enpi_api.examples.apps.cluster

Cluster

Cluster identifies sequence clusters based on user-defined parameters such as heavy and/or light chain CDR3 similarity, gene usage, and CDR3 length. Samples from different sequencing technologies can be combined to inform candidate selection and hit expansion.

Example Cluster run configuration

This example showcases how to run Cluster on a clone collection, cluster clones on CDR2 and CDR3 amino acids.

from enpi_api.l2.client.enpi_api_client import EnpiApiClient
from enpi_api.l2.tags import SequenceTags
from enpi_api.l2.types.cluster import SequenceFeatureIdentities
from enpi_api.l2.types.collection import CollectionId

with EnpiApiClient() as enpi_client:
    """We are assumming that the collection references below exists and its clones are valid"""

    collection_id = CollectionId(123)

    # Run Clustering with the collection specified above
    cluster_run = enpi_client.cluster_api.start(
        name="Example Clustering run",
        collection_ids=[collection_id],  # More collections could be passed here
        sequence_features=[SequenceTags.Cdr2AminoAcids, SequenceTags.Cdr3AminoAcids],  # Features on which the clones will be clustered
        identities=SequenceFeatureIdentities(Heavy=80),  # Identities used when clustering clones, applied to chosen Sequence Features.
        match_tags=[SequenceTags.Cdr3AminoAcidsLength],  # Determines if app should cluster on the same CDR3 Amino Acid Length
    ).wait()

    # After awaiting, `start` function returns information about our Clustering run
    print(cluster_run)

Get successful Cluster runs

Get all or a selection of successfully finished Cluster runs.

from enpi_api.l2.client.enpi_api_client import EnpiApiClient
from enpi_api.l2.types.collection import CollectionId

with EnpiApiClient() as enpi_client:
    """We are assuming that the collections referenced below exist and that
    they were passed into at least one successfully finished Clustering run"""
    collection_ids = [
        CollectionId(123),
        CollectionId(124),
    ]

    # When passing collection IDs to `get_cluster_runs`, function will return
    # all successfully finished Clustering runs that had those collections as their inputs
    cluster_runs = enpi_client.cluster_api.get_cluster_runs(
        collection_ids=collection_ids,  # Target collection IDs
    )
    print(cluster_runs)

    # Alternatively, if no collection IDs are provided, function will return
    # all the successfully finished Clustering runs available to the user
    all_cluster_runs = enpi_client.cluster_api.get_cluster_runs()
    print(all_cluster_runs)

Export clustered clones from a Cluster run

Export a result of a successful Cluster run - clustered clones - into a TSV file or a DataFrame.

from enpi_api.l2.client.enpi_api_client import EnpiApiClient
from enpi_api.l2.types.cluster import ClusterRunId, ExportClustersMode

with EnpiApiClient() as enpi_client:
    """We are assuming that the Clustering run referenced below was ran successfully"""

    cluster_run_id = ClusterRunId("62195cb4-9e6e-4c44-b702-c3613848c15b")

    path = enpi_client.cluster_api.export_clusters_as_csv(
        cluster_run_id=cluster_run_id,  # ID of the Clustering run to export clusters from
        mode=ExportClustersMode.CLONES,  # Mode of the export, determines the final content of the exported file
        limit=100,  # Optional param, allows to export up to N clusters
        output_directory="/home/example_user/",  # Directory in which the exported file will be located
    ).wait()

    print(path)

    """A variant of export function above that returns a DataFrame is also available:

    df = enpi_client.cluster_api.export_clusters_as_df(
        cluster_run_id=cluster_run_id,  # ID of the Cluster run to export clusters from
        mode=ExportClustersMode.CLONES,  # Mode of the export, determines the final content of the exported file
        limit=100,  # Optional param, allows to export up to N clusters
    ).wait()

    print(df)
    """

View Source

 1'''
 2# Cluster
 3
 4Cluster identifies sequence clusters based on user-defined parameters such as heavy and/or light chain CDR3 similarity,
 5gene usage, and CDR3 length. Samples from different sequencing technologies can be combined to inform candidate selection and hit expansion.
 6
 7##Example Cluster run configuration
 8
 9This example showcases how to run Cluster on a clone collection, cluster clones on CDR2 and CDR3 amino acids.
10```python
11from enpi_api.l2.client.enpi_api_client import EnpiApiClient
12from enpi_api.l2.tags import SequenceTags
13from enpi_api.l2.types.cluster import SequenceFeatureIdentities
14from enpi_api.l2.types.collection import CollectionId
15
16with EnpiApiClient() as enpi_client:
17    """We are assumming that the collection references below exists and its clones are valid"""
18
19    collection_id = CollectionId(123)
20
21    # Run Clustering with the collection specified above
22    cluster_run = enpi_client.cluster_api.start(
23        name="Example Clustering run",
24        collection_ids=[collection_id],  # More collections could be passed here
25        sequence_features=[SequenceTags.Cdr2AminoAcids, SequenceTags.Cdr3AminoAcids],  # Features on which the clones will be clustered
26        identities=SequenceFeatureIdentities(Heavy=80),  # Identities used when clustering clones, applied to chosen Sequence Features.
27        match_tags=[SequenceTags.Cdr3AminoAcidsLength],  # Determines if app should cluster on the same CDR3 Amino Acid Length
28    ).wait()
29
30    # After awaiting, `start` function returns information about our Clustering run
31    print(cluster_run)
32
33```
34##Get successful Cluster runs
35
36Get all or a selection of successfully finished Cluster runs.
37```python
38from enpi_api.l2.client.enpi_api_client import EnpiApiClient
39from enpi_api.l2.types.collection import CollectionId
40
41with EnpiApiClient() as enpi_client:
42    """We are assuming that the collections referenced below exist and that
43    they were passed into at least one successfully finished Clustering run"""
44    collection_ids = [
45        CollectionId(123),
46        CollectionId(124),
47    ]
48
49    # When passing collection IDs to `get_cluster_runs`, function will return
50    # all successfully finished Clustering runs that had those collections as their inputs
51    cluster_runs = enpi_client.cluster_api.get_cluster_runs(
52        collection_ids=collection_ids,  # Target collection IDs
53    )
54    print(cluster_runs)
55
56    # Alternatively, if no collection IDs are provided, function will return
57    # all the successfully finished Clustering runs available to the user
58    all_cluster_runs = enpi_client.cluster_api.get_cluster_runs()
59    print(all_cluster_runs)
60
61```
62##Export clustered clones from a Cluster run
63
64Export a result of a successful Cluster run - clustered clones - into a TSV file or a DataFrame.
65```python
66from enpi_api.l2.client.enpi_api_client import EnpiApiClient
67from enpi_api.l2.types.cluster import ClusterRunId, ExportClustersMode
68
69with EnpiApiClient() as enpi_client:
70    """We are assuming that the Clustering run referenced below was ran successfully"""
71
72    cluster_run_id = ClusterRunId("62195cb4-9e6e-4c44-b702-c3613848c15b")
73
74    path = enpi_client.cluster_api.export_clusters_as_csv(
75        cluster_run_id=cluster_run_id,  # ID of the Clustering run to export clusters from
76        mode=ExportClustersMode.CLONES,  # Mode of the export, determines the final content of the exported file
77        limit=100,  # Optional param, allows to export up to N clusters
78        output_directory="/home/example_user/",  # Directory in which the exported file will be located
79    ).wait()
80
81    print(path)
82
83    """A variant of export function above that returns a DataFrame is also available:
84
85    df = enpi_client.cluster_api.export_clusters_as_df(
86        cluster_run_id=cluster_run_id,  # ID of the Cluster run to export clusters from
87        mode=ExportClustersMode.CLONES,  # Mode of the export, determines the final content of the exported file
88        limit=100,  # Optional param, allows to export up to N clusters
89    ).wait()
90
91    print(df)
92    """
93
94```
95'''