enpi_api.examples.apps.cluster
Cluster
Cluster identifies sequence clusters based on user-defined parameters such as heavy and/or light chain CDR3 similarity, gene usage, and CDR3 length. Samples from different sequencing technologies can be combined to inform candidate selection and hit expansion.
Example Cluster run configuration
This example showcases how to run Cluster on a clone collection, cluster clones on CDR2 and CDR3 amino acids.
from enpi_api.l2.client.enpi_api_client import EnpiApiClient
from enpi_api.l2.tags import SequenceTags
from enpi_api.l2.types.cluster import SequenceFeatureIdentities
from enpi_api.l2.types.collection import CollectionId
with EnpiApiClient() as enpi_client:
"""We are assumming that the collection references below exists and its clones are valid"""
collection_id = CollectionId(123)
# Run Clustering with the collection specified above
cluster_run = enpi_client.cluster_api.start(
name="Example Clustering run",
collection_ids=[collection_id], # More collections could be passed here
sequence_features=[SequenceTags.Cdr2AminoAcids, SequenceTags.Cdr3AminoAcids], # Features on which the clones will be clustered
identities=SequenceFeatureIdentities(Heavy=80), # Identities used when clustering clones, applied to chosen Sequence Features.
match_tags=[SequenceTags.Cdr3AminoAcidsLength], # Determines if app should cluster on the same CDR3 Amino Acid Length
).wait()
# After awaiting, `start` function returns information about our Clustering run
print(cluster_run)
Get successful Cluster runs
Get all or a selection of successfully finished Cluster runs.
from enpi_api.l2.client.enpi_api_client import EnpiApiClient
from enpi_api.l2.types.collection import CollectionId
with EnpiApiClient() as enpi_client:
"""We are assuming that the collections referenced below exist and that
they were passed into at least one successfully finished Clustering run"""
collection_ids = [
CollectionId(123),
CollectionId(124),
]
# When passing collection IDs to `get_cluster_runs`, function will return
# all successfully finished Clustering runs that had those collections as their inputs
cluster_runs = enpi_client.cluster_api.get_cluster_runs(
collection_ids=collection_ids, # Target collection IDs
)
print(cluster_runs)
# Alternatively, if no collection IDs are provided, function will return
# all the successfully finished Clustering runs available to the user
all_cluster_runs = enpi_client.cluster_api.get_cluster_runs()
print(all_cluster_runs)
Export clustered clones from a Cluster run
Export a result of a successful Cluster run - clustered clones - into a TSV file or a DataFrame.
from enpi_api.l2.client.enpi_api_client import EnpiApiClient
from enpi_api.l2.types.cluster import ClusterRunId, ExportClustersMode
with EnpiApiClient() as enpi_client:
"""We are assuming that the Clustering run referenced below was ran successfully"""
cluster_run_id = ClusterRunId("62195cb4-9e6e-4c44-b702-c3613848c15b")
path = enpi_client.cluster_api.export_clusters_as_csv(
cluster_run_id=cluster_run_id, # ID of the Clustering run to export clusters from
mode=ExportClustersMode.CLONES, # Mode of the export, determines the final content of the exported file
limit=100, # Optional param, allows to export up to N clusters
output_directory="/home/example_user/", # Directory in which the exported file will be located
).wait()
print(path)
"""A variant of export function above that returns a DataFrame is also available:
df = enpi_client.cluster_api.export_clusters_as_df(
cluster_run_id=cluster_run_id, # ID of the Cluster run to export clusters from
mode=ExportClustersMode.CLONES, # Mode of the export, determines the final content of the exported file
limit=100, # Optional param, allows to export up to N clusters
).wait()
print(df)
"""
1''' 2# Cluster 3 4Cluster identifies sequence clusters based on user-defined parameters such as heavy and/or light chain CDR3 similarity, 5gene usage, and CDR3 length. Samples from different sequencing technologies can be combined to inform candidate selection and hit expansion. 6 7##Example Cluster run configuration 8 9This example showcases how to run Cluster on a clone collection, cluster clones on CDR2 and CDR3 amino acids. 10```python 11from enpi_api.l2.client.enpi_api_client import EnpiApiClient 12from enpi_api.l2.tags import SequenceTags 13from enpi_api.l2.types.cluster import SequenceFeatureIdentities 14from enpi_api.l2.types.collection import CollectionId 15 16with EnpiApiClient() as enpi_client: 17 """We are assumming that the collection references below exists and its clones are valid""" 18 19 collection_id = CollectionId(123) 20 21 # Run Clustering with the collection specified above 22 cluster_run = enpi_client.cluster_api.start( 23 name="Example Clustering run", 24 collection_ids=[collection_id], # More collections could be passed here 25 sequence_features=[SequenceTags.Cdr2AminoAcids, SequenceTags.Cdr3AminoAcids], # Features on which the clones will be clustered 26 identities=SequenceFeatureIdentities(Heavy=80), # Identities used when clustering clones, applied to chosen Sequence Features. 27 match_tags=[SequenceTags.Cdr3AminoAcidsLength], # Determines if app should cluster on the same CDR3 Amino Acid Length 28 ).wait() 29 30 # After awaiting, `start` function returns information about our Clustering run 31 print(cluster_run) 32 33``` 34##Get successful Cluster runs 35 36Get all or a selection of successfully finished Cluster runs. 37```python 38from enpi_api.l2.client.enpi_api_client import EnpiApiClient 39from enpi_api.l2.types.collection import CollectionId 40 41with EnpiApiClient() as enpi_client: 42 """We are assuming that the collections referenced below exist and that 43 they were passed into at least one successfully finished Clustering run""" 44 collection_ids = [ 45 CollectionId(123), 46 CollectionId(124), 47 ] 48 49 # When passing collection IDs to `get_cluster_runs`, function will return 50 # all successfully finished Clustering runs that had those collections as their inputs 51 cluster_runs = enpi_client.cluster_api.get_cluster_runs( 52 collection_ids=collection_ids, # Target collection IDs 53 ) 54 print(cluster_runs) 55 56 # Alternatively, if no collection IDs are provided, function will return 57 # all the successfully finished Clustering runs available to the user 58 all_cluster_runs = enpi_client.cluster_api.get_cluster_runs() 59 print(all_cluster_runs) 60 61``` 62##Export clustered clones from a Cluster run 63 64Export a result of a successful Cluster run - clustered clones - into a TSV file or a DataFrame. 65```python 66from enpi_api.l2.client.enpi_api_client import EnpiApiClient 67from enpi_api.l2.types.cluster import ClusterRunId, ExportClustersMode 68 69with EnpiApiClient() as enpi_client: 70 """We are assuming that the Clustering run referenced below was ran successfully""" 71 72 cluster_run_id = ClusterRunId("62195cb4-9e6e-4c44-b702-c3613848c15b") 73 74 path = enpi_client.cluster_api.export_clusters_as_csv( 75 cluster_run_id=cluster_run_id, # ID of the Clustering run to export clusters from 76 mode=ExportClustersMode.CLONES, # Mode of the export, determines the final content of the exported file 77 limit=100, # Optional param, allows to export up to N clusters 78 output_directory="/home/example_user/", # Directory in which the exported file will be located 79 ).wait() 80 81 print(path) 82 83 """A variant of export function above that returns a DataFrame is also available: 84 85 df = enpi_client.cluster_api.export_clusters_as_df( 86 cluster_run_id=cluster_run_id, # ID of the Cluster run to export clusters from 87 mode=ExportClustersMode.CLONES, # Mode of the export, determines the final content of the exported file 88 limit=100, # Optional param, allows to export up to N clusters 89 ).wait() 90 91 print(df) 92 """ 93 94``` 95'''