enpi_api.examples.assign_metadata
Assign metadata to collections, clones or sequences
Various examples showcasing how you can assign metadata to collections, clones or sequences.
Add simple collection level metadata
An example script showcasing how to import a minimal, example metadata for clone collection: we're matching the target collection with a filter and then applying an example tag value to a example collection level tag.
from enpi_api.l2.client.enpi_api_client import EnpiApiClient
from enpi_api.l2.types.collection import CollectionId
from enpi_api.l2.types.filter import MatchId, MatchIdTarget
from enpi_api.l2.types.import_metadata import Annotation, CollectionAnnotation
from enpi_api.l2.types.tag import Tag, TagArchetype, TagDataType, TagLevel
with EnpiApiClient() as enpi_client:
"""We are assuming that the collection referred to below exists"""
collection_id = CollectionId(123)
# Get or create the tag archetype that we are gonna use to store the new example value in
tag_name = "Example metadata import target tag"
example_tag: TagArchetype | None = enpi_client.tag_api.get_tag_archetype_by_name(
TagLevel.COLLECTION,
tag_name,
)
if not example_tag:
example_tag = enpi_client.tag_api.create_tag_archetype(
TagLevel.COLLECTION,
TagDataType.TEXT,
tag_name,
).wait()
# Build the filter that matches our target collection by its ID
collection_filter = enpi_client.filter_api.create_filter(
name="A single collection filter", # Name of a filter
condition=MatchId( # This filter matches entities on ID value
target=MatchIdTarget.COLLECTION, # The entities we are filtering are collections
id=collection_id, # The expected collection ID value
),
)
# Specify the annotation to add and into which tag does it go
metadata_annotation: Annotation = CollectionAnnotation(
tags=[
Tag(
id=example_tag.id,
value="Test example tag value",
)
]
)
# Import templated metadata into the target collection
enpi_client.collection_api.add_metadata(
filter=collection_filter,
annotation=metadata_annotation,
).wait()
Add heavy and productive boolean tag via metadata import
Add sequence level metadata by using filter to narrow down the target collection and a template specification to match certain tag values and annotate them accordingly. In this example we check if sequences of a given collection have "Heavy" chain and are productive at the same time, which we write down as a new boolean, sequence level tag.
import pandas as pd
from enpi_api.l2.client.enpi_api_client import EnpiApiClient
from enpi_api.l2.tags import SequenceTags
from enpi_api.l2.types.collection import CollectionId
from enpi_api.l2.types.filter import MatchId, MatchIdTarget, TemplatedAndOperator, TemplatedMatchTag
from enpi_api.l2.types.import_metadata_templated import Annotation, sequence_annotation, template_tag
from enpi_api.l2.types.tag import TagArchetype, TagDataType, TagLevel
with EnpiApiClient() as enpi_client:
"""We are assuming that a collection referred to below exists and contains Heavy, Lambda and Kappa sequences.
Based on `Chain` values and `Productive` tag values we will be adding a new tag: `This sequence is Heavy and Productive`.
We will be matching collection that we want to annotate with a filter and match the clones/sequences with
the template content - this way following script will be reusable for different collections."""
# Get the ID of the collection we want to add metadata to. Can be done with `enpi_client.collection_api.get_collections_metadata()`
collection_id = CollectionId(123)
# Get or create the tag archetype that we are gonna use to store the new boolean value in
tag_name = "This sequence is Heavy and Productive"
is_heavy_and_productive_tag: TagArchetype | None = enpi_client.tag_api.get_tag_archetype_by_name(
TagLevel.SEQUENCE,
tag_name,
)
if not is_heavy_and_productive_tag:
is_heavy_and_productive_tag = enpi_client.tag_api.create_tag_archetype(
TagLevel.SEQUENCE,
TagDataType.BOOLEAN,
tag_name,
).wait()
# Build the filter that matches target collection by ID and sequences by values specified in the template
metadata_import_filter = enpi_client.filter_api.create_templated_filter(
name="Metadata import filter",
shared=False,
condition=TemplatedAndOperator(
conditions=[
# Matches are using the data put into the filter
MatchId(
target=MatchIdTarget.COLLECTION,
id=collection_id, # Match our target ID
),
# Templated matches are using the data put into template file/df
TemplatedMatchTag(
tag_id=SequenceTags.Chain,
),
TemplatedMatchTag(
tag_id=SequenceTags.Productive,
),
]
),
)
# Build the metadata template. Can also be in a CSV file format and
# passed to `enpi_client.collection_api.add_metadata_from_file`
metadata_template_df = pd.DataFrame(
[
["Heavy", True, True],
["Heavy", False, False],
["Lambda", True, False],
["Lambda", False, False],
["Kappa", False, False],
["Kappa", False, False],
],
columns=["Chain", "Productive", "This sequence is Heavy and Productive"],
)
# Specify the annotation to add and into which tag does it go
metadata_annotation: Annotation = sequence_annotation(
tags=[template_tag(is_heavy_and_productive_tag.id)],
)
# Import templated metadata into the target collection
enpi_client.collection_api.add_metadata_from_df(
filter=metadata_import_filter,
data_frame=metadata_template_df,
annotation=metadata_annotation,
).wait()
Add some calculated sequence level metadata
This example showcases how you can add some calculated sequence level metadata to your collections.
The script will assume the following:
- There is already a collection with the name
My collectionthat we are targeting for this example.
The script will perform the following steps:
- Ensure that there is a tag that we want to assign the metadata to, for this example it will be a sequence level tag
called
Max sequence count within clone. - Export the collection so we can calculate the metadata.
- Compute the example metadata, which in this case is the highest sequence count within a clone.
- Assign the metadata to the collection.
import pandas as pd
from enpi_api.l2.client.enpi_api_client import EnpiApiClient
from enpi_api.l2.tags import CollectionTags, SequenceTags
from enpi_api.l2.types.filter import (
MatchId,
MatchIdTarget,
TemplatedAndOperator,
TemplatedMatchId,
TemplatedMatchTag,
)
from enpi_api.l2.types.import_metadata_templated import (
Annotation,
sequence_annotation,
template_tag,
)
from enpi_api.l2.types.tag import TagDataType, TagLevel
with EnpiApiClient() as enpi_client:
"""We are assuming that there is a single collection with the name referenced below already on the ENPICOM Platform"""
collection_name = "Example collection"
filtered_collections = [collection for collection in enpi_client.collection_api.get_collections_metadata() if collection.name() == collection_name]
collection = filtered_collections[0] # We assume that this name is unique here
# Check if our tag already exists
tag_key = "Max sequence count within clone"
tag_level = TagLevel.SEQUENCE
tag = enpi_client.tag_api.get_tag_archetype_by_name(key=tag_key, level=tag_level)
if not tag: # Then we need to create it
tag = enpi_client.tag_api.create_tag_archetype(
level=tag_level,
data_type=TagDataType.INTEGER,
key=tag_key,
).wait() # We need to wait for the tag to be created before we can use it
# We need to create a filter to be able to fetch the data
filter = enpi_client.filter_api.create_filter(
name="Filter to download data",
# We are simply matching on the collection ID here
condition=MatchId(target=MatchIdTarget.COLLECTION, id=collection.id),
)
# Export the data into a Pandas DataFrame
exported_data_df = enpi_client.collection_api.get_as_df(
# Even though we already specified the collection ID in the filter, we need to specify it here as well to avoid accidentally querying too much data
collection_ids=[collection.id],
filter=filter,
# We specify the tags that we want to export, the fewer tags we specify, the faster the export will be
tag_ids=[
# This is what we are going to use to calculate the metadata with
SequenceTags.SequenceCount,
],
).wait() # This starts a long-running operation, we need to wait for it if we want to use it further down in the script
# We now perform the calculation of the metadata, for each clone, we will get the maximum value of the `Sequence Count` tag
grouped_df = exported_data_df.groupby(["Unique Clone ID"]).max(numeric_only=True)
# For assigning the metadata we will now use the templated method that can assign different values per "match"
# For this, our metadata DataFrame will have 3 columns:
# - "Name": to match the collection name
# - "Unique Sequence ID": to match the sequence ID
# - "Max sequence count within clone": this column contains the value that will be assigned to the sequence that matches the "Unique Sequence ID"
metadata_filter = enpi_client.filter_api.create_templated_filter(
name="Filter to add metadata",
shared=False,
condition=TemplatedAndOperator(
conditions=[
# It can never hurt to specifically match the collection ID or name, just to be sure you do not
# accidentally assign metadata to the wrong collection
TemplatedMatchTag(tag_id=CollectionTags.Name),
TemplatedMatchId(target=MatchIdTarget.SEQUENCE),
]
),
)
# Specify the sequence-level annotation to add to the collection
# We specify it as a `template_tag` because the value comes from the DataFrame
metadata_annotation: Annotation = sequence_annotation([template_tag(tag.id)])
# Create metadata dataframe with the aforementioned columns
metadata_frame = pd.DataFrame(
[
[
collection.name(), # We match on the "Name"
df_row[1]["Unique Sequence ID"], # We match on the "Unique Sequence ID"
grouped_df.loc[df_row[1]["Unique Clone ID"]]["Sequence Count"], # We add the calculated metadata
]
for df_row in exported_data_df.iterrows()
],
columns=["Name", "Unique Sequence ID", tag.key],
)
# Apply metadata to the collection
enpi_client.collection_api.add_metadata_from_df(
filter=metadata_filter,
annotation=metadata_annotation,
data_frame=metadata_frame,
).wait() # This starts a long-running operation, we need to wait for it if we want to use it further down in the script
# At this point, the metadata should have been added to the collection
1''' 2# Assign metadata to collections, clones or sequences 3 4Various examples showcasing how you can assign metadata to collections, clones or sequences. 5 6##Add simple collection level metadata 7 8An example script 9showcasing how to import a minimal, example metadata for clone collection: we're matching the target collection with a filter 10and then applying an example tag value to a example collection level tag. 11```python 12from enpi_api.l2.client.enpi_api_client import EnpiApiClient 13from enpi_api.l2.types.collection import CollectionId 14from enpi_api.l2.types.filter import MatchId, MatchIdTarget 15from enpi_api.l2.types.import_metadata import Annotation, CollectionAnnotation 16from enpi_api.l2.types.tag import Tag, TagArchetype, TagDataType, TagLevel 17 18with EnpiApiClient() as enpi_client: 19 """We are assuming that the collection referred to below exists""" 20 21 collection_id = CollectionId(123) 22 23 # Get or create the tag archetype that we are gonna use to store the new example value in 24 tag_name = "Example metadata import target tag" 25 example_tag: TagArchetype | None = enpi_client.tag_api.get_tag_archetype_by_name( 26 TagLevel.COLLECTION, 27 tag_name, 28 ) 29 if not example_tag: 30 example_tag = enpi_client.tag_api.create_tag_archetype( 31 TagLevel.COLLECTION, 32 TagDataType.TEXT, 33 tag_name, 34 ).wait() 35 36 # Build the filter that matches our target collection by its ID 37 collection_filter = enpi_client.filter_api.create_filter( 38 name="A single collection filter", # Name of a filter 39 condition=MatchId( # This filter matches entities on ID value 40 target=MatchIdTarget.COLLECTION, # The entities we are filtering are collections 41 id=collection_id, # The expected collection ID value 42 ), 43 ) 44 45 # Specify the annotation to add and into which tag does it go 46 metadata_annotation: Annotation = CollectionAnnotation( 47 tags=[ 48 Tag( 49 id=example_tag.id, 50 value="Test example tag value", 51 ) 52 ] 53 ) 54 55 # Import templated metadata into the target collection 56 enpi_client.collection_api.add_metadata( 57 filter=collection_filter, 58 annotation=metadata_annotation, 59 ).wait() 60 61``` 62##Add heavy and productive boolean tag via metadata import 63 64Add sequence level metadata by using filter to narrow down the target collection and a template specification to match certain tag values and annotate them accordingly. 65In this example we check if sequences of a given collection have "Heavy" chain and are productive at the same time, which we write down as a new boolean, sequence level tag. 66 67```python 68import pandas as pd 69from enpi_api.l2.client.enpi_api_client import EnpiApiClient 70from enpi_api.l2.tags import SequenceTags 71from enpi_api.l2.types.collection import CollectionId 72from enpi_api.l2.types.filter import MatchId, MatchIdTarget, TemplatedAndOperator, TemplatedMatchTag 73from enpi_api.l2.types.import_metadata_templated import Annotation, sequence_annotation, template_tag 74from enpi_api.l2.types.tag import TagArchetype, TagDataType, TagLevel 75 76with EnpiApiClient() as enpi_client: 77 """We are assuming that a collection referred to below exists and contains Heavy, Lambda and Kappa sequences. 78 Based on `Chain` values and `Productive` tag values we will be adding a new tag: `This sequence is Heavy and Productive`. 79 We will be matching collection that we want to annotate with a filter and match the clones/sequences with 80 the template content - this way following script will be reusable for different collections.""" 81 82 # Get the ID of the collection we want to add metadata to. Can be done with `enpi_client.collection_api.get_collections_metadata()` 83 collection_id = CollectionId(123) 84 85 # Get or create the tag archetype that we are gonna use to store the new boolean value in 86 tag_name = "This sequence is Heavy and Productive" 87 is_heavy_and_productive_tag: TagArchetype | None = enpi_client.tag_api.get_tag_archetype_by_name( 88 TagLevel.SEQUENCE, 89 tag_name, 90 ) 91 if not is_heavy_and_productive_tag: 92 is_heavy_and_productive_tag = enpi_client.tag_api.create_tag_archetype( 93 TagLevel.SEQUENCE, 94 TagDataType.BOOLEAN, 95 tag_name, 96 ).wait() 97 98 # Build the filter that matches target collection by ID and sequences by values specified in the template 99 metadata_import_filter = enpi_client.filter_api.create_templated_filter( 100 name="Metadata import filter", 101 shared=False, 102 condition=TemplatedAndOperator( 103 conditions=[ 104 # Matches are using the data put into the filter 105 MatchId( 106 target=MatchIdTarget.COLLECTION, 107 id=collection_id, # Match our target ID 108 ), 109 # Templated matches are using the data put into template file/df 110 TemplatedMatchTag( 111 tag_id=SequenceTags.Chain, 112 ), 113 TemplatedMatchTag( 114 tag_id=SequenceTags.Productive, 115 ), 116 ] 117 ), 118 ) 119 120 # Build the metadata template. Can also be in a CSV file format and 121 # passed to `enpi_client.collection_api.add_metadata_from_file` 122 metadata_template_df = pd.DataFrame( 123 [ 124 ["Heavy", True, True], 125 ["Heavy", False, False], 126 ["Lambda", True, False], 127 ["Lambda", False, False], 128 ["Kappa", False, False], 129 ["Kappa", False, False], 130 ], 131 columns=["Chain", "Productive", "This sequence is Heavy and Productive"], 132 ) 133 134 # Specify the annotation to add and into which tag does it go 135 metadata_annotation: Annotation = sequence_annotation( 136 tags=[template_tag(is_heavy_and_productive_tag.id)], 137 ) 138 139 # Import templated metadata into the target collection 140 enpi_client.collection_api.add_metadata_from_df( 141 filter=metadata_import_filter, 142 data_frame=metadata_template_df, 143 annotation=metadata_annotation, 144 ).wait() 145 146``` 147##Add some calculated sequence level metadata 148 149This example showcases how you can 150add some calculated sequence level metadata to your collections. 151 152The script will assume the following: 153 154- There is already a collection with the name `My collection` that we are targeting for this example. 155 156The script will perform the following steps: 157 1581. Ensure that there is a tag that we want to assign the metadata to, for this example it will be a sequence level tag 159called `Max sequence count within clone`. 1602. Export the collection so we can calculate the metadata. 1613. Compute the example metadata, which in this case is the highest sequence count within a clone. 1624. Assign the metadata to the collection. 163 164```python 165import pandas as pd 166from enpi_api.l2.client.enpi_api_client import EnpiApiClient 167from enpi_api.l2.tags import CollectionTags, SequenceTags 168from enpi_api.l2.types.filter import ( 169 MatchId, 170 MatchIdTarget, 171 TemplatedAndOperator, 172 TemplatedMatchId, 173 TemplatedMatchTag, 174) 175from enpi_api.l2.types.import_metadata_templated import ( 176 Annotation, 177 sequence_annotation, 178 template_tag, 179) 180from enpi_api.l2.types.tag import TagDataType, TagLevel 181 182with EnpiApiClient() as enpi_client: 183 """We are assuming that there is a single collection with the name referenced below already on the ENPICOM Platform""" 184 185 collection_name = "Example collection" 186 187 filtered_collections = [collection for collection in enpi_client.collection_api.get_collections_metadata() if collection.name() == collection_name] 188 collection = filtered_collections[0] # We assume that this name is unique here 189 190 # Check if our tag already exists 191 tag_key = "Max sequence count within clone" 192 tag_level = TagLevel.SEQUENCE 193 194 tag = enpi_client.tag_api.get_tag_archetype_by_name(key=tag_key, level=tag_level) 195 if not tag: # Then we need to create it 196 tag = enpi_client.tag_api.create_tag_archetype( 197 level=tag_level, 198 data_type=TagDataType.INTEGER, 199 key=tag_key, 200 ).wait() # We need to wait for the tag to be created before we can use it 201 202 # We need to create a filter to be able to fetch the data 203 filter = enpi_client.filter_api.create_filter( 204 name="Filter to download data", 205 # We are simply matching on the collection ID here 206 condition=MatchId(target=MatchIdTarget.COLLECTION, id=collection.id), 207 ) 208 209 # Export the data into a Pandas DataFrame 210 exported_data_df = enpi_client.collection_api.get_as_df( 211 # Even though we already specified the collection ID in the filter, we need to specify it here as well to avoid accidentally querying too much data 212 collection_ids=[collection.id], 213 filter=filter, 214 # We specify the tags that we want to export, the fewer tags we specify, the faster the export will be 215 tag_ids=[ 216 # This is what we are going to use to calculate the metadata with 217 SequenceTags.SequenceCount, 218 ], 219 ).wait() # This starts a long-running operation, we need to wait for it if we want to use it further down in the script 220 221 # We now perform the calculation of the metadata, for each clone, we will get the maximum value of the `Sequence Count` tag 222 grouped_df = exported_data_df.groupby(["Unique Clone ID"]).max(numeric_only=True) 223 224 # For assigning the metadata we will now use the templated method that can assign different values per "match" 225 # For this, our metadata DataFrame will have 3 columns: 226 # - "Name": to match the collection name 227 # - "Unique Sequence ID": to match the sequence ID 228 # - "Max sequence count within clone": this column contains the value that will be assigned to the sequence that matches the "Unique Sequence ID" 229 metadata_filter = enpi_client.filter_api.create_templated_filter( 230 name="Filter to add metadata", 231 shared=False, 232 condition=TemplatedAndOperator( 233 conditions=[ 234 # It can never hurt to specifically match the collection ID or name, just to be sure you do not 235 # accidentally assign metadata to the wrong collection 236 TemplatedMatchTag(tag_id=CollectionTags.Name), 237 TemplatedMatchId(target=MatchIdTarget.SEQUENCE), 238 ] 239 ), 240 ) 241 242 # Specify the sequence-level annotation to add to the collection 243 # We specify it as a `template_tag` because the value comes from the DataFrame 244 metadata_annotation: Annotation = sequence_annotation([template_tag(tag.id)]) 245 246 # Create metadata dataframe with the aforementioned columns 247 metadata_frame = pd.DataFrame( 248 [ 249 [ 250 collection.name(), # We match on the "Name" 251 df_row[1]["Unique Sequence ID"], # We match on the "Unique Sequence ID" 252 grouped_df.loc[df_row[1]["Unique Clone ID"]]["Sequence Count"], # We add the calculated metadata 253 ] 254 for df_row in exported_data_df.iterrows() 255 ], 256 columns=["Name", "Unique Sequence ID", tag.key], 257 ) 258 259 # Apply metadata to the collection 260 enpi_client.collection_api.add_metadata_from_df( 261 filter=metadata_filter, 262 annotation=metadata_annotation, 263 data_frame=metadata_frame, 264 ).wait() # This starts a long-running operation, we need to wait for it if we want to use it further down in the script 265 266 # At this point, the metadata should have been added to the collection 267 268``` 269'''