enpi_api.examples.assign_metadata

Assign metadata to collections, clones or sequences

Various examples showcasing how you can assign metadata to collections, clones or sequences.

Add simple collection level metadata

An example script showcasing how to import a minimal, example metadata for clone collection: we're matching the target collection with a filter and then applying an example tag value to a example collection level tag.

from enpi_api.l2.client.enpi_api_client import EnpiApiClient
from enpi_api.l2.types.collection import CollectionId
from enpi_api.l2.types.filter import MatchId, MatchIdTarget
from enpi_api.l2.types.import_metadata import Annotation, CollectionAnnotation
from enpi_api.l2.types.tag import Tag, TagArchetype, TagDataType, TagLevel

with EnpiApiClient() as enpi_client:
    """We are assuming that the collection referred to below exists"""

    collection_id = CollectionId(123)

    # Get or create the tag archetype that we are gonna use to store the new example value in
    tag_name = "Example metadata import target tag"
    example_tag: TagArchetype | None = enpi_client.tag_api.get_tag_archetype_by_name(
        TagLevel.COLLECTION,
        tag_name,
    )
    if not example_tag:
        example_tag = enpi_client.tag_api.create_tag_archetype(
            TagLevel.COLLECTION,
            TagDataType.TEXT,
            tag_name,
        ).wait()

    # Build the filter that matches our target collection by its ID
    collection_filter = enpi_client.filter_api.create_filter(
        name="A single collection filter",  # Name of a filter
        condition=MatchId(  # This filter matches entities on ID value
            target=MatchIdTarget.COLLECTION,  # The entities we are filtering are collections
            id=collection_id,  # The expected collection ID value
        ),
    )

    # Specify the annotation to add and into which tag does it go
    metadata_annotation: Annotation = CollectionAnnotation(
        tags=[
            Tag(
                id=example_tag.id,
                value="Test example tag value",
            )
        ]
    )

    # Import templated metadata into the target collection
    enpi_client.collection_api.add_metadata(
        filter=collection_filter,
        annotation=metadata_annotation,
    ).wait()

Add heavy and productive boolean tag via metadata import

Add sequence level metadata by using filter to narrow down the target collection and a template specification to match certain tag values and annotate them accordingly. In this example we check if sequences of a given collection have "Heavy" chain and are productive at the same time, which we write down as a new boolean, sequence level tag.

import pandas as pd
from enpi_api.l2.client.enpi_api_client import EnpiApiClient
from enpi_api.l2.tags import SequenceTags
from enpi_api.l2.types.collection import CollectionId
from enpi_api.l2.types.filter import MatchId, MatchIdTarget, TemplatedAndOperator, TemplatedMatchTag
from enpi_api.l2.types.import_metadata_templated import Annotation, sequence_annotation, template_tag
from enpi_api.l2.types.tag import TagArchetype, TagDataType, TagLevel

with EnpiApiClient() as enpi_client:
    """We are assuming that a collection referred to below exists and contains Heavy, Lambda and Kappa sequences.
    Based on `Chain` values and `Productive` tag values we will be adding a new tag: `This sequence is Heavy and Productive`.
    We will be matching collection that we want to annotate with a filter and match the clones/sequences with
    the template content - this way following script will be reusable for different collections."""

    # Get the ID of the collection we want to add metadata to. Can be done with `enpi_client.collection_api.get_collections_metadata()`
    collection_id = CollectionId(123)

    # Get or create the tag archetype that we are gonna use to store the new boolean value in
    tag_name = "This sequence is Heavy and Productive"
    is_heavy_and_productive_tag: TagArchetype | None = enpi_client.tag_api.get_tag_archetype_by_name(
        TagLevel.SEQUENCE,
        tag_name,
    )
    if not is_heavy_and_productive_tag:
        is_heavy_and_productive_tag = enpi_client.tag_api.create_tag_archetype(
            TagLevel.SEQUENCE,
            TagDataType.BOOLEAN,
            tag_name,
        ).wait()

    # Build the filter that matches target collection by ID and sequences by values specified in the template
    metadata_import_filter = enpi_client.filter_api.create_templated_filter(
        name="Metadata import filter",
        shared=False,
        condition=TemplatedAndOperator(
            conditions=[
                # Matches are using the data put into the filter
                MatchId(
                    target=MatchIdTarget.COLLECTION,
                    id=collection_id,  # Match our target ID
                ),
                # Templated matches are using the data put into template file/df
                TemplatedMatchTag(
                    tag_id=SequenceTags.Chain,
                ),
                TemplatedMatchTag(
                    tag_id=SequenceTags.Productive,
                ),
            ]
        ),
    )

    # Build the metadata template. Can also be in a CSV file format and
    # passed to `enpi_client.collection_api.add_metadata_from_file`
    metadata_template_df = pd.DataFrame(
        [
            ["Heavy", True, True],
            ["Heavy", False, False],
            ["Lambda", True, False],
            ["Lambda", False, False],
            ["Kappa", False, False],
            ["Kappa", False, False],
        ],
        columns=["Chain", "Productive", "This sequence is Heavy and Productive"],
    )

    # Specify the annotation to add and into which tag does it go
    metadata_annotation: Annotation = sequence_annotation(
        tags=[template_tag(is_heavy_and_productive_tag.id)],
    )

    # Import templated metadata into the target collection
    enpi_client.collection_api.add_metadata_from_df(
        filter=metadata_import_filter,
        data_frame=metadata_template_df,
        annotation=metadata_annotation,
    ).wait()

Add some calculated sequence level metadata

This example showcases how you can add some calculated sequence level metadata to your collections.

The script will assume the following:

There is already a collection with the name My collection that we are targeting for this example.

The script will perform the following steps:

Ensure that there is a tag that we want to assign the metadata to, for this example it will be a sequence level tag called Max sequence count within clone.
Export the collection so we can calculate the metadata.
Compute the example metadata, which in this case is the highest sequence count within a clone.
Assign the metadata to the collection.

import pandas as pd
from enpi_api.l2.client.enpi_api_client import EnpiApiClient
from enpi_api.l2.tags import CollectionTags, SequenceTags
from enpi_api.l2.types.filter import (
    MatchId,
    MatchIdTarget,
    TemplatedAndOperator,
    TemplatedMatchId,
    TemplatedMatchTag,
)
from enpi_api.l2.types.import_metadata_templated import (
    Annotation,
    sequence_annotation,
    template_tag,
)
from enpi_api.l2.types.tag import TagDataType, TagLevel

with EnpiApiClient() as enpi_client:
    """We are assuming that there is a single collection with the name referenced below already on the ENPICOM Platform"""

    collection_name = "Example collection"

    filtered_collections = [collection for collection in enpi_client.collection_api.get_collections_metadata() if collection.name() == collection_name]
    collection = filtered_collections[0]  # We assume that this name is unique here

    # Check if our tag already exists
    tag_key = "Max sequence count within clone"
    tag_level = TagLevel.SEQUENCE

    tag = enpi_client.tag_api.get_tag_archetype_by_name(key=tag_key, level=tag_level)
    if not tag:  # Then we need to create it
        tag = enpi_client.tag_api.create_tag_archetype(
            level=tag_level,
            data_type=TagDataType.INTEGER,
            key=tag_key,
        ).wait()  # We need to wait for the tag to be created before we can use it

    # We need to create a filter to be able to fetch the data
    filter = enpi_client.filter_api.create_filter(
        name="Filter to download data",
        # We are simply matching on the collection ID here
        condition=MatchId(target=MatchIdTarget.COLLECTION, id=collection.id),
    )

    # Export the data into a Pandas DataFrame
    exported_data_df = enpi_client.collection_api.get_as_df(
        # Even though we already specified the collection ID in the filter, we need to specify it here as well to avoid accidentally querying too much data
        collection_ids=[collection.id],
        filter=filter,
        # We specify the tags that we want to export, the fewer tags we specify, the faster the export will be
        tag_ids=[
            # This is what we are going to use to calculate the metadata with
            SequenceTags.SequenceCount,
        ],
    ).wait()  # This starts a long-running operation, we need to wait for it if we want to use it further down in the script

    # We now perform the calculation of the metadata, for each clone, we will get the maximum value of the `Sequence Count` tag
    grouped_df = exported_data_df.groupby(["Unique Clone ID"]).max(numeric_only=True)

    # For assigning the metadata we will now use the templated method that can assign different values per "match"
    # For this, our metadata DataFrame will have 3 columns:
    # - "Name": to match the collection name
    # - "Unique Sequence ID": to match the sequence ID
    # - "Max sequence count within clone": this column contains the value that will be assigned to the sequence that matches the "Unique Sequence ID"
    metadata_filter = enpi_client.filter_api.create_templated_filter(
        name="Filter to add metadata",
        shared=False,
        condition=TemplatedAndOperator(
            conditions=[
                # It can never hurt to specifically match the collection ID or name, just to be sure you do not
                # accidentally assign metadata to the wrong collection
                TemplatedMatchTag(tag_id=CollectionTags.Name),
                TemplatedMatchId(target=MatchIdTarget.SEQUENCE),
            ]
        ),
    )

    # Specify the sequence-level annotation to add to the collection
    # We specify it as a `template_tag` because the value comes from the DataFrame
    metadata_annotation: Annotation = sequence_annotation([template_tag(tag.id)])

    # Create metadata dataframe with the aforementioned columns
    metadata_frame = pd.DataFrame(
        [
            [
                collection.name(),  # We match on the "Name"
                df_row[1]["Unique Sequence ID"],  # We match on the "Unique Sequence ID"
                grouped_df.loc[df_row[1]["Unique Clone ID"]]["Sequence Count"],  # We add the calculated metadata
            ]
            for df_row in exported_data_df.iterrows()
        ],
        columns=["Name", "Unique Sequence ID", tag.key],
    )

    # Apply metadata to the collection
    enpi_client.collection_api.add_metadata_from_df(
        filter=metadata_filter,
        annotation=metadata_annotation,
        data_frame=metadata_frame,
    ).wait()  # This starts a long-running operation, we need to wait for it if we want to use it further down in the script

    # At this point, the metadata should have been added to the collection

View Source

  1'''
  2# Assign metadata to collections, clones or sequences
  3
  4Various examples showcasing how you can assign metadata to collections, clones or sequences.
  5
  6##Add simple collection level metadata
  7
  8An example script
  9showcasing how to import a minimal, example metadata for clone collection: we're matching the target collection with a filter
 10and then applying an example tag value to a example collection level tag.
 11```python
 12from enpi_api.l2.client.enpi_api_client import EnpiApiClient
 13from enpi_api.l2.types.collection import CollectionId
 14from enpi_api.l2.types.filter import MatchId, MatchIdTarget
 15from enpi_api.l2.types.import_metadata import Annotation, CollectionAnnotation
 16from enpi_api.l2.types.tag import Tag, TagArchetype, TagDataType, TagLevel
 17
 18with EnpiApiClient() as enpi_client:
 19    """We are assuming that the collection referred to below exists"""
 20
 21    collection_id = CollectionId(123)
 22
 23    # Get or create the tag archetype that we are gonna use to store the new example value in
 24    tag_name = "Example metadata import target tag"
 25    example_tag: TagArchetype | None = enpi_client.tag_api.get_tag_archetype_by_name(
 26        TagLevel.COLLECTION,
 27        tag_name,
 28    )
 29    if not example_tag:
 30        example_tag = enpi_client.tag_api.create_tag_archetype(
 31            TagLevel.COLLECTION,
 32            TagDataType.TEXT,
 33            tag_name,
 34        ).wait()
 35
 36    # Build the filter that matches our target collection by its ID
 37    collection_filter = enpi_client.filter_api.create_filter(
 38        name="A single collection filter",  # Name of a filter
 39        condition=MatchId(  # This filter matches entities on ID value
 40            target=MatchIdTarget.COLLECTION,  # The entities we are filtering are collections
 41            id=collection_id,  # The expected collection ID value
 42        ),
 43    )
 44
 45    # Specify the annotation to add and into which tag does it go
 46    metadata_annotation: Annotation = CollectionAnnotation(
 47        tags=[
 48            Tag(
 49                id=example_tag.id,
 50                value="Test example tag value",
 51            )
 52        ]
 53    )
 54
 55    # Import templated metadata into the target collection
 56    enpi_client.collection_api.add_metadata(
 57        filter=collection_filter,
 58        annotation=metadata_annotation,
 59    ).wait()
 60
 61```
 62##Add heavy and productive boolean tag via metadata import
 63
 64Add sequence level metadata by using filter to narrow down the target collection and a template specification to match certain tag values and annotate them accordingly.
 65In this example we check if sequences of a given collection have "Heavy" chain and are productive at the same time, which we write down as a new boolean, sequence level tag.
 66
 67```python
 68import pandas as pd
 69from enpi_api.l2.client.enpi_api_client import EnpiApiClient
 70from enpi_api.l2.tags import SequenceTags
 71from enpi_api.l2.types.collection import CollectionId
 72from enpi_api.l2.types.filter import MatchId, MatchIdTarget, TemplatedAndOperator, TemplatedMatchTag
 73from enpi_api.l2.types.import_metadata_templated import Annotation, sequence_annotation, template_tag
 74from enpi_api.l2.types.tag import TagArchetype, TagDataType, TagLevel
 75
 76with EnpiApiClient() as enpi_client:
 77    """We are assuming that a collection referred to below exists and contains Heavy, Lambda and Kappa sequences.
 78    Based on `Chain` values and `Productive` tag values we will be adding a new tag: `This sequence is Heavy and Productive`.
 79    We will be matching collection that we want to annotate with a filter and match the clones/sequences with
 80    the template content - this way following script will be reusable for different collections."""
 81
 82    # Get the ID of the collection we want to add metadata to. Can be done with `enpi_client.collection_api.get_collections_metadata()`
 83    collection_id = CollectionId(123)
 84
 85    # Get or create the tag archetype that we are gonna use to store the new boolean value in
 86    tag_name = "This sequence is Heavy and Productive"
 87    is_heavy_and_productive_tag: TagArchetype | None = enpi_client.tag_api.get_tag_archetype_by_name(
 88        TagLevel.SEQUENCE,
 89        tag_name,
 90    )
 91    if not is_heavy_and_productive_tag:
 92        is_heavy_and_productive_tag = enpi_client.tag_api.create_tag_archetype(
 93            TagLevel.SEQUENCE,
 94            TagDataType.BOOLEAN,
 95            tag_name,
 96        ).wait()
 97
 98    # Build the filter that matches target collection by ID and sequences by values specified in the template
 99    metadata_import_filter = enpi_client.filter_api.create_templated_filter(
100        name="Metadata import filter",
101        shared=False,
102        condition=TemplatedAndOperator(
103            conditions=[
104                # Matches are using the data put into the filter
105                MatchId(
106                    target=MatchIdTarget.COLLECTION,
107                    id=collection_id,  # Match our target ID
108                ),
109                # Templated matches are using the data put into template file/df
110                TemplatedMatchTag(
111                    tag_id=SequenceTags.Chain,
112                ),
113                TemplatedMatchTag(
114                    tag_id=SequenceTags.Productive,
115                ),
116            ]
117        ),
118    )
119
120    # Build the metadata template. Can also be in a CSV file format and
121    # passed to `enpi_client.collection_api.add_metadata_from_file`
122    metadata_template_df = pd.DataFrame(
123        [
124            ["Heavy", True, True],
125            ["Heavy", False, False],
126            ["Lambda", True, False],
127            ["Lambda", False, False],
128            ["Kappa", False, False],
129            ["Kappa", False, False],
130        ],
131        columns=["Chain", "Productive", "This sequence is Heavy and Productive"],
132    )
133
134    # Specify the annotation to add and into which tag does it go
135    metadata_annotation: Annotation = sequence_annotation(
136        tags=[template_tag(is_heavy_and_productive_tag.id)],
137    )
138
139    # Import templated metadata into the target collection
140    enpi_client.collection_api.add_metadata_from_df(
141        filter=metadata_import_filter,
142        data_frame=metadata_template_df,
143        annotation=metadata_annotation,
144    ).wait()
145
146```
147##Add some calculated sequence level metadata
148
149This example showcases how you can
150add some calculated sequence level metadata to your collections.
151
152The script will assume the following:
153
154- There is already a collection with the name `My collection` that we are targeting for this example.
155
156The script will perform the following steps:
157
1581. Ensure that there is a tag that we want to assign the metadata to, for this example it will be a sequence level tag
159called `Max sequence count within clone`.
1602. Export the collection so we can calculate the metadata.
1613. Compute the example metadata, which in this case is the highest sequence count within a clone.
1624. Assign the metadata to the collection.
163
164```python
165import pandas as pd
166from enpi_api.l2.client.enpi_api_client import EnpiApiClient
167from enpi_api.l2.tags import CollectionTags, SequenceTags
168from enpi_api.l2.types.filter import (
169    MatchId,
170    MatchIdTarget,
171    TemplatedAndOperator,
172    TemplatedMatchId,
173    TemplatedMatchTag,
174)
175from enpi_api.l2.types.import_metadata_templated import (
176    Annotation,
177    sequence_annotation,
178    template_tag,
179)
180from enpi_api.l2.types.tag import TagDataType, TagLevel
181
182with EnpiApiClient() as enpi_client:
183    """We are assuming that there is a single collection with the name referenced below already on the ENPICOM Platform"""
184
185    collection_name = "Example collection"
186
187    filtered_collections = [collection for collection in enpi_client.collection_api.get_collections_metadata() if collection.name() == collection_name]
188    collection = filtered_collections[0]  # We assume that this name is unique here
189
190    # Check if our tag already exists
191    tag_key = "Max sequence count within clone"
192    tag_level = TagLevel.SEQUENCE
193
194    tag = enpi_client.tag_api.get_tag_archetype_by_name(key=tag_key, level=tag_level)
195    if not tag:  # Then we need to create it
196        tag = enpi_client.tag_api.create_tag_archetype(
197            level=tag_level,
198            data_type=TagDataType.INTEGER,
199            key=tag_key,
200        ).wait()  # We need to wait for the tag to be created before we can use it
201
202    # We need to create a filter to be able to fetch the data
203    filter = enpi_client.filter_api.create_filter(
204        name="Filter to download data",
205        # We are simply matching on the collection ID here
206        condition=MatchId(target=MatchIdTarget.COLLECTION, id=collection.id),
207    )
208
209    # Export the data into a Pandas DataFrame
210    exported_data_df = enpi_client.collection_api.get_as_df(
211        # Even though we already specified the collection ID in the filter, we need to specify it here as well to avoid accidentally querying too much data
212        collection_ids=[collection.id],
213        filter=filter,
214        # We specify the tags that we want to export, the fewer tags we specify, the faster the export will be
215        tag_ids=[
216            # This is what we are going to use to calculate the metadata with
217            SequenceTags.SequenceCount,
218        ],
219    ).wait()  # This starts a long-running operation, we need to wait for it if we want to use it further down in the script
220
221    # We now perform the calculation of the metadata, for each clone, we will get the maximum value of the `Sequence Count` tag
222    grouped_df = exported_data_df.groupby(["Unique Clone ID"]).max(numeric_only=True)
223
224    # For assigning the metadata we will now use the templated method that can assign different values per "match"
225    # For this, our metadata DataFrame will have 3 columns:
226    # - "Name": to match the collection name
227    # - "Unique Sequence ID": to match the sequence ID
228    # - "Max sequence count within clone": this column contains the value that will be assigned to the sequence that matches the "Unique Sequence ID"
229    metadata_filter = enpi_client.filter_api.create_templated_filter(
230        name="Filter to add metadata",
231        shared=False,
232        condition=TemplatedAndOperator(
233            conditions=[
234                # It can never hurt to specifically match the collection ID or name, just to be sure you do not
235                # accidentally assign metadata to the wrong collection
236                TemplatedMatchTag(tag_id=CollectionTags.Name),
237                TemplatedMatchId(target=MatchIdTarget.SEQUENCE),
238            ]
239        ),
240    )
241
242    # Specify the sequence-level annotation to add to the collection
243    # We specify it as a `template_tag` because the value comes from the DataFrame
244    metadata_annotation: Annotation = sequence_annotation([template_tag(tag.id)])
245
246    # Create metadata dataframe with the aforementioned columns
247    metadata_frame = pd.DataFrame(
248        [
249            [
250                collection.name(),  # We match on the "Name"
251                df_row[1]["Unique Sequence ID"],  # We match on the "Unique Sequence ID"
252                grouped_df.loc[df_row[1]["Unique Clone ID"]]["Sequence Count"],  # We add the calculated metadata
253            ]
254            for df_row in exported_data_df.iterrows()
255        ],
256        columns=["Name", "Unique Sequence ID", tag.key],
257    )
258
259    # Apply metadata to the collection
260    enpi_client.collection_api.add_metadata_from_df(
261        filter=metadata_filter,
262        annotation=metadata_annotation,
263        data_frame=metadata_frame,
264    ).wait()  # This starts a long-running operation, we need to wait for it if we want to use it further down in the script
265
266    # At this point, the metadata should have been added to the collection
267
268```
269'''