Source code for darwin.dataset.identifier

import re
from typing import Optional, Tuple, Union


[docs] class DatasetIdentifier: """ Formal representation of a dataset identifier for the SDK. A dataset identifier is a string that uniquely identifies a dataset on Darwin. A dataset identifier is made of the following substrings: ``<team-slug>/<dataset-slug>:<version>``. If ``version`` is missing, it defaults to ``latest``. Parameters ---------- dataset_slug : str The slugified name of the dataset. team_slug : Optional[str], default: None The slugified name of the team. version : Optional[str], default: None The version of the identifier. Attributes ---------- dataset_slug : str The slugified name of the dataset. team_slug : Optional[str], default: None The slugified name of the team. version : Optional[str], default: None The version of the identifier. """ def __init__( self, dataset_slug: str, team_slug: Optional[str] = None, version: Optional[str] = None, ): self.dataset_slug = dataset_slug self.team_slug = team_slug self.version = version
[docs] @classmethod def parse(cls, identifier: Union[str, "DatasetIdentifier"]) -> "DatasetIdentifier": """ Parses the given identifier and returns the corresponding DatasetIdentifier. Parameters ---------- identifier : Union[str, DatasetIdentifier] The identifier to be parsed. Returns ------- DatasetIdentifier The SDK representation of a ``DatasetIdentifier``. Raises ------ ValueError If the ``identifier`` given is invalid. """ if isinstance(identifier, DatasetIdentifier): return identifier team_slug, dataset_slug, version = _parse(identifier) return cls(dataset_slug=dataset_slug, team_slug=team_slug, version=version)
def __str__(self): output = "" if self.team_slug: output = f"{self.team_slug}/" output = f"{output}{self.dataset_slug}" if self.version: output = f"{output}:{self.version}" return output
def _parse(slug: str) -> Tuple[Optional[str], str, Optional[str]]: team: Optional[str] = None version: Optional[str] = None if not _is_slug_valid(slug): raise ValueError(f"Invalid dataset identifier {slug}") initial_split = slug.split("/") if len(initial_split) == 1: dataset = initial_split[0] elif len(initial_split) == 2: team, dataset = initial_split else: raise ValueError(f"Invalid dataset identifier {slug}") if ":" in dataset: dataset, version = dataset.split(":") return team, dataset, version def _is_slug_valid(slug: str) -> bool: slug_format = "[\\_a-zA-Z0-9.-]+" version_format = "[\\_a-zA-Z0-9.:-]+" return ( re.fullmatch(rf"({slug_format}/)?{slug_format}(:{version_format})?", slug) is not None )