import datetime
import shutil
from enum import Enum
from pathlib import Path
from typing import Any, Dict, Optional
import requests
from darwin.dataset.identifier import DatasetIdentifier
[docs]
class ReleaseStatus(Enum):
PENDING = "pending"
COMPLETE = "complete"
FAILED = "failed"
[docs]
class Release:
"""
Represents a release/export. Releases created this way can only contain items with 'completed'
status.
Parameters
----------
dataset_slug : str
The slug of the dataset.
team_slug : str
the slug of the team.
version : str
The version of the ``Release``.
name : str
The name of the ``Release``.
status : ReleaseStatus
The status of the ``Release``.
url : Optional[str]
The full url used to download the ``Release``.
export_date : datetime.datetime
The ``datetime`` of when this release was created.
image_count : Optional[int]
Number of images in this ``Release``.
class_count : Optional[int]
Number of distinct classes in this ``Release``.
available : bool
If this ``Release`` is downloadable or not.
latest : bool
If this ``Release`` is the latest one or not.
format : str
Format for the file of this ``Release`` should it be downloaded.
Attributes
----------
dataset_slug : str
The slug of the dataset.
team_slug : str
the slug of the team.
version : str
The version of the ``Release``.
name : str
The name of the ``Release``.
status : ReleaseStatus
The status of the ``Release``.
url : Optional[str]
The full url used to download the ``Release``.
export_date : datetime.datetime
The ``datetime`` of when this release was created.
image_count : Optional[int]
Number of images in this ``Release``.
class_count : Optional[int]
Number of distinct classes in this ``Release``.
available : bool
If this ``Release`` is downloadable or not.
latest : bool
If this ``Release`` is the latest one or not.
format : str
Format for the file of this ``Release`` should it be downloaded.
"""
def __init__(
self,
dataset_slug: str,
team_slug: str,
version: str,
name: str,
status: ReleaseStatus,
url: Optional[str],
export_date: datetime.datetime,
image_count: Optional[int],
class_count: Optional[int],
available: bool,
latest: bool,
format: str,
):
self.dataset_slug = dataset_slug
self.team_slug = team_slug
self.version = version
self.name = name
self.status = ReleaseStatus(status)
self.url = url
self.export_date = export_date
self.image_count = image_count
self.class_count = class_count
self.available = available
self.latest = latest
self.format = format
[docs]
@classmethod
def parse_json(
cls, dataset_slug: str, team_slug: str, payload: Dict[str, Any]
) -> "Release":
"""
Given a json, parses it into a ``Release`` object instance.
Parameters
----------
dataset_slug : str
The slug of the dataset this ``Release`` belongs to.
team_slug : str
The slug of the team this ``Release``'s dataset belongs to.
payload : Dict[str, Any]
A Dictionary with the ``Release`` information. It must have a minimal format similar to:
.. code-block:: javascript
{
"version": "a_version",
"name": "a_name"
}
If no ``format`` key is found in ``payload``, the default will be ``json``.
Optional ``payload`` has no ``download_url`` key, then ``url``, ``available``,
``image_count``, ``class_count`` and ``latest`` will default to either ``None`` or
``False`` depending on the type.
A more complete format for this parameter would be similar to:
.. code-block:: javascript
{
"version": "a_version",
"name": "a_name",
"metadata": {
"num_images": 1,
"annotation_classes": []
},
"download_url": "http://www.some_url_here.com",
"latest": false,
"format": "a_format"
}
Returns
-------
Release
A ``Release`` created from the given payload.
"""
try:
export_date: datetime.datetime = datetime.datetime.strptime(
payload["inserted_at"], "%Y-%m-%dT%H:%M:%S%z"
)
except ValueError:
# For python version older than 3.7
export_date = datetime.datetime.strptime(
payload["inserted_at"], "%Y-%m-%dT%H:%M:%SZ"
)
if payload["download_url"] is None:
return cls(
dataset_slug=dataset_slug,
team_slug=team_slug,
version=payload["version"],
name=payload["name"],
status=payload["status"],
export_date=export_date,
url=None,
available=False,
image_count=None,
class_count=None,
latest=False,
format=payload.get("format", "json"),
)
return cls(
dataset_slug=dataset_slug,
team_slug=team_slug,
version=payload["version"],
name=payload["name"],
status=payload["status"],
image_count=payload["metadata"]["num_images"],
class_count=len(payload["metadata"]["annotation_classes"]),
export_date=export_date,
url=payload["download_url"],
available=True,
latest=payload["latest"],
format=payload.get("format", "json"),
)
[docs]
def download_zip(self, path: Path) -> Path:
"""
Downloads the release content into a zip file located by the given path.
Parameters
----------
path : Path
The path where the zip file will be located.
Returns
--------
Path
Same ``Path`` as provided in the parameters.
Raises
------
ValueError
If this ``Release`` object does not have a specified url.
"""
if not self.url:
raise ValueError("Release must have a valid url to download the zip.")
with requests.get(self.url, stream=True) as response:
with open(path, "wb") as download_file:
shutil.copyfileobj(response.raw, download_file)
return path
@property
def identifier(self) -> DatasetIdentifier:
"""DatasetIdentifier : The ``DatasetIdentifier`` for this ``Release``."""
return DatasetIdentifier(
team_slug=self.team_slug, dataset_slug=self.dataset_slug, version=self.name
)