from dataclasses import dataclass
from typing import Any, Dict, List, Optional
from pydantic import BaseModel
from darwin.path_utils import construct_full_path
[docs]
@dataclass(frozen=True, eq=True)
class DatasetItem(BaseModel):
"""
DatasetItem represents files that can be images or videos which belong to a dataset.
"""
#: The id of this ``DatasetItem``.
id: int
#: The filename of this ``DatasetItem``.
filename: str
#: The status of this ``DatasetItem``. It can be ``"archived"``, ``"error"``, ``"uploading"``,
#: ``"processing"``, ``"new"``, ``"annotate"``, ``"review"`` or ``"complete"``.
status: str
#: Whether or not this item was soft deleted.
archived: bool
#: The size of this ``DatasetItem``\'s file in bytes.
filesize: int
#: The id of the ``Dataset`` this ``DatasetItem`` belongs to.
dataset_id: int
#: The slugified name of the ``Dataset`` this ``DatasetItem`` belongs to.
dataset_slug: str
#: The sequential value of this ``DatasetItem`` in relation to the ``Dataset`` it belongs to.
#: This allows us to know which items were added first and is used mostly for sorting purposes.
seq: int
#: The id of this ``DatasetItem``'s workflow. A ``None`` value means this ``DatasetItem`` is
#: new and was never worked on, or was reset to the new state.
current_workflow_id: Optional[int] = None
#: The darwin path to this ``DatasetItem``.
path: str
#: The names of each slot in the item, most items have a single slot corresponding to the file itself.
#: only used for v2 dataset items
slots: List[Any]
#: Information about the slot layout of the item including type, version, and slot names
layout: Dict
#: Metadata of this ``DatasetItem``'s workflow. A ``None`` value means this ``DatasetItem`` is
#: new and was never worked on, or was reset to the new state.
current_workflow: Optional[Dict[str, Any]] = None
@property
def full_path(self) -> str:
"""
The full POSIX relative path of this ``DatasetItem``.
"""
return construct_full_path(self.path, self.filename)
[docs]
@classmethod
def parse(cls, raw: Dict[str, Any], dataset_slug: str = "n/a") -> "DatasetItem":
"""
Parses the given dictionary into a ``DatasetItem``.
Parameters
----------
raw : Dict[str, Any]
The dictionary to parse.
Returns
-------
DatasetItem
A dataset item with the parsed information.
Raises
------
ValidationError
If any of the keys from the given dictionary do not have the correct format or are
missing.
"""
if "slots" in raw:
data = {
"id": raw["id"],
"filename": raw["name"],
"path": raw["path"],
"status": raw["status"],
"archived": raw["archived"],
"filesize": sum(file.get("size_bytes", 0) for file in raw["slots"]),
"dataset_id": raw["dataset_id"],
"dataset_slug": dataset_slug,
"seq": None,
"current_workflow_id": raw.get("workflow_data", {}).get("workflow_id"),
"current_workflow": raw.get("workflow_data"),
"slots": raw["slots"],
"layout": raw.get("layout"),
}
else:
data = {
"id": raw["id"],
"filename": raw["filename"],
"status": raw["status"],
"archived": raw["archived"],
"filesize": raw["file_size"],
"dataset_id": raw["dataset_id"],
"dataset_slug": dataset_slug,
"seq": raw["seq"],
"current_workflow_id": raw.get("current_workflow_id"),
"current_workflow": raw.get("current_workflow"),
"path": raw["path"],
"slots": [],
}
return DatasetItem(**data)