import datetime
from pathlib import Path
from typing import Any, Dict, Iterator, List, Optional
from xml.etree.ElementTree import Element, SubElement, tostring
import darwin.datatypes as dt
DEPRECATION_MESSAGE = """
This function is going to be turned into private. This means that breaking
changes in its interface and implementation are to be expected. We encourage using ``export``
instead of calling this low-level function directly.
"""
[docs]
def export(annotation_files: Iterator[dt.AnnotationFile], output_dir: Path) -> None:
"""
Exports the given ``AnnotationFile``s into the cvat format inside of the given ``output_dir``.
Parameters
----------
annotation_files : Iterator[dt.AnnotationFile]
The ``AnnotationFile``\\s to be exported.
output_dir : Path
The folder where the new cvat file will be.
"""
output = _build_xml(list(annotation_files))
# TODO, maybe an optional output name (like the dataset name if available)
output_file_path = (output_dir / "output").with_suffix(".xml")
with open(output_file_path, "wb") as f:
f.write(tostring(output))
def _add_subelement_text(parent: Element, name: str, value: Any) -> Element:
sub = SubElement(parent, name)
sub.text = str(value)
return sub
def _build_xml(annotation_files: List[dt.AnnotationFile]) -> Element:
label_lookup: Dict[str, int] = _build_label_lookup(annotation_files)
root: Element = Element("annotations")
_add_subelement_text(root, "version", "1.1")
_build_meta(root, annotation_files, label_lookup)
_build_images(root, annotation_files, label_lookup)
return root
def _build_images(
root: Element,
annotation_files: List[dt.AnnotationFile],
label_lookup: Dict[str, int],
) -> None:
for id, annotation_file in enumerate(annotation_files, 1):
image = SubElement(root, "image")
image.attrib["id"] = str(id)
image.attrib["name"] = annotation_file.filename
image.attrib["width"] = str(annotation_file.image_width)
image.attrib["height"] = str(annotation_file.image_height)
for annotation in annotation_file.annotations:
_build_annotation(image, annotation)
def _build_annotation(image: Element, annotation: dt.Annotation) -> None:
if annotation.annotation_class.annotation_type == "bounding_box":
box = SubElement(image, "box")
box.attrib["label"] = annotation.annotation_class.name
box.attrib["xtl"] = str(annotation.data["x"])
box.attrib["ytl"] = str(annotation.data["y"])
box.attrib["xbr"] = str(annotation.data["x"] + annotation.data["w"])
box.attrib["ybr"] = str(annotation.data["y"] + annotation.data["h"])
box.attrib["occluded"] = "0"
_build_attributes(box, annotation)
else:
print(f"[warning] skipping {annotation.annotation_class.annotation_type}")
def _build_attributes(box: Element, annotation: dt.Annotation) -> None:
annotation_text: Optional[dt.SubAnnotation] = annotation.get_sub("text")
if annotation_text:
attribute = _add_subelement_text(box, "attribute", annotation_text.data)
attribute.attrib["name"] = "__text"
annotation_instance_id: Optional[dt.SubAnnotation] = annotation.get_sub(
"instance_id"
)
if annotation_instance_id:
attribute = _add_subelement_text(
box, "attribute", str(annotation_instance_id.data)
)
attribute.attrib["name"] = "__instance_id"
annotation_attributes: Optional[dt.SubAnnotation] = annotation.get_sub("attributes")
if annotation_attributes:
for attrib in annotation_attributes.data:
attribute = _add_subelement_text(box, "attribute", "")
attribute.attrib["name"] = attrib
def _build_meta(
root: Element,
annotation_files: List[dt.AnnotationFile],
label_lookup: Dict[str, int],
) -> None:
meta: Element = SubElement(root, "meta")
_add_subelement_text(
meta, "dumped", str(datetime.datetime.now(tz=datetime.timezone.utc))
)
task: Element = SubElement(meta, "task")
_add_subelement_text(task, "id", 1)
_add_subelement_text(task, "name", "exported_task_from_darwin")
_add_subelement_text(task, "size", len(annotation_files))
_add_subelement_text(task, "mode", "annotation")
_add_subelement_text(task, "overlapp", 0)
_add_subelement_text(task, "bugtracker", None)
_add_subelement_text(task, "flipped", False)
_add_subelement_text(
task, "created", str(datetime.datetime.now(tz=datetime.timezone.utc))
)
_add_subelement_text(
task, "updated", str(datetime.datetime.now(tz=datetime.timezone.utc))
)
labels: Element = SubElement(task, "labels")
_build_labels(labels, label_lookup)
segments: Element = SubElement(task, "segments")
_build_segments(segments, annotation_files)
owner: Element = SubElement(task, "owner")
_add_subelement_text(owner, "username", "example_username")
_add_subelement_text(owner, "email", "user@example.com")
def _build_segments(
segments: Element, annotation_files: List[dt.AnnotationFile]
) -> None:
segment: Element = SubElement(segments, "segment")
_add_subelement_text(segment, "id", 1)
_add_subelement_text(segment, "start", 1)
_add_subelement_text(segment, "end", len(annotation_files))
_add_subelement_text(segment, "url", "not applicable")
def _build_labels(labels: Element, label_lookup: Dict[str, int]) -> None:
for key in label_lookup.keys():
label: Element = SubElement(labels, "label")
_add_subelement_text(label, "name", key)
SubElement(label, "attributes")
def _build_label_lookup(annotation_files: List[dt.AnnotationFile]) -> Dict[str, int]:
labels: Dict[str, int] = {}
for annotation_file in annotation_files:
for annotation_class in annotation_file.annotation_classes:
if (
annotation_class.name not in labels
and annotation_class.annotation_type == "bounding_box"
):
labels[annotation_class.name] = len(labels)
return labels