from functools import partial, reduce
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Set, cast
from jsonschema import validate
from darwin.datatypes import (
Annotation,
AnnotationClass,
AnnotationFile,
Point,
SubAnnotation,
make_bounding_box,
make_keypoint,
make_line,
make_polygon,
make_tag,
)
from darwin.importer.formats.labelbox_schemas import labelbox_export
from darwin.utils import attempt_decode
[docs]
def parse_path(path: Path) -> Optional[List[AnnotationFile]]:
"""
Parses the given LabelBox file and maybe returns the corresponding annotations.
The file must have a structure similar to the following:
.. code-block:: javascript
[
{
"Label":{
"objects":[
{
"title": "SomeTitle",
"bbox":{"top":3558, "left":145, "height":623, "width":449}
},
{ }
],
"classifications": [
{
"value": "a_question",
"answer": {"value": "an_answer"}
}
]
},
"External ID": "demo-image-7.jpg"
},
{ }
]
You can check the Labelbox Schemas in `labelbox_schemas.py`.
Currently we support the following annotations:
- bounding-box ``Image``: https://docs.labelbox.com/docs/bounding-box-json
- polygon ``Image``: https://docs.labelbox.com/docs/polygon-json
- point ``Image``: https://docs.labelbox.com/docs/point-json
- polyline ``Image``: https://docs.labelbox.com/docs/polyline-json
We also support conversion from question/answer to Annotation Tags for the following:
- Radio Buttons
- Checklists
- Free Text
Parameters
--------
path: Path
The path of the file to parse.
Returns
-------
Optional[List[darwin.datatypes.AnnotationFile]]
The AnnotationFiles with the parsed information from the file or None, if the file is not a
`json` file.
Raises
------
ValidationError
If the given JSON file is malformed or if it has an unknown annotation.
To see a list of possible annotation formats go to:
https://docs.labelbox.com/docs/annotation-types-1
"""
if path.suffix != ".json":
return None
data = attempt_decode(path)
validate(data, schema=labelbox_export)
convert_with_path = partial(_convert, path=path)
return _map_list(convert_with_path, data)
def _convert(file_data: Dict[str, Any], path) -> AnnotationFile:
filename: str = str(file_data.get("External ID"))
label: Dict[str, Any] = cast(Dict[str, Any], file_data.get("Label"))
label_objects: List[Dict[str, Any]] = cast(
List[Dict[str, Any]], label.get("objects")
)
label_classifications: List[Dict[str, Any]] = cast(
List[Dict[str, Any]], label.get("classifications")
)
classification_annotations: List[Annotation] = []
if len(label_classifications) > 0:
classification_annotations = _flat_map_list(
_map_list(_convert_label_classifications, label_classifications)
)
object_annotations: List[Annotation] = _map_list(
_convert_label_objects, label_objects
)
annotations: List[Annotation] = object_annotations + classification_annotations
classes: Set[AnnotationClass] = set(map(_get_class, annotations))
return AnnotationFile(
annotations=annotations,
path=path,
filename=filename,
annotation_classes=classes,
remote_path="/",
)
def _convert_label_objects(obj: Dict[str, Any]) -> Annotation:
title: str = str(obj.get("title"))
bbox: Optional[Dict[str, Any]] = obj.get("bbox")
if bbox:
return _to_bbox_annotation(bbox, title)
polygon: Optional[List[Point]] = obj.get("polygon")
if polygon:
return _to_polygon_annotation(polygon, title)
point: Optional[Point] = obj.get("point")
if point:
return _to_keypoint_annotation(point, title)
line: Optional[List[Point]] = obj.get("line")
if line:
return _to_line_annotation(line, title)
raise ValueError(f"Unknown label object {obj}")
def _convert_label_classifications(obj: Dict[str, Any]) -> List[Annotation]:
question: str = str(obj.get("value"))
answer: Optional[Dict[str, Any]] = obj.get("answer")
if answer is not None:
if isinstance(answer, str):
return [_to_tag_annotations_from_free_text(question, answer)]
else:
return [_to_tag_annotations_from_radio_box(question, answer)]
answers: Optional[List[Dict[str, Any]]] = obj.get("answers")
if answers is not None:
return _to_tag_annotations_from_checklist(question, answers)
raise ValueError(f"Unknown classification obj {obj}")
def _to_bbox_annotation(bbox: Dict[str, Any], title: str) -> Annotation:
x: float = cast(float, bbox.get("left"))
y: float = cast(float, bbox.get("top"))
width: float = cast(float, bbox.get("width"))
height: float = cast(float, bbox.get("height"))
return make_bounding_box(title, x, y, width, height)
def _to_polygon_annotation(polygon: List[Point], title: str) -> Annotation:
return make_polygon(title, polygon, None)
def _to_keypoint_annotation(point: Point, title: str) -> Annotation:
x: float = cast(float, point.get("x"))
y: float = cast(float, point.get("y"))
return make_keypoint(title, x, y)
def _to_line_annotation(line: List[Point], title: str) -> Annotation:
return make_line(title, line, None)
def _to_tag_annotations_from_radio_box(
question: str, radio_button: Dict[str, Any]
) -> Annotation:
answer: str = str(radio_button.get("value"))
return make_tag(f"{question}:{answer}")
def _to_tag_annotations_from_checklist(question: str, checklist) -> List[Annotation]:
annotations: List[Annotation] = []
for answer in checklist:
val: str = answer.get("value")
annotations.append(make_tag(f"{question}:{val}"))
return annotations
def _to_tag_annotations_from_free_text(question: str, free_text: str) -> Annotation:
return make_tag(question, [SubAnnotation(annotation_type="text", data=free_text)])
def _get_class(annotation: Annotation) -> AnnotationClass:
return annotation.annotation_class
def _flat_map_list(the_list: List[List[Any]]) -> List[Any]:
# We do a flat_map here: https://stackoverflow.com/a/2082107/1337392
return reduce(list.__add__, the_list)
def _map_list(fun: Callable[[Any], Any], the_list: List[Any]) -> List[Any]:
return list(map(fun, the_list))