Source code for darwin.options

import sys
from argparse import ArgumentParser, Namespace
from datetime import datetime
from typing import Any, Optional, Tuple

import argcomplete
from darwin.datatypes import AnnotatorReportGrouping



[docs]
class Options:
    """
    Has functions to parse CLI options given by the user.
    """

    def __init__(self) -> None:
        self.parser: ArgumentParser = ArgumentParser(
            description="Command line tool to create/upload/download datasets on darwin."
        )

        subparsers = self.parser.add_subparsers(dest="command")
        subparsers.add_parser("help", help="Show this help message and exit.")

        # AUTHENTICATE
        auth = subparsers.add_parser("authenticate", help="Authenticate the user. ")
        auth.add_argument("--api_key", type=str, help="API key to use.")
        auth.add_argument("--default_team", type=str, help="Default team to use.")
        auth.add_argument("--datasets_dir", type=str, help="Folder to store datasets.")

        # SET COMPRESSION LEVEL
        parser_compression = subparsers.add_parser(
            "compression", help="Set compression level."
        )
        parser_compression.add_argument(
            "compression_level",
            type=int,
            choices=range(0, 10),
            help="Compression level to use on uploaded data. 0 is no compression, 9 is the best.",
        )

        # SELECT TEAM
        parser_create = subparsers.add_parser("team", help="List or pick teams.")
        parser_create.add_argument(
            "team_name", nargs="?", type=str, help="Team name to use."
        )
        parser_create.add_argument(
            "-c",
            "--current",
            action="store_true",
            required=False,
            help="Shows only the current team.",
        )

        parser_convert = subparsers.add_parser(
            "convert", help="Converts darwin json to other annotation formats."
        )
        parser_convert.add_argument(
            "format", type=str, help="Annotation format to convert to."
        )
        parser_convert.add_argument(
            "files",
            type=str,
            nargs="+",
            help="Annotation files (or folders) to convert.",
        )
        parser_convert.add_argument(
            "output_dir", type=str, help="Where to store output files."
        )

        # VALIDATE SCHEMA
        parser_validate_schema = subparsers.add_parser(
            "validate", help="Validate annotation files against Darwin schema"
        )
        parser_validate_schema.add_argument(
            "location",
            help="Location of file/folder to validate. Accepts single files or a folder to search *.json files",
        )
        parser_validate_schema.add_argument(
            "--pattern",
            action="store_true",
            help="'location' is a Folder + File glob style pattern to search (eg: ./*.json)",
        )

        parser_validate_schema.add_argument(
            "--silent",
            action="store_true",
            help="Flag to suppress all output except errors to console",
        )
        parser_validate_schema.add_argument(
            "--output", help="name of file to write output json to"
        )
        # DATASET
        dataset = subparsers.add_parser(
            "dataset",
            help="Dataset related functions.",
            description="Arguments to interact with datasets",
        )
        dataset_action = dataset.add_subparsers(dest="action")

        # Remote
        parser_remote = dataset_action.add_parser(
            "remote", help="List remote datasets."
        )
        parser_remote.add_argument("-t", "--team", help="Specify team.")
        parser_remote.add_argument(
            "-a", "--all", action="store_true", help="List datasets for all teams."
        )

        # Local
        parser_local = dataset_action.add_parser(
            "local", help="List downloaded datasets."
        )
        parser_local.add_argument("-t", "--team", help="Specify team.")

        # Create
        parser_create = dataset_action.add_parser(
            "create", help="Creates a new dataset on darwin."
        )
        parser_create.add_argument("dataset", type=str, help="Dataset name.")

        # Path
        parser_path = dataset_action.add_parser(
            "path", help="Print local path to dataset."
        )
        parser_path.add_argument("dataset", type=str, help="Dataset name.")

        # Url
        parser_url = dataset_action.add_parser(
            "url", help="Print url to dataset on darwin."
        )
        parser_url.add_argument("dataset", type=str, help="Dataset name.")

        # Push
        parser_push = dataset_action.add_parser(
            "push", help="Upload data to an existing (remote) dataset."
        )
        parser_push.add_argument(
            "dataset",
            type=str,
            help="[Remote] Dataset name: to list all the existing dataset, run 'darwin dataset remote'.",
        )
        parser_push.add_argument("files", type=str, nargs="+", help="Files to upload.")
        parser_push.add_argument(
            "-e",
            "--exclude",
            type=str,
            nargs="+",
            default="",
            help="Excludes the files with the specified extension/s if a data folder is provided as data path.",
        )
        parser_push.add_argument(
            "-f",
            "--fps",
            default="native",
            help="Frames per second for video split (recommended: 1), use 'native' to use the videos intrinsic fps.",
        )
        parser_push.add_argument(
            "--frames",
            action="store_true",
            help="Annotate a video as independent frames.",
        )

        parser_push.add_argument(
            "--extract_views",
            action="store_true",
            help="Upload a volume with all 3 orthogonal views.",
        )
        parser_push.add_argument(
            "--handle_as_slices",
            action="store_true",
            help="Upload DICOM files as slices",
        )

        parser_push.add_argument(
            "--path", type=str, default=None, help="Folder to upload the files into."
        )

        parser_push.add_argument(
            "--verbose", action="store_true", help="Flag to show upload details."
        )

        parser_push.add_argument(
            "-p",
            "--preserve-folders",
            action="store_true",
            help="Preserve the local folder structure in the dataset.",
        )
        parser_push.add_argument(
            "--item-merge-mode",
            type=str,
            choices=["slots", "series", "channels"],
            help="Specify the item merge mode: `slots`, `series`, or `channels`",
        )

        # Remove
        parser_remove = dataset_action.add_parser(
            "remove", help="Remove a remote or remote and local dataset."
        )
        parser_remove.add_argument(
            "dataset", type=str, help="Remote dataset name to delete."
        )

        # Export
        parser_export = dataset_action.add_parser(
            "export", help="Export a version of a dataset."
        )
        parser_export.add_argument(
            "dataset", type=str, help="Remote dataset name to export."
        )
        parser_export.add_argument(
            "name", type=str, help="Name with with the version gets tagged."
        )
        parser_export.add_argument(
            "--class-ids",
            type=str,
            nargs="+",
            help=(
                "List of annotation class ids. If present, it will only include items that have"
                " annotations with a class whose id matches."
            ),
        )
        parser_export.add_argument(
            "--include-authorship",
            default=False,
            action="store_true",
            help="Each annotation contains annotator and reviewer authorship metadata.",
        )
        parser_export.add_argument(
            "--include-url-token",
            default=False,
            action="store_true",
            help="Each annotation file includes a url with an access token. "
            "Warning, anyone with the url can access the images, even without being a team member.",
        )
        parser_export.add_argument(
            "--version",
            default=None,
            type=str,
            choices=["1.0", "2.0"],
            help="When used for V2 dataset, allows to force generation of either Darwin JSON 1.0 (Legacy) or newer 2.0. "
            "Omit this option to get your team's default.",
        )

        # Releases
        parser_dataset_version = dataset_action.add_parser(
            "releases", help="Available version of a dataset."
        )
        parser_dataset_version.add_argument(
            "dataset", type=str, help="Remote dataset name to list."
        )

        # Pull
        parser_pull = dataset_action.add_parser(
            "pull", help="Download a version of a dataset."
        )
        parser_pull.add_argument(
            "dataset", type=str, help="Remote dataset name to download."
        )
        parser_pull.add_argument(
            "--only-annotations",
            action="store_true",
            help="Download only annotations and no corresponding images.",
        )
        parser_pull.add_argument(
            "--folders",
            action="store_true",
            default=True,
            help="Recreates image folders.",
        )
        parser_pull.add_argument(
            "--no-folders",
            action="store_true",
            help="Does not recreate image folders.",
        )
        parser_pull.add_argument(
            "--video-frames",
            action="store_true",
            help="Pulls video frame images instead of video files.",
        )
        parser_pull.add_argument(
            "--retry",
            action="store_true",
            default=False,
            help="Repeatedly try to download the release if it is still processing.",
        )
        parser_pull.add_argument(
            "--retry-timeout",
            type=int,
            default=600,
            help="Total time to wait for the release to be ready for download.",
        )
        parser_pull.add_argument(
            "--retry-interval",
            type=int,
            default=10,
            help="Time to wait between retries of checking if the release is ready for download.",
        )
        slots_group = parser_pull.add_mutually_exclusive_group()
        slots_group.add_argument(
            "--force-slots",
            action="store_true",
            help="Forces pull of all slots of items into deeper file structure ({prefix}/{item_name}/{slot_name}/{file_name}). "
            + "If your dataset includes items with multiple slots, or multiple source files per slot, this option becomes implicitly enabled.",
        )
        slots_group.add_argument(
            "--ignore-slots",
            action="store_true",
            help="Ignores slots and only pulls the first slot of each item into a flat file structure ({prefix}/{file_name}).",
        )

        # Import
        parser_import = dataset_action.add_parser(
            "import", help="Import data to an existing (remote) dataset."
        )
        parser_import.add_argument(
            "dataset",
            type=str,
            help="[Remote] Dataset name: to list all the existing dataset, run 'darwin dataset remote'.",
        )
        parser_import.add_argument(
            "format", type=str, help="The format of the annotations to import."
        )
        parser_import.add_argument(
            "files",
            type=str,
            nargs="+",
            help="The location of the annotation files, or the folder where the annotation files are.",
        )
        parser_import.add_argument(
            "--append",
            action="store_true",
            help="Append annotations instead of overwriting.",
        )
        parser_import.add_argument(
            "--yes",
            action="store_true",
            help="Skips prompts for creating and adding classes to dataset.",
        )
        parser_import.add_argument(
            "--delete-for-empty",
            action="store_true",
            help="Empty annotations will delete annotations from remote files.",
        )
        parser_import.add_argument(
            "--import-annotators",
            action="store_true",
            help="Import annotators metadata from the annotation files, where available",
        )
        parser_import.add_argument(
            "--import-reviewers",
            action="store_true",
            help="Import reviewers metadata from the annotation files, where available",
        )
        parser_import.add_argument(
            "--overwrite",
            action="store_true",
            help="Bypass warnings about overwiting existing annotations.",
        )

        # Cpu limit for multiprocessing tasks
        def cpu_default_types(input: Any) -> Optional[int]:  # type: ignore
            try:
                return int(input)
            except TypeError:
                return None

        parser_import.add_argument(
            "--cpu-limit",
            "--cpu_limit",
            type=cpu_default_types,
            required=False,
            default=1,
            help="Limits amount of cores used on machine to process results, default to single core",
        )

        # Convert
        parser_convert = dataset_action.add_parser(
            "convert", help="Converts darwin json to other annotation formats."
        )
        parser_convert.add_argument(
            "dataset",
            type=str,
            help="[Remote] Dataset name: to list all the existing dataset, run 'darwin dataset remote'.",
        )
        parser_convert.add_argument(
            "format", type=str, help="Annotation format to convert to."
        )
        parser_convert.add_argument(
            "-o", "--output_dir", type=str, help="Where to store output files."
        )

        # Split
        parser_split = dataset_action.add_parser(
            "split",
            help="Splits a local dataset following random and stratified split types.",
        )
        parser_split.add_argument(
            "dataset", type=str, help="Local dataset name to split."
        )
        parser_split.add_argument(
            "-v",
            "--val-percentage",
            required=True,
            type=float,
            help="Validation percentage.",
        )
        parser_split.add_argument(
            "-t",
            "--test-percentage",
            required=True,
            type=float,
            help="Test percentage.",
        )
        parser_split.add_argument(
            "-s", "--seed", type=int, required=False, default=0, help="Split seed."
        )

        # List Files
        parser_files = dataset_action.add_parser(
            "files", help="Lists file in a remote dataset."
        )
        parser_files.add_argument(
            "dataset",
            type=str,
            help="[Remote] Dataset name: to list all the existing dataset, run 'darwin dataset remote'.",
        )
        parser_files.add_argument(
            "--only-filenames", action="store_true", help="Only prints out filenames."
        )
        parser_files.add_argument(
            "--status",
            type=str,
            required=False,
            help="Comma separated list of statuses.",
        )
        parser_files.add_argument(
            "--path",
            type=str,
            required=False,
            help="List only files under PATH. This is useful if your dataset has a directory structure.",
        )
        parser_files.add_argument(
            "--sort-by",
            type=str,
            required=False,
            help="Sort remotely fetched files by the given direction. Defaults to 'updated_at:desc'.",
        )

        # Set file status
        parser_file_status = dataset_action.add_parser(
            "set-file-status", help="Sets the status of one or more files."
        )
        parser_file_status.add_argument(
            "dataset",
            type=str,
            help="[Remote] Dataset name: to list all the existing dataset, run 'darwin dataset remote'.",
        )
        parser_file_status.add_argument("status", type=str, help="Status to change to.")
        parser_file_status.add_argument(
            "files", type=str, nargs="+", help="Files to change status."
        )

        # Delete files
        parser_delete_files = dataset_action.add_parser(
            "delete-files", help="Delete one or more files remotely."
        )
        parser_delete_files.add_argument(
            "dataset",
            type=str,
            help="[Remote] Dataset name: to list all the existing dataset, run 'darwin dataset remote'.",
        )
        parser_delete_files.add_argument(
            "files", type=str, nargs="+", help="Files to delete."
        )
        parser_delete_files.add_argument(
            "-y",
            "--yes",
            default=False,
            action="store_true",
            required=False,
            help="Confirmation flag to delete the file without prompting for manual input.",
        )

        # Add comments
        parser_comment = dataset_action.add_parser("comment", help="Comment image.")
        parser_comment.add_argument(
            "dataset",
            type=str,
            help="[Remote] Dataset name: to list all the existing dataset, run 'darwin dataset remote'. ",
        )
        parser_comment.add_argument("file", type=str, help="File to comment")
        parser_comment.add_argument(
            "--text", type=str, help="Comment: list of words", required=True
        )
        parser_comment.add_argument(
            "--x",
            required=False,
            type=float,
            default=1,
            help="X coordinate for comment box",
        )
        parser_comment.add_argument(
            "--y",
            required=False,
            type=float,
            default=1,
            help="Y coordinate for comment box",
        )
        parser_comment.add_argument(
            "--w",
            "--width",
            required=False,
            type=float,
            default=1,
            help="Comment box width in pixels",
        )
        parser_comment.add_argument(
            "--h",
            "--height",
            required=False,
            type=float,
            default=1,
            help="Comment box height in pixels",
        )

        # Help
        dataset_action.add_parser("help", help="Show this help message and exit.")

        # REPORT
        report = subparsers.add_parser(
            "report",
            help="Report related functions.",
            description="Arguments to interact with reports",
        )
        report_action = report.add_subparsers(dest="action")

        # Annotators
        parser_annotators = report_action.add_parser(
            "annotators", help="Report about the annotators."
        )
        parser_annotators.add_argument(
            "--datasets",
            default=[],
            type=lambda csv: [value.strip() for value in csv.split(",")],
            help="List of comma-separated dataset slugs to include in the report.",
        )
        parser_annotators.add_argument(
            "--start",
            required=True,
            type=lambda dt: datetime.strptime(dt, "%Y-%m-%dT%H:%M:%S%z"),
            help="Report start DateTime in RFC3339 format (e.g. 2020-01-20T14:00:00Z).",
        )
        parser_annotators.add_argument(
            "--stop",
            required=True,
            type=lambda dt: datetime.strptime(dt, "%Y-%m-%dT%H:%M:%S%z"),
            help="Report end DateTime in RFC3339 format (e.g. 2020-01-20T15:00:00Z).",
        )
        parser_annotators.add_argument(
            "--group-by",
            required=True,
            type=lambda csv: [value.strip() for value in csv.split(",")],
            help=f"Non-empty list of comma-separated grouping options for the report, any of: f{[name.value for name in AnnotatorReportGrouping]}.",
        )
        parser_annotators.add_argument(
            "-r",
            "--pretty",
            action="store_true",
            default=False,
            help="Prints the results formatted in a rich table.",
        )

        # VERSION
        subparsers.add_parser(
            "version", help="Check current version of the repository. "
        )

        # EXTRACTION
        parser_extract = subparsers.add_parser(
            "extract", help="Extract and process media files"
        )
        extract_subparsers = parser_extract.add_subparsers(dest="extract_type")

        # Video artifacts
        parser_video = extract_subparsers.add_parser(
            "video-artifacts",
            help="Extract video artifacts for read-only registration in the Darwin platform",
            description="Process video files to generate streaming artifacts including HLS segments, "
            "thumbnails, frame extracts, and manifest files required for video playback "
            "in the V7 Darwin platform.",
        )
        parser_video.add_argument(
            "source_file",
            type=str,
            help="Path to input video file",
        )
        parser_video.add_argument(
            "-p",
            "--storage-key-prefix",
            type=str,
            required=True,
            help="Storage key prefix for generated files",
        )
        parser_video.add_argument(
            "-o",
            "--output-dir",
            type=str,
            required=True,
            help="Output directory for artifacts",
        )
        parser_video.add_argument(
            "-f",
            "--fps",
            type=float,
            default=0.0,
            help="Desired output FPS (0.0 for native)",
        )
        parser_video.add_argument(
            "-s",
            "--segment-length",
            type=int,
            default=2,
            help="Length of each segment in seconds",
        )
        parser_video.add_argument(
            "--repair",
            action="store_true",
            help="Checks video for errors and attempts to repair them",
        )

        argcomplete.autocomplete(self.parser)


[docs]
    def parse_args(self) -> Tuple[Namespace, ArgumentParser]:
        """
        Parses and validates the CLI options.

        Returns
        -------
        Tuple[Namespace, ArgumentParser]
            The tuple with the namespace and parser to use.
        """
        args = self.parser.parse_args()

        if not args.command:
            self.parser.print_help()
            sys.exit()

        return args, self.parser