Source code for vocalpy.examples._examples

"""Functions for working with example data."""

from __future__ import annotations

import importlib.resources
import json
import os
import pathlib
import shutil
from enum import Enum
from typing import TYPE_CHECKING, Union

import pooch
import requests.exceptions
from attr import define

from .example_data import ExampleData

if TYPE_CHECKING:
    import vocalpy

    ExampleType = Union[
        pathlib.Path,
        vocalpy.Sound,
        vocalpy.Spectrogram,
        vocalpy.Annotation,
        ExampleData,
    ]


# ---- all `makefunc`s that return ExampleData for larger example datasets go here
def bfsongrepo_makefunc(
    path: pathlib.Path | list[pathlib.Path],
    return_path: bool = False,
    annot_format: str = "simple-seq",
) -> ExampleData:
    import vocalpy  # avoid circular import

    wav_paths = [path for path in path if path.suffix == ".wav"]
    csv_paths = [path for path in path if path.suffix == ".csv"]
    if return_path:
        return ExampleData(sound=wav_paths, annotation=csv_paths)
    else:
        return ExampleData(
            sound=[vocalpy.Sound.read(wav_path) for wav_path in wav_paths],
            annotation=[
                vocalpy.Annotation.read(csv_path, format=annot_format)
                for csv_path in csv_paths
            ],
        )


def jourjine_et_al_2023_makefunc(
    path: pathlib.Path | list[pathlib.Path], return_path: bool = False
) -> ExampleData:
    """Make ``'jourjine-et-al-2023'`` example data"""
    import vocalpy  # avoid circular import

    wav_path = [path for path in path if path.suffix == ".wav"]
    wav_path = wav_path[0]
    csv_path = [path for path in path if path.suffix == ".csv"]
    csv_path = csv_path[0]

    if return_path:
        return ExampleData(sound=wav_path, segments=csv_path)
    else:
        sound = vocalpy.Sound.read(wav_path)
        return ExampleData(
            sound=sound,
            segments=vocalpy.Segments.from_csv(
                csv_path,
                samplerate=sound.samplerate,
                columns_map={
                    "start_seconds": "start_s",
                    "stop_seconds": "stop_s",
                },
            ),
        )


def zblib_makefunc(
    path: pathlib.Path | list[pathlib.Path], return_path: bool = False
) -> ExampleData:
    import vocalpy  # avoid circular import

    wav_paths = [path for path in path if path.suffix == ".wav"]
    if return_path:
        return ExampleData(sound=wav_paths)
    else:
        return ExampleData(
            sound=[vocalpy.Sound.read(wav_path) for wav_path in wav_paths]
        )


MAKEFUNCS = [
    bfsongrepo_makefunc,
    jourjine_et_al_2023_makefunc,
    zblib_makefunc,
]

MAKEFUNCS_MAP = {makefunc.__name__: makefunc for makefunc in MAKEFUNCS}

ExampleTypes = Enum("Exampletypes", "Sound Spectrogram Annotation ExampleData")


@define
class Example:
    """Class that represents example data.

    Attributes
    ----------
    name : str
        Human-readable name of example data
    description : str
        Description of example data,
        including any relevant citations.
    type : ExampleTypes
        Type of data.
        A :class:`Enum` member that is used
        in the :meth:`Example.load` method
        to determine how to load the data.
    requires_download: bool
        If ``True``, this example data requires a download.
        The :meth:`Example.load` method will call :mod:`pooch`.
    filename : str
        For examples that are a single file,
        this is the name of the file.
        For examples that are multiple files,
        this is the name of the archive
        downloaded from Zenodo with :mod:`pooch`.
    path : pathlib.Path, optional
        For examples that are a single file,
        this is the path to the file.
    makefunc : callable, optional
        For examples that are multiple files,
        this is a function that returns an
        :class:`ExampleData` instance
        with attributes containing the
        multiple files.
    makefunc_kwargs : dict, optional
        A :class:`dict` of keyword arguments
        to pass into :attr:`Exanple.makefunc`.

    Notes
    -----
    This dataclass is used to load metadata from
    `vocalpy/examples/example-metadata.json`.
    """

    name: str
    description: str
    type: ExampleTypes
    requires_download: bool
    filename: str | None
    path: pathlib.Path | None = None
    makefunc: callable | None = None
    makefunc_kwargs: dict | None = None

    @classmethod
    def from_metadata(
        cls,
        description_filename: str,
        example_type: str,
        name: str | None = None,
        filename: str | None = None,
        requires_download: bool = False,
        makefunc_name: str | None = None,
        makefunc_kwargs: dict | None = None,
    ):
        """Create a :class:`Example` instance from metadata.

        Parameters
        ----------
        description_filename: str
            Name of text file that contains description of
            examnple data, including any relevant citations.
        example_type: str
            String name of example type,
            that should match one member of the :class:`Enum`
            ``ExampleTypes``.
            The :meth:`Example.load` method uses this
            to determine how to load the example.
        name: str, optional
            A human-readable name for the example.
            If None, defaults to filename.
        filename : str
            For examples that are a single file,
            the name of the file.
        makefunc_name : string, optional
            For examples that are multiple files,
            the name of the function that
            returns an instance of :class:`ExampleData`
            with attributes that contain data loaded
            from the files.
        makefunc_kwargs : dict, optional
            A :class:`dict` of keyword arguments to
            pass into the ``makefunc``.
            Optional, default is None.

        Returns
        -------
        example : Example
            Instance of :class:`Example` dataclass
        """
        if filename is None and name is None:
            raise ValueError(
                "`name` and `filename` for example can't both be None"
            )

        if name is None:
            name = filename

        description_path = importlib.resources.files(
            "vocalpy.examples"
        ).joinpath(description_filename)
        description = description_path.read_text()

        type_ = ExampleTypes[example_type]

        if filename:
            path = importlib.resources.files("vocalpy.examples").joinpath(
                filename
            )

        if makefunc_name is not None:
            makefunc = MAKEFUNCS_MAP[makefunc_name]
        else:
            makefunc = None

        return cls(
            name,
            description,
            type_,
            requires_download,
            filename,
            path,
            makefunc,
            makefunc_kwargs,
        )

    def __attrs_post_init__(self):
        if self.name is None:
            raise ValueError("`name` can't be None")

        if not any(
            [self.type is example_type for example_type in ExampleTypes]
        ):
            raise ValueError(
                f"example type '{self.type}' is not one of the ExampleTypes: {ExampleTypes}"
            )

    def load(self, return_path: bool):
        import vocalpy

        if self.requires_download:
            try:
                POOCH.load_registry_from_doi()
            except requests.exceptions.ConnectionError as e:
                raise ConnectionError(
                    "Unable to connect to registry to download example dataset. "
                    "This may be due to an issue with an internet connection."
                ) from e
            if self.filename.endswith(".tar.gz"):
                path = POOCH.fetch(self.filename, processor=pooch.Untar())
            elif self.filename.endswith(".zip"):
                path = POOCH.fetch(self.filename, processor=pooch.Unzip())
            else:
                path = POOCH.fetch(self.filename)
            if isinstance(path, list):
                path = [pathlib.Path(path_) for path_ in path]
            else:
                path = pathlib.Path(path)
        else:
            path = self.path

        if isinstance(path, list):
            # enforce consisting sorting across platforms
            path = sorted(path)

        if return_path:
            if self.type == ExampleTypes.ExampleData:
                return self.makefunc(path, return_path=return_path)
            else:
                return path
        else:
            if self.type == ExampleTypes.Sound:
                return vocalpy.Sound.read(path)
            elif self.type == ExampleTypes.Spectrogram:
                return vocalpy.Spectrogram.read(path)
            elif self.type == ExampleTypes.Annotation:
                return vocalpy.Annotation.read(path, format=self.annot_format)
            elif self.type == ExampleTypes.ExampleData:
                return self.makefunc(path, return_path=return_path)


EXAMPLE_METADATA_JSON_PATH = pathlib.Path(
    importlib.resources.files("vocalpy.examples").joinpath(
        "example-metadata.json"
    )
)
with EXAMPLE_METADATA_JSON_PATH.open("r") as fp:
    ALL_EXAMPLE_METADATA = json.load(fp)

EXAMPLES = [
    Example.from_metadata(**example_metadata)
    for example_metadata in ALL_EXAMPLE_METADATA
]

REGISTRY = {example_.name: example_ for example_ in EXAMPLES}

# ---- pooch set-up ------------------------------
VOCALPY_DATA_DIR = "VOCALPY_DATA_DIR"

ZENODO_DATASET_BASE_URL = "doi:10.5281/zenodo.10685639"

POOCH = pooch.create(
    path=pooch.os_cache("vocalpy"),
    base_url=ZENODO_DATASET_BASE_URL,
    registry=None,
    env=VOCALPY_DATA_DIR,
)


[docs] def get_cache_dir() -> pathlib.Path: """Returns path to directory where example data is cached.""" return os.environ.get(VOCALPY_DATA_DIR, POOCH.abspath)
[docs] def clear_cache() -> None: """Clears cache, by removing cache dir""" cache_dir = get_cache_dir() shutil.rmtree(cache_dir)
[docs] def example(name: str, return_path: bool = False) -> ExampleType: """Get an example from :mod:`vocalpy.examples`. To see all available example data, call :func:`vocalpy.examples.show`. Parameters ---------- name : str Name of the example. To see names of examples and the associated metadata, call :func:`vocalpy.examples.show`. return_path : bool If True, return the path to the example data. Default is False. Returns ------- example : pathlib.Path, vocalpy.Annotation, vocalpy.Sound, vocalpy.Spectrogram, list By default, the path or a list of paths to the example data is returned. If ``return_type`` is specified, then an instance or list of instances of the corresponding VocalPy data type will be returned, e.g., a :class:`vocalpy.Sound` instance with the example data read into it. Examples -------- >>> sound = voc.example('bells.wav') >>> spect = voc.spectrogram(sound) >>> voc.plot.spect(spect) If you want the path(s) to where the data is on your local machine, set `return_path` to `True`. This is useful to demonstrate :mod:`vocalpy` functionality that loads files, or to work with the data in the files in some other way. >>> sound_path = voc.example('bells.wav', return_path=True) >>> sound = voc.Sound.read(sound_path) Notes ----- By default, local files will be cached in the directory given by :func:`vocalpy.example.get_cache_dir`. You can override this by setting an environment variable ``VOCALPY_DATA_DIR`` prior to importing VocalPy: >>> import os >>> os.environ['VOCALPY_DATA_DIR'] = '/path/to/store/data' >>> import vocalpy as voc See Also -------- vocalpy.examples.show """ if name not in REGISTRY: raise ValueError( f"No example data found with name: {name}. " "To see the names of all example data, call `vocalpy.examples.show()`" ) example_: Example = REGISTRY[name] return example_.load(return_path=return_path)
[docs] def show() -> None: """Show example data in :mod:`vocalpy`. Prints examples in the form ``name``, ``metadata`` where ``name`` is the string you would pass to :func:`vocalpy.example` to retrieve the example, and ``metadata`` is a string containing metadata: the source of the data, and any relevant citation. See Also -------- vocalpy.example Notes ----- To retrieve an example, call :func:`vocalpy.example`. """ print("VocalPy example data") print("=" * 72) for example in EXAMPLES: print( f"name: {example.name}\n" "description:\n" f"{example.description}\n" )