Source code for vocalpy.feature_extractor
"""Class that represents the step in a pipeline that extracts features."""
from __future__ import annotations
import collections.abc
import inspect
from typing import TYPE_CHECKING, Mapping
import dask
import dask.diagnostics
if TYPE_CHECKING:
from . import Features, Params, Sound
[docs]
class FeatureExtractor:
"""Class that represents the step in a pipeline
that extracts features.
Attributes
----------
callback : Callable
Callable that takes a :class:`Sound` or :class:`Segment`
and returns :class:`Features`.
params : dict
Parameters for extracting :class:`Features`.
Passed as keyword arguments to ``callback``.
"""
[docs]
def __init__(
self, callback: callable, params: Mapping | Params | None = None
):
if not callable(callback):
raise ValueError(
f"`callback` should be callable, but `callable({callback})` returns False"
)
self.callback = callback
if params is None:
params = {}
signature = inspect.signature(callback)
for name, param in signature.parameters.items():
if param.default is not inspect._empty:
params[name] = param.default
from . import Params # avoid circular import
if not isinstance(params, (collections.abc.Mapping, Params)):
raise TypeError(
f"`params` should be a `Mapping` or `Params` but type was: {type(params)}"
)
if isinstance(params, Params):
# coerce to dict
params = {**params}
signature = inspect.signature(callback)
if not all([param in signature.parameters for param in params]):
invalid_params = [
param for param in params if param not in signature.parameters
]
raise ValueError(
f"Invalid params for callback: {invalid_params}\n"
f"Callback parameters are: {signature.parameters}"
)
self.params = params
def __repr__(self):
return f"FeatureExtractor(callback={self.callback.__qualname__}, params={self.params})"
def extract(
self, sound: Sound | list[Sound], parallelize: bool = True
) -> Features | list[Features]:
from . import Features, Sound
if not isinstance(sound, (list, Sound)):
raise TypeError(
"`sound` must be an instance of a `Sound` "
f"or a `list` of `Sound` instances, "
f"but type was: {type(sound)}"
)
if isinstance(sound, list):
if not all([isinstance(sound_, Sound) for sound_ in sound]):
types = set(type(el) for el in sound)
raise TypeError(
"A `list` passed to `FeatureExtract.extract` must be all `Sound` instances, "
f"but found the following types: {types}"
)
# define nested function so vars are in scope and ``dask`` can call it
def _to_features(sound_: Sound) -> Features:
return self.callback(sound_, **self.params)
if isinstance(sound, Sound):
return _to_features(sound)
elif isinstance(sound, list) and all(
[isinstance(sound_, Sound) for sound_ in sound]
):
features = []
for sound_ in sound:
if parallelize:
features.append(dask.delayed(_to_features)(sound_))
else:
features.append(_to_features(sound_))
if parallelize:
graph = dask.delayed()(features)
with dask.diagnostics.ProgressBar():
return graph.compute()
else:
return features