Source code for vocalpy.segmenter
"""Class that represents the segmenting step in a pipeline."""
from __future__ import annotations
import collections.abc
import inspect
from typing import TYPE_CHECKING, Callable, Mapping
import dask
import dask.diagnostics
from .audio_file import AudioFile
from .params import Params
from .sound import Sound
from .spectrogram_maker import validate_sound
if TYPE_CHECKING:
from .segments import Segments
DEFAULT_SEGMENT_PARAMS = {
"threshold": 5000,
"min_dur": 0.02,
"min_silent_dur": 0.002,
}
[docs]
class Segmenter:
"""Class that represents the segmenting step in a pipeline.
Attributes
----------
callback : callable, optional
The function or :class:`Callable` class instance
that is used to segment.
If not specified, defaults to
:func:`vocalpy.segment.meansquared`.
params : Mapping or Params, optional.
Parameters passed to ``callback``.
A :class:`Mapping` of keyword arguments,
or one of the :class:`Params` classes that
represents parameters, e.g.,
class:`vocalpy.segment.MeanSquaredParams`.
If not specified, defaults to
:const:`vocalpy.segmenter.DEFAULT_SEGMENT_PARAMS`.
"""
[docs]
def __init__(
self,
callback: Callable | None = None,
params: Mapping | Params | None = None,
):
"""Initialize a new :class:`vocalpy.Segmenter` instance.
Parameters
----------
callback : callable, optional
The function or :class:`Callable` class instance
that is used to segment.
If not specified, defaults to
:func:`vocalpy.segment.meansquared`.
params : Mapping or Params, optional.
Parameters passed to ``callback``.
A :class:`Mapping` of keyword arguments,
or one of the :class:`Params` classes that
represents parameters, e.g.,
class:`vocalpy.segment.MeanSquaredParams`.
If not specified, defaults to
:data:`vocalpy.segmenter.DEFAULT_SEGMENT_PARAMS`.
"""
if callback is None:
from vocalpy.segment import meansquared
callback = meansquared
# if callback was None and we use the default,
# **and** params is None, we set these default params
if params is None:
params = DEFAULT_SEGMENT_PARAMS
else:
# if we *don't* use the default callback **and** params is None,
# then we instead get the defaults for the specified callback
if params is None:
params = {}
signature = inspect.signature(callback)
for name, param in signature.parameters.items():
if param.default is not inspect._empty:
params[name] = param.default
if not callable(callback):
raise ValueError(
f"`callback` should be callable, but `callable({callback})` returns False"
)
self.callback = callback
if not isinstance(params, (collections.abc.Mapping, Params)):
raise TypeError(
f"`params` should be a `Mapping` or `Params` but type was: {type(params)}"
)
if isinstance(params, Params):
# coerce to dict
params = {**params}
signature = inspect.signature(callback)
if not all([param in signature.parameters for param in params]):
invalid_params = [
param for param in params if param not in signature.parameters
]
raise ValueError(
f"Invalid params for callback: {invalid_params}\n"
f"Callback parameters are: {signature.parameters}"
)
self.params = params
def __repr__(self):
return f"Segmenter(callback={self.callback.__qualname__}, params={self.params})"
[docs]
def segment(
self,
sound: Sound | AudioFile | list[Sound | AudioFile],
parallelize: bool = True,
) -> Segments | list[Segments]:
"""Segment sound.
Parameters
----------
sound : vocalpy.Sound or list of Sound
A `class`:vocalpy.Sound` instance
or list of :class:`vocalpy.Sound` instances
to segment.
parallelize : bool
If True, parallelize segmentation using :mod:`dask`.
Returns
-------
segments : vocalpy.Segments, list
If a :class:`~vocalpy.Sound` is passed in,
a single set of :class:`~vocalpy.Segments` will be returned.
If a list of :class:`~vocalpy.Sound` is passed in,
a list of :class:`~vocalpy.Segments` will be returned.
"""
validate_sound(sound)
# define nested function so vars are in scope and ``dask`` can call it
def _to_segments(sound_: Sound | AudioFile) -> Segments:
if isinstance(sound_, AudioFile):
sound_ = Sound.read(sound_.path)
segments = self.callback(sound_, **self.params)
return segments
if isinstance(sound, (Sound, AudioFile)):
return _to_segments(sound)
segments = []
for sound_ in sound:
if parallelize:
segments.append(dask.delayed(_to_segments)(sound_))
else:
segments.append(_to_segments(sound_))
if parallelize:
graph = dask.delayed()(segments)
with dask.diagnostics.ProgressBar():
return graph.compute()
else:
return segments