Source code for vocalpy.segmenter

"""Class that represents the segmenting step in a pipeline."""

from __future__ import annotations

import collections.abc
import inspect
from typing import TYPE_CHECKING, Callable, Mapping

import dask
import dask.diagnostics

from .audio_file import AudioFile
from .params import Params
from .sound import Sound
from .spectrogram_maker import validate_sound

if TYPE_CHECKING:
    from .segments import Segments


DEFAULT_SEGMENT_PARAMS = {
    "threshold": 5000,
    "min_dur": 0.02,
    "min_silent_dur": 0.002,
}


[docs] class Segmenter: """Class that represents the segmenting step in a pipeline. Attributes ---------- callback : callable, optional The function or :class:`Callable` class instance that is used to segment. If not specified, defaults to :func:`vocalpy.segment.meansquared`. params : Mapping or Params, optional. Parameters passed to ``callback``. A :class:`Mapping` of keyword arguments, or one of the :class:`Params` classes that represents parameters, e.g., class:`vocalpy.segment.MeanSquaredParams`. If not specified, defaults to :const:`vocalpy.segmenter.DEFAULT_SEGMENT_PARAMS`. """
[docs] def __init__( self, callback: Callable | None = None, params: Mapping | Params | None = None, ): """Initialize a new :class:`vocalpy.Segmenter` instance. Parameters ---------- callback : callable, optional The function or :class:`Callable` class instance that is used to segment. If not specified, defaults to :func:`vocalpy.segment.meansquared`. params : Mapping or Params, optional. Parameters passed to ``callback``. A :class:`Mapping` of keyword arguments, or one of the :class:`Params` classes that represents parameters, e.g., class:`vocalpy.segment.MeanSquaredParams`. If not specified, defaults to :data:`vocalpy.segmenter.DEFAULT_SEGMENT_PARAMS`. """ if callback is None: from vocalpy.segment import meansquared callback = meansquared # if callback was None and we use the default, # **and** params is None, we set these default params if params is None: params = DEFAULT_SEGMENT_PARAMS else: # if we *don't* use the default callback **and** params is None, # then we instead get the defaults for the specified callback if params is None: params = {} signature = inspect.signature(callback) for name, param in signature.parameters.items(): if param.default is not inspect._empty: params[name] = param.default if not callable(callback): raise ValueError( f"`callback` should be callable, but `callable({callback})` returns False" ) self.callback = callback if not isinstance(params, (collections.abc.Mapping, Params)): raise TypeError( f"`params` should be a `Mapping` or `Params` but type was: {type(params)}" ) if isinstance(params, Params): # coerce to dict params = {**params} signature = inspect.signature(callback) if not all([param in signature.parameters for param in params]): invalid_params = [ param for param in params if param not in signature.parameters ] raise ValueError( f"Invalid params for callback: {invalid_params}\n" f"Callback parameters are: {signature.parameters}" ) self.params = params
def __repr__(self): return f"Segmenter(callback={self.callback.__qualname__}, params={self.params})"
[docs] def segment( self, sound: Sound | AudioFile | list[Sound | AudioFile], parallelize: bool = True, ) -> Segments | list[Segments]: """Segment sound. Parameters ---------- sound : vocalpy.Sound or list of Sound A `class`:vocalpy.Sound` instance or list of :class:`vocalpy.Sound` instances to segment. parallelize : bool If True, parallelize segmentation using :mod:`dask`. Returns ------- segments : vocalpy.Segments, list If a :class:`~vocalpy.Sound` is passed in, a single set of :class:`~vocalpy.Segments` will be returned. If a list of :class:`~vocalpy.Sound` is passed in, a list of :class:`~vocalpy.Segments` will be returned. """ validate_sound(sound) # define nested function so vars are in scope and ``dask`` can call it def _to_segments(sound_: Sound | AudioFile) -> Segments: if isinstance(sound_, AudioFile): sound_ = Sound.read(sound_.path) segments = self.callback(sound_, **self.params) return segments if isinstance(sound, (Sound, AudioFile)): return _to_segments(sound) segments = [] for sound_ in sound: if parallelize: segments.append(dask.delayed(_to_segments)(sound_)) else: segments.append(_to_segments(sound_)) if parallelize: graph = dask.delayed()(segments) with dask.diagnostics.ProgressBar(): return graph.compute() else: return segments