Source code for vocalpy.sound

"""Class that represents a sound."""

from __future__ import annotations

import pathlib
import reprlib
import warnings
from typing import TYPE_CHECKING

import numpy as np
import numpy.typing as npt
import soundfile

from ._vendor import evfuncs
from .audio_file import AudioFile

if TYPE_CHECKING:
    from .segments import Segments


[docs] class Sound: """Class that represents a sound. Attributes ---------- data : numpy.ndarray The audio signal as a :class:`numpy.ndarray`, where the dimensions are (channels, samples). samplerate : int The sampling rate the audio signal was acquired at, in Hertz. channels : int The number of channels in the audio signal. Determined from the first dimension of ``data``. samples : int The number of samples in the audio signal. Determined from the last dimension of ``data``. duration : float Duration of the sound in seconds. Determined from the last dimension of ``data`` and the ``samplerate``. Examples -------- A :class:`~vocalpy.Sound` is read from a file. >>> sound_path = voc.example("bl26lb16.wav", return_path=True) >>> sound = voc.Sound.read(sound_path) >>> sound vocalpy.Sound(data=array([[-0.00... 0.00912476]]), samplerate=32000) The :class:`~vocalpy.Sound` class is designed as a domain-specific data container with attributes that help us avoid cluttering up code with variables that track the sampling rate, number of channels, and duration of the file. >>> sound = voc.example("bl26lb16.wav") >>> print(sound.samplerate) 32000 >>> print(sound.channels) 1 >>> print(sound.duration) 7.254 You can :func:`print` a :class:`~vocalpy.Sound` to see all the properties that are derived from the sampling rate and the shape of the underlying data array: the number of channels, the number of samples, and the duration in seconds. >>> sound = voc.example("bl26lb16.wav") >>> print(sound) vocalpy.Sound(data=array([[-0.00... 0.00912476]]), samplerate=32000), channels=1, samples=184463, duration=5.764) The :mod:`vocalpy` package tries to provide functions that take :class:`~vocalpy.Sound` instances as inputs, and return other domain-specific types as outputs, such as :class:`~vocalpy.Segments`, :class:`~vocalpy.Spectrogram`, and :class:`~vocalpy.Features`. If instead you need to work with the digital audio signal directly as a numpy array, you can access it through the :attr:`~vocalpy.Sound.data` attribute. >>> sound = voc.example("bl26lb16.wav") >>> sound_arr = sound.data Sound can be written to a file as well, in any format supported by :mod:`soundfile`. >>> sound = voc.example("bl26lb16.wav") >>> sound.write("bl26lb16-copy.wav") We can clip a sound to an arbitrary duration using the :meth:`~vocalpy.Sound.clip` method. This is useful if there are long, relatively silent periods before or after the animal sounds that we are interested in. >>> sound = voc.example("bl26lb16.wav") >>> sound_clip = sound.clip(0.1, 1.5) >>> print(sound_clip.duration) 1.4 If we want to clip from a start time to the end of the sound, we can just specify a time for `start`. >>> sound = voc.example("bl26lb16.wav") >>> sound_clip = sound.clip(0.5) >>> print(sound_clip.duration) 1.4 Likewise, if we want to clip from the start of the sound we can just specify a time for `stop`. Notice that we need to use a keyword argument here, since `start` is the first argument to :meth:`~vocalpy.Sound.clip`. >>> sound = voc.example("bl26lb16.wav") >>> sound_clip = sound.clip(stop=0.5) >>> print(sound_clip.duration) 0.5 If we want to segment an audio file into periods of animal sounds and periods of background, we can do that with one of the algorithms in :mod:`vocalpy.segment`. This will give us a :class:`~vocalpy.Segments` instance that we can then pass into the :meth:`~vocalpy.Sound.segment` method to get back a :class:`list` of :class:`~vocalpy.Sound` instances, one for each segment. >>> sound = voc.example("bl26lb16.wav") >>> segments = voc.segment.meansquared(sound, threshold=1000, min_dur=0.0002, min_silent_dur=0.004) >>> syllables = sound.segment(segments) >>> len(syllables) 26 You can also index a :class:`~vocalpy.Sound` as you would a :class:`numpy.array` and this will give you back a new :class:`~vocalpy.Sound`. One place where this is useful is when you have multi-channel audio, and you only want one channel, or you want to iterate over the channels. >>> sound = voc.example("fruitfly-song-multichannel.wav") >>> a_channel = sound[0, :] >>> print(a_channel) vocalpy.Sound(data=array([[-0.00...-0.00723267]]), samplerate=10000), channels=1, samples=15000, duration=1.500) >>> for channel in sound: ... print(channel) vocalpy.Sound(data=array([[-0.00...-0.00723267]]), samplerate=10000), channels=1, samples=15000, duration=1.500) vocalpy.Sound(data=array([[ 0.01... 0.00268555]]), samplerate=10000), channels=1, samples=15000, duration=1.500) vocalpy.Sound(data=array([[ 0.00...-0.00100708]]), samplerate=10000), channels=1, samples=15000, duration=1.500) This works with other methods of indexing, as shown below. >>> sound = voc.example("bl26lb16.wav") >>> print(sound.data.shape) >>> decimated = sound[:, ::10] # keep every 10th sample -- not true downsampling, we don't change the sampling rate Note that we are just passing indexing directly to the underlying :class:`numpy.array`, not re-implementing the API. """
[docs] def __init__( self, data: npt.NDArray, samplerate: int, ): if not isinstance(data, np.ndarray): raise TypeError( f"Sound array `data` should be a numpy array, " f"but type was {type(data)}." ) if not (data.ndim == 1 or data.ndim == 2): raise ValueError( f"Sound array `data` should have either 1 or 2 dimensions, " f"but number of dimensions was {data.ndim}." ) if data.ndim == 1: data = data[np.newaxis, :] if data.shape[0] > data.shape[1]: warnings.warn( "The ``data`` passed in has more channels than samples: the number of channels (data.shape[0]) " f"is {data.shape[0]} and the number of samples (data.shape[1]) is {data.shape[1]}. " "You may need to verify you have passed in the data correctly.", stacklevel=2, ) self.data = data if not isinstance(samplerate, int): raise TypeError( f"Type of ``samplerate`` must be int but was: {type(samplerate)}" ) if not samplerate > 0: raise ValueError( f"Value of ``samplerate`` must be a positive integer, but was {samplerate}." ) self.samplerate = samplerate
@property def channels(self): return self.data.shape[0] @property def samples(self): return self.data.shape[1] @property def duration(self): return self.data.shape[1] / self.samplerate def __repr__(self): return ( f"vocalpy.{self.__class__.__name__}(" f"data={reprlib.repr(self.data)}, " f"samplerate={self.samplerate})" ) def __str__(self): return ( f"vocalpy.{self.__class__.__name__}(" f"data={reprlib.repr(self.data)}, " f"samplerate={self.samplerate}), " f"channels={self.channels}, " f"samples={self.samples}, " f"duration={self.duration:.3f})" ) def __eq__(self, other): if other.__class__ is not self.__class__: return NotImplemented return all( [ np.array_equal(self.data, other.data), self.samplerate == other.samplerate, ] ) def __ne__(self, other): if other.__class__ is not self.__class__: return NotImplemented return not self.__eq__(other)
[docs] @classmethod def read( cls, path: str | pathlib.Path, dtype: npt.DTypeLike = np.float64, **kwargs, ) -> "Self": # noqa: F821 """Read audio from ``path``. Parameters ---------- path : str, pathlib.Path Path to file from which audio data should be read. **kwargs : dict, optional Other arguments to :func:`soundfile.read`:, refer to :module:`soundfile` documentation for details. Note that :method:`vocalpy.Sound.read` passes in the argument ``always_2d=True``, because we require `Sound.data` to always have a "channel" dimension. Returns ------- sound : vocalpy.Sound A :class:`vocalpy.Sound` instance with ``data`` read from ``path``. """ path = pathlib.Path(path) if not path.exists(): raise FileNotFoundError(f"Sound file not found at path: {path}") if path.name.endswith("cbin"): data, samplerate = evfuncs.load_cbin(path) if dtype in (np.float32, np.float64): # for consistency with soundfile, # we scale the cbin int16 data to range [-1.0, 1.0] when we cast to float # Next line is from https://stackoverflow.com/a/42544738/4906855, see comments there data = data.astype(dtype) / 32768.0 elif dtype == np.int16: pass else: raise ValueError( f"Invalid ``dtype`` for cbin audio: {dtype}. " "Must be one of {numpy.int16, np.float32, np.float64}" ) # evfuncs always gives us 1-dim, so we add channel dimension data = data[np.newaxis, :] else: data, samplerate = soundfile.read( path, always_2d=True, dtype=dtype, **kwargs ) data = data.transpose( (1, 0) ) # dimensions (samples, channels) -> (channels, samples) return cls(data=data, samplerate=samplerate)
[docs] def write(self, path: str | pathlib.Path, **kwargs) -> AudioFile: """Write audio data to a file. Parameters ---------- path : str, pathlib.Path Path to file that audio data should be saved in. **kwargs: dict, optional Extra arguments to :func:`soundfile.write`. Refer to :module:`soundfile` documentation for details. """ path = pathlib.Path(path) if path.name.endswith("cbin"): raise ValueError( "Extension for `path` was 'cbin', but `vocalpy.Sound.write` cannot write to the cbin format. " "Audio data from cbin files can be converted to wav as follows:\n" ">>> sound.data = sound.data.astype(np.float32) / 32768.0\n" "The above converts the int16 values to float values between -1.0 and 1.0. " "You can then save the data as a wav file:\n" ">>> sound.write('path.wav')\n" ) # next line: swap axes because soundfile expects dimensions to be (samples, channels) soundfile.write( file=path, data=self.data.transpose((1, 0)), samplerate=self.samplerate, **kwargs, ) return AudioFile(path=path)
def __iter__(self): for channel in self.data: yield Sound( data=channel[np.newaxis, ...], samplerate=self.samplerate, ) def __getitem__(self, key): if isinstance(key, (int, tuple, slice)): try: return Sound( data=self.data[key], samplerate=self.samplerate, ) except IndexError as e: raise IndexError( f"Invalid integer or slice for Sound with {self.data.shape[0]} channels: {key}" ) from e else: raise TypeError( f"Sound can be indexed with integer or slice, but type was: {type(key)}" )
[docs] def segment(self, segments: Segments) -> list[Sound]: """Segment a sound, using a set of line :class:`~vocalpy.Segments`. Parameters ---------- segments : vocalpy.Segments. A :class:`~vocalpy.Segments` instance, the output of a segmenting function in :mod:`vocalpy.segment`. Returns ------- sounds : list A list of :class:`~vocalpy.Sound` instances, one for every segment in :class:`~vocalpy.Segments`. Examples -------- >>> sound = voc.example("bells.wav") >>> segments = voc.segment.meansquared(sound) >>> syllables = sound.segment(segments) >>> len(syllables) 10 Notes ----- The :meth`Sound.segment` method is used with the output of functions from :mod:`vocalpy.segment`, an instance of :class:`~vocalpy.Segments`. If you need to clip a :class:`~vocalpy.Sound` at arbitrary times, use the :meth:`~vocalpy.Sound.clip` method. See Also -------- vocalpy.segment Sound.clip """ from .segments import Segments if not isinstance(segments, Segments): raise TypeError( f"`segments` argument should be an instance of `vocalpy.Segments`, but type is: {type(segments)}" ) if segments.samplerate != self.samplerate: warnings.warn( f"The `samplerate` attribute of `segments, {segments.samplerate}, " f"does not equal the `samplerate` of this `Sound`, {self.samplerate}. " "You may want to check the source of the segments.", stacklevel=2, ) if ( segments.start_inds[-1] + segments.lengths[-1] > self.data.shape[-1] ): raise ValueError( f"The offset of the last segment in `segments`, {segments.start_inds[-1] + segments.lengths[-1]}, " f"is greater than the last sample of this `Sound`, {self.data.shape[-1]}" ) sounds_out = [] for start_ind, length in zip(segments.start_inds, segments.lengths): sounds_out.append( Sound( data=self.data[ :, start_ind : start_ind + length # noqa : E203 ], samplerate=self.samplerate, ) ) return sounds_out
[docs] def clip(self, start: float = 0.0, stop: float | None = None) -> Sound: """Make a clip from this :class:`~vocalpy.Sound` that starts at time ``start`` in seconds and ends at time ``stop``. Parameters ---------- start : float Start time for clip, in seconds. Default is 0. stop : float, optional. Stop time for clip, in seconds. Default is None, in which case the value will be set to the :attr:`~vocalpy.Sound.duration` of this :class:`~vocalpy.Sound`. Returns ------- clip : vocalpy.Sound A new :class:`~vocalpy.Sound` with duration ``stop - start``. Examples -------- >>> sound = voc.example('bl26lb16.wav') >>> clip = sound.clip(1.5, 2.5) >>> clip.duration 1.0 Notes ----- The :meth:`~vocalpy.Sound.clip` method is used to clip a :class:`~vocalpy.Sound` at arbitrary times. If you need to segment an audio file into periods of animal sounds and periods of background, use one of the functions in :mod:`vocalpy.segment` to get an instance of :class:`~vocalpy.Segments`, that you can then use with the :meth`Sound.segment` method. See Also -------- Sound.segment """ if not isinstance(start, (float, np.floating)): raise TypeError( f"The `start` time for the clip must be a float type, but type was {type(start)}." ) if start < 0.0: raise ValueError( f"Value for `start` time must be a non-negative number, but was: {start}" ) if start > self.duration: raise ValueError( f"Value for `start` time, {start}, cannot be greater than this `Sound`'s duration, {self.duration}" ) start_ind = int(start * self.samplerate) if stop is None: return Sound( # don't use stop ind, instead go all the way to the end data=self.data[:, start_ind:], samplerate=self.samplerate, ) else: if not isinstance(stop, (float, np.floating)): raise TypeError( f"The `stop` time for the clip must be a float type, but type was {type(start)}." ) if stop < start: raise ValueError( f"Value for `stop`, {stop}, is less than value for `start`, {start}. " "Please specify a `stop` time for the clip greater than the `start` time." ) if stop > self.duration: raise ValueError( f"Value for `stop` time, {stop}, cannot be greater than this `Sound`'s duration, {self.duration}" ) stop_ind = int(stop * self.samplerate) return Sound( data=self.data[:, start_ind:stop_ind], samplerate=self.samplerate, )
[docs] def to_mono(self): """Convert a :class:`~vocalpy.Sound` to mono by averaging samples across channels. Examples -------- >>> sound = voc.examples("WhiLbl0010") >>> print(sound.channels) 2 >>> sound_mono = sound.to_mono() >>> print(sound.channels) 1 Note that feature extraction functions operate on channels independently, so it may speed up your analysis to convert multi-channel audio to mono, if you do not need to consider channels indepedently. >>> import timeit >>> import numpy as np >>> sound = voc.examples("WhiLbl0010") >>> sound_mono = sound.to_mono() >>> np.mean(timeit.repeat("voc.feature.biosound(sound)", number=5, globals=globals())) np.float64(19.713963174959645) >>> np.mean(timeit.repeat("voc.feature.biosound(sound_mono)", number=5, globals=globals())) np.float64(9.917085491772742) Notes ----- This method uses the :func:`librosa.to_mono` function. """ if self.data.shape[0] == 1: return self else: import librosa return Sound( data=librosa.to_mono(self.data), samplerate=self.samplerate )