Source code for vocalpy.sound

"""Class that represents a sound."""

from __future__ import annotations

import pathlib
import reprlib
import warnings
from typing import TYPE_CHECKING

import numpy as np
import numpy.typing as npt
import soundfile

from ._vendor import evfuncs
from .audio_file import AudioFile

if TYPE_CHECKING:
    from .segments import Segments



[docs]
class Sound:
    """Class that represents a sound.

    Attributes
    ----------
    data : numpy.ndarray
        The audio signal as a :class:`numpy.ndarray`,
        where the dimensions are (channels, samples).
    samplerate : int
        The sampling rate the audio signal was acquired at, in Hertz.
    channels : int
        The number of channels in the audio signal.
        Determined from the first dimension of ``data``.
    samples : int
        The number of samples in the audio signal.
        Determined from the last dimension of ``data``.
    duration : float
        Duration of the sound in seconds.
        Determined from the last dimension of ``data``
        and the ``samplerate``.

    Examples
    --------

    A :class:`~vocalpy.Sound` is read from a file.

    >>> sound_path = voc.example("bl26lb16.wav", return_path=True)
    >>> sound = voc.Sound.read(sound_path)
    >>> sound
    vocalpy.Sound(data=array([[-0.00... 0.00912476]]), samplerate=32000)

    The :class:`~vocalpy.Sound` class is designed as a
    domain-specific data container with attributes that
    help us avoid cluttering up code with variables
    that track the sampling rate, number of channels,
    and duration of the file.

    >>> sound = voc.example("bl26lb16.wav")
    >>> print(sound.samplerate)
    32000
    >>> print(sound.channels)
    1
    >>> print(sound.duration)
    7.254

    You can :func:`print` a :class:`~vocalpy.Sound`
    to see all the properties that are derived from
    the sampling rate and the shape of the
    underlying data array: the number of channels,
    the number of samples, and the duration in seconds.

    >>> sound = voc.example("bl26lb16.wav")
    >>> print(sound)
    vocalpy.Sound(data=array([[-0.00... 0.00912476]]), samplerate=32000), channels=1, samples=184463, duration=5.764)

    The :mod:`vocalpy` package tries to provide
    functions that take :class:`~vocalpy.Sound` instances as inputs,
    and return other domain-specific types as outputs,
    such as :class:`~vocalpy.Segments`, :class:`~vocalpy.Spectrogram`,
    and :class:`~vocalpy.Features`.
    If instead you need to work with the
    digital audio signal directly as a numpy array,
    you can access it through the :attr:`~vocalpy.Sound.data`
    attribute.

    >>> sound = voc.example("bl26lb16.wav")
    >>> sound_arr = sound.data

    Sound can be written to a file as well,
    in any format supported by :mod:`soundfile`.

    >>> sound = voc.example("bl26lb16.wav")
    >>> sound.write("bl26lb16-copy.wav")

    We can clip a sound to an arbitrary duration
    using the :meth:`~vocalpy.Sound.clip` method.
    This is useful if there are long, relatively silent periods
    before or after the animal sounds that we are interested in.

    >>> sound = voc.example("bl26lb16.wav")
    >>> sound_clip = sound.clip(0.1, 1.5)
    >>> print(sound_clip.duration)
    1.4

    If we want to clip from a start time to the end of the sound,
    we can just specify a time for `start`.

    >>> sound = voc.example("bl26lb16.wav")
    >>> sound_clip = sound.clip(0.5)
    >>> print(sound_clip.duration)
    1.4

    Likewise, if we want to clip from the start of the sound
    we can just specify a time for `stop`.
    Notice that we need to use a keyword argument here,
    since `start` is the first argument to :meth:`~vocalpy.Sound.clip`.

    >>> sound = voc.example("bl26lb16.wav")
    >>> sound_clip = sound.clip(stop=0.5)
    >>> print(sound_clip.duration)
    0.5

    If we want to segment an audio file
    into periods of animal sounds and periods of background,
    we can do that with one of the algorithms in
    :mod:`vocalpy.segment`. This will give us a
    :class:`~vocalpy.Segments` instance that we can then pass into
    the :meth:`~vocalpy.Sound.segment` method to get back
    a :class:`list` of :class:`~vocalpy.Sound` instances,
    one for each segment.

    >>> sound = voc.example("bl26lb16.wav")
    >>> segments = voc.segment.meansquared(sound, threshold=1000, min_dur=0.0002, min_silent_dur=0.004)
    >>> syllables = sound.segment(segments)
    >>> len(syllables)
    26

    You can also index a :class:`~vocalpy.Sound` as you would a
    :class:`numpy.array` and this will give you back a new
    :class:`~vocalpy.Sound`.
    One place where this is useful is when you have multi-channel
    audio, and you only want one channel, or you want to iterate
    over the channels.

    >>> sound = voc.example("fruitfly-song-multichannel.wav")
    >>> a_channel = sound[0, :]
    >>> print(a_channel)
    vocalpy.Sound(data=array([[-0.00...-0.00723267]]), samplerate=10000), channels=1, samples=15000, duration=1.500)
    >>> for channel in sound:
    ...     print(channel)
    vocalpy.Sound(data=array([[-0.00...-0.00723267]]), samplerate=10000), channels=1, samples=15000, duration=1.500)
    vocalpy.Sound(data=array([[ 0.01... 0.00268555]]), samplerate=10000), channels=1, samples=15000, duration=1.500)
    vocalpy.Sound(data=array([[ 0.00...-0.00100708]]), samplerate=10000), channels=1, samples=15000, duration=1.500)

    This works with other methods of indexing,
    as shown below.

    >>> sound = voc.example("bl26lb16.wav")
    >>> print(sound.data.shape)
    >>> decimated = sound[:, ::10]  # keep every 10th sample -- not true downsampling, we don't change the sampling rate

    Note that we are just passing indexing directly
    to the underlying :class:`numpy.array`,
    not re-implementing the API.
    """


[docs]
    def __init__(
        self,
        data: npt.NDArray,
        samplerate: int,
    ):
        if not isinstance(data, np.ndarray):
            raise TypeError(
                f"Sound array `data` should be a numpy array, "
                f"but type was {type(data)}."
            )
        if not (data.ndim == 1 or data.ndim == 2):
            raise ValueError(
                f"Sound array `data` should have either 1 or 2 dimensions, "
                f"but number of dimensions was {data.ndim}."
            )
        if data.ndim == 1:
            data = data[np.newaxis, :]

        if data.shape[0] > data.shape[1]:
            warnings.warn(
                "The ``data`` passed in has more channels than samples: the number of channels (data.shape[0]) "
                f"is {data.shape[0]} and the number of samples (data.shape[1]) is {data.shape[1]}. "
                "You may need to verify you have passed in the data correctly.",
                stacklevel=2,
            )

        self.data = data

        if not isinstance(samplerate, int):
            raise TypeError(
                f"Type of ``samplerate`` must be int but was: {type(samplerate)}"
            )
        if not samplerate > 0:
            raise ValueError(
                f"Value of ``samplerate`` must be a positive integer, but was {samplerate}."
            )
        self.samplerate = samplerate


    @property
    def channels(self):
        return self.data.shape[0]

    @property
    def samples(self):
        return self.data.shape[1]

    @property
    def duration(self):
        return self.data.shape[1] / self.samplerate

    def __repr__(self):
        return (
            f"vocalpy.{self.__class__.__name__}("
            f"data={reprlib.repr(self.data)}, "
            f"samplerate={self.samplerate})"
        )

    def __str__(self):
        return (
            f"vocalpy.{self.__class__.__name__}("
            f"data={reprlib.repr(self.data)}, "
            f"samplerate={self.samplerate}), "
            f"channels={self.channels}, "
            f"samples={self.samples}, "
            f"duration={self.duration:.3f})"
        )

    def __eq__(self, other):
        if other.__class__ is not self.__class__:
            return NotImplemented
        return all(
            [
                np.array_equal(self.data, other.data),
                self.samplerate == other.samplerate,
            ]
        )

    def __ne__(self, other):
        if other.__class__ is not self.__class__:
            return NotImplemented
        return not self.__eq__(other)


[docs]
    @classmethod
    def read(
        cls,
        path: str | pathlib.Path,
        dtype: npt.DTypeLike = np.float64,
        **kwargs,
    ) -> "Self":  # noqa: F821
        """Read audio from ``path``.

        Parameters
        ----------
        path : str, pathlib.Path
            Path to file from which audio data should be read.
        **kwargs : dict, optional
            Other arguments to :func:`soundfile.read`:, refer to
            :module:`soundfile` documentation for details.
            Note that :method:`vocalpy.Sound.read` passes in the argument
            ``always_2d=True``, because we require `Sound.data`
            to always have a "channel" dimension.

        Returns
        -------
        sound : vocalpy.Sound
            A :class:`vocalpy.Sound` instance with ``data``
            read from ``path``.
        """
        path = pathlib.Path(path)
        if not path.exists():
            raise FileNotFoundError(f"Sound file not found at path: {path}")

        if path.name.endswith("cbin"):
            data, samplerate = evfuncs.load_cbin(path)
            if dtype in (np.float32, np.float64):
                # for consistency with soundfile,
                # we scale the cbin int16 data to range [-1.0, 1.0] when we cast to float
                # Next line is from https://stackoverflow.com/a/42544738/4906855, see comments there
                data = data.astype(dtype) / 32768.0
            elif dtype == np.int16:
                pass
            else:
                raise ValueError(
                    f"Invalid ``dtype`` for cbin audio: {dtype}. "
                    "Must be one of {numpy.int16, np.float32, np.float64}"
                )
            # evfuncs always gives us 1-dim, so we add channel dimension
            data = data[np.newaxis, :]
        else:
            data, samplerate = soundfile.read(
                path, always_2d=True, dtype=dtype, **kwargs
            )
            data = data.transpose(
                (1, 0)
            )  # dimensions (samples, channels) -> (channels, samples)

        return cls(data=data, samplerate=samplerate)



[docs]
    def write(self, path: str | pathlib.Path, **kwargs) -> AudioFile:
        """Write audio data to a file.

        Parameters
        ----------
        path : str, pathlib.Path
            Path to file that audio data should be saved in.
        **kwargs: dict, optional
            Extra arguments to :func:`soundfile.write`.
            Refer to :module:`soundfile` documentation for details.
        """
        path = pathlib.Path(path)
        if path.name.endswith("cbin"):
            raise ValueError(
                "Extension for `path` was 'cbin', but `vocalpy.Sound.write` cannot write to the cbin format. "
                "Audio data from cbin files can be converted to wav as follows:\n"
                ">>> sound.data = sound.data.astype(np.float32) / 32768.0\n"
                "The above converts the int16 values to float values between -1.0 and 1.0. "
                "You can then save the data as a wav file:\n"
                ">>> sound.write('path.wav')\n"
            )
        # next line: swap axes because soundfile expects dimensions to be (samples, channels)
        soundfile.write(
            file=path,
            data=self.data.transpose((1, 0)),
            samplerate=self.samplerate,
            **kwargs,
        )
        return AudioFile(path=path)


    def __iter__(self):
        for channel in self.data:
            yield Sound(
                data=channel[np.newaxis, ...],
                samplerate=self.samplerate,
            )

    def __getitem__(self, key):
        if isinstance(key, (int, tuple, slice)):
            try:
                return Sound(
                    data=self.data[key],
                    samplerate=self.samplerate,
                )
            except IndexError as e:
                raise IndexError(
                    f"Invalid integer or slice for Sound with {self.data.shape[0]} channels: {key}"
                ) from e
        else:
            raise TypeError(
                f"Sound can be indexed with integer or slice, but type was: {type(key)}"
            )


[docs]
    def segment(self, segments: Segments) -> list[Sound]:
        """Segment a sound, using a set of line :class:`~vocalpy.Segments`.

        Parameters
        ----------
        segments : vocalpy.Segments.
            A :class:`~vocalpy.Segments` instance,
            the output of a segmenting function
            in :mod:`vocalpy.segment`.

        Returns
        -------
        sounds : list
            A list of :class:`~vocalpy.Sound` instances,
            one for every segment in :class:`~vocalpy.Segments`.

        Examples
        --------
        >>> sound = voc.example("bells.wav")
        >>> segments = voc.segment.meansquared(sound)
        >>> syllables = sound.segment(segments)
        >>> len(syllables)
        10

        Notes
        -----
        The :meth`Sound.segment` method is used with the output
        of functions from :mod:`vocalpy.segment`, an instance of
        :class:`~vocalpy.Segments`. If you need to clip a
        :class:`~vocalpy.Sound` at arbitrary times, use the
        :meth:`~vocalpy.Sound.clip` method.

        See Also
        --------
        vocalpy.segment
        Sound.clip
        """
        from .segments import Segments

        if not isinstance(segments, Segments):
            raise TypeError(
                f"`segments` argument should be an instance of `vocalpy.Segments`, but type is: {type(segments)}"
            )
        if segments.samplerate != self.samplerate:
            warnings.warn(
                f"The `samplerate` attribute of `segments, {segments.samplerate}, "
                f"does not equal the `samplerate` of this `Sound`, {self.samplerate}. "
                "You may want to check the source of the segments.",
                stacklevel=2,
            )
        if (
            segments.start_inds[-1] + segments.lengths[-1]
            > self.data.shape[-1]
        ):
            raise ValueError(
                f"The offset of the last segment in `segments`, {segments.start_inds[-1] + segments.lengths[-1]}, "
                f"is greater than the last sample of this `Sound`, {self.data.shape[-1]}"
            )

        sounds_out = []
        for start_ind, length in zip(segments.start_inds, segments.lengths):
            sounds_out.append(
                Sound(
                    data=self.data[
                        :, start_ind : start_ind + length  # noqa : E203
                    ],
                    samplerate=self.samplerate,
                )
            )
        return sounds_out



[docs]
    def clip(self, start: float = 0.0, stop: float | None = None) -> Sound:
        """Make a clip from this :class:`~vocalpy.Sound` that starts at time
        ``start`` in seconds and ends at time ``stop``.

        Parameters
        ----------
        start : float
            Start time for clip, in seconds.
            Default is 0.
        stop : float, optional.
            Stop time for clip, in seconds.
            Default is None, in which case
            the value will be set to the
            :attr:`~vocalpy.Sound.duration`
            of this :class:`~vocalpy.Sound`.

        Returns
        -------
        clip : vocalpy.Sound
            A new :class:`~vocalpy.Sound` with
            duration ``stop - start``.

        Examples
        --------
        >>> sound = voc.example('bl26lb16.wav')
        >>> clip = sound.clip(1.5, 2.5)
        >>> clip.duration
        1.0

        Notes
        -----
        The :meth:`~vocalpy.Sound.clip` method is used to clip a
        :class:`~vocalpy.Sound` at arbitrary times.
        If you need to segment an audio file into periods of
        animal sounds and periods of background,
        use one of the functions in :mod:`vocalpy.segment`
        to get an instance of :class:`~vocalpy.Segments`,
        that you can then use with the :meth`Sound.segment` method.

        See Also
        --------
        Sound.segment
        """
        if not isinstance(start, (float, np.floating)):
            raise TypeError(
                f"The `start` time for the clip must be a float type, but type was {type(start)}."
            )
        if start < 0.0:
            raise ValueError(
                f"Value for `start` time must be a non-negative number, but was: {start}"
            )
        if start > self.duration:
            raise ValueError(
                f"Value for `start` time, {start}, cannot be greater than this `Sound`'s duration, {self.duration}"
            )
        start_ind = int(start * self.samplerate)

        if stop is None:
            return Sound(
                # don't use stop ind, instead go all the way to the end
                data=self.data[:, start_ind:],
                samplerate=self.samplerate,
            )
        else:
            if not isinstance(stop, (float, np.floating)):
                raise TypeError(
                    f"The `stop` time for the clip must be a float type, but type was {type(start)}."
                )
            if stop < start:
                raise ValueError(
                    f"Value for `stop`, {stop}, is less than value for `start`, {start}. "
                    "Please specify a `stop` time for the clip greater than the `start` time."
                )
            if stop > self.duration:
                raise ValueError(
                    f"Value for `stop` time, {stop}, cannot be greater than this `Sound`'s duration, {self.duration}"
                )
            stop_ind = int(stop * self.samplerate)
            return Sound(
                data=self.data[:, start_ind:stop_ind],
                samplerate=self.samplerate,
            )



[docs]
    def to_mono(self):
        """Convert a :class:`~vocalpy.Sound` to mono by averaging samples across channels.

        Examples
        --------

        >>> sound = voc.examples("WhiLbl0010")
        >>> print(sound.channels)
        2
        >>> sound_mono = sound.to_mono()
        >>> print(sound.channels)
        1

        Note that feature extraction functions operate on channels independently,
        so it may speed up your analysis to convert multi-channel audio to mono,
        if you do not need to consider channels indepedently.

        >>> import timeit
        >>> import numpy as np
        >>> sound = voc.examples("WhiLbl0010")
        >>> sound_mono = sound.to_mono()
        >>> np.mean(timeit.repeat("voc.feature.biosound(sound)", number=5, globals=globals()))
        np.float64(19.713963174959645)
        >>> np.mean(timeit.repeat("voc.feature.biosound(sound_mono)", number=5, globals=globals()))
        np.float64(9.917085491772742)

        Notes
        -----
        This method uses the :func:`librosa.to_mono` function.
        """
        if self.data.shape[0] == 1:
            return self
        else:
            import librosa

            return Sound(
                data=librosa.to_mono(self.data), samplerate=self.samplerate
            )