Source code for vocalpy.segments

"""Classes that represent line segments returned by segmenting algorithms."""

from __future__ import annotations

import json
import numbers
import pathlib
import reprlib

import numpy as np
import numpy.typing as npt
import pandas as pd


[docs] class Segments: """Class that represents a set of line segments returned by a segmenting algorithm. This class represents the result of algorithms that segment a signal into a series of consecutive, non-overlapping 2-D line segments :math:`S`. Each segment :math:`s_i` in a :class:`Segments` instance has an integer start index and length. The start index is computed by the segmenting algorithm. For algorithms that find segments by thresholding energy, the length will be equal to the stop index computed by the algorithm minus the start index, plus one (to account for how Python indexes). The stop index is the last index above threshold for a segment. For a list of such algorithms, call :func:`vocalpy.segment.line.list`. For algorithms that segment spectrograms into boxes, see :class:`Boxes`. Attributes ---------- start_inds : numpy.ndarray lengths: numpy.ndarray labels: list, optional A :class:`list` of strings, where each string is the label for each segment. sound : vocalpy.Sound The sound that was segmented to produce this set of line segments. Examples -------- :class:`Segments` are returned by the segmenting algorithms that return a set of line segments (as opposed to segmenting algorithms that return a set of boxes). >>> bfsongrep = voc.example('bfsongrepo') >>> sound = bfsongrepo.sounds[0] >>> segments = voc.segment.meansquared(sound, threshold=1500, min_dur=0.2, min_silent_dur=0.02) >>> segments Segments(start_inds=array([ 22293...4425, 220495]), lengths=array([ 8012,... 6935, 7896]), samplerate=32000, labels=['', '', '', '', '', '', ...]) # noqa Because audio data is a digital signal with discrete samples, segments are defined in terms of start indices and lengths. Thus, the start index of each segment is the index of the sample where it starts--also known as a "boundary"--and the length is given in number of samples. However, we often want to think of segments times in terms of seconds. We can get the start times of segments in seconds with the :attr:`~Segments.start_times` property, and we can get the duration of segments in seconds with the :attr:`~Segments.durations` property. >>> segments.start_times array([0.69665625, 1.801375 , 2.26390625, 2.7535625 , 3.5885 , 6.38828125, 6.89046875]) >>> segments.durations array([0.250375 , 0.33278125, 0.31 , 0.23625 , 0.308625 , 0.21671875, 0.24675 ]) This is possible because each set of :class:`Segments` has a :attr:`~Segments.samplerate` attribute, that can be used to convert from sample numbers to seconds. This attribute is taken from the :class:`vocalpy.Sound` that was segmented to produce the :class:`Segments` in the first place. Depending on the segmenting algorithm, the start of one segment may not be the same as the end of the segment that precedes it. In this case we may want to find where the segments stop. We can do so with the :attr:`~Segments.stop_ind` and :attr:`~Segments.stop_ind` properties. To actually get a :class:`Sound` for every segment in a set of :class:`Segments`, we can pass the :class:`Segments` into to the :meth:`vocalpy.Sound.segment` method. >>> segment_sounds = sound.segment(segments) This might seem verbose, but it has a couple of advantages. The first is that the :class:`Segments` can be saved in a json file, so they can be loaded again and used to segment a sound without needed to re-run the segmentation. You can use a naming convention so that each sound file has a segments file paired with it: e.g., if the sound file is named ``"mouse1-day1-bout1.wav"``, then the json file could be named ``"mouse1-day1-bout1.segments.json"``. >>> segments.to_json(path='mouse1-day1-bout1.segments.json') A set of :class:`Segments` is then loaded with the :meth:`~Segments.from_json` method. >>> segments = voc.Segments.from_json(path='mouse1-day1-bout1.segments.json') The second advantage of representing :class:`Segments` separately is that they can then be used to compute metrics for segmentation. Note that here we are using the :attr:`~Segments.all_times` property, that gives us all the boundary times in seconds. >>> sounds = voc.example('bfsongrepo', return_type='sound') >>> segments = voc.segment.meansquared(sound, threshold=1500, min_dur=0.2, min_silent_dur=0.02) >>> annots = voc.example('bfsongrepo', return_type='annotation') >>> ref = np.sorted(np.concatenate(annots[0].seq.onsets, annot[0].seq.offsets)) >>> hyp = segments.all_times >>> prec, _ = voc.metrics.segmentation.ir.precision(reference=ref, hypothesis=hyp) See Also -------- Boxes """
[docs] def __init__( self, start_inds: npt.NDArray, lengths: npt.NDArray, samplerate: int, labels: list[str] | None = None, ) -> None: if not isinstance(start_inds, np.ndarray): raise TypeError( f"`start_inds` must be a numpy array but type was: {type(start_inds)}" ) if not isinstance(lengths, np.ndarray): raise TypeError( f"`lengths` must be a numpy array but type was: {type(lengths)}" ) if not issubclass(start_inds.dtype.type, numbers.Integral): raise ValueError( f"`start_inds` must have an integer dtype, but dtype was: {start_inds.dtype}" ) if not issubclass(lengths.dtype.type, numbers.Integral): raise ValueError( f"`lengths` must have an integer dtype, but dtype was: {lengths.dtype}" ) if start_inds.size == lengths.size == 0: # no need to validate pass else: if not start_inds.ndim == 1: raise ValueError( "`start_inds` for `Segments` should be 1-dimensional array " f"but start_inds.ndim was: {start_inds.ndim}" ) if not lengths.ndim == 1: raise ValueError( f"`lengths` for `Segments` should be 1-dimensional array but lengths.ndim was: {lengths.ndim}" ) if start_inds.size != lengths.size: raise ValueError( "`start_inds` and `lengths` of `Segments` must have same number of elements." f"`start_inds` has {start_inds.size} elements and `lengths` has {lengths.size} elements." ) if not np.all(start_inds >= 0): raise ValueError( "Values of `start_inds` for `Segments` must all be non-negative." ) if not np.all(start_inds[1:] > start_inds[:-1]): raise ValueError( "Values of `start_inds` for `Segments` must be strictly increasing." ) if not np.all(lengths >= 1): raise ValueError( "Values of `lengths` for `Segments` must all be positive." ) if not isinstance(samplerate, int): raise TypeError( f"Type of ``samplerate`` must be int but was: {type(samplerate)}" ) if not samplerate > 0: raise ValueError( f"Value of ``samplerate`` must be a positive integer, but was {samplerate}." ) if labels is not None: if not isinstance(labels, list): raise TypeError( f"`labels` must be a list but type was: {type(labels)}" ) if not all([isinstance(lbl, str) for lbl in labels]): types = set([type(lbl) for lbl in labels]) raise ValueError( f"`labels` of `Segments` must be a list of strings, but found the following types: {types}" ) if len(labels) != start_inds.size: raise ValueError( "`labels` for `Segments` must have same number of elements as `start_inds`. " f"`labels` has {len(labels)} elements but `start_inds` has {start_inds.size} elements." ) else: # if labels is None # then default to empty strings labels = [""] * start_inds.shape[0] self.start_inds = start_inds self.lengths = lengths self.samplerate = samplerate self.labels = labels
@property def stop_inds(self): """Indices of where segments stop. Returns ``self.start_inds + self.lengths``. """ return self.start_inds + self.lengths @property def all_inds(self): """Start and stop indices of segments. Returns the following: .. code-block: python np.unique(np.concatenate(self.start_inds, self.stop_inds) """ return np.unique(np.concatenate((self.start_inds, self.stop_inds))) @property def start_times(self): """Start times of segments. Returns ``self.start_inds / self.sound.samplerate``. """ return self.start_inds / self.samplerate @property def durations(self): """Durations of segments. Returns ``self.lengths / self.sound.samplerate``. """ return self.lengths / self.samplerate @property def stop_times(self): """Stop times of segments. Returns ``self.start_times + self.durations``. """ return self.start_times + self.durations @property def all_times(self): return np.unique(np.concatenate((self.start_times, self.stop_times))) def __repr__(self): return ( f"Segments(start_inds={reprlib.repr(self.start_inds)}, lengths={reprlib.repr(self.lengths)}, " f"samplerate={self.samplerate!r}, labels={reprlib.repr(self.labels)})" )
[docs] def to_json(self, path: str | pathlib.Path) -> None: """Save :class:`Segments` to a json file. Parameters ---------- json_path : str, pathlib.Path The path where the json file should be saved with these :class:`Segments`. """ path = pathlib.Path(path) json_dict = { "start_inds": self.start_inds.tolist(), "lengths": self.lengths.tolist(), "samplerate": self.samplerate, "labels": self.labels, } with path.open("w") as fp: json.dump(json_dict, fp)
[docs] @classmethod def from_json(cls, path: str | pathlib.Path) -> "Segments": """Load :class:`Segments` from a json file. Parameters ---------- path : str, pathlib.Path The path to the json file to load the :class:`Segments` from. Returns ------- segments : Segments """ path = pathlib.Path(path) with path.open("r") as fp: json_dict = json.load(fp) start_inds = np.array(json_dict["start_inds"], dtype=int) lengths = np.array(json_dict["lengths"], dtype=int) samplerate = json_dict["samplerate"] labels = json_dict["labels"] return cls(start_inds, lengths, samplerate, labels)
VALID_COLUMNS_MAP_VALUES = ["start_s", "stop_s", "start_ind", "length"]
[docs] @classmethod def from_csv( cls, csv_path: str | pathlib.Path, samplerate: int, columns_map: dict | None = None, default_label: str | None = None, read_csv_kwargs: dict | None = None, ): """Create a :class:`~vocalpy.Segments` instance from a csv file. The csv file can either have the column names ``{"start_ind", "length", "label"}``, that will be used directly as the :class:`~vocalpy.Segment` attributes ``start_inds``, ``lengths``, and ``labels``, respectively, or it can have the column names ``{"start_s", "stop_s", "label"}``, where ``"start_s"`` and ``"stop_s""`` refer to times in seconds. The ``label`` column is not required, and if it is not found, the ``labels`` will default to empty strings. You can change this behavior by specifying a ``default_label`` that will be used for all the segments if no ``labels`` column is found, instead of an empty string. If one of these sets of columns (``{"start_ind", "length"``}`` or ``{"start_s", "stop_s"}``) is not found in the csv, then an error will be raised. You can have the :meth:`vocalpy.Segments.from_csv` method rename columns for you after it loads the csv file into a :class:`pandas.DataFrame` using the ``columns_map`` argument; see example below. All other columns are ignored; you do not need to somehow remove them to load the file. Parameters ---------- csv_path : string or pathlib.Path Path to csv file. samplerate : int The sampling rate of the audio signal that was segmented to produce these segments. columns_map : dict, optional Mapping that will be used to rename columns in the csv default_label : str, optional String, a default that is assigned as the label to all segments. read_csv_kwargs, dict, optional Keyword arguments to pass to :func:`pandas.read_csv` function. Returns ------- segments : vocalpy.Segments Examples -------- The main use of this method is to load a set of line segments from a csv file created by another library or a script. If the column names in the csv do not match the column names that `vocalpy.Segments` expects, you can have the `vocalpy.Segments.from_csv` method rename the columns for you after loading the csv, using the `columns_map` argument. Here is an example of renaming columns to the expected names "start_s" and "stop_s". After renaming, the values in these columns are then converted to the starting indices and lengths of segments using the `samplerate`. >>> jourjine = voc.example("jourjine-et-al-2023", return_path=True) >>> sound = voc.Sound.read(jourjine.sound) >>> csv_path = jourjine.segments >>> columns_map = {"start_seconds": "start_s", "stop_seconds": "stop_s"} >>> segments = voc.Segments.from_csv(csv_path, samplerate=sound.samplerate, columns_map=columns_map) >>> print(segments) Segments(start_inds=array([ 131... 149767168]), lengths=array([40447,...29696, 25087]), samplerate=250000, labels=['', '', '', '', '', '', ...]) Notes ----- This method is provided as a convenience for the case where you have a segmentation saved in a csv file, e.g., from a :class:`pandas.DataFrame`, that was created by another library or script. If you are working mainly with :mod:`vocalpy`, you should prefer to load a set of segments with :meth:`~vocalpy.Segments.from_json`, and to save the set of segments with :meth:`~vocalpy.Segments.to_json`, since this avoids needing to keep track of the `samplerate` value separately. """ if not isinstance(samplerate, int): raise TypeError( f"The `samplerate` argument must be an int but type was: {type(samplerate)}" ) if samplerate < 1: raise ValueError( f"The `samplerate` argument must be a positve number but value was: {samplerate}" ) if read_csv_kwargs is not None: if not isinstance(read_csv_kwargs, dict): raise TypeError( f"The `read_csv_kwargs` must be a `dict` but type was: {type(read_csv_kwargs)}" ) else: read_csv_kwargs = {} df = pd.read_csv(csv_path, **read_csv_kwargs) if columns_map is not None: if not isinstance(columns_map, dict): raise TypeError( f"The `columns_map` argument must be a `dict` but type was: {type(dict)}" ) if not all( ( isinstance(k, str) and isinstance(v, str) for k, v in columns_map.items() ) ): raise ValueError( "The `columns_map` argument must be a dict that maps string keys to string values, " "but not all keys and values were strings." ) if not all( v in cls.VALID_COLUMNS_MAP_VALUES for v in columns_map.values() ): invalid_values = [ v for v in columns_map.values() if v not in cls.VALID_COLUMNS_MAP_VALUES ] raise ValueError( f"The `columns_map` argument must map keys (column names in the csv) " 'to either {"start_seconds", "stop_seconds"} or {"start_ind", "length"}. ' f"The following values are invalid: {invalid_values}" ) df.columns = [ ( columns_map[column_name] if column_name in columns_map else column_name ) for column_name in df.columns ] if "label" not in df.columns and default_label is not None: if not isinstance(default_label, str): raise TypeError( f"The `default_label` argument must be a string but type was: {type(default_label)}" ) df["label"] = default_label if "start_ind" in df.columns and "length" in df.columns: return cls( start_inds=df["start_ind"].values, lengths=df["length"].values, labels=( df["label"].values.tolist() if "label" in df.columns else None ), samplerate=samplerate, ) elif "start_s" in df.columns and "stop_s" in df.columns: start_inds = (df["start_s"].values * samplerate).astype(int) lengths = ( (df["stop_s"].values - df["start_s"].values) * samplerate ).astype(int) return cls( start_inds=start_inds, lengths=lengths, labels=( df["label"].values.tolist() if "label" in df.columns else None ), samplerate=samplerate, ) else: raise ValueError( "The csv file loaded from `csv_path must either have columns {'start_ind', 'length'} " "or {'start_s', 'stop_s'}, but neither pair was found. " f"Columns in the `pandas.DataFrame` loaded from the csv file are: {df.columns}\n" "To have the `vocalpy.Segments.from_csv` method rename the columns for you, " "use the `columns_map` argument. Type `help(voc.Segments)` or, in iPython, `voc.Segments?`, " "to see examples of using this and other arguments." )
def __len__(self): return len(self.start_times) def __eq__(self, other: "Segments") -> bool: if not isinstance(other, Segments): return False return ( np.array_equal(self.start_inds, other.start_inds) and np.array_equal(self.lengths, other.lengths) and self.samplerate == other.samplerate and self.labels == other.labels )