Source code for vocalpy.metrics.segmentation.ir

"""Metrics for segmentation adapted from information retrieval."""

from __future__ import annotations

import attr
import numpy as np
import numpy.typing as npt

from ... import validators


[docs] def find_hits( hypothesis: npt.NDArray, reference: npt.NDArray, tolerance: float | int | None = None, decimals: int | None = None, ) -> tuple[npt.NDArray, npt.NDArray, npt.NDArray]: r"""Find hits in arrays of event times. This is a helper function used to compute information retrieval metrics. Specifically, this function is called by :func:`~vocalpy.metrics.segmentation.ir.precision_recall_fscore`. An element in ``hypothesis``, is considered a hit if its value :math:`t_h` falls within an interval around any value in ``reference``, :math:`t_0`, plus or minus ``tolerance`` :math:`t_0 - \Delta t < t < t_0 + \Delta t` This function only allows there to be zero or one hit for each element in ``reference``, but not more than one. If the condition :math:`|ref_i - hyp_j| < tolerance` is true for multiple values :math:`hyp_j` in ``hypothesis``, then the value with the smallest difference from :math:`ref_i` is considered a hit. Both ``hypothesis`` and ``reference`` must be 1-dimensional arrays of non-negative, strictly increasing values. If you have two arrays ``onsets`` and ``offsets``, you can concatenate those into a single valid array of boundary times using :func:`concat_starts_and_stops` that you can then pass to this function. Parameters ---------- hypothesis : numpy.ndarray Boundaries, e.g., onsets or offsets of segments, as computed by some method. reference : numpy.ndarray Ground truth boundaries that the hypothesized boundaries ``hypothesis`` are compared to. tolerance : float or int Tolerance, in seconds. Elements in ``hypothesis`` are considered a true positive if they are within a time interval around any reference boundary :math:`t_0` in ``reference`` plus or minus the ``tolerance``, i.e., if a hypothesized boundary :math:`t_h` is within the interval :math:`t_0 - \Delta t < t < t_0 + \Delta t`. Default is None, in which case it is set to ``0`` (either float or int, depending on the dtype of ``hypothesis`` and ``reference``). See notes for more detail. decimals: int The number of decimal places to round both ``hypothesis`` and ``reference`` to, using :func:`numpy.round`. This mitigates inflated error rates due to floating point error. Rounding is only applied if both ``hypothesis`` and ``reference`` are floating point values. To avoid rounding, e.g. to compute strict precision and recall, pass in the value ``False``. Default is 3, which assumes that the values are in seconds and should be rounded to milliseconds. Returns ------- hits_ref : numpy.ndarray The indices of hits in ``reference``. hits_hyp : numpy.ndarray The indices of hits in ``hypothesis``. diffs : numpy.ndarray Absolute differences :math:`|hit^{ref}_i - hit^{hyp}_i|`, i.e., ``np.abs(reference[hits_ref] - hypothesis[hits_hyp])``. """ validators.is_valid_boundaries_array( hypothesis ) # 1-d, non-negative, strictly increasing validators.is_valid_boundaries_array(reference) validators.have_same_dtype(hypothesis, reference) if tolerance is None: if issubclass(reference.dtype.type, np.floating): tolerance = 0.0 elif issubclass(reference.dtype.type, np.integer): tolerance = 0 if tolerance < 0: raise ValueError( f"``tolerance`` must be a non-negative number but was: {tolerance}" ) if decimals and (decimals is not False and not isinstance(decimals, int)): raise ValueError( f"``decimals`` must either be ``False`` or an integer but was: {decimals}" ) if issubclass(reference.dtype.type, np.floating): if not isinstance(tolerance, float): raise TypeError( "If ``hypothesis`` and ``reference`` are floating, tolerance must be a float also, " f"but type was: {type(tolerance)}" ) if decimals is None: decimals = 3 if decimals < 0: raise ValueError( f"``decimals`` must be a non-negative number but was: {decimals}" ) if decimals is not False: # we assume float values are in units of seconds and round to ``decimals``, # the default is 3 to indicate "milliseconds" reference = np.round(reference, decimals=decimals) hypothesis = np.round(hypothesis, decimals=decimals) if issubclass(reference.dtype.type, np.integer): if not isinstance(tolerance, int): raise TypeError( "If ``hypothesis`` and ``reference`` are integers, tolerance must be an integer also, " f"but type was: {type(tolerance)}" ) if decimals is not None: raise ValueError( "Cannot specify a ``decimals`` value when dtype of arrays is int" ) diffs = np.abs(np.subtract.outer(reference, hypothesis)) in_window = diffs <= tolerance hits_ref, hits_hyp = np.where(in_window) # now force there to be only one hit in hyp for each hit in ref; # we do this by choosing the hit that has the smallest absolute difference diffs_in_window = diffs[hits_ref, hits_hyp] hits_diffs = sorted( zip(hits_ref, hits_hyp, diffs_in_window), key=lambda x: x[2] ) hits_ref_out = [] hits_hyp_out = [] diffs_out = [] for hit_ref, hit_hyp, diff in hits_diffs: if hit_ref not in hits_ref_out and hit_hyp not in hits_hyp_out: hits_ref_out.append(hit_ref) hits_hyp_out.append(hit_hyp) diffs_out.append(diff) hits_ref_out = np.array(hits_ref_out) sort_inds = np.argsort(hits_ref_out) hits_ref_out = hits_ref_out[sort_inds] hits_hyp_out = np.array(hits_hyp_out)[sort_inds] diffs_out = np.array(diffs_out)[sort_inds] return hits_ref_out, hits_hyp_out, diffs_out
[docs] @attr.define class IRMetricData: """Class representing data used to compute an information retrieval metric. This class contains data needed to compute metrics like precision and recall for estimated event times compared to reference event times. The class attributes are the variables returned by :func:`vocalpy.metrics.segmentation.find_hits`. Instances of this class are returned along with the value of the computed metrics. The values can be useful when computing additional statistics, e.g., the classes of segments that had higher or lower precision or recall, or the distribution of differences between reference times and estimated times for some class of events. Attributes ---------- hits_ref : numpy.ndarray The indices of hits in ``reference``. hits_hyp : numpy.ndarray The indices of hits in ``hypothesis``. diffs : numpy.ndarray Absolute differences :math:`|hit^{ref}_i - hit^{hyp}_i|`, i.e., ``np.abs(reference[hits_ref] - hypothesis[hits_hyp])``. """ hits_ref: npt.NDArray hits_hyp: npt.NDArray diffs: npt.NDArray
[docs] def precision_recall_fscore( hypothesis: npt.NDArray, reference: npt.NDArray, metric: str, tolerance: float | int | None = None, decimals: int | bool | None = None, ) -> tuple[float, int, IRMetricData]: r"""Helper function that computes precision, recall, and the F-score. Since all these metrics require computing the number of true positives, and F-score is a combination of precision and recall, we rely on this helper function to compute them. You can compute each directly without needing the ``metric`` argument that this function requires by calling the appropriate function: :func:`~vocalpy.metrics.segmentation.ir.precision`, :func:`~vocalpy.metrics.segmentation.ir.recall`, and :func:`~vocalpy.metrics.segmentation.ir.fscore`. See docstrings of those functions for definitions of the metrics in terms of segmentation algorithms. Precision, recall, and F-score are computed using hits found with :func:`vocalpy.metrics.segmentation._ir_helper.find_hits`. See docstring of that function for more detail on how hits are computed. Both ``hypothesis`` and ``reference`` must be 1-dimensional arrays of non-negative, strictly increasing values. If you have two arrays ``onsets`` and ``offsets``, you can concatenate those into a single valid array of boundary times using :func:`concat_starts_and_stops` that you can then pass to this function. Parameters ---------- hypothesis : numpy.ndarray Boundaries, e.g., onsets or offsets of segments, as computed by some method. reference : numpy.ndarray Ground truth boundaries that the hypothesized boundaries ``hypothesis`` are compared to. metric : str The name of the metric to compute. One of: ``{"precision", "recall", "fscore"}``. tolerance : float or int Tolerance, in seconds. Elements in ``hypothesis`` are considered a true positive if they are within a time interval around any reference boundary :math:`t_0` in ``reference`` plus or minus the ``tolerance``, i.e., if a hypothesized boundary :math:`t_h` is within the interval :math:`t_0 - \Delta t < t < t_0 + \Delta t`. Default is None, in which case it is set to ``0`` (either float or int, depending on the dtype of ``hypothesis`` and ``reference``). See notes for more detail. decimals: int The number of decimal places to round both ``hypothesis`` and ``reference`` to, using :func:`numpy.round`. This mitigates inflated error rates due to floating point error. Rounding is only applied if both ``hypothesis`` and ``reference`` are floating point values. To avoid rounding, e.g. to compute strict precision and recall, pass in the value ``False``. Default is 3, which assumes that the values are in seconds and should be rounded to milliseconds. Returns ------- metric_value : float Value for ``metric``. n_tp : int The number of true positives. metric_data : IRMetricData Instance of :class:`IRMetricData` with indices of hits in both ``hypothesis`` and ``reference``, and the absolute difference between times in ``hypothesis`` and ``reference`` for the hits. Notes ----- The addition of a tolerance parameter is based on [1]_. This is also sometimes known as a "collar" [2]_ or "forgiveness collar" [3]_. The value for the tolerance can be determined by visual inspection of the distribution; see for example [4]_. References ---------- .. [1] Kemp, T., Schmidt, M., Whypphal, M., & Waibel, A. (2000, June). Strategies for automatic segmentation of audio data. In 2000 ieee international conference on acoustics, speech, and signal processing. proceedings (cat. no. 00ch37100) (Vol. 3, pp. 1423-1426). IEEE. .. [2] Jordán, P. G., & Giménez, A. O. (2023). Advances in Binary and Multiclass Sound Segmentation with Deep Learning Techniques. .. [3] NIST. (2009). The 2009 (RT-09) Rich Transcription Meeting Recognition Evaluation Plan. <https://web.archive.org/web/20100606041157if_/http://www.itl.nist.gov/iad/mig/thyps/rt/2009/docs/rt09-meeting-eval-plan-v2.pdf> .. [4] Du, P., & Troyer, T. W. (2006). A segmentation algorithm for zebra finch song at the note level. Neurocomputing, 69(10-12), 1375-1379. """ if metric not in {"precision", "recall", "fscore"}: raise ValueError( f'``metric`` must be one of: {{"precision", "recall", "fscore"}} but was: {metric}' ) # edge case: if both reference and hypothesis have a length of zero, we have a score of 1.0 # but no hits. This is to avoid punishing the correct hypothesis that there are no boundaries. # See https://github.com/vocalpy/vocalpy/issues/170 if len(reference) == 0 and len(hypothesis) == 0: return ( 1.0, 0, IRMetricData( hits_ref=np.array([]), hits_hyp=np.array([]), diffs=np.array([]), ), ) # If we have no boundaries, we get no score. if len(reference) == 0 or len(hypothesis) == 0: return ( 0.0, 0, IRMetricData( hits_ref=np.array([]), hits_hyp=np.array([]), diffs=np.array([]), ), ) hits_ref, hits_hyp, diffs = find_hits( hypothesis, reference, tolerance, decimals ) metric_data = IRMetricData(hits_ref, hits_hyp, diffs) n_tp = hits_hyp.size if metric == "precision": precision_ = n_tp / hypothesis.size return precision_, n_tp, metric_data elif metric == "recall": recall_ = n_tp / reference.size return recall_, n_tp, metric_data elif metric == "fscore": precision_ = n_tp / hypothesis.size recall_ = n_tp / reference.size if np.isclose(precision_, 0.0) and np.isclose(recall_, 0.0): # avoids divide-by-zero that would give NaN return 0.0, n_tp, metric_data fscore_ = 2 * (precision_ * recall_) / (precision_ + recall_) return fscore_, n_tp, metric_data
[docs] def precision( hypothesis: npt.NDArray, reference: npt.NDArray, tolerance: float | int | None = None, decimals: int | bool | None = None, ) -> tuple[float, int, IRMetricData]: r"""Compute precision :math:`P` for a segmentation. Computes the metric from a hypothesized vector of boundaries ``hypothesis`` returned by a segmentation algorithm and a reference vector of boundaries ``reference``, e.g., boundaries cleaned by a human expert or boundaries from a benchmark dataset. Precision is defined as the number of true positives (:math:`T_p`) over the number of true positives plus the number of false positives (:math:`F_p`). :math:`P = \\frac{T_p}{T_p+F_p}`. The number of true positives ``n_tp`` is computed by calling :func:`vocalpy.metrics.segmentation.ir.find_hits`. This function then computes the precision as ``precision = n_tp / hypothesis.size``. Both ``hypothesis`` and ``reference`` must be 1-dimensional arrays of non-negative, strictly increasing values. If you have two arrays ``onsets`` and ``offsets``, you can concatenate those into a single valid array of boundary times using :func:`concat_starts_and_stops` that you can then pass to this function. Parameters ---------- hypothesis : numpy.ndarray Boundaries, e.g., onsets or offsets of segments, as computed by some method. reference : numpy.ndarray Ground truth boundaries that the hypothesized boundaries ``hypothesis`` are compared to. tolerance : float or int Tolerance, in seconds. Elements in ``hypothesis`` are considered a true positive if they are within a time interval around any reference boundary :math:`t_0` in ``reference`` plus or minus the ``tolerance``, i.e., if a hypothesized boundary :math:`t_h` is within the interval :math:`t_0 - \Delta t < t < t_0 + \Delta t`. Default is None, in which case it is set to ``0`` (either float or int, depending on the dtype of ``hypothesis`` and ``reference``). decimals: int The number of decimal places to round both ``hypothesis`` and ``reference`` to, using :func:`numpy.round`. This mitigates inflated error rates due to floating point error. Rounding is only applied if both ``hypothesis`` and ``reference`` are floating point values. To avoid rounding, e.g. to compute strict precision and recall, pass in the value ``False``. Default is 3, which assumes that the values are in seconds and should be rounded to milliseconds. Returns ------- precision : float Value for precision, computed as described above. n_tp : int The number of true positives. metric_data : IRMetricData Instance of :class:`IRMetricData` with indices of hits in both ``hypothesis`` and ``reference``, and the absolute difference between times in ``hypothesis`` and ``reference`` for the hits. Examples -------- >>> hypothesis = np.array([1, 6, 10, 16]) >>> reference = np.array([0, 5, 10, 15]) >>> prec, n_tp, ir_metric_data = vocalpy.metrics.segmentation.ir.precision(hypothesis, reference, tolerance=0) >>> print(prec) 0.25 >>> print(ir_metric_data.hits_hyp) np.array([2]) >>> hypothesis = np.array([0, 1, 5, 10]) >>> reference = np.array([0, 5, 10]) >>> fscore, n_tp, metric_data = vocalpy.metrics.segmentation.ir.precision(hypothesis, reference, tolerance=1) >>> print(fscore) 0.75 >>> print(ir_metric_data.hits_hyp) np.array([0, 2, 3]) Notes ----- The addition of a tolerance parameter is based on [1]_. This is also sometimes known as a "collar" [2]_ or "forgiveness collar" [3]_. The value for the tolerance can be determined by visual inspection of the distribution; see for example [4]_. References ---------- .. [1] Kemp, T., Schmidt, M., Whypphal, M., & Waibel, A. (2000, June). Strategies for automatic segmentation of audio data. In 2000 ieee international conference on acoustics, speech, and signal processing. proceedings (cat. no. 00ch37100) (Vol. 3, pp. 1423-1426). IEEE. .. [2] Jordán, P. G., & Giménez, A. O. (2023). Advances in Binary and Multiclass Sound Segmentation with Deep Learning Techniques. .. [3] NIST. (2009). The 2009 (RT-09) Rich Transcription Meeting Recognition Evaluation Plan. https://web.archive.org/web/20100606041157if_/http://www.itl.nist.gov/iad/mig/thyps/rt/2009/docs/rt09-meeting-eval-plan-v2.pdf .. [4] Du, P., & Troyer, T. W. (2006). A segmentation algorithm for zebra finch song at the note level. Neurocomputing, 69(10-12), 1375-1379. """ return precision_recall_fscore( hypothesis, reference, "precision", tolerance, decimals )
[docs] def recall( hypothesis: npt.NDArray, reference: npt.NDArray, tolerance: float | int | None = None, decimals: int | bool | None = None, ) -> tuple[float, int, IRMetricData]: r"""Compute recall :math:`R` for a segmentation. Computes the metric from a hypothesized vector of boundaries ``hypothesis`` returned by a segmentation algorithm and a reference vector of boundaries ``reference``, e.g., boundaries cleaned by a human expert or boundaries from a benchmark dataset. Recall (:math:`R`) is defined as the number of true positives (:math:`T_p`) over the number of true positives plus the number of false negatives (:math:`F_n`). :math:`R = \\frac{T_p}{T_p + F_n}` The number of true positives ``n_tp`` is computed by calling :func:`vocalpy.metrics.segmentation.ir.find_hits`. This function then computes the recall as ``recall = n_tp / reference.size``. Both ``hypothesis`` and ``reference`` must be 1-dimensional arrays of non-negative, strictly increasing values. If you have two arrays ``onsets`` and ``offsets``, you can concatenate those into a single valid array of boundary times using :func:`concat_starts_and_stops` that you can then pass to this function. Parameters ---------- hypothesis : numpy.ndarray Boundaries, e.g., onsets or offsets of segments, as computed by some method. reference : numpy.ndarray Ground truth boundaries that the hypothesized boundaries ``hypothesis`` are compared to. tolerance : float or int Tolerance, in seconds. Elements in ``hypothesis`` are considered a true positive if they are within a time interval around any reference boundary :math:`t_0` in ``reference`` plus or minus the ``tolerance``, i.e., if a hypothesized boundary :math:`t_h` is within the interval :math:`t_0 - \Delta t < t < t_0 + \Delta t`. Default is None, in which case it is set to ``0`` (either float or int, depending on the dtype of ``hypothesis`` and ``reference``). decimals: int The number of decimal places to round both ``hypothesis`` and ``reference`` to, using :func:`numpy.round`. This mitigates inflated error rates due to floating point error. Rounding is only applied if both ``hypothesis`` and ``reference`` are floating point values. To avoid rounding, e.g. to compute strict precision and recall, pass in the value ``False``. Default is 3, which assumes that the values are in seconds and should be rounded to milliseconds. Returns ------- recall : float Value for recall, computed as described above. n_tp : int The number of true positives. metric_data : IRMetricData Instance of :class:`IRMetricData` with indices of hits in both ``hypothesis`` and ``reference``, and the absolute difference between times in ``hypothesis`` and ``reference`` for the hits. Examples -------- >>> hypothesis = np.array([1, 6, 10, 16]) >>> reference = np.array([0, 5, 10, 15]) >>> recall, n_tp, ir_metric_data = vocalpy.metrics.segmentation.ir.recall(hypothesis, reference, tolerance=0) >>> print(recall) 0.25 >>> print(ir_metric_data.hits_hyp) np.array([2]) >>> hypothesis = np.array([0, 1, 5, 10]) >>> reference = np.array([0, 5, 10]) >>> recall, n_tp, metric_data = vocalpy.metrics.segmentation.ir.recall(hypothesis, reference, tolerance=1) >>> print(recall) 1.0 >>> print(ir_metric_data.hits_hyp) np.array([0, 2, 3]) Notes ----- The addition of a tolerance parameter is based on [1]_. This is also sometimes known as a "collar" [2]_ or "forgiveness collar" [3]_. The value for the tolerance can be determined by visual inspection of the distribution; see for example [4]_. References ---------- .. [1] Kemp, T., Schmidt, M., Whypphal, M., & Waibel, A. (2000, June). Strategies for automatic segmentation of audio data. In 2000 ieee international conference on acoustics, speech, and signal processing. proceedings (cat. no. 00ch37100) (Vol. 3, pp. 1423-1426). IEEE. .. [2] Jordán, P. G., & Giménez, A. O. (2023). Advances in Binary and Multiclass Sound Segmentation with Deep Learning Techniques. .. [3] NIST. (2009). The 2009 (RT-09) Rich Transcription Meeting Recognition Evaluation Plan. https://web.archive.org/web/20100606041157if_/http://www.itl.nist.gov/iad/mig/thyps/rt/2009/docs/rt09-meeting-eval-plan-v2.pdf .. [4] Du, P., & Troyer, T. W. (2006). A segmentation algorithm for zebra finch song at the note level. Neurocomputing, 69(10-12), 1375-1379. """ return precision_recall_fscore( hypothesis, reference, "recall", tolerance, decimals )
[docs] def fscore( hypothesis: npt.NDArray, reference: npt.NDArray, tolerance: float | int | None = None, decimals: int | bool | None = None, ) -> tuple[float, int, IRMetricData]: r"""Compute the F-score for a segmentation. Computes the metric from a hypothesized vector of boundaries ``hypothesis`` returned by a segmentation algorithm and a reference vector of boundaries ``reference``, e.g., boundaries cleaned by a human expert or boundaries from a benchmark dataset. The F-score can be interpreted as a harmonic mean of the precision and recall, where an F-score reaches its best value at 1 and worst score at 0. The relative contribution of precision and recall to the F-score are equal. The formula for the F-score is: ``f_score = 2 * (precision * recall) / (precision + recall)`` Both ``hypothesis`` and ``reference`` must be 1-dimensional arrays of non-negative, strictly increasing values. If you have two arrays ``onsets`` and ``offsets``, you can concatenate those into a single valid array of boundary times using :func:`concat_starts_and_stops` that you can then pass to this function. Parameters ---------- hypothesis : numpy.ndarray Boundaries, e.g., onsets or offsets of segments, as computed by some method. reference : numpy.ndarray Ground truth boundaries that the hypothesized boundaries ``hypothesis`` are compared to. tolerance : float or int Tolerance, in seconds. Elements in ``hypothesis`` are considered a true positive if they are within a time interval around any reference boundary :math:`t_0` in ``reference`` plus or minus the ``tolerance``, i.e., if a hypothesized boundary :math:`t_h` is within the interval :math:`t_0 - \Delta t < t < t_0 + \Delta t`. Default is None, in which case it is set to ``0`` (either float or int, depending on the dtype of ``hypothesis`` and ``reference``). decimals: int The number of decimal places to round both ``hypothesis`` and ``reference`` to, using :func:`numpy.round`. This mitigates inflated error rates due to floating point error. Rounding is only applied if both ``hypothesis`` and ``reference`` are floating point values. To avoid rounding, e.g. to compute strict precision and recall, pass in the value ``False``. Default is 3, which assumes that the values are in seconds and should be rounded to milliseconds. Returns ------- f_score : float Value for F-score, computed as described above. n_tp : int The number of true positives. metric_data : IRMetricData Instance of :class:`IRMetricData` with indices of hits in both ``hypothesis`` and ``reference``, and the absolute difference between times in ``hypothesis`` and ``reference`` for the hits. Examples -------- >>> hypothesis = np.array([1, 6, 10, 16]) >>> reference = np.array([0, 5, 10, 15]) >>> prec, n_tp, ir_metric_data = vocalpy.metrics.segmentation.ir.fscore(hypothesis, reference, tolerance=0) >>> print(prec) 0.25 >>> print(ir_metric_data.hits_hyp) np.array([2]) >>> hypothesis = np.array([0, 1, 5, 10]) >>> reference = np.array([0, 5, 10]) >>> prec, n_tp, metric_data = vocalpy.metrics.segmentation.ir.fscore(hypothesis, reference, tolerance=1) >>> print(prec) 0.75 >>> print(ir_metric_data.hits_hyp) np.array([0, 2, 3]) Notes ----- The addition of a tolerance parameter is based on [1]_. This is also sometimes known as a "collar" [2]_ or "forgiveness collar" [3]_. The value for the tolerance can be determined by visual inspection of the distribution; see for example [4]_. References ---------- .. [1] Kemp, T., Schmidt, M., Whypphal, M., & Waibel, A. (2000, June). Strategies for automatic segmentation of audio data. In 2000 ieee international conference on acoustics, speech, and signal processing. proceedings (cat. no. 00ch37100) (Vol. 3, pp. 1423-1426). IEEE. .. [2] Jordán, P. G., & Giménez, A. O. (2023). Advances in Binary and Multiclass Sound Segmentation with Deep Learning Techniques. .. [3] NIST. (2009). The 2009 (RT-09) Rich Transcription Meeting Recognition Evaluation Plan. https://web.archive.org/web/20100606041157if_/http://www.itl.nist.gov/iad/mig/thyps/rt/2009/docs/rt09-meeting-eval-plan-v2.pdf .. [4] Du, P., & Troyer, T. W. (2006). A segmentation algorithm for zebra finch song at the note level. Neurocomputing, 69(10-12), 1375-1379. """ return precision_recall_fscore( hypothesis, reference, "fscore", tolerance, decimals )
[docs] def concat_starts_and_stops( starts: npt.NDArray, stops: npt.NDArray ) -> npt.NDArray: """Concatenate arrays of start and stop times into a single array of boundary times. Some segmenting algorithms return lists of segments denoted by the start and stop times of each segment. (You may also see these times called "onsets" and "offsets".) Typically, such segmenting algorithms work by setting a threshold on some acoustic feature, e.g. the Root-Mean-Square of the spectral power. This means the segments will be separated by brief "silent gaps" (periods below threshold). To compute metrics for segmentation like precision and recall, you may want to combine the start and stop times into a single array of boundary times. Such an approach is valid if we think of a "silent gaps" as a segment whose start time is the stop time/offset of the preceding segment. If you have arrays of start and stop times, you can concatenate into a single array of boundary times with this function. Both ``starts`` and ``stops`` must be 1-dimensional arrays of non-negative, strictly increasing values, with the same ``dtype``. The two arrays ``starts`` and ``stops`` must be the same length, and all start times must be less than the corresponding stop times, i.e., ``np.all(starts < stops)`` should evaluate to ``True``. Parameters ---------- starts : numpy.ndarray Array of start times of segments. stops : numpy.ndarray Array of stop times of segments. Returns ------- boundaries : numpy.ndarray The array of boundary times, concatenated and then sorted, so that ``np.all(boundaries[1:] > boundaries[:-1]`` evaluates to ``True``. Examples -------- >>> starts = np.array([0, 8, 16, 24]) >>> stops = np.array([4, 12, 20, 28]) >>> concat_starts_and_stops(starts, stops) np.array([0, 4, 8, 12, 16, 20, 24, 28]) >>> starts = np.array([0.000, 8.000, 16.000, 24.000]) >>> stops = np.array([4.000, 12.000, 20.000, 28.000]) >>> concat_starts_and_stops(starts, stops) np.array([0.000, 4.000, 8.000, 12.000, 16.000, 20.000, 24.000, 28.000]) """ validators.is_valid_boundaries_array( starts ) # 1-d, non-negative, strictly increasing validators.is_valid_boundaries_array(stops) validators.have_same_dtype(starts, stops) if not starts.size == stops.size: raise ValueError( "Boundary arrays ``starts`` and ``stops`` must have same lengths--" "every element in ``starts`` must have a corresponding element in ``stops``--" f"but sizes were different: starts.size={starts.size}, stops.size={stops.size}" ) if not np.all(starts < stops): gt = np.where(starts > stops)[0] raise ValueError( "Every element in ``starts`` must be less than the corresponding element in ``stops``," f"but some values in ``starts`` were greater: values at indices {gt}" ) return np.sort(np.concatenate((starts, stops)))