Source code for vocalpy.signal.audio

"""Signal processing functions for audio."""

from __future__ import annotations

import numbers
import warnings
from typing import Sequence

import numpy as np
import numpy.typing as npt
import scipy.signal

from ..sound import Sound


[docs] def bandpass_filtfilt( sound: Sound, freq_cutoffs: Sequence[int] = (500, 10000) ) -> Sound: """Filter audio with band-pass filter, then perform zero-phase filtering with :func:`scipy.signal.filtfilt`. Parameters ---------- sound: vocalpy.Sound An audio signal. freq_cutoffs : Sequence Cutoff frequencies for bandpass filter. List or tuple with two elements, default is ``(500, 10000)``. Returns ------- sound : vocalpy.Sound New audio instance """ if freq_cutoffs[0] <= 0: raise ValueError( "Low frequency cutoff {} is invalid, must be greater than zero.".format( freq_cutoffs[0] ) ) nyquist_rate = sound.samplerate / 2 if freq_cutoffs[1] >= nyquist_rate: raise ValueError( f"High frequency cutoff ({freq_cutoffs[1]}) is invalid, must be less than Nyquist rate: {nyquist_rate}." ) if sound.data.shape[-1] < 387: numtaps = 64 elif sound.data.shape[-1] < 771: numtaps = 128 elif sound.data.shape[-1] < 1539: numtaps = 256 else: numtaps = 512 cutoffs = np.asarray( [freq_cutoffs[0] / nyquist_rate, freq_cutoffs[1] / nyquist_rate] ) # code on which this is based, bandpass_filtfilt.m, says it uses Hann(ing) # window to design filter, but default for matlab's fir1 # is actually Hamming # note that first parameter for scipy.signal.firwin is filter *length* # whereas argument to matlab's fir1 is filter *order* # for linear FIR, filter length is filter order + 1 b = scipy.signal.firwin(numtaps + 1, cutoffs, pass_zero=False) a = np.zeros((numtaps + 1,)) a[0] = 1 # make an "all-zero filter" padlen = np.max((b.shape[-1] - 1, a.shape[-1] - 1)) filtered = scipy.signal.filtfilt(b, a, sound.data, padlen=padlen) return Sound(data=filtered, samplerate=sound.samplerate)
[docs] def meansquared( sound: Sound, freq_cutoffs=(500, 10000), smooth_win: int = 2 ) -> npt.NDArray: """Convert audio to a Root-Mean-Square-like trace. This function first applies a band-pass filter, and then rectifies the audio signal by squaring. Finally, it smooths by taking the average within a window of size ``smooth_win``. Parameters ---------- sound: vocalpy.Sound An audio signal. Multi-channel is supported. freq_cutoffs : Iterable Cutoff frequencies for bandpass filter. List or tuple with two elements, default is ``(500, 10000)``. smooth_win : integer Size of smoothing window, in milliseconds. Default is ``2``. Returns ------- meansquared : numpy.ndarray The ``vocalpy.Sound.data`` after squaring and smoothing. See Also -------- vocalpy.segment.meansquared """ sound = bandpass_filtfilt(sound, freq_cutoffs) data = np.array(sound.data) if issubclass(data.dtype.type, numbers.Integral): while np.any(np.abs(data) > np.sqrt(np.iinfo(data.dtype).max)): warnings.warn( f"Values in `data` would overflow when squaring because of dtype, {data.dtype};" f"casting to a larger integer dtype to avoid", stacklevel=2, ) # make a new dtype string, endianness + type, plus the current itemsize squared new_dtype_str = str(data.dtype)[:-1] + str( int(str(data.dtype)[-1]) ** 2 ) data = data.astype(np.dtype(new_dtype_str)) squared = np.power(data, 2) len = np.round(sound.samplerate * smooth_win / 1000).astype(int) h = np.ones((len,)) / len # to convolve per channel, seems like the best we can do is a list comprehension # followed by re-building the array. # see https://scipy-cookbook.readthedocs.io/items/ApplyFIRFilter.html smooth = np.array([np.convolve(squared_, h) for squared_ in squared]) # next two lines are basically the same as np.convolve(mode='valid'), but off by one # I think I did it this way to exactly replicate code in evsonganaly offset = round((smooth.shape[-1] - data.shape[-1]) / 2) return smooth[:, offset : data.shape[-1] + offset] # noqa: E203