Spaces:

jamtur01
/

MMaDA

Runtime error

App Files Files Community

MMaDA / venv /lib /python3.11 /site-packages /torchmetrics /functional /audio /pesq.py

jamtur01

Upload folder using huggingface_hub

9c6594c verified about 1 month ago

raw

history blame contribute delete

5.03 kB

	# Copyright The Lightning team.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	from typing import Any

	import numpy as np
	import torch
	from torch import Tensor

	from torchmetrics.utilities.checks import _check_same_shape
	from torchmetrics.utilities.imports import _MULTIPROCESSING_AVAILABLE, _PESQ_AVAILABLE

	__doctest_requires__ = {("perceptual_evaluation_speech_quality",): ["pesq"]}


	def perceptual_evaluation_speech_quality(
	preds: Tensor,
	target: Tensor,
	fs: int,
	mode: str,
	keep_same_device: bool = False,
	n_processes: int = 1,
	) -> Tensor:
	r"""Calculate `Perceptual Evaluation of Speech Quality`_ (PESQ).

	It's a recognized industry standard for audio quality that takes into considerations characteristics such as: audio
	sharpness, call volume, background noise, clipping, audio interference etc. PESQ returns a score between -0.5 and
	4.5 with the higher scores indicating a better quality.

	This metric is a wrapper for the `pesq package`_. Note that input will be moved to `cpu` to perform the metric
	calculation.

	.. hint::
	Usingsing this metrics requires you to have ``pesq`` install. Either install as ``pip install
	torchmetrics[audio]`` or ``pip install pesq``. Note that ``pesq`` will compile with your currently
	installed version of numpy, meaning that if you upgrade numpy at some point in the future you will
	most likely have to reinstall ``pesq``.

	Args:
	preds: float tensor with shape ``(...,time)``
	target: float tensor with shape ``(...,time)``
	fs: sampling frequency, should be 16000 or 8000 (Hz)
	mode: ``'wb'`` (wide-band) or ``'nb'`` (narrow-band)
	keep_same_device: whether to move the pesq value to the device of preds
	n_processes: integer specifying the number of processes to run in parallel for the metric calculation.
	Only applies to batches of data and if ``multiprocessing`` package is installed.

	Returns:
	Float tensor with shape ``(...,)`` of PESQ values per sample

	Raises:
	ModuleNotFoundError:
	If ``pesq`` package is not installed
	ValueError:
	If ``fs`` is not either ``8000`` or ``16000``
	ValueError:
	If ``mode`` is not either ``"wb"`` or ``"nb"``
	RuntimeError:
	If ``preds`` and ``target`` do not have the same shape

	Example:
	>>> from torch import randn
	>>> from torchmetrics.functional.audio.pesq import perceptual_evaluation_speech_quality
	>>> preds = randn(8000)
	>>> target = randn(8000)
	>>> perceptual_evaluation_speech_quality(preds, target, 8000, 'nb')
	tensor(2.2885)
	>>> perceptual_evaluation_speech_quality(preds, target, 16000, 'wb')
	tensor(1.6805)

	"""
	if not _PESQ_AVAILABLE:
	raise ModuleNotFoundError(
	"PESQ metric requires that pesq is installed."
	" Either install as `pip install torchmetrics[audio]` or `pip install pesq`."
	)
	import pesq as pesq_backend

	def _issubtype_number(x: Any) -> bool:
	return np.issubdtype(type(x), np.number)

	_filter_error_msg = np.vectorize(_issubtype_number)

	if fs not in (8000, 16000):
	raise ValueError(f"Expected argument `fs` to either be 8000 or 16000 but got {fs}")
	if mode not in ("wb", "nb"):
	raise ValueError(f"Expected argument `mode` to either be 'wb' or 'nb' but got {mode}")
	_check_same_shape(preds, target)

	if preds.ndim == 1:
	pesq_val_np = pesq_backend.pesq(fs, target.detach().cpu().numpy(), preds.detach().cpu().numpy(), mode)
	pesq_val = torch.tensor(pesq_val_np)
	else:
	preds_np = preds.reshape(-1, preds.shape[-1]).detach().cpu().numpy()
	target_np = target.reshape(-1, preds.shape[-1]).detach().cpu().numpy()

	if _MULTIPROCESSING_AVAILABLE and n_processes != 1:
	pesq_val_np = pesq_backend.pesq_batch(fs, target_np, preds_np, mode, n_processor=n_processes)
	pesq_val_np = np.array(pesq_val_np)
	else:
	pesq_val_np = np.empty(shape=(preds_np.shape[0]))
	for b in range(preds_np.shape[0]):
	pesq_val_np[b] = pesq_backend.pesq(fs, target_np[b, :], preds_np[b, :], mode)
	pesq_val = torch.from_numpy(pesq_val_np[_filter_error_msg(pesq_val_np)].astype(np.float32))
	pesq_val = pesq_val.reshape(len(pesq_val))

	if keep_same_device:
	return pesq_val.to(preds.device)

	return pesq_val