AOMediaCodec
diff --git a/‎tests/README.md‎
Lines changed: 71 additions & 14 deletions b/‎tests/README.md‎
Lines changed: 71 additions & 14 deletions
diff --git a/‎tests/dsp_utils.py‎
Lines changed: 165 additions & 0 deletions b/‎tests/dsp_utils.py‎
Lines changed: 165 additions & 0 deletions
diff --git a/‎proto/arbitrary_obu.proto‎ ‎tests/proto/arbitrary_obu.proto‎proto/arbitrary_obu.proto renamed to tests/proto/arbitrary_obu.proto b/‎proto/arbitrary_obu.proto‎ ‎tests/proto/arbitrary_obu.proto‎proto/arbitrary_obu.proto renamed to tests/proto/arbitrary_obu.proto
diff --git a/‎proto/audio_element.proto‎ ‎tests/proto/audio_element.proto‎proto/audio_element.proto renamed to tests/proto/audio_element.proto b/‎proto/audio_element.proto‎ ‎tests/proto/audio_element.proto‎proto/audio_element.proto renamed to tests/proto/audio_element.proto
diff --git a/‎proto/audio_frame.proto‎ ‎tests/proto/audio_frame.proto‎proto/audio_frame.proto renamed to tests/proto/audio_frame.proto b/‎proto/audio_frame.proto‎ ‎tests/proto/audio_frame.proto‎proto/audio_frame.proto renamed to tests/proto/audio_frame.proto
diff --git a/‎proto/codec_config.proto‎ ‎tests/proto/codec_config.proto‎proto/codec_config.proto renamed to tests/proto/codec_config.proto b/‎proto/codec_config.proto‎ ‎tests/proto/codec_config.proto‎proto/codec_config.proto renamed to tests/proto/codec_config.proto
diff --git a/‎proto/ia_sequence_header.proto‎ ‎tests/proto/ia_sequence_header.proto‎proto/ia_sequence_header.proto renamed to tests/proto/ia_sequence_header.proto b/‎proto/ia_sequence_header.proto‎ ‎tests/proto/ia_sequence_header.proto‎proto/ia_sequence_header.proto renamed to tests/proto/ia_sequence_header.proto
diff --git a/‎proto/mix_presentation.proto‎ ‎tests/proto/mix_presentation.proto‎proto/mix_presentation.proto renamed to tests/proto/mix_presentation.proto b/‎proto/mix_presentation.proto‎ ‎tests/proto/mix_presentation.proto‎proto/mix_presentation.proto renamed to tests/proto/mix_presentation.proto
diff --git a/‎proto/obu_header.proto‎ ‎tests/proto/obu_header.proto‎proto/obu_header.proto renamed to tests/proto/obu_header.proto b/‎proto/obu_header.proto‎ ‎tests/proto/obu_header.proto‎proto/obu_header.proto renamed to tests/proto/obu_header.proto
diff --git a/‎proto/param_definitions.proto‎ ‎tests/proto/param_definitions.proto‎proto/param_definitions.proto renamed to tests/proto/param_definitions.proto b/‎proto/param_definitions.proto‎ ‎tests/proto/param_definitions.proto‎proto/param_definitions.proto renamed to tests/proto/param_definitions.proto
@@ -34,7 +34,7 @@ Theses file describe metadata about the test vector to encode an
 -   `base_test`: The recommended textproto to diff against.
 -   Other fields refer to the OBUs and data within the test vector.
 
-# Input WAV files
+## Input WAV files
 
 Test vectors may have multiple substreams with several input .wav files. These
 .wav files may be shared with other test vectors. The .textproto file has a
@@ -68,7 +68,7 @@ Title                                                | Summary
 `Transport_TOA_5s.wav`                               | Short clip of vehicles driving by using third-order ambisonics.                                                     | 16       | 48kHz       | pcm_s16le | 5s
 `Transport_9.1.6_5s.wav`                             | Short clip of vehicles driving by using 9.1.6.                                                                      | 16       | 48kHz       | pcm_s16le | 5s
 
-# Output WAV files
+## Output WAV files
 
 Output wav files are based on the
 [layout](https://aomediacodec.github.io/iamf/#syntax-layout) in the mix
@@ -93,25 +93,82 @@ Sound System 12         | IAMF                     | C
 Sound System 13         | IAMF                     | FL, FR, FC, LFE, BL, BR, FLc, FRc, SiL, SiR, TpFL, TpFR, TpBL, TpBR, TpSiL, TpSiR
 Binaural Layout         | IAMF                     | L2, R2
 
-# Verification
+## Decode and Verification
 
-For test cases using Opus or AAC codecs, the average PSNR value must exceed 30, and for the other codecs, an average PSNR value exceeding 80 is considered PASS.
-You can use `psnr_calc.py` file to calculate PSNR between reference and generated output.
+For test cases with lossy codecs (Opus or AAC), the average PSNR value must
+exceed 30. otherwise the average PSNR must exceed 80.
 
-- How to use `psnr_calc.py` script:
-    ```
-    python psnr_calc.py
-      --dir <directory path containing the target file and reference file>
-      --target <target wav file name>
-      --ref <reference wav file name>
-    ```
+`run_decode_and_psnr_tests` will run the decoder for all reference test cases
+and compare the PSNR between all outputs.
+
+Prerequisites:
+
+-   The path to a built `iamfdec`, usually
+    `libiamf/code/test/tools/iamfdec/iamfdec`
+-   `protoc`, and compiled `libiamf/code/proto` files.
+-   A python environment with `scipy`, `protobuf`, `tqdm`, `numpy`.
+
+Note that example commands below assume a working directory of `libiamf/tests`.
+
+To compile the proto files run
+
+`protoc -I=proto/ --python_out=proto/ proto/*.proto`
+
+To set up a python environment using pip
+
+```
+python3 -m venv venv
+source venv/bin/activate
+pip install scipy protobuf tqdm numpy librosa
+```
+
+Run the test suite.
+
+Arguments:
 
-- Calculate PSNR values of multiple wav files
+`iamfdec_path`, full path to the built `iamfdec` tool. `test_file_directory`,
+full path to folder containing `.textproto` and reference `.wav` files.
+`working_directory`, full path to write audio files produced by `iamfdec`.
+`csv_summary`, optionally included, full path and filename to write a summary of
+test results.
+
+```
+python3 run_decode_and_psnr_test.py --iamfdec_path /your/full/path/to/libiamf/code/test/tools/iamfdec/iamfdec --test_file_directory /your/full/path/to/libiamf/tests/ --working_directory /your/path/for/scratch/wav/files --csv_summary /your/path/to/write/summary.csv
+```
+
+For a simple configuration, this example will dump all files to the current
+working directory.
+
+`python3 run_decode_and_psnr_test.py --iamfdec_path
+../code/test/tools/iamfdec/iamfdec --test_file_directory $PWD --csv_summary
+$PWD/summary.csv -w $PWD`
+
+Extra arguments:
+
+`regex_filter`, optionally included, regex to filter output files. For example
+`--regex_filter="000100"` will run a single file, or
+`--regex_filter="0001\d{2}"` will process files in the range [test_000100,
+test_000199]. `verbose_test_summary`, turns on verbose logging.
+`--preserve_output_files`, set to keep the output generated `.wav` files,
+otherwise they are deleted.
+
+## Verification Only
+
+For test cases using Opus or AAC codecs, the average PSNR value must exceed 30,
+and for the other codecs, an average PSNR value exceeding 80 is considered PASS.
+You can use `psnr_calc.py` file to calculate PSNR between reference and
+generated output.
+
+-   How to use `psnr_calc.py` script: `python psnr_calc.py --dir <directory path
+    containing the target file and reference file> --target <target wav file
+    name> --ref <reference wav file name> --verbose`
+
+-   Calculate PSNR values of multiple wav files
 
     Multiple files can be entered as `::`
 
     ```
     Example:
 
     python psnr_calc.py --dir . --target target1.wav::target2.wav --ref ref1.wav::ref2.wav
-    ```
+    ```
@@ -0,0 +1,165 @@
+"""PSNR calculation utilities."""
+
+import logging
+import math
+import wave
+import numpy as np
+import scipy.io.wavfile as wavfile
+import librosa
+
+
+def calc_per_channel_psnr_pcm(
+    ref_signal: np.ndarray, signal: np.ndarray, sampwidth_bytes: int
+):
+  """Calculates the PSNR between two signals.
+
+  Args:
+    ref_signal: The reference signal as a numpy array.
+    signal: The signal to compare as a numpy array.
+    sampwidth_bytes: The sample width in bytes (e.g. 2 for 16-bit, 3 for
+      24-bit).
+
+  Returns:
+    The per channel PSNR in dB.
+  """
+  assert (
+      sampwidth_bytes > 1
+  ), "Supports sample format: [pcm_s16le, pcm_s24le, pcm_s32le]"
+  max_value = pow(2, sampwidth_bytes * 8) - 1
+
+  # To prevent overflow
+  ref_signal = ref_signal.astype("int64")
+  signal = signal.astype("int64")
+
+  mse = np.mean((ref_signal - signal) ** 2, axis=0, dtype="float64")
+
+  psnr_list = list()
+
+  # To support mono signal
+  num_channels = 1 if ref_signal.shape[1:] == () else ref_signal.shape[1]
+  for i in range(num_channels):
+    mse_value = mse[i] if num_channels > 1 else mse
+    if mse_value == 0:
+      psnr_list.append(np.inf)
+      logging.debug("ch#%d PSNR: inf", i)
+    else:
+      psnr_value = 10 * math.log10(max_value**2 / mse_value)
+      psnr_list.append(psnr_value)
+      logging.debug("ch#%d PSNR: %f dB", i, psnr_value)
+
+  return psnr_list
+
+
+def calc_per_channel_lsd_pcm(ref_signal: np.ndarray,
+                             signal: np.ndarray,
+                             sampling_rate: int):
+  """Calculates the log spectral distance using Mel bins between two signals.
+
+  Args:
+    ref_signal: The reference signal as a numpy array.
+    signal: The signal to compare as a numpy array.
+    sampling rate: The sampling rate of the signals in Hz.
+
+  Returns:
+    The per channel log spectral distance in dB.
+  """
+  eps = 1e-4
+
+  # Convert to float
+  ref_signal = ref_signal / np.iinfo(ref_signal.dtype).max
+  signal = signal / np.iinfo(signal.dtype).max
+
+  lsd_list = list()
+
+  # To support mono channel
+  num_channels = 1 if ref_signal.shape[1:] == () else ref_signal.shape[1]
+  for i in range(num_channels):
+    ref_channel = ref_signal[:, i] if num_channels > 1 else ref_signal
+    signal_channel = signal[:, i] if num_channels > 1 else signal
+
+    lsd_frames = list()
+
+    # Compute mel spectrogram
+    mel_ref = librosa.feature.melspectrogram(y=ref_channel, sr=sampling_rate)
+    mel_signal = librosa.feature.melspectrogram(y=signal_channel,
+                                                sr=sampling_rate)
+
+    log_mel_ref = 10 * np.log10(mel_ref + eps)
+    log_mel_signal = 10 * np.log10(mel_signal + eps)
+
+    diff_squared = (log_mel_ref - log_mel_signal) ** 2
+
+    # Average across mel bins, which is the 0th dimension
+    lsd_per_frame = np.sqrt(np.mean(diff_squared, axis=0))
+
+    # shape: (1, num_frames) -> (num_frames,)
+    lsd_per_frame = np.squeeze(lsd_per_frame)
+
+    lsd_value = np.mean(lsd_per_frame)
+    lsd_list.append(lsd_value)
+    logging.debug('ch#d LSD: %f dB', i, lsd_value)
+
+  return lsd_list
+
+
+def calc_score_wav(ref_filepath: str, target_filepath: str, metric: str):
+  """Calculates the score between two WAV files.
+
+  Args:
+    ref_filepath: Path to the reference WAV file.
+    target_filepath: Path to the target WAV file to compare.
+    metric: one of 'PSNR' or 'SNR'.
+
+  Returns:
+    The score in dB, averaged over all channels.
+
+  Raises:
+    Exception: If the wav files have different samplerate, channels, bit-depth
+               or number of samples.
+  """
+  ref_wav = wave.open(ref_filepath, "rb")
+  target_wav = wave.open(target_filepath, "rb")
+
+  # Check sampling rate
+  if ref_wav.getframerate() != target_wav.getframerate():
+    raise ValueError(
+        "Sampling rate of reference file and comparison file are different:"
+        f" {ref_filepath} vs {target_filepath}"
+    )
+
+  # Check number of channels
+  if ref_wav.getnchannels() != target_wav.getnchannels():
+    raise ValueError(
+        "Number of channels of reference file and comparison file are"
+        f" different: {ref_filepath} vs {target_filepath}"
+    )
+
+  # Check number of samples
+  if ref_wav.getnframes() != target_wav.getnframes():
+    raise ValueError(
+        "Number of samples of reference file and comparison file are different:"
+        f" {ref_filepath} vs {target_filepath}"
+    )
+
+  # Check bit depth
+  if ref_wav.getsampwidth() != target_wav.getsampwidth():
+    raise ValueError(
+        "Bit depth of reference file and comparison file are different:"
+        f" {ref_filepath} vs {target_filepath}"
+    )
+
+  # Open wav as a np array
+  _, ref_data = wavfile.read(ref_filepath)
+  _, target_data = wavfile.read(target_filepath)
+
+  if metric == 'PSNR':
+    scores_list = calc_per_channel_psnr_pcm(
+          ref_data, target_data, ref_wav.getsampwidth()
+    )
+  elif metric == 'LSD':
+    scores_list = calc_per_channel_lsd_pcm(ref_data, target_data,
+                                           ref_wav.getframerate())
+  else:
+    return None
+
+  return np.mean(scores_list)