|
49 | 49 | 'voicing', # frame-level RMS of harmonic parts (dB), float32[T_s,] |
50 | 50 | 'tension', # frame-level tension (logit), float32[T_s,] |
51 | 51 | ] |
| 52 | +WAV_CANDIDATE_EXTENSIONS = ['.wav', '.flac'] |
52 | 53 | DS_INDEX_SEP = '#' |
53 | 54 |
|
54 | 55 | # These operators are used as global variables due to a PyTorch shared memory bug on Windows platforms. |
@@ -129,13 +130,26 @@ def require(attr, optional=False): |
129 | 130 | raise ValueError(f'Missing required attribute {attr} of item \'{item_name}\'.') |
130 | 131 | return value |
131 | 132 |
|
| 133 | + wav_fn = None |
| 134 | + for ext in WAV_CANDIDATE_EXTENSIONS: |
| 135 | + candidate_fn = raw_data_dir / 'wavs' / f'{item_name}{ext}' |
| 136 | + if candidate_fn.exists(): |
| 137 | + wav_fn = candidate_fn |
| 138 | + break |
| 139 | + if wav_fn is None and not self.prefer_ds: |
| 140 | + raise FileNotFoundError( |
| 141 | + f'Waveform file not found for item \'{item_name}\'. ' |
| 142 | + f'Candidate extensions: {WAV_CANDIDATE_EXTENSIONS}\n' |
| 143 | + f'If you are using DS files instead of waveform files, please set \'prefer_ds\' to true.' |
| 144 | + ) |
| 145 | + |
132 | 146 | temp_dict = { |
133 | 147 | 'ds_idx': item_idx, |
134 | 148 | 'spk_id': self.spk_map[spk], |
135 | 149 | 'spk_name': spk, |
136 | 150 | 'language_id': self.lang_map[lang], |
137 | 151 | 'language_name': lang, |
138 | | - 'wav_fn': str(raw_data_dir / 'wavs' / f'{item_name}.wav'), |
| 152 | + 'wav_fn': str(wav_fn) if wav_fn is not None else None, |
139 | 153 | 'lang_seq': [ |
140 | 154 | ( |
141 | 155 | self.lang_map[lang if '/' not in p else p.split('/', maxsplit=1)[0]] |
@@ -288,10 +302,8 @@ def process_item(self, item_name, meta_data, binarization_args): |
288 | 302 | processed_input['mel2ph'] = mel2ph.cpu().numpy() |
289 | 303 |
|
290 | 304 | # Below: extract actual f0, convert to pitch and calculate delta pitch |
291 | | - if pathlib.Path(meta_data['wav_fn']).exists(): |
| 305 | + if meta_data['wav_fn'] is not None: |
292 | 306 | waveform, _ = librosa.load(meta_data['wav_fn'], sr=hparams['audio_sample_rate'], mono=True) |
293 | | - elif not self.prefer_ds: |
294 | | - raise FileNotFoundError(meta_data['wav_fn']) |
295 | 307 | else: |
296 | 308 | waveform = None |
297 | 309 |
|
|
0 commit comments