-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathhyperparameterTuningExample.m
More file actions
74 lines (62 loc) · 2.44 KB
/
hyperparameterTuningExample.m
File metadata and controls
74 lines (62 loc) · 2.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
addpath('utils')
% Download the dataset if it does not already exist
% The full dev-train-100 dataset is around 6 GB of data and may take a while to download
downloadDatasetFolder = tempdir;
disp('Downloading...')
datasetFolder = downloadDataset(downloadDatasetFolder);
disp('Finished!')
% Create an audioDatastore object to access the LibriSpeech audio data
ADS = audioDatastore(datasetFolder,'IncludeSubfolders',1);
% Extract the speaker label from the file path
ADS.Labels = extractBetween(ADS.Files,fullfile(datasetFolder,filesep),filesep);
% To train the network with data from all 251 speakers, set reduceDataset to false
% To run this example quickly with data from just six speakers, set reduceDataset to true
reducedDataSet = true;
if reducedDataSet
indices = cellfun(@(c)str2double(c)<50,ADS.Labels); %#ok
ADS = subset(ADS,indices);
end
ADS = splitEachLabel(ADS,0.3);
% Split the audio files into training and test data
% 80% of the audio files are assigned to the training set and 20% are assigned to the test set
[ADSTrain,ADSTest] = splitEachLabel(ADS,0.8);
% Set the parameters for preprocessing
[audioIn,dsInfo] = read(ADSTrain);
Fs = dsInfo.SampleRate;
frameDuration = 200e-3;
overlapDuration = 40e-3;
frameLength = floor(Fs*frameDuration);
overlapLength = round(Fs*overlapDuration);
disp('Preprocessing Data...')
[XTrain,YTrain] = preprocessAudioData(ADSTrain,frameLength,overlapLength,Fs);
[XTest,YTest] = preprocessAudioData(ADSTest,frameLength,overlapLength,Fs);
% Save training and test data to .mat
disp('Saving...')
save('SpeakerIdentificationProject/audioTrainingData.mat','XTrain','YTrain','XTest','YTest');
% Save reduced dataset with uniform distribution of labels
XTrain_red = [];
YTrain_red = [];
XTest_red = [];
YTest_red = [];
trainSize = 50;
testSize = 5;
labels = unique(YTrain);
for i = 1:length(labels)
label = labels(i);
trainIdx = find(YTrain == label, trainSize);
testIdx = find(YTest == label, testSize);
XTrain_red(end+1:end+trainSize,:,:) = XTrain(trainIdx,:,:);
YTrain_red(end+1:end+trainSize,:) = YTrain(trainIdx,:);
XTest_red(end+1:end+testSize,:,:) = XTest(testIdx,:,:);
YTest_red(end+1:end+testSize,:) = YTest(testIdx,:);
end
XTrain = XTrain_red;
YTrain = YTrain_red;
XTest = XTest_red;
YTest = YTest_red;
save('SpeakerIdentificationProject/smallerAudioTrainingData.mat','XTrain','YTrain','XTest','YTest');
clear
% Check Python configuration
checkPythonSetup
% Open Experiment Manager
experimentManager