Skip to content

Commit 75670c7

Browse files
authored
Merge pull request #390 from sbates-idrc/C2LC-668
C2LC-668: Add logic to select voice for announcement
2 parents 235afae + c9bc566 commit 75670c7

3 files changed

Lines changed: 305 additions & 2 deletions

File tree

src/AudioManagerImpl.js

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import CharacterState from './CharacterState';
55
import type {IntlShape} from 'react-intl';
66
import {AudioManager} from './types';
77
import SceneDimensions from './SceneDimensions';
8+
import {selectSpeechSynthesisVoice} from './Utils.js';
89

910
const SamplerDefs = {
1011
// The percussion instruments we use actually don't vary their pitch, so we use the same sample at different
@@ -204,6 +205,7 @@ export default class AudioManagerImpl implements AudioManager {
204205
right90: Sampler,
205206
right180: Sampler
206207
};
208+
speechSynthesisVoices: Array<SpeechSynthesisVoice>;
207209

208210
constructor(audioEnabled: boolean, announcementsEnabled: boolean, sonificationEnabled: boolean) {
209211
this.audioEnabled = audioEnabled;
@@ -215,6 +217,9 @@ export default class AudioManagerImpl implements AudioManager {
215217

216218
this.samplers = {};
217219

220+
this.updateSpeechSynthesisVoices();
221+
window.speechSynthesis.onvoiceschanged = this.updateSpeechSynthesisVoices;
222+
218223
Object.keys(SamplerDefs).forEach((samplerKey) => {
219224
const samplerDef = SamplerDefs[samplerKey];
220225
const sampler = new Sampler(samplerDef);
@@ -223,6 +228,10 @@ export default class AudioManagerImpl implements AudioManager {
223228
});
224229
}
225230

231+
updateSpeechSynthesisVoices = () => {
232+
this.speechSynthesisVoices = window.speechSynthesis.getVoices();
233+
};
234+
226235
playAnnouncement(messageIdSuffix: string, intl: IntlShape, messagePayload: any) {
227236
if (this.audioEnabled && this.announcementsEnabled) {
228237
if (window.speechSynthesis.speaking || window.speechSynthesis.pending) {
@@ -231,6 +240,15 @@ export default class AudioManagerImpl implements AudioManager {
231240
const messageId = "Announcement." + messageIdSuffix;
232241
const toAnnounce = intl.formatMessage({ id: messageId}, messagePayload);
233242
const utterance = new SpeechSynthesisUtterance(toAnnounce);
243+
244+
// TODO: When we support non-English UI language(s),
245+
// ensure that the language is specified correctly
246+
utterance.voice = selectSpeechSynthesisVoice(
247+
'en',
248+
window.navigator.language,
249+
this.speechSynthesisVoices
250+
);
251+
234252
window.speechSynthesis.speak(utterance);
235253
}
236254
}

src/Utils.js

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,77 @@ function isLoopBlock(blockType: string): boolean {
219219
return blockType === 'startLoop' || blockType === 'endLoop';
220220
}
221221

222+
// Select the voice to use for a speech systhesis utterance.
223+
// This function exists to work around a bug in Safari on Mac where
224+
// making a call to window.speechSynthesis.speak() with a
225+
// SpeechSynthesisUtterance with an unset voice causes no speech to happen.
226+
// See: https://issues.fluidproject.org/browse/C2LC-668
227+
//
228+
// utteranceLangTag: BCP 47 language tag
229+
// userLangTag: BCP 47 language tag, as from window.navigator.language
230+
// voices: available voices, as returned from window.speechSynthesis.getVoices()
231+
//
232+
// For details on BCP 47, see: https://datatracker.ietf.org/doc/html/rfc5646
233+
//
234+
function selectSpeechSynthesisVoice(utteranceLangTag: ?string,
235+
userLangTag: ?string,
236+
voices: ?Array<SpeechSynthesisVoice>): SpeechSynthesisVoice | null {
237+
238+
if (utteranceLangTag == null
239+
|| utteranceLangTag.length < 2
240+
|| userLangTag == null
241+
|| userLangTag.length < 2
242+
|| voices == null
243+
|| voices.length === 0) {
244+
return null;
245+
}
246+
247+
const utteranceLanguage = utteranceLangTag.substring(0, 2);
248+
249+
// Stage 1: filter by language
250+
251+
let stage1 = [];
252+
253+
// If the user's language tag has the same language as the utterance,
254+
// look for voices that match the user's language tag. So that users
255+
// hear the speech with their preferred pronunciation, if applicable.
256+
257+
if (userLangTag.startsWith(utteranceLanguage)) {
258+
stage1 = voices.filter(voice => voice.lang === userLangTag);
259+
}
260+
261+
// If we haven't found any matches yet, and the utterance language is
262+
// 'en', look for voices for 'en-US'
263+
264+
if (stage1.length === 0 && utteranceLanguage === 'en') {
265+
stage1 = voices.filter(voice => voice.lang === 'en-US');
266+
}
267+
268+
// Finally, look for voices with the same language as the utterance
269+
270+
if (stage1.length === 0) {
271+
stage1 = voices.filter(voice => voice.lang.startsWith(utteranceLanguage));
272+
}
273+
274+
// Stage 2: Prefer voices with default: true
275+
276+
const defaultVoices = stage1.filter(voice => voice.default);
277+
const stage2 = defaultVoices.length > 0 ? defaultVoices : stage1;
278+
279+
// Stage 3: Prefer voices with localService: true
280+
281+
const localVoices = stage2.filter(voice => voice.localService);
282+
const stage3 = localVoices.length > 0 ? localVoices : stage2;
283+
284+
// Stage 4: Pick the voice
285+
286+
if (stage3.length === 0) {
287+
return null;
288+
} else {
289+
return stage3[0];
290+
}
291+
}
292+
222293
export {
223294
decodeCoordinate,
224295
decodeDirection,
@@ -236,5 +307,6 @@ export {
236307
getStartingPositionFromString,
237308
isLoopBlock,
238309
makeDelayedPromise,
239-
parseLoopLabel
310+
parseLoopLabel,
311+
selectSpeechSynthesisVoice
240312
};

src/Utils.test.js

Lines changed: 214 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// @flow
22

3-
import { decodeCoordinate, decodeDirection, encodeCoordinate, encodeDirection, extend, isLoopBlock, generateEncodedProgramURL, getThemeFromString, getWorldFromString, getStartingPositionFromString, focusByQuerySelector, focusFirstInNodeList, focusLastInNodeList, generateLoopLabel, parseLoopLabel } from './Utils.js';
3+
import { decodeCoordinate, decodeDirection, encodeCoordinate, encodeDirection, extend, isLoopBlock, generateEncodedProgramURL, getThemeFromString, getWorldFromString, getStartingPositionFromString, focusByQuerySelector, focusFirstInNodeList, focusLastInNodeList, generateLoopLabel, parseLoopLabel, selectSpeechSynthesisVoice } from './Utils.js';
44
import React from 'react';
55
import Adapter from 'enzyme-adapter-react-16';
66
import { mount, configure } from 'enzyme';
@@ -224,3 +224,216 @@ test('Test isLoopBlock', () => {
224224
expect(isLoopBlock('endLoop')).toEqual(true);
225225
expect(isLoopBlock('forward1')).toEqual(false);
226226
});
227+
228+
describe('selectSpeechSynthesisVoice', () => {
229+
describe('Check parameters', () => {
230+
const voices = (([
231+
{
232+
default: true,
233+
lang: 'en-US',
234+
localService: true,
235+
name: 'Voice',
236+
voiceURI: 'Voice'
237+
}
238+
]: any): Array<SpeechSynthesisVoice>);
239+
240+
test('When utteranceLangTag or userLangTag is bad, return null', () => {
241+
expect(selectSpeechSynthesisVoice(null, 'en', voices)).toBeNull();
242+
expect(selectSpeechSynthesisVoice('', 'en', voices)).toBeNull();
243+
expect(selectSpeechSynthesisVoice('a', 'en', voices)).toBeNull();
244+
expect(selectSpeechSynthesisVoice('en', null, voices)).toBeNull();
245+
expect(selectSpeechSynthesisVoice('en', '', voices)).toBeNull();
246+
expect(selectSpeechSynthesisVoice('en', 'a', voices)).toBeNull();
247+
});
248+
249+
test('When there are no voices, return null', () => {
250+
expect(selectSpeechSynthesisVoice('en', 'en', [])).toBeNull();
251+
expect(selectSpeechSynthesisVoice('en', 'en', null)).toBeNull();
252+
});
253+
});
254+
255+
describe('Selection by language', () => {
256+
const voices = (([
257+
{
258+
default: true,
259+
lang: 'en-CA',
260+
localService: true,
261+
name: 'Voice',
262+
voiceURI: 'Voice'
263+
},
264+
{
265+
default: true,
266+
lang: 'en-US',
267+
localService: true,
268+
name: 'Voice',
269+
voiceURI: 'Voice'
270+
},
271+
{
272+
default: true,
273+
lang: 'fr-CA',
274+
localService: true,
275+
name: 'Voice',
276+
voiceURI: 'Voice'
277+
},
278+
{
279+
default: true,
280+
lang: 'fr-FR',
281+
localService: true,
282+
name: 'Voice',
283+
voiceURI: 'Voice'
284+
}
285+
]: any): Array<SpeechSynthesisVoice>);
286+
287+
test('When available, match the user language', () => {
288+
expect(selectSpeechSynthesisVoice('en', 'en-CA', voices)).toBe(voices[0]);
289+
expect(selectSpeechSynthesisVoice('en', 'en-US', voices)).toBe(voices[1]);
290+
expect(selectSpeechSynthesisVoice('fr', 'fr-CA', voices)).toBe(voices[2]);
291+
expect(selectSpeechSynthesisVoice('fr', 'fr-FR', voices)).toBe(voices[3]);
292+
});
293+
294+
test('When utterance is English and user language is English, but we have no matching voice, find en-US', () => {
295+
expect(selectSpeechSynthesisVoice('en', 'en-GB', voices)).toBe(voices[1]);
296+
});
297+
298+
test('When utterance is English and user language is not, find en-US', () => {
299+
expect(selectSpeechSynthesisVoice('en', 'fr-FR', voices)).toBe(voices[1]);
300+
});
301+
302+
test('When utterance is English, user language is not, and there is no en-US, find the first English', () => {
303+
const noEnUSvoices = (([
304+
{
305+
default: true,
306+
lang: 'fr-CA',
307+
localService: true,
308+
name: 'Voice',
309+
voiceURI: 'Voice'
310+
},
311+
{
312+
default: true,
313+
lang: 'fr-FR',
314+
localService: true,
315+
name: 'Voice',
316+
voiceURI: 'Voice'
317+
},
318+
{
319+
default: true,
320+
lang: 'en-CA',
321+
localService: true,
322+
name: 'Voice',
323+
voiceURI: 'Voice'
324+
},
325+
{
326+
default: true,
327+
lang: 'en-GB',
328+
localService: true,
329+
name: 'Voice',
330+
voiceURI: 'Voice'
331+
}
332+
]: any): Array<SpeechSynthesisVoice>);
333+
334+
expect(selectSpeechSynthesisVoice('en', 'fr-FR', noEnUSvoices)).toBe(noEnUSvoices[2]);
335+
});
336+
337+
test('When utterance is not English, and user language is the same, but we have no matching voice, find the first voice for the utterance language', () => {
338+
expect(selectSpeechSynthesisVoice('fr', 'fr-CH', voices)).toBe(voices[2]);
339+
});
340+
341+
test('When utterance is not English, and user language is not the same, find the first voice for the utterance language', () => {
342+
expect(selectSpeechSynthesisVoice('fr', 'en-US', voices)).toBe(voices[2]);
343+
});
344+
345+
test('When there is no match, return null', () => {
346+
const noEnVoices = (([
347+
{
348+
default: true,
349+
lang: 'fr-CA',
350+
localService: true,
351+
name: 'Voice',
352+
voiceURI: 'Voice'
353+
},
354+
{
355+
default: true,
356+
lang: 'fr-FR',
357+
localService: true,
358+
name: 'Voice',
359+
voiceURI: 'Voice'
360+
}
361+
]: any): Array<SpeechSynthesisVoice>);
362+
363+
expect(selectSpeechSynthesisVoice('en', 'fr-FR', noEnVoices)).toBeNull();
364+
});
365+
});
366+
367+
test('Prefer voices with default: true, then localService: true', () => {
368+
const voices = (([
369+
{
370+
default: false,
371+
lang: 'fr-CA',
372+
localService: false,
373+
name: 'Voice',
374+
voiceURI: 'Voice'
375+
},
376+
{
377+
default: false,
378+
lang: 'fr-FR',
379+
localService: true,
380+
name: 'Voice',
381+
voiceURI: 'Voice'
382+
},
383+
{
384+
default: false,
385+
lang: 'en-CA',
386+
localService: true,
387+
name: 'Voice',
388+
voiceURI: 'Voice'
389+
},
390+
{
391+
default: true,
392+
lang: 'en-CA',
393+
localService: false,
394+
name: 'Voice',
395+
voiceURI: 'Voice'
396+
},
397+
{
398+
default: true,
399+
lang: 'en-US',
400+
localService: false,
401+
name: 'Voice',
402+
voiceURI: 'Voice'
403+
},
404+
{
405+
default: true,
406+
lang: 'en-US',
407+
localService: true,
408+
name: 'Voice',
409+
voiceURI: 'Voice'
410+
}
411+
]: any): Array<SpeechSynthesisVoice>);
412+
413+
expect(selectSpeechSynthesisVoice('fr', 'fr-CA', voices)).toBe(voices[0]);
414+
expect(selectSpeechSynthesisVoice('fr', 'en-US', voices)).toBe(voices[1]);
415+
expect(selectSpeechSynthesisVoice('en', 'en-CA', voices)).toBe(voices[3]);
416+
expect(selectSpeechSynthesisVoice('en', 'en-US', voices)).toBe(voices[5]);
417+
});
418+
419+
test('When there are multiple matches, pick the first', () => {
420+
const voices = (([
421+
{
422+
default: true,
423+
lang: 'en-US',
424+
localService: true,
425+
name: 'Voice',
426+
voiceURI: 'Voice'
427+
},
428+
{
429+
default: true,
430+
lang: 'en-US',
431+
localService: true,
432+
name: 'Voice',
433+
voiceURI: 'Voice'
434+
}
435+
]: any): Array<SpeechSynthesisVoice>);
436+
437+
expect(selectSpeechSynthesisVoice('en', 'en-US', voices)).toBe(voices[0]);
438+
});
439+
});

0 commit comments

Comments
 (0)