Merge pull request #390 from sbates-idrc/C2LC-668

sbates-idrc · web-flow · commit 75670c725b9c · 2022-09-08T10:17:04.000-04:00
C2LC-668: Add logic to select voice for announcement
diff --git a/src/AudioManagerImpl.js b/src/AudioManagerImpl.js
@@ -5,6 +5,7 @@ import CharacterState from './CharacterState';
 import type {IntlShape} from 'react-intl';
 import {AudioManager} from './types';
 import SceneDimensions from './SceneDimensions';
+import {selectSpeechSynthesisVoice} from './Utils.js';
 
 const SamplerDefs = {
     // The percussion instruments we use actually don't vary their pitch, so we use the same sample at different
@@ -204,6 +205,7 @@ export default class AudioManagerImpl implements AudioManager {
         right90: Sampler,
         right180: Sampler
     };
+    speechSynthesisVoices: Array<SpeechSynthesisVoice>;
 
     constructor(audioEnabled: boolean, announcementsEnabled: boolean, sonificationEnabled: boolean) {
         this.audioEnabled = audioEnabled;
@@ -215,6 +217,9 @@ export default class AudioManagerImpl implements AudioManager {
 
         this.samplers = {};
 
+        this.updateSpeechSynthesisVoices();
+        window.speechSynthesis.onvoiceschanged = this.updateSpeechSynthesisVoices;
+
         Object.keys(SamplerDefs).forEach((samplerKey) => {
             const samplerDef = SamplerDefs[samplerKey];
             const sampler = new Sampler(samplerDef);
@@ -223,6 +228,10 @@ export default class AudioManagerImpl implements AudioManager {
         });
     }
 
+    updateSpeechSynthesisVoices = () => {
+        this.speechSynthesisVoices = window.speechSynthesis.getVoices();
+    };
+
     playAnnouncement(messageIdSuffix: string, intl: IntlShape, messagePayload: any) {
         if (this.audioEnabled && this.announcementsEnabled) {
             if (window.speechSynthesis.speaking || window.speechSynthesis.pending) {
@@ -231,6 +240,15 @@ export default class AudioManagerImpl implements AudioManager {
             const messageId = "Announcement." + messageIdSuffix;
             const toAnnounce = intl.formatMessage({ id: messageId}, messagePayload);
             const utterance = new SpeechSynthesisUtterance(toAnnounce);
+
+            // TODO: When we support non-English UI language(s),
+            //       ensure that the language is specified correctly
+            utterance.voice = selectSpeechSynthesisVoice(
+                'en',
+                window.navigator.language,
+                this.speechSynthesisVoices
+            );
+
             window.speechSynthesis.speak(utterance);
         }
     }
diff --git a/src/Utils.js b/src/Utils.js
@@ -219,6 +219,77 @@ function isLoopBlock(blockType: string): boolean {
     return blockType === 'startLoop' || blockType === 'endLoop';
 }
 
+// Select the voice to use for a speech systhesis utterance.
+// This function exists to work around a bug in Safari on Mac where
+// making a call to window.speechSynthesis.speak() with a
+// SpeechSynthesisUtterance with an unset voice causes no speech to happen.
+// See: https://issues.fluidproject.org/browse/C2LC-668
+//
+// utteranceLangTag: BCP 47 language tag
+// userLangTag: BCP 47 language tag, as from window.navigator.language
+// voices: available voices, as returned from window.speechSynthesis.getVoices()
+//
+// For details on BCP 47, see: https://datatracker.ietf.org/doc/html/rfc5646
+//
+function selectSpeechSynthesisVoice(utteranceLangTag: ?string,
+    userLangTag: ?string,
+    voices: ?Array<SpeechSynthesisVoice>): SpeechSynthesisVoice | null {
+
+    if (utteranceLangTag == null
+            || utteranceLangTag.length < 2
+            || userLangTag == null
+            || userLangTag.length < 2
+            || voices == null
+            || voices.length === 0) {
+        return null;
+    }
+
+    const utteranceLanguage = utteranceLangTag.substring(0, 2);
+
+    // Stage 1: filter by language
+
+    let stage1 = [];
+
+    // If the user's language tag has the same language as the utterance,
+    // look for voices that match the user's language tag. So that users
+    // hear the speech with their preferred pronunciation, if applicable.
+
+    if (userLangTag.startsWith(utteranceLanguage)) {
+        stage1 = voices.filter(voice => voice.lang === userLangTag);
+    }
+
+    // If we haven't found any matches yet, and the utterance language is
+    // 'en', look for voices for 'en-US'
+
+    if (stage1.length === 0 && utteranceLanguage === 'en') {
+        stage1 = voices.filter(voice => voice.lang === 'en-US');
+    }
+
+    // Finally, look for voices with the same language as the utterance
+
+    if (stage1.length === 0) {
+        stage1 = voices.filter(voice => voice.lang.startsWith(utteranceLanguage));
+    }
+
+    // Stage 2: Prefer voices with default: true
+
+    const defaultVoices = stage1.filter(voice => voice.default);
+    const stage2 = defaultVoices.length > 0 ? defaultVoices : stage1;
+
+    // Stage 3: Prefer voices with localService: true
+
+    const localVoices = stage2.filter(voice => voice.localService);
+    const stage3 = localVoices.length > 0 ? localVoices : stage2;
+
+    // Stage 4: Pick the voice
+
+    if (stage3.length === 0) {
+        return null;
+    } else {
+        return stage3[0];
+    }
+}
+
 export {
     decodeCoordinate,
     decodeDirection,
@@ -236,5 +307,6 @@ export {
     getStartingPositionFromString,
     isLoopBlock,
     makeDelayedPromise,
-    parseLoopLabel
+    parseLoopLabel,
+    selectSpeechSynthesisVoice
 };
diff --git a/src/Utils.test.js b/src/Utils.test.js
@@ -1,6 +1,6 @@
 // @flow
 
-import { decodeCoordinate, decodeDirection, encodeCoordinate, encodeDirection, extend, isLoopBlock, generateEncodedProgramURL, getThemeFromString, getWorldFromString, getStartingPositionFromString, focusByQuerySelector, focusFirstInNodeList, focusLastInNodeList, generateLoopLabel, parseLoopLabel } from './Utils.js';
+import { decodeCoordinate, decodeDirection, encodeCoordinate, encodeDirection, extend, isLoopBlock, generateEncodedProgramURL, getThemeFromString, getWorldFromString, getStartingPositionFromString, focusByQuerySelector, focusFirstInNodeList, focusLastInNodeList, generateLoopLabel, parseLoopLabel, selectSpeechSynthesisVoice } from './Utils.js';
 import React from 'react';
 import Adapter from 'enzyme-adapter-react-16';
 import { mount, configure } from 'enzyme';
@@ -224,3 +224,216 @@ test('Test isLoopBlock', () => {
     expect(isLoopBlock('endLoop')).toEqual(true);
     expect(isLoopBlock('forward1')).toEqual(false);
 });
+
+describe('selectSpeechSynthesisVoice', () => {
+    describe('Check parameters', () => {
+        const voices = (([
+            {
+                default: true,
+                lang: 'en-US',
+                localService: true,
+                name: 'Voice',
+                voiceURI: 'Voice'
+            }
+        ]: any): Array<SpeechSynthesisVoice>);
+
+        test('When utteranceLangTag or userLangTag is bad, return null', () => {
+            expect(selectSpeechSynthesisVoice(null, 'en', voices)).toBeNull();
+            expect(selectSpeechSynthesisVoice('',   'en', voices)).toBeNull();
+            expect(selectSpeechSynthesisVoice('a',  'en', voices)).toBeNull();
+            expect(selectSpeechSynthesisVoice('en', null, voices)).toBeNull();
+            expect(selectSpeechSynthesisVoice('en', '',   voices)).toBeNull();
+            expect(selectSpeechSynthesisVoice('en', 'a',  voices)).toBeNull();
+        });
+
+        test('When there are no voices, return null', () => {
+            expect(selectSpeechSynthesisVoice('en', 'en', [])).toBeNull();
+            expect(selectSpeechSynthesisVoice('en', 'en', null)).toBeNull();
+        });
+    });
+
+    describe('Selection by language', () => {
+        const voices = (([
+            {
+                default: true,
+                lang: 'en-CA',
+                localService: true,
+                name: 'Voice',
+                voiceURI: 'Voice'
+            },
+            {
+                default: true,
+                lang: 'en-US',
+                localService: true,
+                name: 'Voice',
+                voiceURI: 'Voice'
+            },
+            {
+                default: true,
+                lang: 'fr-CA',
+                localService: true,
+                name: 'Voice',
+                voiceURI: 'Voice'
+            },
+            {
+                default: true,
+                lang: 'fr-FR',
+                localService: true,
+                name: 'Voice',
+                voiceURI: 'Voice'
+            }
+        ]: any): Array<SpeechSynthesisVoice>);
+
+        test('When available, match the user language', () => {
+            expect(selectSpeechSynthesisVoice('en', 'en-CA', voices)).toBe(voices[0]);
+            expect(selectSpeechSynthesisVoice('en', 'en-US', voices)).toBe(voices[1]);
+            expect(selectSpeechSynthesisVoice('fr', 'fr-CA', voices)).toBe(voices[2]);
+            expect(selectSpeechSynthesisVoice('fr', 'fr-FR', voices)).toBe(voices[3]);
+        });
+
+        test('When utterance is English and user language is English, but we have no matching voice, find en-US', () => {
+            expect(selectSpeechSynthesisVoice('en', 'en-GB', voices)).toBe(voices[1]);
+        });
+
+        test('When utterance is English and user language is not, find en-US', () => {
+            expect(selectSpeechSynthesisVoice('en', 'fr-FR', voices)).toBe(voices[1]);
+        });
+
+        test('When utterance is English, user language is not, and there is no en-US, find the first English', () => {
+            const noEnUSvoices = (([
+                {
+                    default: true,
+                    lang: 'fr-CA',
+                    localService: true,
+                    name: 'Voice',
+                    voiceURI: 'Voice'
+                },
+                {
+                    default: true,
+                    lang: 'fr-FR',
+                    localService: true,
+                    name: 'Voice',
+                    voiceURI: 'Voice'
+                },
+                {
+                    default: true,
+                    lang: 'en-CA',
+                    localService: true,
+                    name: 'Voice',
+                    voiceURI: 'Voice'
+                },
+                {
+                    default: true,
+                    lang: 'en-GB',
+                    localService: true,
+                    name: 'Voice',
+                    voiceURI: 'Voice'
+                }
+            ]: any): Array<SpeechSynthesisVoice>);
+
+            expect(selectSpeechSynthesisVoice('en', 'fr-FR', noEnUSvoices)).toBe(noEnUSvoices[2]);
+        });
+
+        test('When utterance is not English, and user language is the same, but we have no matching voice, find the first voice for the utterance language', () => {
+            expect(selectSpeechSynthesisVoice('fr', 'fr-CH', voices)).toBe(voices[2]);
+        });
+
+        test('When utterance is not English, and user language is not the same, find the first voice for the utterance language', () => {
+            expect(selectSpeechSynthesisVoice('fr', 'en-US', voices)).toBe(voices[2]);
+        });
+
+        test('When there is no match, return null', () => {
+            const noEnVoices = (([
+                {
+                    default: true,
+                    lang: 'fr-CA',
+                    localService: true,
+                    name: 'Voice',
+                    voiceURI: 'Voice'
+                },
+                {
+                    default: true,
+                    lang: 'fr-FR',
+                    localService: true,
+                    name: 'Voice',
+                    voiceURI: 'Voice'
+                }
+            ]: any): Array<SpeechSynthesisVoice>);
+
+            expect(selectSpeechSynthesisVoice('en', 'fr-FR', noEnVoices)).toBeNull();
+        });
+    });
+
+    test('Prefer voices with default: true, then localService: true', () => {
+        const voices = (([
+            {
+                default: false,
+                lang: 'fr-CA',
+                localService: false,
+                name: 'Voice',
+                voiceURI: 'Voice'
+            },
+            {
+                default: false,
+                lang: 'fr-FR',
+                localService: true,
+                name: 'Voice',
+                voiceURI: 'Voice'
+            },
+            {
+                default: false,
+                lang: 'en-CA',
+                localService: true,
+                name: 'Voice',
+                voiceURI: 'Voice'
+            },
+            {
+                default: true,
+                lang: 'en-CA',
+                localService: false,
+                name: 'Voice',
+                voiceURI: 'Voice'
+            },
+            {
+                default: true,
+                lang: 'en-US',
+                localService: false,
+                name: 'Voice',
+                voiceURI: 'Voice'
+            },
+            {
+                default: true,
+                lang: 'en-US',
+                localService: true,
+                name: 'Voice',
+                voiceURI: 'Voice'
+            }
+        ]: any): Array<SpeechSynthesisVoice>);
+
+        expect(selectSpeechSynthesisVoice('fr', 'fr-CA', voices)).toBe(voices[0]);
+        expect(selectSpeechSynthesisVoice('fr', 'en-US', voices)).toBe(voices[1]);
+        expect(selectSpeechSynthesisVoice('en', 'en-CA', voices)).toBe(voices[3]);
+        expect(selectSpeechSynthesisVoice('en', 'en-US', voices)).toBe(voices[5]);
+    });
+
+    test('When there are multiple matches, pick the first', () => {
+        const voices = (([
+            {
+                default: true,
+                lang: 'en-US',
+                localService: true,
+                name: 'Voice',
+                voiceURI: 'Voice'
+            },
+            {
+                default: true,
+                lang: 'en-US',
+                localService: true,
+                name: 'Voice',
+                voiceURI: 'Voice'
+            }
+        ]: any): Array<SpeechSynthesisVoice>);
+
+        expect(selectSpeechSynthesisVoice('en', 'en-US', voices)).toBe(voices[0]);
+    });
+});