-
Notifications
You must be signed in to change notification settings - Fork 33
Expand file tree
/
Copy pathtranscribe.go
More file actions
290 lines (270 loc) · 12.8 KB
/
transcribe.go
File metadata and controls
290 lines (270 loc) · 12.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
/*
Copyright © 2022 AssemblyAI support@assemblyai.com
*/
package cmd
import (
"encoding/json"
"errors"
"fmt"
"io/ioutil"
"os"
"strings"
S "github.com/AssemblyAI/assemblyai-cli/schemas"
U "github.com/AssemblyAI/assemblyai-cli/utils"
"github.com/spf13/cobra"
)
var transcribeCmd = &cobra.Command{
Use: "transcribe <url | path>",
Short: "Transcribe and understand audio with a single AI-powered API",
Long: `Automatically convert audio and video files and live audio streams to text with AssemblyAI's Speech-to-Text APIs.
Do more with Audio Intelligence - summarization, content moderation, topic detection, and more.
Powered by cutting-edge AI models.`,
Args: cobra.MinimumNArgs(1),
Run: func(cmd *cobra.Command, args []string) {
var params S.TranscribeParams
var flags S.TranscribeFlags
args = cmd.Flags().Args()
if len(args) == 0 {
printErrorProps := S.PrintErrorProps{
Error: errors.New("Please provide a URL or a file path"),
Message: "Please provide a local file or a URL to be transcribed.",
}
U.PrintError(printErrorProps)
return
}
params.AudioURL = args[0]
flags.Json, _ = cmd.Flags().GetBool("json")
flags.Poll, _ = cmd.Flags().GetBool("poll")
flags.Srt, _ = cmd.Flags().GetBool("srt")
params.AutoChapters, _ = cmd.Flags().GetBool("auto_chapters")
params.AutoHighlights, _ = cmd.Flags().GetBool("auto_highlights")
params.ContentModeration, _ = cmd.Flags().GetBool("content_moderation")
params.DualChannel, _ = cmd.Flags().GetBool("dual_channel")
params.Disfluencies, _ = cmd.Flags().GetBool("disfluencies")
params.EntityDetection, _ = cmd.Flags().GetBool("entity_detection")
params.FormatText, _ = cmd.Flags().GetBool("format_text")
params.Punctuate, _ = cmd.Flags().GetBool("punctuate")
params.RedactPii, _ = cmd.Flags().GetBool("redact_pii")
params.SentimentAnalysis, _ = cmd.Flags().GetBool("sentiment_analysis")
params.SpeakerLabels, _ = cmd.Flags().GetBool("speaker_labels")
params.TopicDetection, _ = cmd.Flags().GetBool("topic_detection")
params.Summarization, _ = cmd.Flags().GetBool("summarization")
wordBoost, _ := cmd.Flags().GetString("word_boost")
if params.DualChannel && params.SpeakerLabels {
if cmd.Flags().Lookup("speaker_labels").Changed {
printErrorProps := S.PrintErrorProps{
Error: errors.New("Speaker labels are not supported for dual channel audio"),
Message: "Speaker labels are not supported for dual channel audio",
}
U.PrintError(printErrorProps)
return
}
params.SpeakerLabels = false
}
if wordBoost != "" {
params.WordBoost = strings.Split(wordBoost, ",")
boostParam, _ := cmd.Flags().GetString("boost_param")
if boostParam != "" && boostParam != "low" && boostParam != "default" && boostParam != "high" {
printErrorProps := S.PrintErrorProps{
Error: errors.New("Invalid boost_param"),
Message: "Please provide a valid boost_param. Valid values are low, default, or high.",
}
U.PrintError(printErrorProps)
return
}
params.BoostParam = &boostParam
}
if params.Summarization {
params.Punctuate = true
params.FormatText = true
if params.AutoChapters {
printErrorProps := S.PrintErrorProps{
Error: errors.New("Auto chapters are not supported for summarization"),
Message: "Auto chapters are not supported for summarization",
}
U.PrintError(printErrorProps)
return
}
params.SummaryType, _ = cmd.Flags().GetString("summary_type")
if _, ok := S.SummarizationTypeMapReverse[params.SummaryType]; !ok {
printErrorProps := S.PrintErrorProps{
Error: errors.New("Invalid summary type"),
Message: "Invalid summary type. To know more about Summarization, head over to https://assemblyai.com/docs/audio-intelligence#summarization",
}
U.PrintError(printErrorProps)
return
}
summaryModel, _ := cmd.Flags().GetString("summary_model")
if summaryModel != "" {
if _, ok := S.SummarizationModelMap[summaryModel]; !ok {
printErrorProps := S.PrintErrorProps{
Error: errors.New("Invalid summary model"),
Message: "Invalid summary model. To know more about Summarization, head over to https://assemblyai.com/docs/audio-intelligence#summarization",
}
U.PrintError(printErrorProps)
return
}
if !U.Contains(S.SummarizationModelMap[summaryModel], params.SummaryType) {
printErrorProps := S.PrintErrorProps{
Error: errors.New("Invalid summary model"),
Message: "Cant use summary model " + summaryModel + " with summary type " + params.SummaryType + ". To know more about Summarization, head over to https://assemblyai.com/docs/audio-intelligence#summarization",
}
U.PrintError(printErrorProps)
return
}
if summaryModel == "conversational" && !params.SpeakerLabels {
printErrorProps := S.PrintErrorProps{
Error: errors.New("Speaker labels required for conversational summary model"),
Message: "Speaker labels are required for conversational summarization. To know more about Summarization, head over to https://assemblyai.com/docs/audio-intelligence#summarization",
}
U.PrintError(printErrorProps)
return
}
params.SummaryModel = summaryModel
}
speechModel, _ := cmd.Flags().GetString("speech_model")
params.SpeechModel = &speechModel
}
if params.RedactPii {
policies, _ := cmd.Flags().GetString("redact_pii_policies")
policiesArray := strings.Split(policies, ",")
for _, policy := range policiesArray {
if _, ok := S.PIIRedactionPolicyMap[policy]; !ok {
printErrorProps := S.PrintErrorProps{
Error: errors.New("Invalid redaction policy"),
Message: fmt.Sprintf("%s is not a valid policy. See https://www.assemblyai.com/docs/Models/pii_redaction for the complete list of supported policies.", policy),
}
U.PrintError(printErrorProps)
return
}
}
params.RedactPiiPolicies = policiesArray
}
webhook := cmd.Flags().Lookup("webhook_url").Value.String()
if webhook != "" {
params.WebhookURL = webhook
webhookHeaderName := cmd.Flags().Lookup("webhook_auth_header_name").Value.String()
webhookHeaderValue := cmd.Flags().Lookup("webhook_auth_header_value").Value.String()
if webhookHeaderName != "" {
params.WebhookAuthHeaderName = webhookHeaderName
}
if webhookHeaderValue != "" {
params.WebhookAuthHeaderValue = webhookHeaderValue
}
}
languageDetection, _ := cmd.Flags().GetBool("language_detection")
languageCode, _ := cmd.Flags().GetString("language_code")
if languageDetection && languageCode != "" {
printErrorProps := S.PrintErrorProps{
Error: errors.New("Invalid language detection"),
Message: "Please provide either language detection or language code, not both.",
}
U.PrintError(printErrorProps)
return
}
if languageDetection && languageCode == "" {
params.LanguageDetection = true
}
if languageCode != "" {
if _, ok := S.LanguageMap[languageCode]; !ok {
printErrorProps := S.PrintErrorProps{
Error: errors.New("Invalid language code"),
Message: "Invalid language code. See https://www.assemblyai.com/docs/Concepts/faq#supported-languages for supported languages.",
}
U.PrintError(printErrorProps)
return
}
params.LanguageCode = &languageCode
params.LanguageDetection = false
}
customSpelling, _ := cmd.Flags().GetString("custom_spelling")
if customSpelling != "" {
parsedCustomSpelling := []S.CustomSpelling{}
_, err := os.Stat(customSpelling)
if !os.IsNotExist(err) {
file, err := os.Open(customSpelling)
if err != nil {
printErrorProps := S.PrintErrorProps{
Error: err,
Message: "Error opening custom spelling file",
}
U.PrintError(printErrorProps)
return
}
defer file.Close()
byteCustomSpelling, err := ioutil.ReadAll(file)
if err != nil {
printErrorProps := S.PrintErrorProps{
Error: err,
Message: "Error reading custom spelling file",
}
U.PrintError(printErrorProps)
return
}
err = json.Unmarshal(byteCustomSpelling, &parsedCustomSpelling)
if err != nil {
printErrorProps := S.PrintErrorProps{
Error: err,
Message: "Error parsing custom spelling file",
}
U.PrintError(printErrorProps)
return
}
} else {
err = json.Unmarshal([]byte(customSpelling), &parsedCustomSpelling)
if err != nil {
printErrorProps := S.PrintErrorProps{
Error: err,
Message: "Invalid custom spelling. Please provide a valid custom spelling JSON.",
}
U.PrintError(printErrorProps)
return
}
}
err = U.ValidateCustomSpelling(parsedCustomSpelling)
if err != nil {
printErrorProps := S.PrintErrorProps{
Error: err,
Message: "Invalid custom spelling. Please provide a valid custom spelling JSON.",
}
U.PrintError(printErrorProps)
return
}
params.CustomSpelling = parsedCustomSpelling
}
U.Transcribe(params, flags)
},
}
func init() {
transcribeCmd.PersistentFlags().BoolP("auto_chapters", "s", false, "A \"summary over time\" for the audio file transcribed.")
transcribeCmd.PersistentFlags().BoolP("auto_highlights", "a", false, "Automatically detect important phrases and words in the text.")
transcribeCmd.PersistentFlags().BoolP("content_moderation", "c", false, "Detect if sensitive content is spoken in the file.")
transcribeCmd.PersistentFlags().BoolP("disfluencies", "D", false, "Include Filler Words in your transcripts")
transcribeCmd.PersistentFlags().BoolP("dual_channel", "d", false, "Enable dual channel")
transcribeCmd.PersistentFlags().BoolP("entity_detection", "e", false, "Identify a wide range of entities that are spoken in the audio file.")
transcribeCmd.PersistentFlags().BoolP("format_text", "f", true, "Enable text formatting")
transcribeCmd.PersistentFlags().BoolP("json", "j", false, "If true, the CLI will output the JSON.")
transcribeCmd.PersistentFlags().BoolP("language_detection", "n", false, "Identify the dominant language that’s spoken in an audio file.")
transcribeCmd.PersistentFlags().BoolP("poll", "p", true, "The CLI will poll the transcription until it's complete.")
transcribeCmd.PersistentFlags().BoolP("punctuate", "u", true, "Enable automatic punctuation.")
transcribeCmd.PersistentFlags().BoolP("redact_pii", "r", false, "Remove personally identifiable information from the transcription.")
transcribeCmd.PersistentFlags().BoolP("sentiment_analysis", "x", false, "Detect the sentiment of each sentence of speech spoken in the file.")
transcribeCmd.PersistentFlags().BoolP("speaker_labels", "l", false, "Automatically detect the number of speakers in your audio file, and each word in the transcription text can be associated with its speaker.")
transcribeCmd.PersistentFlags().BoolP("srt", "", false, "Generate an SRT file for the audio file transcribed.")
transcribeCmd.PersistentFlags().BoolP("summarization", "m", false, "Generate a single abstractive summary of the entire audio.")
transcribeCmd.PersistentFlags().BoolP("topic_detection", "t", false, "Label the topics that are spoken in the file.")
transcribeCmd.PersistentFlags().StringP("boost_param", "z", "", "Control how much weight should be applied to your boosted keywords/phrases. This value can be either low, default, or high.")
transcribeCmd.PersistentFlags().StringP("custom_spelling", "", "", "Specify how words are spelled or formatted in the transcript text.")
transcribeCmd.PersistentFlags().StringP("language_code", "g", "", "Specify the language of the speech in your audio file.")
transcribeCmd.PersistentFlags().StringP("redact_pii_policies", "i", "drug,number_sequence,person_name", "The list of PII policies to redact, comma-separated without space in-between. Required if the redact_pii flag is true.")
transcribeCmd.PersistentFlags().StringP("summary_type", "y", "bullets", "Type of summary generated.")
transcribeCmd.PersistentFlags().StringP("webhook_auth_header_name", "b", "", "Containing the header's name which will be inserted into the webhook request")
transcribeCmd.PersistentFlags().StringP("webhook_auth_header_value", "o", "", "The value of the header that will be inserted into the webhook request.")
transcribeCmd.PersistentFlags().StringP("webhook_url", "w", "", "Receive a webhook once your transcript is complete.")
transcribeCmd.PersistentFlags().StringP("word_boost", "k", "", "The value of this flag MUST be used surrounded by quotes. Any term included will have its likelihood of being transcribed boosted.")
transcribeCmd.PersistentFlags().StringP("summary_model", "q", "informative", "The model used to generate the summary.")
transcribeCmd.PersistentFlags().StringP("speech_model", "", "", "The speech model to use for the transcription. Allowed values: see https://www.assemblyai.com/docs/api-reference/transcripts/submit#request.body.speech_model")
transcribeCmd.Flags().Bool("test", false, "Flag for test executing purpose")
transcribeCmd.Flags().MarkHidden("test")
rootCmd.AddCommand(transcribeCmd)
}