-
Notifications
You must be signed in to change notification settings - Fork 58
Expand file tree
/
Copy pathBreakSentences.java
More file actions
73 lines (66 loc) · 2.81 KB
/
BreakSentences.java
File metadata and controls
73 lines (66 loc) · 2.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
/*
* microsoft-translator-java-api
*
* Copyright 2012 Jonathan Griggs <jonathan.griggs at gmail.com>.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.memetix.mst.sentence;
import java.net.URL;
import java.net.URLEncoder;
import com.memetix.mst.MicrosoftTranslatorAPI;
import com.memetix.mst.language.Language;
/**
* BreakSentences
*
* Provides an interface to the Microsoft Translator BreakSentences service
*
* This service is basically a utility for determining how Microsoft Translator is
* interpreting sentence breaks within a given string of text
*
* Uses the AJAX Interface V2 - see: http://msdn.microsoft.com/en-us/library/ff512395.aspx
*
* @author Jonathan Griggs <jonathan.griggs at gmail.com>
*/
public final class BreakSentences extends MicrosoftTranslatorAPI {
private static final String SERVICE_URL = "http://api.microsofttranslator.com/V2/Ajax.svc/BreakSentences?";
// prevent instantiation
private BreakSentences(){};
/**
* Reports the number of sentences detected and the length of those sentences
*
* @param text The String to break into sentences
* @param fromLang The Language of origin
* @return an array of integers representing the size of each detected sentence
* @throws Exception on error.
*/
public static Integer[] execute(final String text, final Language fromLang) throws Exception {
//Run the basic service validations first
validateServiceState(text,fromLang);
final URL url = new URL(SERVICE_URL
+PARAM_SENTENCES_LANGUAGE+URLEncoder.encode(fromLang.toString(), ENCODING)
+PARAM_TEXT_SINGLE+URLEncoder.encode(text, ENCODING));
final Integer[] response = retrieveIntArray(url);
return response;
}
private static void validateServiceState(final String text, final Language fromLang) throws Exception {
final int byteLength = text.getBytes(ENCODING).length;
if(byteLength>10240) {
throw new RuntimeException("TEXT_TOO_LARGE - Microsoft Translator (BreakSentences) can handle up to 10,240 bytes per request");
}
if(Language.AUTO_DETECT.equals(fromLang)) {
throw new RuntimeException("BreakSentences does not support AUTO_DETECT Langauge. Please specify the origin language");
}
validateServiceState();
}
}