-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdeva2ipa.cs
More file actions
285 lines (242 loc) · 10.5 KB
/
deva2ipa.cs
File metadata and controls
285 lines (242 loc) · 10.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
using System;
using System.Collections;
using System.Configuration;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
namespace CST.Conversion
{
class Deva2IPA
{
static void Main(string[] args)
{
try
{
if (args.Length < 2 || args.Length > 3)
{
PrintUsage();
return;
}
// validate args
FileInfo fi = new FileInfo(args[0]);
if (fi.Exists == false)
{
Console.WriteLine("Input file does not exist");
return;
}
DirectoryInfo di = new DirectoryInfo(args[1]);
if (di.Exists == false)
{
Console.Write("Output directory does not exist. Would you like to create it?[y/n] ");
if (Console.ReadLine().ToLower().Substring(0, 1) == "y")
{
di.Create();
}
else
{
return;
}
}
Deva2IPA d2 = new Deva2IPA();
d2.InputFilePath = args[0];
d2.OutputFilePath = di.FullName + "\\" + fi.Name;
// IPA conversion does not require capitalization parameters
// ParagraphElements and IgnoreElements are kept if needed for XML parsing logic,
// but CapitalMarker is removed as IPA is case-sensitive.
d2.Convert();
}
catch (Exception ex)
{
Console.WriteLine("-------- BEGIN EXCEPTION in Deva2IPA Main ---------------");
Console.WriteLine("Exception: " + ex);
Console.WriteLine("Args:");
foreach (string arg in args)
{
Console.WriteLine(arg);
}
Console.WriteLine("---------- END EXCEPTION in Deva2IPA Main -------------");
}
}
static void PrintUsage()
{
Console.WriteLine("Transliterates Unicode Devanagari to International Phonetic Alphabet (IPA)");
Console.WriteLine("syntax:");
Console.WriteLine("deva2ipa input [output]");
}
// end static methods
private Hashtable deva2IPA;
public Deva2IPA()
{
deva2IPA = new Hashtable();
// Niggahita (Nasal)
// Common Pali realization is velar nasal /ŋ/ at end of syllable
deva2IPA['\x0902'] = "ŋ";
// --- Independent Vowels ---
deva2IPA['\x0905'] = "a"; // a - short /a/
deva2IPA['\x0906'] = "aː"; // aa - long /a:/
deva2IPA['\x0907'] = "i"; // i
deva2IPA['\x0908'] = "iː"; // ii
deva2IPA['\x0909'] = "u"; // u
deva2IPA['\x090A'] = "uː"; // uu
deva2IPA['\x090F'] = "eː"; // e (Long by default in open syllables)
deva2IPA['\x0913'] = "oː"; // o (Long by default in open syllables)
// --- Gutturals (Velars) ---
deva2IPA['\x0915'] = "k"; // ka
deva2IPA['\x0916'] = "kʰ"; // kha (Aspirated)
deva2IPA['\x0917'] = "g"; // ga
deva2IPA['\x0918'] = "gʱ"; // gha (Breathy voiced)
deva2IPA['\x0919'] = "ŋ"; // nga
// --- Palatals ---
// Pali 'c' is a voiceless palatal stop /c/, not affricate /tʃ/ in strict phonetics,
// though /tʃ/ is common. Using /c/ for precision.
deva2IPA['\x091A'] = "c"; // ca
deva2IPA['\x091B'] = "cʰ"; // cha
deva2IPA['\x091C'] = "ɟ"; // ja (Voiced palatal stop)
deva2IPA['\x091D'] = "ɟʱ"; // jha
deva2IPA['\x091E'] = "ɲ"; // nya
// --- Retroflexes (Cerebrals) ---
deva2IPA['\x091F'] = "ʈ"; // ta
deva2IPA['\x0920'] = "ʈʰ"; // tha
deva2IPA['\x0921'] = "ɖ"; // da
deva2IPA['\x0922'] = "ɖʱ"; // dha
deva2IPA['\x0923'] = "ɳ"; // na
// --- Dentals ---
// Pali t/d are true dentals, distinct from English alveolar.
// Represented by the bridge diacritic below.
deva2IPA['\x0924'] = "t̪"; // ta
deva2IPA['\x0925'] = "t̪ʰ"; // tha
deva2IPA['\x0926'] = "d̪"; // da
deva2IPA['\x0927'] = "d̪ʱ"; // dha
deva2IPA['\x0928'] = "n"; // na (Dental/Alveolar)
// --- Labials ---
deva2IPA['\x092A'] = "p"; // pa
deva2IPA['\x092B'] = "pʰ"; // pha
deva2IPA['\x092C'] = "b"; // ba
deva2IPA['\x092D'] = "bʱ"; // bha
deva2IPA['\x092E'] = "m"; // ma
// --- Liquids, Fricatives, etc. ---
deva2IPA['\x092F'] = "j"; // ya (Palatal approximant)
deva2IPA['\x0930'] = "r"; // ra (Trill)
deva2IPA['\x0932'] = "l"; // la
deva2IPA['\x0935'] = "ʋ"; // va (Labiodental approximant, more accurate than 'v' or 'w')
deva2IPA['\x0938'] = "s"; // sa
deva2IPA['\x0939'] = "h"; // ha
deva2IPA['\x0933'] = "ɭ"; // la (Retroflex lateral)
// --- Dependent Vowel Signs ---
deva2IPA['\x093E'] = "aː"; // aa
deva2IPA['\x093F'] = "i"; // i
deva2IPA['\x0940'] = "iː"; // ii
deva2IPA['\x0941'] = "u"; // u
deva2IPA['\x0942'] = "uː"; // uu
deva2IPA['\x0947'] = "eː"; // e
deva2IPA['\x094B'] = "oː"; // o
deva2IPA['\x094D'] = ""; // Virama (suppresses inherent vowel)
// --- Numerals ---
// Keeping digits as is, or could map to IPA numbering if spoken
deva2IPA['\x0966'] = '0';
deva2IPA['\x0967'] = '1';
deva2IPA['\x0968'] = '2';
deva2IPA['\x0969'] = '3';
deva2IPA['\x096A'] = '4';
deva2IPA['\x096B'] = '5';
deva2IPA['\x096C'] = '6';
deva2IPA['\x096D'] = '7';
deva2IPA['\x096E'] = '8';
deva2IPA['\x096F'] = '9';
// Special characters
deva2IPA['\x0970'] = "."; // Abbreviation
deva2IPA['\x200C'] = ""; // ZWNJ
deva2IPA['\x200D'] = ""; // ZWJ
}
public string InputFilePath
{
get { return inputFilePath; }
set { inputFilePath = value; }
}
private string inputFilePath;
public string OutputFilePath
{
get { return outputFilePath; }
set { outputFilePath = value; }
}
private string outputFilePath;
public void Convert()
{
StreamReader sr = new StreamReader(InputFilePath);
string devStr = sr.ReadToEnd();
sr.Close();
// Update stylesheet reference if exists (Optional)
devStr = devStr.Replace("tipitaka-deva.xsl", "tipitaka-ipa.xsl");
// NOTE: Capitalization logic removed.
// IPA is strictly lower-case for phonetic representation.
// Remove Dev abbreviation sign before an ellipsis.
devStr = devStr.Replace("\x0970\x2026", "\x2026");
string str = ConvertToIPA(devStr);
// Handle Dandas (Punctuation)
str = ConvertDandas(str);
str = CleanupPunctuation(str);
StreamWriter sw = new StreamWriter(OutputFilePath, false, Encoding.UTF8); // Use UTF8 for IPA symbols
sw.Write(str);
sw.Flush();
sw.Close();
}
public string ConvertToIPA(string devStr)
{
// Logic to insert inherent 'a' vowel.
// Pali consonants carry an inherent 'a' unless followed by a vowel sign or virama.
// Regex: Find any consonant NOT followed by (Vowel Sign OR Virama OR already 'a') -> Insert 'a'
// Note: Since our IPA map for 'a' is "a", we insert "a".
// If the IPA map for 'a' was "ɐ", we would change this replace string to "$1ɐ$2".
devStr = Regex.Replace(devStr, "([\x0915-\x0939])([^\x093E-\x094Da])", "$1a$2");
// Double check for edge case at end of string or buffer (simplified logic)
// The original logic repeats to handle overlaps, we keep it consistent.
devStr = Regex.Replace(devStr, "([\x0915-\x0939])([^\x093E-\x094Da])", "$1a$2");
StringBuilder sb = new StringBuilder();
foreach (char c in devStr.ToCharArray())
{
if (deva2IPA.ContainsKey(c))
sb.Append(deva2IPA[c]);
else
sb.Append(c);
}
return sb.ToString();
}
public string ConvertDandas(string str)
{
// In IPA transcription, we might use pipes | or || for prosodic breaks,
// or simple punctuation for readability.
// 0x0964 = Single Danda (|) -> . (Full stop / Phrase break)
// 0x0965 = Double Danda (||) -> . (Paragraph/Verse break) or ||
// Retaining the XML aware logic from original code but mapping to IPA breaks
str = Regex.Replace(str, "<p rend=\"gatha[a-z0-9]*\">.+?</p>",
new MatchEvaluator(this.ConvertGathaDandas));
str = Regex.Replace(str, "<p rend=\"centre\">.+?</p>",
new MatchEvaluator(this.RemoveNamoTassaDandas));
// Default fallback
str = str.Replace("\x0964", ".");
str = str.Replace("\x0965", " || ");
return str;
}
public string ConvertGathaDandas(Match m)
{
string str = m.Value;
str = str.Replace("\x0964", ","); // Comma for mid-gatha break
str = str.Replace("\x0965", "."); // Period for end
return str;
}
public string RemoveNamoTassaDandas(Match m)
{
string str = m.Value;
return str.Replace("\x0965", "");
}
public string CleanupPunctuation(string str)
{
str = str.Replace(" ,", ",");
str = str.Replace(" ?", "?");
str = str.Replace(" !", "!");
str = str.Replace(" ;", ";");
str = str.Replace(" .", ".");
return str;
}
}
}