-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlepitest.js
More file actions
411 lines (339 loc) · 31.5 KB
/
lepitest.js
File metadata and controls
411 lines (339 loc) · 31.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
////// LEPITEST DATA INTEGRITY CHECK
var data = $$('p');
////1 Start data check results log
console.log('→ Run lepitest data integrity check ...'); var log = ''; var i = 0;
// Add some padding to avoid loops slamming into NULL values when hitting the end of a div
$$('div').forEach(f=>{var nuller = document.createElement("p"); f.appendChild(nuller)});
$$('div').forEach(f=>{var nuller = document.createElement("p"); f.appendChild(nuller)});
// Trim v datapoints back to only show one species name
$$('.v').forEach(f=>{f.innerHTML = f.innerHTML.replace(/ ×.+/,'')});
// Remove ID links
$$('.link').forEach(f=>{f.remove()});
////2 Simple conditional batch checks
data.forEach(f=>{
var d = f.innerHTML;
var sd = ' detected at positition "' + convert(f.classList) + ' ' + d + '". ';
var s1 = '] Invalid name' + sd;
var s2 = '] Stray taxon' + sd;
var s3 = '] Invalid data type' + sd;
var s4 = '] Invalid data order' + sd;
var s5 = '] Invalid data point' + sd;
var sf = 'The suffix of this taxon type must be "';
// Special characters and capitalization
if (!f.matches('.xp') && /( |\t)$/.test(d) || /( |\.\.|--| † †| agg. agg.)/g.test(d) || f.matches('p:not(.b,.z,.l,.xp)') && /"/g.test(d) || f.matches('.n,.r2,.p2') && / ,/g.test(d)) {i++; log += '[#' + i + s1 + 'Check for excess spaces, tabs, special characters or suffixes.\n'};
if (f.matches('[class^="x"]:not(.xp),.f,.y,.t,.j,.h,.g,.i,.sh') && /[^A-z]/g.test(d) || f.matches('.b') && /[^A-z -]/g.test(f.firstElementChild.innerHTML) || f.matches('.z') && /[^A-z]/g.test(f.firstElementChild.innerHTML)) {i++; log += '[#' + i + s1 + 'Allowed characters are A-Z and a-z only. Check for invalid spaces, tabs, line-breaks and special characters.\n'};
if (f.matches('.o,.a,.ae,.c,.k,.u,.ue,.w,.v,.v2,.s,.s2,.s3,.s4') && /[^A-z-.†× ]/g.test(d)) {i++; log += '[#' + i + s1 + 'Allowed characters are A-Z, a-z and single spaces or hyphens only. Check for invalid tabs, line-breaks and special characters.\n'};
if (f.matches('p:not(.b,.z,.l,.p,.xp)') && /[^A-ZÄČÖÕÜ]/.test(d.substring(0,1)) || f.matches('.xp') && /[^A-Z]/.test(d.substring(1,2)) || f.matches('.b,.z') && /[^A-Z]/.test(f.firstElementChild.innerHTML.substring(0,1)) || f.matches('p:not(.xp,.b,.z,.n,.r,.r2,.l,.p,.p2,.e,.e2,.d,.d2)') && /[^a-z -.†×;]/.test(d.substring(1,d.length)) || f.matches('.xp') && /[^a-z]/.test(d.substring(2,d.length-2)) || f.matches('.b,.z') && /[^a-z -]/.test(f.firstElementChild.innerHTML.substring(1,f.firstElementChild.innerHTML.length))) {i++; log += '[#' + i + s1 + 'Check for correct placement of uppercase and lowercase letters.\n'};
if (f.matches('.d,.d2,.e,.e2') && (/ [a-z]/g.test(d) && !(/(and|of|du|und|von)/.test(d)) || /( And | Of | Du | Und | Von )/.test(d))) {i++; log += '[#' + i + s1 + 'Check for correct placement of uppercase and lowercase letters.\n'};
if (f.matches('.d,.d2,.e,.e2') && /\d/g.test(d)) {i++; log += '[#' + i + s1 + 'No numeric characters allowed in common names, use words instead.\n'};
// Common typos, spelling and grammar mistakes
if (f.matches('.e,.e2') && /Acrea/.test(d)) {i++; log += '[#' + i + s1 + 'Check the term "Acraea" for typos.\n'};
if (f.matches('.e,.e2') && /Butterfy/.test(d)) {i++; log += '[#' + i + s1 + 'Check the term "Butterfly" for typos.\n'};
if (f.matches('.e,.e2') && /Emporor/.test(d)) {i++; log += '[#' + i + s1 + 'Check the term "Emperor" for typos.\n'};
if (f.matches('.d,.d2') && /(A|a)craea/.test(d)) {i++; log += '[#' + i + s1 + 'Check the term "Acräa" for typos.\n'};
if (f.matches('.d,.d2') && /Chineische/.test(d)) {i++; log += '[#' + i + s1 + 'Check the term "Chinesische" for typos.\n'};
if (f.matches('.d,.d2') && /(Dickopf|kopfalter)/g.test(d)) {i++; log += '[#' + i + s1 + 'Check the term "Dickkopffalter" for typos.\n'};
if (f.matches('.d,.d2') && /fügel/.test(d)) {i++; log += '[#' + i + s1 + 'Check the term "Flügel" for typos.\n'};
if (f.matches('.d,.d2') && /(fügler|flüger)/.test(d)) {i++; log += '[#' + i + s1 + 'Check the term "flügler" for typos.\n'};
if (f.matches('.d,.d2') && /Gelbich/.test(d)) {i++; log += '[#' + i + s1 + 'Check the term "gelblich" for typos.\n'};
if (f.matches('.d,.d2') && /(G|g)elbing/.test(d)) {i++; log += '[#' + i + s1 + 'Check the term "Gelbling" for typos.\n'};
if (f.matches('.d,.d2') && /Gemeine/.test(d)) {i++; log += '[#' + i + s1 + '"Gemein" is an obsolescent german term for "Gewöhnlich". Use the latter term instead.\n'};
if (f.matches('.d,.d2') && /Perlmutter/.test(d)) {i++; log += '[#' + i + s1 + '"Perlmutter" is an older, nowadays less common german term for "Perlmutt". Use the latter term instead.\n'};
if (f.matches('.d,.d2') && /Passionsblumenfalter/.test(d)) {i++; log += '[#' + i + s1 + '"Passionsblumenfalter" is an older, needlessly long german term for "Passionsfalter". Use the latter term instead.\n'};
if (f.matches('.d,.d2') && /Gewönliche/.test(d)) {i++; log += '[#' + i + s1 + 'Check the term "Gewöhnlich" for typos.\n'};
if (f.matches('.d,.d2') && /Gross/.test(d)) {i++; log += '[#' + i + s1 + '"Gross" is an outdated spelling of "Groß".\n'};
if (f.matches('.d,.d2') && /(ikansche|ikanisce|ikanishe)/.test(d)) {i++; log += '[#' + i + s1 + 'Check the terms "Amerikanische/Afrikanische" for typos.\n'};
if (f.matches('.d,.d2') && /(P|p)auenauge/.test(d)) {i++; log += '[#' + i + s1 + 'Check the term "Pfauenauge" for typos.\n'};
if (f.matches('.d,.d2') && /(Scwalben|Shwalben)/.test(d)) {i++; log += '[#' + i + s1 + 'Check the term "Schwalben" for typos.\n'};
if (f.matches('.d,.d2') && /(scwanz|shwanz)/.test(d)) {i++; log += '[#' + i + s1 + 'Check the term "schwanz" for typos.\n'};
if (f.matches('.d,.d2') && /Tibetanisch/.test(d)) {i++; log += '[#' + i + s1 + '"Tibetanisch" is an incorrect spelling of "Tibetisch".\n'};
if (f.matches('.d,.d2') && /(W|w)eiss/.test(d)) {i++; log += '[#' + i + s1 + '"Weiss" is an outdated spelling of "Weiß".\n'};
if (f.matches('.e,.e2,.d,.d2') && /&/.test(d)) {i++; log += '[#' + i + s1 + 'Use the full word "and/und" instead of the "&" character as an abbreviation.\n'};
if (f.matches('.e,.e2,.d,.d2') && /St\./.test(d)) {i++; log += '[#' + i + s1 + 'Use the full word "Sankt/Saint" instead of the "St." abbreviation.\n'};
if (f.matches('.e,.e2,.d,.d2') && /Mt\./.test(d)) {i++; log += '[#' + i + s1 + 'Use the full word "Mount" instead of the "Mt." abbreviation.\n'};
if (f.matches('.e,.e2,.d,.d2') && /(’|´)/.test(d)) {i++; log += '[#' + i + s1 + 'For an apostrophe, please use the simple U+0027 "typewriter apostrophe" character found on every keyboard.\n'};
if (f.matches('.e,.e2,.d,.d2') && /(Phillipin|Phillippin)/.test(d)) {i++; log += '[#' + i + s1 + 'Check the term "Philippin" for typos.\n'};
if (f.matches('.e,.e2,.d,.d2') && /Saimes/.test(d)) {i++; log += '[#' + i + s1 + 'Check the term "Siamese" for typos.\n'};
if (f.matches('p:not(.r,.r2,.l,.p,.p2,.xp)') && /(P|p)aplio/.test(d)) {i++; log += '[#' + i + s1 + 'Check the term "Papilio" for typos.\n'};
if (f.matches('.a,.ae,.c,.k,.u,.ue,.e,.e2,.d,.d2') && /( -|- )/g.test(d)) {i++; log += '[#' + i + s1 + 'Check for excess spaces before/after the hyphen.\n'};
// Taxon suffixes
if (f.matches('.sh') && !(/(a|e|i)$/.test(d))) {i++; log += '[#' + i + s1 + 'The suffix of this synonym must match a valid higher level taxon.\n'};
if (f.matches('[class^="x"]:not(.x,.xs,.xp)') && !(/a$/.test(d)) || f.matches('.xp') && !(/a"$/.test(d))) {i++; log += '[#' + i + s1 + sf + 'a".\n'};
if (f.matches('.x') && !(/oidea$/.test(d))) {i++; log += '[#' + i + s1 + sf + 'oidea".\n'};
if (f.matches('.xs') && !(/iformes$/.test(d))) {i++; log += '[#' + i + s1 + sf + 'iformes".\n'};
if (f.matches('.f') && !(/idae$/.test(d))) {i++; log += '[#' + i + s1 + sf + 'idae".\n'};
if (f.matches('.y') && !(/inae$/.test(d))) {i++; log += '[#' + i + s1 + sf + 'inae".\n'};
if (f.matches('.t') && !(/ini$/.test(d))) {i++; log += '[#' + i + s1 + sf + 'ini".\n'};
if (f.matches('.j') && !(/ina$/.test(d))) {i++; log += '[#' + i + s1 + sf + 'ina".\n'};
if (f.matches('.h') && !(/iti$/.test(d))) {i++; log += '[#' + i + s1 + sf + 'iti".\n'};
if (f.matches('p:not(.c)') && (/ agg\.$/.test(d))) {i++; log += '[#' + i + s1 + 'Use the AGG datatype to add the "agg." suffix to a species complex.\n'};
// AUT and REF errors
if (f.matches('.n') && (!(/^\D+, \d{4}$/.test(d)) || d.substring(d.length-4,d.length) < 1758)) {i++; log += '[#' + i + s5 + "Make sure the AUT ends with a valid date in YYYY format, starting at 1758.\n"};
if (f.matches('.r,.r2') && (!(/^\D+, \d{4}$/.test(d)) || d.substring(d.length-4,d.length) < 1998)) {i++; log += '[#' + i + s5 + "Make sure the REF ends with a valid date in YYYY format, starting at 1998.\n"};
if (f.matches('.n,.r,.r2') && d.substring(d.length-4,d.length) > date.substring(0,4)) {i++; log += '[#' + i + s5 + "You cannot use a future date for anything already published.\n"};
if (f.matches('.r') && (/&/.test(d) || (d.match(/,/g) || []).length > 1 || (d.match(/et al/g) || []).length > 1)) {i++; log += '[#' + i + s5 + 'Only list the first author when using the r data type. The "et al." prefix is added automatically for this data type and therefore should not be present in the raw data.\n'};
if (f.matches('.r2') && (/&/.test(d) && (d.match(/\W /g) || []).length < 2 || (d.match(/&/g) || []).length > 1 || (d.match(/,/g) || []).length > 1) || /(\S&|&\S|,\S)g/.test(d)) {i++; log += '[#' + i + s5 + 'Do not list more than 2 authors when using the r2 data type. If there are 2 authors listed, make sure they are properly separated by a space-separated "&" character.\n'};
if (f.matches('.n') && ((d.match(/&/g) || []).length > 1 || (d.match(/,/g) || []).length > 2) || /(\S&|&\S|,\S)g/.test(d)) {i++; log += '[#' + i + s5 + 'Do not list more than 3 authors when using the n data type. If there is more than one author listed, make sure the last one is properly separated by a space-separated "&" character.\n'};
if (f.matches('.p') && !(/^doi:10./.test(d))) {i++; log += '[#' + i + s5 + 'Make sure this REF_ID is a proper DOI, without any additional "doi:" or URL suffix.\n'};
if (f.matches('.p2') && !(/^ISSN-L \d{4}-\d{3}(\d|X), \d/.test(d))) {i++; log += '[#' + i + s5 + "Make sure this REF_ID is a proper ISSN + Volume designation.\n"};
// Data order
if (f.matches('.x9') && !(f.matches('p:first-of-type'))) {i += 1; log += '[#' + i + s2 + 'Only a single taxon of this type can exist at the very top of the list.\n'};
if (f.matches('.f,.i,.o,.a,.ae,.c,.k,.u,.ue,.w,.v,.v2') && f.matches('p:first-of-type') || f.matches('.k,.u,.ue') && f.matches('p:nth-of-type(2)')) {i++; log += '[#' + i + s2 + 'It must be located within a valid parent taxon.\n'};
if (f.matches('.g') && f.nextElementSibling.matches('.o,.k,.u,.ue,.v,.v2,.e,.e2,.d,.d2,.b,.z,.n,.s,.s2,.s3,.s4,.sh,.l,.p,.p2') || f.matches('.i,.o') && f.nextElementSibling.matches('.k,.u,.ue,.v,.v2,.e,.e2,.d,.d2,.b,.z,.n,.s,.s2,.s3,.s4,.sh,.l,.p,.p2')) {i++; log += '[#' + i + s2 + 'There is at least one misplaced datapoint at the beginning of this GEN/SUBGEN/SPGR, e.g. SP subdata lacking its SP datapoint or a SPGR not located within a SUBGEN. The correct hierarchy is: GEN > SUBGEN > SPGR > SP > SP subdata.\n'};
if (f.matches('p.g~p:is(.e2,.d2)')) {i++; log += '[#' + i + s4 + 'This type 2 common name must be located below a SUPERFAM, FAM, SUBFAM, TRI, SUBTRI or INFRATRI taxon.\n'};
if (f.matches('p.g~p.sh')) {i++; log += '[#' + i + s4 + 'This synonym must be located below a ORD, PARAPHYLUM, SUPERFAM, FAM, SUBFAM, TRI, SUBTRI or INFRATRI taxon.\n'};
if (f.matches('.e,.d,.s,.b,.z,.n,.s2,.s3,.s4') && !(f.matches('p:is(.a,.ae,.c,.w)~p'))) {i++; log += '[#' + i + s4 + 'Type 1 common names, BAS, TS and SYN must be located below a SP/AGG/HYBR taxon.\n'};
if (f.matches('p.d+p.e') || f.matches('p.d2+p.e2')) {i++; log += '[#' + i + s4 + 'German common names should be placed after english common names.\n'};
if (f.matches('p:is(.b,.z,.n,.s,.s2,.s3,.s4,.u,.ue,.k,.v,.v2)+p:is(.e,.d)') || f.matches('p.sh+p:is(.e2,.d2)')) {i++; log += '[#' + i + s4 + 'BAS, TS, AUT, SYN, SYN_H, SSP, SEG and PARENT should be placed after common names.\n'};
if (f.matches('p:is(.s,.s2,.s3,.s4)+p:is(.b,.z,.n)')) {i++; log += '[#' + i + s4 + 'Synonyms should be placed after basionyms and type species designations.\n'};
if (f.matches('p:is(.z,.n)+p.b')) {i++; log += '[#' + i + s4 + 'Type species designations should be placed after basionyms. There can also be only one basionym listed per species-type taxon.\n'};
if (f.matches('p:is(.u,.ue,.k,.v,.v2)+p:is(.b,.z,.n,.s,.s2,.s3,.s4)')) {i++; log += '[#' + i + s4 + 'SSP, SEG and PARENT should be placed after basionyms, type species designations and synonyms.\n'};
if (f.matches('p:is(.p,.p2)+p:is(.e,.e2,.d,.d2,.b,.n,.z,[class^="s"],.u,.ue,.k,.v,.v2)')) {i++; log += '[#' + i + s4 + 'REF, REF_TITLE and REF_ID should be placed after common names, basionyms, type species designations, synonyms, SSP, SEG and PARENT.\n'};
if (f.matches('.v,.v2') && !(f.matches(':is(.w)~p'))) {i++; log += '[#' + i + s4 + 'PARENT taxons must be located below a HYBR taxon.\n'};
if (f.matches('.n') && !(f.matches(':is(.b,.z)+p')) || f.matches('.b,.z') && !(/, \d\d\d\d<\/span>/.test(d))) {i++; log += '[#' + i + s4 + 'Every BAS and TS must be directly followed by a proper AUT.\n'};
if (f.matches('.v2') && !(f.matches('p:is(.v)+p'))) {i++; log += '[#' + i + s4 + 'A hybrid PARENT taxon must be directly followed by the second PARENT.\n'};
if (f.matches('.l') && !(f.matches('p:is(.r,.r2)+p')) || f.matches('.p,.p2') && !(f.matches('p.l+p'))) {i++; log += '[#' + i + s4 + 'Every REF must be followed by a proper REF_TITLE and REF_ID.\n'};
// Data types
if (f.matches('p:not(.x9,.x8,.x7,.x6,.x5,.x4,.x3,.x2,.x,.xs,.xp,.f,.y,.t,.j,.h,.g,.i,.o,.a,.ae,.c,.b,.z,.n,.k,.u,.ue,.w,.v,.v2,.e,.e2,.d,.d2,.s,.s2,.s3,.s4,.sh,.r,.r2,.l,.p,.p2)')) {i++; log += '[#' + i + s3 + 'Use a valid data type.\n'};
});
////3 Check for correct alphabetical order
var s5 = '] Subdata duplicate detected at positition "';
var s6 = '] Wrong subdata sorting detected at positition "';
// Order of SEG within AGG
$$('.c').forEach(f=>{
var sib = f.nextElementSibling;
while (sib) {
if (sib.matches('.k') && sib.nextElementSibling.matches('.k') && sib.innerHTML.localeCompare(sib.nextElementSibling.innerHTML, 'en') == 0) {i++; log += '[#' + i + s5 + 'AGG ' + f.innerHTML + '". "SEG ' + sib.innerHTML + '" is listed twice within this AGG.\n'};
if (sib.matches('.k') && sib.nextElementSibling.matches('.k') && sib.innerHTML.localeCompare(sib.nextElementSibling.innerHTML, 'en') == 1) {i++; log += '[#' + i + s6 + 'AGG ' + f.innerHTML + '". Place "SEG ' + sib.innerHTML + '" in its correct alphabetical order within this AGG.\n'};
if (sib.nextElementSibling.matches('p:not(.e,.d,.b,.n,.s,.s2,.s3,.s4,.k)')) break
sib = sib.nextElementSibling}});
// Order of TS within SP/AGG/HYBR
$$('.a,.ae,.c,.w').forEach(f=>{
var sib = f.nextElementSibling;
while (sib) {
if (sib.matches('.z') && sib.nextElementSibling.nextElementSibling.matches('.z') && sib.firstElementChild.innerHTML.localeCompare(sib.nextElementSibling.nextElementSibling.firstElementChild.innerHTML, 'en') == 0) {i++; log += '[#' + i + s5 + convert(f.classList) + ' ' + f.innerHTML + '". "TS ' + sib.firstElementChild.innerHTML + '" is listed twice within this ' + convert(f.classList) + '.\n'};
if (sib.matches('.z') && sib.nextElementSibling.nextElementSibling.matches('.z') && sib.firstElementChild.innerHTML.localeCompare(sib.nextElementSibling.nextElementSibling.firstElementChild.innerHTML, 'en') == 1) {i++; log += '[#' + i + s6 + convert(f.classList) + ' ' + f.innerHTML + '". Place "TS ' + sib.firstElementChild.innerHTML + '" in its correct alphabetical order within this ' + convert(f.classList) + '.\n'};
if (sib.nextElementSibling.matches('p:not(.e,.d,.b,.z,.n)')) break
sib = sib.nextElementSibling}});
// Order of SYN within SP/AGG/HYBR
$$('.a,.ae,.c,.w').forEach(f=>{
var sib = f.nextElementSibling;
while (sib) {
if (sib.matches('.s,.s2,.s3,.s4') && sib.nextElementSibling.matches('.s,.s2,.s3,.s4') && sib.innerHTML.localeCompare(sib.nextElementSibling.innerHTML, 'en') == 0) {i++; log += '[#' + i + s5 + convert(f.classList) + ' ' + f.innerHTML + '". "SYN ' + sib.innerHTML + '" is listed twice within this ' + convert(f.classList) + '.\n'};
if (sib.matches('.s,.s2,.s3,.s4') && sib.nextElementSibling.matches('.s,.s2,.s3,.s4') && sib.innerHTML.localeCompare(sib.nextElementSibling.innerHTML, 'en') == 1) {i++; log += '[#' + i + s6 + convert(f.classList) + ' ' + f.innerHTML + '". Place "SYN ' + sib.innerHTML + '" in its correct alphabetical order within this ' + convert(f.classList) + '.\n'};
if (sib.nextElementSibling.matches('p:not(.e,.d,.b,.n,.s,.s2,.s3,.s4)')) break
sib = sib.nextElementSibling}});
// Order of SYN_H within higher taxons
$$('.x9,.x,.xp,.f,.y,.t,.j,.h').forEach(f=>{
var sib = f.nextElementSibling.firstElementChild;
while (sib) {
if (sib.matches('.sh') && sib.nextElementSibling.matches('.sh') && sib.innerHTML.localeCompare(sib.nextElementSibling.innerHTML, 'en') == 0) {i++; log += '[#' + i + s5 + convert(f.classList) + ' ' + f.innerHTML + '". "SYN_H ' + sib.innerHTML + '" is listed twice within this ' + convert(f.classList) + '.\n'};
if (sib.matches('.sh') && sib.nextElementSibling.matches('.sh') && sib.innerHTML.localeCompare(sib.nextElementSibling.innerHTML, 'en') == 1) {i++; log += '[#' + i + s6 + convert(f.classList) + ' ' + f.innerHTML + '". Place "SYN_H ' + sib.innerHTML + '" in its correct alphabetical order within this ' + convert(f.classList) + '.\n'};
if (sib.nextElementSibling.matches('p:not(.e2,.d2,.sh)')) break
sib = sib.nextElementSibling}});
// Order of SSP within SP
$$('.a,.ae').forEach(f=>{
var sib = f.nextElementSibling;
while (sib) {
if (sib.matches('.u,.ue') && sib.nextElementSibling.matches('.u,.ue') && sib.innerHTML.localeCompare(sib.nextElementSibling.innerHTML, 'en') == 0) {i++; log += '[#' + i + s5 + convert(f.classList) + ' ' + f.innerHTML + '". "'+ convert(sib.classList) + ' ' + sib.innerHTML + '" is listed twice within this ' + convert(f.classList) + '.\n'};
if (sib.matches('.u,.ue') && sib.nextElementSibling.matches('.u,.ue') && sib.innerHTML.localeCompare(sib.nextElementSibling.innerHTML, 'en') == 1) {i++; log += '[#' + i + s6 + convert(f.classList) + ' ' + f.innerHTML + '". Place "'+ convert(sib.classList) + ' ' + sib.innerHTML + '" in its correct alphabetical order within this ' + convert(f.classList) + '.\n'};
if (sib.nextElementSibling.matches('p:not(.e,.d,.b,.n,.s,.s2,.s3,.s4,.u,.ue)')) break
sib = sib.nextElementSibling}});
// Order of PARENT within HYBR
$$('.w').forEach(f=>{
var sib = f.nextElementSibling;
while (sib) {
if (sib.matches('.v') && sib.nextElementSibling.matches('.v2') && sib.innerHTML.localeCompare(sib.nextElementSibling.innerHTML, 'en') == 0) {i++; log += '[#' + i + s5 + convert(f.classList) + ' ' + f.innerHTML + '". "'+ convert(sib.classList) + ' ' + sib.innerHTML + '" is listed twice within this ' + convert(f.classList) + '.\n'};
if (sib.matches('.v') && sib.nextElementSibling.matches('.v2') && sib.innerHTML.localeCompare(sib.nextElementSibling.innerHTML, 'en') == 1) {i++; log += '[#' + i + s6 + convert(f.classList) + ' ' + f.innerHTML + '". Place "'+ convert(sib.classList) + ' ' + sib.innerHTML + '" in its correct alphabetical order within this ' + convert(f.classList) + '.\n'};
if (sib.nextElementSibling.matches('p:not(.e,.d,.b,.n,.s,.s2,.s3,.s4,.v,.v2)')) break
sib = sib.nextElementSibling}});
// Order of SP/AGG/HYBR within GEN/SUBGEN/SPGR
$$('.g,.i,.o').forEach(f=>{
var sib = f.nextElementSibling; var testArray = [];
while (sib) {
if (sib.matches('.a,.ae,.c')) testArray.push(sib.innerHTML);
if (sib.matches('.w')) testArray.push(sib.innerHTML.replace(' ×',''));
if (sib.matches('p:not(.r,.r2,.l,.p,.p2,.e,.d,.b,.n,.a,.ae,.c,.w,.s,.s2,.s3,.s4,.u,.ue,.k,.v,.v2)')) break
sib = sib.nextElementSibling};
for (let t = 0; t < testArray.length - 1; t++) {
if (testArray[t].localeCompare(testArray[t+1], 'en') == 0) {i++; log += '[#' + i + s5 + convert(f.classList) + ' ' + f.innerHTML + '". The species "' + testArray[t] + '" is listed twice within this this ' + convert(f.classList) + '.\n'};
if (testArray[t].localeCompare(testArray[t+1], 'en') == 1) {i++; log += '[#' + i + s6 + convert(f.classList) + ' ' + f.innerHTML + '". Place the species "' + testArray[t] + '" in its correct alphabetical order within this ' + convert(f.classList) + '.\n'};
}});
// Order of SPGR within SUBGEN
$$('.i').forEach(f=>{
var sib = f.nextElementSibling; var testArray = [];
while (sib) {
if (sib.matches('.o')) testArray.push(sib.innerHTML);
if (sib.matches('p:not(.r,.r2,.l,.p,.p2,.e,.d,.b,.n,.o,.a,.ae,.c,.w,.s,.s2,.s3,.s4,.u,.ue,.k,.v,.v2)')) break
sib = sib.nextElementSibling};
for (let t = 0; t < testArray.length - 1; t++) {
if (testArray[t].localeCompare(testArray[t+1], 'en') == 0) {i++; log += '[#' + i + s5 + 'SUBGEN ' + f.innerHTML + '". "SPGR ' + testArray[t] + '" is listed twice within this this SUBGEN.\n'};
if (testArray[t].localeCompare(testArray[t+1], 'en') == 1) {i++; log += '[#' + i + s6 + 'SBUGEN ' + f.innerHTML + '". Place "SPGR ' + testArray[t] + '" in its correct alphabetical order within this this SUBGEN.\n'};
}});
// Order of GEN within FAM/SUBFAM/TRI/SUBTRI/INFRATRI
$$('.f,.y,.t,.j,.h').forEach(f=>{
var sib = f.nextElementSibling.firstElementChild; var testArray = [];
while (sib) {
if (sib.matches('.g')) testArray.push(sib.innerHTML);
if (sib.matches('p:not(.r,.r2,.l,.p,.p2,.e,.e2,.d,.d2,.b,.n,.g,.i,.o,.a,.ae,.c,.w,.s,.s2,.s3,.s4,.sh,.u,.ue,.k,.v,.v2)')) break
sib = sib.nextElementSibling};
for (let t = 0; t < testArray.length - 1; t++) {
if (testArray[t].localeCompare(testArray[t+1], 'en') == 0) {i++; log += '[#' + i + s5 + convert(f.classList) + ' ' + f.innerHTML + '". "GEN ' + testArray[t] + '" is listed twice within this ' + convert(f.classList) + '.\n'};
if (testArray[t].localeCompare(testArray[t+1], 'en') == 1) {i++; log += '[#' + i + s6 + convert(f.classList) + ' ' + f.innerHTML + '". Place "GEN ' + testArray[t] + '" in its correct alphabetical order within this ' + convert(f.classList) + '.\n'};
}});
////4 Check for correct hierarchical order
var s7 = '] Wrong subdata hierarchy detected at positition "';
// SSP within AGG/HYBR
$$('.c,.w').forEach(f=>{
var sib = f.nextElementSibling;
while (sib) {
if (sib.matches('.u,.ue')) {i++; log += '[#' + i + s7 + 'AGG ' + f.innerHTML + '". AGG/HYBR can' + "'t contain SSP, only SEG/PARENT.\n"};
if (sib.matches('p:not(.e,.d,.b,.z,.n,.s,.s2,.s3,.s4,.u,.ue,.k,.v,.v2,.r,.r2,.l,.p,.p2)')) break
sib = sib.nextElementSibling}});
// SEG within SP/HYBR
$$('.a,.ae,.w').forEach(f=>{
var sib = f.nextElementSibling;
while (sib) {
if (sib.matches('.k')) {i++; log += '[#' + i + s7 + convert(f.classList) + ' ' + f.innerHTML + '". Species and their hybrids can' + "'t contain SEG, only SSP/PARENT.\n"};
if (sib.matches('p:not(.e,.d,.b,.z,.n,.s,.s2,.s3,.s4,.u,.ue,.k,.v,.v2,.r,.r2,.l,.p,.p2)')) break
sib = sib.nextElementSibling}});
// PARENT within SP/AGG
$$('.a,.ae,.c').forEach(f=>{
var sib = f.nextElementSibling;
while (sib) {
if (sib.matches('.v,.v2')) {i++; log += '[#' + i + s7 + convert(f.classList) + ' ' + f.innerHTML + '". Species can' + "'t contain species hybrid PARENT datapoints, these belong to HYBR taxons.\n"};
if (sib.matches('p:not(.e,.d,.b,.z,.n,.s,.s2,.s3,.s4,.u,.ue,.k,.v,.v2,.r,.r2,.l,.p,.p2)')) break
sib = sib.nextElementSibling}});
////5 SUBGEN and SPGR specific checks
// Check for incorrect SPGR names
$$('.o').forEach(f=>{
var testArray = []; testArray.push(f.innerHTML.toLowerCase());
var sib = f.nextElementSibling;
while (sib) {
if (sib.matches('.a,.ae,.c')) testArray.push(sib.innerHTML.replace(' agg.','').replace(' †',''));
if (sib.matches('p:not(.a,.ae,.c,.w,.e,.d,.b,.z,.n,.s,.s2,.s3,.s4,.u,.ue,.k,.v,.v2,.r,.r2,.l,.p,.p2)')) break
sib = sib.nextElementSibling};
var gen = testArray.toString().replace(',',' ').split(' ');
testArray[0] = gen[1] + ' ' + testArray[0];
var c = 0;
for (let t = 0; t < testArray.length - 1; t++) {
if (testArray[0].localeCompare(testArray[t+1], 'en') == 0) c++
};
if (c == 0) {i++; log += '[#' + i + '] Invalid name detected for SPGR "' + f.innerHTML + '" within GEN "' + gen[1] + '". Its name does not match its eponymous species.\n'};
});
// Check for GEN and its first SUBGEN not having equal names
$$('.g').forEach(f=>{
var testArray = []; testArray.push(f.innerHTML);
var sib = f.nextElementSibling;
while (sib) {
if (sib.matches('.i')) testArray.push(sib.innerHTML); break;
if (sib.matches('p:not(.w,.e,.d,.b,.n,.s,.s2,.s3,.s4,.v,.v2,.r,.r2,.l,.p,.p2)')) break
sib = sib.nextElementSibling};
if (testArray.length == 2 && testArray[0] !== testArray[1]) {i++; log += '[#' + i + '] Invalid name detected for first SUBGEN of GEN "' + f.innerHTML + '". Both should have equal names.\n'};
});
// Check for GEN containing only a single SUBGEN
$$('.g').forEach(f=>{
var c = 0;
var sib = f.nextElementSibling;
while (sib) {
if (sib.matches('.i')) c++;
if (sib.matches('p:not(.i,.o,.a,.ae,.c,.w,.e,.d,.b,.z,.n,.s,.s2,.s3,.s4,.u,.ue,.k,.v,.v2,.r,.r2,.l,.p,.p2)')) break
sib = sib.nextElementSibling};
if (c == 1) {i++; log += '[#' + i + '] Invalid number of SUBGEN detected for GEN "' + f.innerHTML + '". There should be none or more than one present.\n'};
});
// Check for SUBGEN containing only a single SPGR
$$('.i').forEach(f=>{
var c = 0;
var sib = f.nextElementSibling;
while (sib) {
if (sib.matches('.o')) c++;
if (sib.matches('p:not(.o,.a,.ae,.c,.w,.e,.d,.b,.z,.n,.s,.s2,.s3,.s4,.u,.ue,.k,.v,.v2,.r,.r2,.l,.p,.p2)')) break
sib = sib.nextElementSibling};
if (c == 1) {i++; log += '[#' + i + '] Invalid number of SPGR detected for SUBGEN "' + f.innerHTML + '". There should be none or more than one present.\n'};
});
////6 Check for duplicate common names and taxa
var s8 = '] Duplicate name detected for ';
var s9 = '] Duplicate basionym, synonym or TS designation detected for ';
var s10 = '] Duplicate synonymic (sub)species detected for ';
// Create test arrays
var testArrayE = [];
var testArrayE2 = [];
var testArrayD = [];
var testArrayD2 = [];
var testArrayTX = [];
var testArrayBS = [];
var testArraySY = [];
// Collect all names
$$('p[id]:not(.o),.e,.e2,.d,.d2').forEach(f=>{
if (f.matches('.e')) testArrayE.push(f.innerHTML);
if (f.matches('.e2')) testArrayE2.push(f.innerHTML);
if (f.matches('.d')) testArrayD.push(f.innerHTML);
if (f.matches('.d2')) testArrayD2.push(f.innerHTML);
if (f.matches('p[id]')) testArrayTX.push(f.id.replace(/_/g,' '));
if (f.matches('.b,.z,.s,.s2,.s3,.s4')) testArrayBS.push(f.id.replace(/_/g,' ').replace(/.+\$/,''));
if (f.matches('.a,.u,.k,.s,.s2,.s3,.s4')) testArraySY.push(f.id.replace(/_/g,' ').replace(/.+\$/,''))});
// Sort the names
testArrayE.sort();
testArrayE2.sort();
testArrayD.sort();
testArrayD2.sort();
testArrayTX.sort();
testArrayBS.sort();
testArraySY.sort();
// Check the arrays for duplicates
for (let t = 0; t < testArrayE.length - 1; t++) {
if (testArrayE[t].localeCompare(testArrayE[t+1], 'en') == 0) {i++; log += '[#' + i + s8 + '"NAME_EN (type 1) ' + testArrayE[t] + '". One of them should be considered invalid.\n'};
};
for (let t = 0; t < testArrayE2.length - 1; t++) {
if (testArrayE2[t].localeCompare(testArrayE2[t+1], 'en') == 0) {i++; log += '[#' + i + s8 + '"NAME_EN (type 2) ' + testArrayE2[t] + '". One of them should be considered invalid.\n'};
};
for (let t = 0; t < testArrayD.length - 1; t++) {
if (testArrayD[t].localeCompare(testArrayD[t+1], 'de') == 0) {i++; log += '[#' + i + s8 + '"NAME_DE (type 1) ' + testArrayD[t] + '". One of them should be considered invalid.\n'};
};
for (let t = 0; t < testArrayD2.length - 1; t++) {
if (testArrayD2[t].localeCompare(testArrayD2[t+1], 'de') == 0) {i++; log += '[#' + i + s8 + '"NAME_DE (type 2) ' + testArrayD2[t] + '". One of them should be considered invalid.\n'};
};
for (let t = 0; t < testArrayTX.length - 1; t++) {
if (testArrayTX[t].localeCompare(testArrayTX[t+1], 'en') == 0) {
testArrayTX[t] = testArrayTX[t].replace(/.+\$/,''); testArrayTX[t] = testArrayTX[t].charAt(0).toUpperCase() + testArrayTX[t].slice(1);
i++; log += '[#' + i + s8 + 'taxon "' + testArrayTX[t] + '". One of them must be considered invalid.\n'};
};
for (let t = 0; t < testArrayBS.length - 1; t++) {
if (testArrayBS[t].localeCompare(testArrayBS[t+1], 'en') == 0) {
testArrayBS[t] = testArrayBS[t].charAt(0).toUpperCase() + testArrayBS[t].slice(1);
i++; log += '[#' + i + s9 + 'taxon "' + testArrayBS[t] + '". One of them should be deleted.\n'};
};
for (let t = 0; t < testArraySY.length - 1; t++) {
if (testArraySY[t].localeCompare(testArraySY[t+1], 'en') == 0) {
testArraySY[t] = testArraySY[t].charAt(0).toUpperCase() + testArraySY[t].slice(1);
i++; log += '[#' + i + s10 + 'taxon "' + testArraySY[t] + '". Either the taxon or the synonym must be considered invalid.\n'};
};
////7 Check for loose characters not being part of any data
$$('div[id]').forEach(f=>{ if (/\/div\>(?!\n?\<)/g.test(f.innerHTML) || f.innerHTML.match(/(\n)/g).length > 1) {i++; log += '[#' + i + '] Loose characters or empty lines detected within group ' + f.firstElementChild.innerHTML + '. These characters should either be turned into proper data points or be deleted!\n'}});
////8 Remove padding, calculate download size, finish log, show infos and results
$$('p:not([class])').forEach(f=>{f.remove()});
var size = count * 0.0000236;
var info2;
var info1 = 'CHECK AND EXPORT DATASET\n⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯\n- Export format: TSV\n- Estimated download size: ' + size.toFixed(1) + ' MB\n\n';
if (i > 0) info2 = "WARNING: Data integrity issues found! Keep this in mind when using the data!\n- If you're a contributor, please fix the issues before opening a pull request.\n- If you're a visitor, please report this problem at https://github.com/lepitaxa/lepitaxa.github.io/issues.\n\n";
else info2 = "No data integrity issues found!\n\n";
var results = '⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯\nLepitest Log:\n⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯\n[#0] Data points checked: ' + data.length + '. Issues found: ' + i + '.\n' + log;
console.log(results); alert(info1 + info2 + results);
////9 Run tsv converter, rebuild the ID links afterwards
console.log('⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯\n→ Converting data to TSV format ...');
// TSV converter
var file_cont = 'META_DATASET\tLepitaxa-' + version + '\nMETA_ORIGIN\thttps://lepitaxa.github.io';
$$('p:not(.b,.z,.l),.bs,.zs,.ls').forEach(f=>{file_cont += '\n' + convert(f.classList) + '\t' + f.innerHTML});
// Generate, click and remove download link, rebuld IDs
var file_link = document.createElement('a');
file_link.setAttribute('download','Lepitaxa-' + version + '.tsv');
file_link.setAttribute('href','data:text/tsv;charset=utf-8,' + encodeURIComponent(file_cont));
file_link.click(); console.log('→ Download ready!'); file_link.remove();
$$('.a,.ae,.c,.w,.g').forEach(f=>{var id_lnk = document.createElement('a'); id_lnk.href = 'https://lepitaxa.github.io#' + f.id; id_lnk.title= '→ Lepitaxa-Link'; id_lnk.classList.add('link'); f.prepend(id_lnk)});
////// AUX FUNCTION - Class list data type extractor
function convert(dt) {
dt = dt.toString();
dt = dt.replace(/(en|de|hide|open)/g,'');
dt = dt.replace(/ /g,'').replace('x9','ORD').replace('x8','SUBORD').replace('x7','INFRAORD').replace('x6','PARVORD').replace('x5','MICROORD').replace('x4','SECT').replace('x3','SUBSECT').replace('x2','INFRASECT').replace('xs','SERIES').replace('xp','PARAPHYLUM').replace('ae','SP_EXT').replace('ue','SSP_EXT').replace('bs','BAS').replace('zs','TS').replace('ls','REF_TITLE').replace('sh','SYN_H').replace('w','HYBR');
dt = dt.replace(/\d/g,'').replace('x','SUPERFAM').replace('f','FAM').replace('y','SUBFAM').replace('t','TRI').replace('j','SUBTRI').replace('h','INFRATRI').replace('g','GEN').replace('i','SUBGEN').replace('o','SPGR').replace('a','SP').replace('c','AGG').replace('k','SEG').replace('u','SSP').replace('v','PARENT').replace('n','AUT').replace('r','REF').replace('l','REF_TITLE').replace('p','REF_ID').replace('e','NAME_EN').replace('d','NAME_DE').replace('s','SYN');
return dt};