forked from openscriptures/api
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathschema.txt
More file actions
49 lines (44 loc) · 2.97 KB
/
schema.txt
File metadata and controls
49 lines (44 loc) · 2.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#Pseudo-SQL
# work(id=1, title="WLC Ketiv", variants_for_work_id=NULL, variant_bit=00000001)
# work(id=2, title="WLC Qere", variants_for_work_id=1, variant_bit=00000010)
# token(id=1, data="A", position=1, work_id=1, variant_bits=00000001) #only Ketiv
# token(id=2, data="a", position=1, work_id=1, variant_bits=00000010) #only Qere
# token(id=3, data="B", position=2, work_id=1, variant_bits=00000011) #both Ketiv & Qere
work(
id PRIMARY KEY AUTO_INCREMENT,
osis_id VARCHAR(255) NOT NULL INDEX,
lang VARCHAR(16) NOT NULL,
title VARCHAR(255) NOT NULL,
variants_for_work_id NULL FOREIGN KEY work(id) COMMENT "If a work contains variants from other works, then those variants can be assigned to another work which is then associated with the parent work via this relationship; for example, the main work could be WLC Ketiv, but there could be another WLC Qere variant work that would be subordinate to it",
variant_bit BYTE NOT NULL DEFAULT 00000001 COMMENT "This is a bit mask which is applied to token.variant_bits to obtain all of the tokens that occur in this work alone"
)
token(
id PRIMARY KEY AUTO_INCREMENT,
data VARCHAR(255) NOT NULL COMMENT "Token data could be word, punctuation, whitespace, or any other atomic textual unit.",
position INTEGER NOT NULL,
work_id INTEGER NOT NULL FOREIGN KEY work(id) COMMENT "This points to a work whose variants_for_work_id = NULL",
variant_bits BYTE NOT NULL DEFAULT 00000001 COMMENT "The bits in this byte are flags indicating which works it is associated with.",
unified_token_id NULL FOREIGN KEY token(id) COMMENT "When works are merged together; unification"
)
#TODO: What to do about transposed tokens? How to discover and represent the relationship?
/* This is stand-off markup to indicate suprasegmental features */
structure(
id PRIMARY KEY AUTO_INCREMENT,
type NOT NULL ENUM('book', 'bookGroup', 'chapter', 'verse', 'section', 'subSection', 'title', 'paragraph', 'quotation', 'questionable-1', 'questionable-2' /*…*/),
variant_bits BYTE NOT NULL COMMENT "The bits in this byte are flags indicating which works it is associated with.",
/* For each of the tokens referenced, their work_id must be the same */
start_token_id NOT NULL FOREIGN KEY token(id),
end_token_id NOT NULL FOREIGN KEY token(id),
start_marker_token_id NULL FOREIGN KEY token(id) COMMENT "In a quotation, this would point to the quotation mark, after which the start_token_id would point.",
end_marker_token_id NULL FOREIGN KEY token(id)
)
/* These relate to interlinearization; these could also be used for unification instead of relying on unified_token_id */
linkage_cluster(
id PRIMARY KEY AUTO_INCREMENT, #TODO: This needs to be stable too!
work_id_1 INTEGER NOT NULL FOREIGN KEY work(id),
work_id_2 INTEGER NOT NULL FOREIGN KEY work(id)
)
linkage_cluster_token(
id PRIMARY KEY AUTO_INCREMENT, #TODO: This needs to be stable too!
token_id NOT NULL FOREIGN KEY token(id),
)