Skip to content

Commit 79bcc1c

Browse files
committed
fixing Swit App Find feature
1 parent 7e7382f commit 79bcc1c

5 files changed

Lines changed: 300 additions & 6 deletions

File tree

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
Direct Lookup Bypass: If OdyCy fails (returns the word itself or unknown), try to find the word in Text-Fabric by stripping accents from both the query and the TF index.
2+
Normalization Utility: Create a GreekNormalizer class that can strip all diacritics (accents, breathings, iota subscripts) to produce a "naked" Greek string (e.g., εἴληφα -> ειληφα, είληφα -> ειληφα).
3+
Convert είληφα -> ειληφα (unaccented).
4+
Convert TF words: εἴληφα -> ειληφα.
5+
Match! We found εἴληφα.
6+
Look up lemma of εἴληφα in TF -> λαμβάνω.
7+
This completely bypasses OdyCy for this case, OR we can feed εἴληφα to OdyCy if we really want to use it. But TF is the ground truth for this app.
8+
Requires to Create a utility to strip all diacritics using unicodedata decomposition and filtering.
9+
oot Cause: Mismatch between user input (monotonic/unaccented) and index/model expectation (polytonic).
10+
Fix Strategy: "Accent-Insensitive Lookup".
11+
Feasibility: High. Python's unicodedata handles this well.
12+
Performance: Acceptable for CLI/Worker (~100-200ms overhead to scan index).
13+
Implementation:
14+
Add strip_accents helper.
15+
OdyCy & Polytonic Greek: External Validation
16+
Findings
17+
Designed for Ancient Greek: OdyCy is explicitly built for Ancient Greek NLP [1][2].
18+
Polytonic vs. Monotonic: Ancient Greek uses the polytonic system (multi-accent). Modern Greek uses the monotonic system (single accent) introduced in 1982 [5][6].
19+
Diacritic Importance: In Ancient Greek, diacritics (accents/breathings) are semantically and morphologically significant. A model trained on this data expects these features for accurate disambiguation.
20+
No "Normalization" Feature: Documentation lists lemmatization, POS tagging, and parsing, but does not list "monotonic-to-polytonic normalization" as a feature [1][3].
21+
Conclusion
22+
The search results validate the empirical findings: OdyCy expects polytonic input because it is an Ancient Greek model. It treats monotonic or unaccented input as "out of distribution" or incorrect orthography, leading to lemmatization failures.

macos/Sources/BibleApp/App/ServerManager.swift

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,8 @@ class ServerManager: ObservableObject {
5252
task.currentDirectoryPath = path
5353
task.executableURL = URL(fileURLWithPath: "/bin/zsh")
5454

55-
// Command to start uvicorn
56-
// We use -c to run the full command string
57-
// Check if .venv exists, otherwise might need another way or assume standard layout
58-
let command = ".venv/bin/uvicorn api.main:app --app-dir src --port 8000"
55+
// Command to start uvicorn (using module path without --app-dir)
56+
let command = ".venv/bin/uvicorn src.api.main:app --port 8000"
5957
task.arguments = ["-c", command]
6058

6159
let pipe = Pipe()

macos/Sources/BibleApp/Models/VerseModels.swift

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,26 @@ struct SearchResult: Codable, Identifiable {
7474
let chapter: Int
7575
let verse: Int
7676
}
77+
78+
// MARK: - FindResponse
79+
struct FindResponse: Codable {
80+
let lemma: String
81+
let original: String
82+
let lemmaGloss: String
83+
let total: Int
84+
let results: [FindResultItem]
85+
86+
enum CodingKeys: String, CodingKey {
87+
case lemma, original, total, results
88+
case lemmaGloss = "lemma_gloss"
89+
}
90+
}
91+
92+
// MARK: - FindResultItem
93+
struct FindResultItem: Codable, Identifiable {
94+
var id: String { ref }
95+
let ref: String
96+
let greek: String
97+
let french: String
98+
let highlights: [String]
99+
}

macos/Sources/BibleApp/Views/ContentView.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@ struct ContentView: View {
1515
}
1616
.tag(0)
1717

18-
SearchView()
18+
FindView()
1919
.tabItem {
20-
Label("Search", systemImage: "magnifyingglass")
20+
Label("Find", systemImage: "character.book.closed")
2121
}
2222
.tag(1)
2323
}
Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
import SwiftUI
2+
3+
struct FindView: View {
4+
@State private var searchText = ""
5+
@State private var limitText = "20"
6+
@State private var findResponse: FindResponse?
7+
@State private var errorMessage: String?
8+
@State private var isLoading = false
9+
10+
@FocusState private var isFocused: Bool
11+
12+
var body: some View {
13+
VStack(spacing: 0) {
14+
// Input Bar
15+
HStack(spacing: 12) {
16+
// Greek word field
17+
HStack {
18+
Image(systemName: "character.book.closed")
19+
.foregroundColor(.gray)
20+
TextField("Greek word (e.g. ἀγαπάω)", text: $searchText)
21+
.textFieldStyle(.plain)
22+
.font(.title2)
23+
.focused($isFocused)
24+
.onSubmit {
25+
performFind()
26+
}
27+
}
28+
.frame(maxWidth: .infinity)
29+
30+
// Limit field
31+
HStack {
32+
Text("Limit:")
33+
.font(.caption)
34+
.foregroundColor(.secondary)
35+
TextField("20", text: $limitText)
36+
.textFieldStyle(.roundedBorder)
37+
.font(.body)
38+
.frame(width: 60)
39+
.onSubmit {
40+
performFind()
41+
}
42+
}
43+
44+
if isLoading {
45+
ProgressView()
46+
.scaleEffect(0.5)
47+
}
48+
}
49+
.padding()
50+
.background(Color(NSColor.controlBackgroundColor))
51+
52+
Divider()
53+
54+
// Results
55+
ScrollView {
56+
if let response = findResponse {
57+
VStack(alignment: .leading, spacing: 12) {
58+
// Results list
59+
ForEach(response.results) { result in
60+
VStack(alignment: .leading, spacing: 4) {
61+
// Reference
62+
Text(result.ref)
63+
.font(.headline)
64+
.foregroundColor(.green)
65+
66+
// Greek text with highlighting
67+
HighlightedText(text: result.greek, highlights: result.highlights)
68+
.font(.body)
69+
.textSelection(.enabled)
70+
71+
// French translation
72+
if !result.french.isEmpty {
73+
Text("(TOB) \(result.french)")
74+
.font(.body)
75+
.foregroundColor(.cyan)
76+
.textSelection(.enabled)
77+
}
78+
79+
Divider()
80+
.background(Color.gray.opacity(0.3))
81+
}
82+
}
83+
84+
// "... and X more" message
85+
if response.total > response.results.count {
86+
Text("... and \(response.total - response.results.count) more.")
87+
.font(.body)
88+
.foregroundColor(.secondary)
89+
.padding(.vertical, 4)
90+
}
91+
92+
// Summary footer
93+
VStack(alignment: .leading, spacing: 4) {
94+
Divider()
95+
.background(Color.gray)
96+
.padding(.vertical, 8)
97+
98+
// Lemma transformation
99+
if response.original != response.lemma {
100+
if !response.lemmaGloss.isEmpty {
101+
Text("Lemma: \(response.original)\(response.lemma) (\(response.lemmaGloss))")
102+
.font(.headline)
103+
.foregroundColor(.cyan)
104+
} else {
105+
Text("Lemma: \(response.original)\(response.lemma)")
106+
.font(.headline)
107+
.foregroundColor(.cyan)
108+
}
109+
} else {
110+
if !response.lemmaGloss.isEmpty {
111+
Text("Lemma: \(response.lemma) (\(response.lemmaGloss))")
112+
.font(.headline)
113+
.foregroundColor(.cyan)
114+
} else {
115+
Text("Lemma: \(response.lemma)")
116+
.font(.headline)
117+
.foregroundColor(.cyan)
118+
}
119+
}
120+
121+
Text("Total occurrences: \(response.total)")
122+
.font(.headline)
123+
.foregroundColor(.green)
124+
}
125+
.padding(.top, 8)
126+
}
127+
.padding()
128+
} else if let error = errorMessage {
129+
Text(error)
130+
.foregroundColor(.red)
131+
.padding()
132+
} else {
133+
VStack {
134+
Image(systemName: "character.book.closed")
135+
.font(.largeTitle)
136+
.foregroundColor(.secondary.opacity(0.5))
137+
Text("Find all occurrences of a Greek word")
138+
.foregroundColor(.secondary)
139+
Text("Supports lemmatization with OdyCy")
140+
.font(.caption)
141+
.foregroundColor(.secondary)
142+
}
143+
.padding(.top, 60)
144+
}
145+
}
146+
}
147+
.onAppear {
148+
isFocused = true
149+
}
150+
}
151+
152+
func performFind() {
153+
guard !searchText.isEmpty else { return }
154+
155+
isLoading = true
156+
errorMessage = nil
157+
findResponse = nil
158+
159+
let word = searchText
160+
let limit = Int(limitText) ?? 20
161+
162+
// Get project root from ServerManager
163+
let projectRoot = ServerManager.shared.serverPath
164+
let venvPython = "\(projectRoot)/.venv-spacy/bin/python3"
165+
let workerScript = "\(projectRoot)/src/application/workers/find_worker.py"
166+
167+
DispatchQueue.global(qos: .userInitiated).async {
168+
let task = Process()
169+
task.executableURL = URL(fileURLWithPath: venvPython)
170+
task.arguments = [workerScript, word, "--limit", "\(limit)"]
171+
task.currentDirectoryPath = projectRoot
172+
173+
let pipe = Pipe()
174+
task.standardOutput = pipe
175+
task.standardError = pipe
176+
177+
do {
178+
try task.run()
179+
task.waitUntilExit()
180+
181+
let data = pipe.fileHandleForReading.readDataToEndOfFile()
182+
183+
DispatchQueue.main.async {
184+
isLoading = false
185+
186+
if task.terminationStatus != 0 {
187+
let errorText = String(data: data, encoding: .utf8) ?? "Unknown error"
188+
errorMessage = "Worker failed: \(errorText)"
189+
return
190+
}
191+
192+
do {
193+
let result = try JSONDecoder().decode(FindResponse.self, from: data)
194+
self.findResponse = result
195+
if result.total == 0 {
196+
self.errorMessage = "No occurrences found for '\(word)'."
197+
}
198+
} catch {
199+
errorMessage = "Parsing error: \(error.localizedDescription)"
200+
print("Raw data: \(String(data: data, encoding: .utf8) ?? "Bad data")")
201+
}
202+
}
203+
} catch {
204+
DispatchQueue.main.async {
205+
isLoading = false
206+
errorMessage = "Failed to run worker: \(error.localizedDescription)"
207+
}
208+
}
209+
}
210+
}
211+
}
212+
213+
// Helper view for highlighting Greek text
214+
struct HighlightedText: View {
215+
let text: String
216+
let highlights: [String]
217+
218+
var body: some View {
219+
// Simple approach: split by highlights and colorize
220+
// For better highlighting, we could use AttributedString
221+
if highlights.isEmpty {
222+
Text(text)
223+
} else {
224+
// Create attributed string with highlights
225+
Text(attributedText())
226+
}
227+
}
228+
229+
private func attributedText() -> AttributedString {
230+
var attrString = AttributedString(text)
231+
232+
for highlight in highlights {
233+
// Find all ranges of this highlight
234+
var searchRange = attrString.startIndex..<attrString.endIndex
235+
236+
while let range = attrString[searchRange].range(of: highlight) {
237+
attrString[range].foregroundColor = .red
238+
attrString[range].font = .body.bold()
239+
240+
// Continue searching after this range
241+
if range.upperBound < attrString.endIndex {
242+
searchRange = range.upperBound..<attrString.endIndex
243+
} else {
244+
break
245+
}
246+
}
247+
}
248+
249+
return attrString
250+
}
251+
}

0 commit comments

Comments
 (0)