Skip to content

Commit 3d2bde2

Browse files
committed
Audio adding to AnkiDroid works
1 parent 8fafb40 commit 3d2bde2

8 files changed

Lines changed: 232 additions & 62 deletions

File tree

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ xz2 = { version = "0.1" }
107107
zbus = { version = "5.5", default-features = false }
108108
zip = { version = "4.0" }
109109
android_logger = { version = "0.15.0"}
110+
sha2 = { version = "0.10"}
111+
hex = { version = "0.4" }
110112

111113
[workspace.metadata.cargo-shear]
112114
ignored = ["bzip2"]

crates/wordbase/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ tokio-util = { workspace = true, features = ["rt"] }
6161
tracing = { workspace = true }
6262
unicode-segmentation = { workspace = true }
6363
uniffi = { workspace = true, optional = true, features = ["tokio"] }
64-
# tracing-log = { workspace = true, optional = true }
64+
sha2 = { workspace = true }
65+
hex = { workspace = true }
6566

6667
derive_more = { workspace = true, features = [
6768
"debug",

crates/wordbase/src/anki.rs

Lines changed: 73 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use {
44
itertools::Itertools,
55
maud::html,
66
serde::Serialize,
7-
std::{collections::HashMap, fmt::Write as _, ops::Range},
7+
std::{collections::HashMap, ops::Range},
88
wordbase_api::{
99
DictionaryId, FrequencyValue, NormString, ProfileId, Record, RecordEntry, Term, dict,
1010
},
@@ -24,12 +24,12 @@ impl Engine {
2424

2525
let span_min = entries
2626
.iter()
27-
.map(|record| record.span_bytes.start)
27+
.map(|entry| entry.span_bytes.start)
2828
.min()
2929
.context("no records")?;
3030
let span_max = entries
3131
.iter()
32-
.map(|record| record.span_bytes.end)
32+
.map(|entry| entry.span_bytes.end)
3333
.max()
3434
.context("no records")?;
3535
let term_span = (usize::try_from(span_min).unwrap_or(usize::MAX))
@@ -42,32 +42,38 @@ impl Engine {
4242
.map_or("?", |dict| dict.meta.name.as_str())
4343
};
4444
let glossaries = glossaries(&entries);
45-
let fields = [
46-
("Expression", term_part(term.headword())),
47-
("ExpressionReading", term_part(term.reading())),
48-
("ExpressionFurigana", term_ruby_plain(term)),
49-
(
50-
"Sentence",
51-
sentence_cloze(sentence, term_span).unwrap_or_default(),
52-
),
53-
(
54-
"MainDefinition",
55-
glossaries.first().cloned().unwrap_or_default(),
56-
),
57-
("Glossary", all_glossaries(&glossaries)),
58-
("IsWordAndSentenceCard", String::new()),
59-
("IsClickCard", String::new()),
60-
("IsSentenceCard", "x".into()),
61-
("PitchPosition", pitch_positions(&entries)),
62-
("Frequency", frequency_list(&entries, dict_name)),
63-
("FreqSort", frequency_harmonic_mean(&entries)),
64-
];
6545

6646
Ok(TermNote {
67-
fields: fields
68-
.into_iter()
69-
.map(|(k, v)| (k.to_string(), v))
70-
.collect(),
47+
fields: [
48+
("Expression", term_part(term.headword())),
49+
("ExpressionReading", term_part(term.reading())),
50+
("ExpressionFurigana", term_ruby_plain(term)),
51+
(
52+
"Sentence",
53+
sentence_cloze(sentence, term_span).unwrap_or_default(),
54+
),
55+
// TODO: generate sentence furigana, like AJT does
56+
// this is kinda complicated though
57+
// I can't use AJT's code for this since it uses an incredibly copyleft license
58+
(
59+
"MainDefinition",
60+
glossaries.first().cloned().unwrap_or_default(),
61+
),
62+
("Glossary", all_glossaries(&glossaries)),
63+
("IsWordAndSentenceCard", String::new()),
64+
("IsClickCard", String::new()),
65+
("IsSentenceCard", "x".into()),
66+
("PitchPosition", pitch_positions(&entries)),
67+
("Frequency", frequency_list(&entries, dict_name)),
68+
("FreqSort", frequency_harmonic_mean(&entries)),
69+
]
70+
.into_iter()
71+
.map(|(k, v)| (k.to_string(), NoteField::String(v)))
72+
.chain(
73+
term_audio(&entries)
74+
.map(|audio| ("ExpressionAudio".to_string(), NoteField::Audio(audio))),
75+
)
76+
.collect::<HashMap<_, _>>(),
7177
})
7278
}
7379

@@ -102,10 +108,16 @@ impl Engine {
102108
}
103109
}
104110

105-
#[derive(Debug, Serialize)]
106111
#[cfg_attr(feature = "uniffi", derive(uniffi::Record))]
107112
pub struct TermNote {
108-
pub fields: HashMap<String, String>,
113+
pub fields: HashMap<String, NoteField>,
114+
}
115+
116+
#[derive(Debug, Serialize)]
117+
#[cfg_attr(feature = "uniffi", derive(uniffi::Enum))]
118+
pub enum NoteField {
119+
String(String),
120+
Audio(Vec<u8>),
109121
}
110122

111123
fn term_part(part: Option<&NormString>) -> String {
@@ -115,23 +127,39 @@ fn term_part(part: Option<&NormString>) -> String {
115127
fn term_ruby_plain(term: &Term) -> String {
116128
match term {
117129
Term::Full(headword, reading) => {
118-
let mut result = String::new();
119-
for (headword_part, reading_part) in lang::jpn::furigana_parts(headword, reading) {
120-
_ = write!(&mut result, "{headword_part}");
121-
if !reading_part.is_empty() {
122-
_ = write!(&mut result, "[{reading_part}]");
123-
}
124-
// Lapis uses a space to separate headword/reading part pairs
125-
// todo do this properly use this as ref: 落とし穴
126-
_ = write!(&mut result, " ");
127-
}
128-
result
130+
// Lapis does something a bit screwy with furigana.
131+
// "押し込む" -> "押[お]し 込[こ]む"
132+
// Notice:
133+
// - after kanji segments, there is "[{reading}]", and no space afterwards
134+
// - after kana segments, there is a space
135+
lang::jpn::furigana_parts(headword, reading)
136+
.map(|(headword_part, reading_part)| {
137+
if reading_part.is_empty() {
138+
format!("{headword_part} ")
139+
} else {
140+
format!("{headword_part}[{reading_part}]")
141+
}
142+
})
143+
.join("")
129144
}
130145
Term::Headword(headword) => headword.to_string(),
131146
Term::Reading(reading) => reading.to_string(),
132147
}
133148
}
134149

150+
fn term_audio(entries: &[&RecordEntry]) -> Option<Vec<u8>> {
151+
entries
152+
.iter()
153+
.find_map(|entry| match &entry.record {
154+
Record::YomichanAudioForvo(audio) => Some(&audio.audio),
155+
Record::YomichanAudioJpod(audio) => Some(&audio.audio),
156+
Record::YomichanAudioNhk16(audio) => Some(&audio.audio),
157+
Record::YomichanAudioShinmeikai8(audio) => Some(&audio.audio),
158+
_ => None,
159+
})
160+
.map(|audio| audio.data.to_vec())
161+
}
162+
135163
fn sentence_cloze(sentence: &str, term_span: Range<usize>) -> Option<String> {
136164
let cloze_prefix = sentence.get(..term_span.start)?;
137165
let cloze_body = sentence.get(term_span.clone())?;
@@ -176,7 +204,7 @@ fn all_glossaries(glossaries: &[String]) -> String {
176204
fn pitch_positions(entries: &[&RecordEntry]) -> String {
177205
entries
178206
.iter()
179-
.filter_map(|record| match &record.record {
207+
.filter_map(|entry| match &entry.record {
180208
Record::YomitanPitch(dict::yomitan::Pitch { position, .. }) => Some(*position),
181209
_ => None,
182210
})
@@ -194,11 +222,11 @@ fn frequency_list<'a>(
194222
) -> String {
195223
entries
196224
.iter()
197-
.filter_map(|record| match &record.record {
225+
.filter_map(|entry| match &entry.record {
198226
Record::YomitanFrequency(dict::yomitan::Frequency { value, display }) => {
199227
match (value, display) {
200-
(_, Some(display)) => Some((record, display.clone())),
201-
(Some(FrequencyValue::Rank(rank)), None) => Some((record, format!("{rank}"))),
228+
(_, Some(display)) => Some((entry, display.clone())),
229+
(Some(FrequencyValue::Rank(rank)), None) => Some((entry, format!("{rank}"))),
202230
_ => None,
203231
}
204232
}
@@ -217,7 +245,7 @@ fn frequency_harmonic_mean(entries: &[&RecordEntry]) -> String {
217245
harmonic_mean(
218246
entries
219247
.iter()
220-
.filter_map(|record| match &record.record {
248+
.filter_map(|entry| match &entry.record {
221249
Record::YomitanFrequency(dict::yomitan::Frequency {
222250
// TODO: how do we handle occurrences?
223251
value: Some(FrequencyValue::Rank(rank)),

crates/wordbase/src/deinflect/lindera.rs

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,16 @@
1+
// TODO: cases to handle:
2+
// - ㌀ -> アパート
3+
// - 20日 -> 20日
4+
15
use {
26
super::{Deinflection, Deinflector},
37
anyhow::{Context as _, Result},
48
itertools::Itertools,
59
lindera::{
10+
character_filter::{
11+
BoxCharacterFilter,
12+
unicode_normalize::{UnicodeNormalizeCharacterFilter, UnicodeNormalizeKind},
13+
},
614
dictionary::{DictionaryKind, load_dictionary_from_kind},
715
mode::Mode,
816
segmenter::Segmenter,
@@ -30,7 +38,12 @@ impl Lindera {
3038
let dictionary = load_dictionary_from_kind(DictionaryKind::UniDic)
3139
.context("failed to load dictionary")?;
3240
let segmenter = Segmenter::new(Mode::Normal, dictionary, None);
33-
let tokenizer = Tokenizer::new(segmenter);
41+
42+
let mut tokenizer = Tokenizer::new(segmenter);
43+
tokenizer.append_character_filter(BoxCharacterFilter::from(
44+
UnicodeNormalizeCharacterFilter::new(UnicodeNormalizeKind::NFKC),
45+
));
46+
3447
Ok(Self {
3548
tokenizer,
3649
lookahead,
@@ -462,6 +475,32 @@ mod tests {
462475
(text, start),
463476
[Deinflection::new(start, text, "有り難う")],
464477
);
478+
479+
assert_deinflects(
480+
&deinflector,
481+
("20日", 0),
482+
[
483+
Deinflection::new(0, "20日", "20日"),
484+
Deinflection::new(0, "20日", "二零日"),
485+
Deinflection::new(0, "20", "20"),
486+
Deinflection::new(0, "20", "二零"),
487+
Deinflection::new(0, "2", "2"),
488+
Deinflection::new(0, "2", "二"),
489+
],
490+
);
491+
492+
assert_deinflects(
493+
&deinflector,
494+
("20日", 0),
495+
[
496+
Deinflection::new(0, "20日", "20日"),
497+
Deinflection::new(0, "20日", "二零日"),
498+
Deinflection::new(0, "20", "20"),
499+
Deinflection::new(0, "20", "二零"),
500+
Deinflection::new(0, "2", "2"),
501+
Deinflection::new(0, "2", "二"),
502+
],
503+
);
465504
}
466505

467506
static TOKENIZER: LazyLock<Tokenizer> = LazyLock::new(|| {

wordbase-android/app/src/main/AndroidManifest.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,12 @@
4848
<category android:name="android.intent.category.DEFAULT" />
4949
</intent-filter>
5050
</activity>
51+
52+
<provider
53+
android:name=".NoteProvider"
54+
android:authorities="io.github.aecsocket.wordbase.note"
55+
android:exported="true"
56+
android:grantUriPermissions="true" />
5157
</application>
5258

5359
</manifest>
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
package io.github.aecsocket.wordbase
2+
3+
import android.content.ContentProvider
4+
import android.content.ContentValues
5+
import android.net.Uri
6+
import android.os.ParcelFileDescriptor
7+
import android.util.Log
8+
import androidx.core.net.toUri
9+
import java.io.FileOutputStream
10+
11+
private const val TAG = "NoteProvider"
12+
13+
class NoteProvider : ContentProvider() {
14+
override fun onCreate() = true
15+
16+
override fun query(
17+
uri: Uri,
18+
projection: Array<out String?>?,
19+
selection: String?,
20+
selectionArgs: Array<out String?>?,
21+
sortOrder: String?
22+
) = null
23+
24+
override fun openFile(uri: Uri, mode: String): ParcelFileDescriptor? {
25+
Log.i(TAG, "Request to open note content $uri")
26+
val data = data ?: return null
27+
Log.i(TAG, "We have data, reading...")
28+
29+
val (read, write) = ParcelFileDescriptor.createPipe()
30+
Thread {
31+
write.use { write ->
32+
FileOutputStream(write.fileDescriptor).use { stream ->
33+
stream.write(data)
34+
}
35+
}
36+
Log.i(TAG, "All data written")
37+
}.start()
38+
Log.i(TAG, "Sent")
39+
return read
40+
}
41+
42+
override fun getType(uri: Uri) = null
43+
44+
override fun insert(
45+
uri: Uri,
46+
values: ContentValues?
47+
) = null
48+
49+
override fun delete(
50+
uri: Uri,
51+
selection: String?,
52+
selectionArgs: Array<out String?>?
53+
) = 0
54+
55+
override fun update(
56+
uri: Uri,
57+
values: ContentValues?,
58+
selection: String?,
59+
selectionArgs: Array<out String?>?
60+
) = 0
61+
62+
companion object {
63+
val uri = "content://io.github.aecsocket.wordbase.note".toUri()
64+
var data: ByteArray? = null
65+
}
66+
}

0 commit comments

Comments
 (0)