44 itertools:: Itertools ,
55 maud:: html,
66 serde:: Serialize ,
7- std:: { collections:: HashMap , fmt :: Write as _ , ops:: Range } ,
7+ std:: { collections:: HashMap , ops:: Range } ,
88 wordbase_api:: {
99 DictionaryId , FrequencyValue , NormString , ProfileId , Record , RecordEntry , Term , dict,
1010 } ,
@@ -24,12 +24,12 @@ impl Engine {
2424
2525 let span_min = entries
2626 . iter ( )
27- . map ( |record| record . span_bytes . start )
27+ . map ( |entry| entry . span_bytes . start )
2828 . min ( )
2929 . context ( "no records" ) ?;
3030 let span_max = entries
3131 . iter ( )
32- . map ( |record| record . span_bytes . end )
32+ . map ( |entry| entry . span_bytes . end )
3333 . max ( )
3434 . context ( "no records" ) ?;
3535 let term_span = ( usize:: try_from ( span_min) . unwrap_or ( usize:: MAX ) )
@@ -42,32 +42,38 @@ impl Engine {
4242 . map_or ( "?" , |dict| dict. meta . name . as_str ( ) )
4343 } ;
4444 let glossaries = glossaries ( & entries) ;
45- let fields = [
46- ( "Expression" , term_part ( term. headword ( ) ) ) ,
47- ( "ExpressionReading" , term_part ( term. reading ( ) ) ) ,
48- ( "ExpressionFurigana" , term_ruby_plain ( term) ) ,
49- (
50- "Sentence" ,
51- sentence_cloze ( sentence, term_span) . unwrap_or_default ( ) ,
52- ) ,
53- (
54- "MainDefinition" ,
55- glossaries. first ( ) . cloned ( ) . unwrap_or_default ( ) ,
56- ) ,
57- ( "Glossary" , all_glossaries ( & glossaries) ) ,
58- ( "IsWordAndSentenceCard" , String :: new ( ) ) ,
59- ( "IsClickCard" , String :: new ( ) ) ,
60- ( "IsSentenceCard" , "x" . into ( ) ) ,
61- ( "PitchPosition" , pitch_positions ( & entries) ) ,
62- ( "Frequency" , frequency_list ( & entries, dict_name) ) ,
63- ( "FreqSort" , frequency_harmonic_mean ( & entries) ) ,
64- ] ;
6545
6646 Ok ( TermNote {
67- fields : fields
68- . into_iter ( )
69- . map ( |( k, v) | ( k. to_string ( ) , v) )
70- . collect ( ) ,
47+ fields : [
48+ ( "Expression" , term_part ( term. headword ( ) ) ) ,
49+ ( "ExpressionReading" , term_part ( term. reading ( ) ) ) ,
50+ ( "ExpressionFurigana" , term_ruby_plain ( term) ) ,
51+ (
52+ "Sentence" ,
53+ sentence_cloze ( sentence, term_span) . unwrap_or_default ( ) ,
54+ ) ,
55+ // TODO: generate sentence furigana, like AJT does
56+ // this is kinda complicated though
57+ // I can't use AJT's code for this since it uses an incredibly copyleft license
58+ (
59+ "MainDefinition" ,
60+ glossaries. first ( ) . cloned ( ) . unwrap_or_default ( ) ,
61+ ) ,
62+ ( "Glossary" , all_glossaries ( & glossaries) ) ,
63+ ( "IsWordAndSentenceCard" , String :: new ( ) ) ,
64+ ( "IsClickCard" , String :: new ( ) ) ,
65+ ( "IsSentenceCard" , "x" . into ( ) ) ,
66+ ( "PitchPosition" , pitch_positions ( & entries) ) ,
67+ ( "Frequency" , frequency_list ( & entries, dict_name) ) ,
68+ ( "FreqSort" , frequency_harmonic_mean ( & entries) ) ,
69+ ]
70+ . into_iter ( )
71+ . map ( |( k, v) | ( k. to_string ( ) , NoteField :: String ( v) ) )
72+ . chain (
73+ term_audio ( & entries)
74+ . map ( |audio| ( "ExpressionAudio" . to_string ( ) , NoteField :: Audio ( audio) ) ) ,
75+ )
76+ . collect :: < HashMap < _ , _ > > ( ) ,
7177 } )
7278 }
7379
@@ -102,10 +108,16 @@ impl Engine {
102108 }
103109}
104110
105- #[ derive( Debug , Serialize ) ]
106111#[ cfg_attr( feature = "uniffi" , derive( uniffi:: Record ) ) ]
107112pub struct TermNote {
108- pub fields : HashMap < String , String > ,
113+ pub fields : HashMap < String , NoteField > ,
114+ }
115+
116+ #[ derive( Debug , Serialize ) ]
117+ #[ cfg_attr( feature = "uniffi" , derive( uniffi:: Enum ) ) ]
118+ pub enum NoteField {
119+ String ( String ) ,
120+ Audio ( Vec < u8 > ) ,
109121}
110122
111123fn term_part ( part : Option < & NormString > ) -> String {
@@ -115,23 +127,39 @@ fn term_part(part: Option<&NormString>) -> String {
115127fn term_ruby_plain ( term : & Term ) -> String {
116128 match term {
117129 Term :: Full ( headword, reading) => {
118- let mut result = String :: new ( ) ;
119- for ( headword_part, reading_part) in lang:: jpn:: furigana_parts ( headword, reading) {
120- _ = write ! ( & mut result, "{headword_part}" ) ;
121- if !reading_part. is_empty ( ) {
122- _ = write ! ( & mut result, "[{reading_part}]" ) ;
123- }
124- // Lapis uses a space to separate headword/reading part pairs
125- // todo do this properly use this as ref: 落とし穴
126- _ = write ! ( & mut result, " " ) ;
127- }
128- result
130+ // Lapis does something a bit screwy with furigana.
131+ // "押し込む" -> "押[お]し 込[こ]む"
132+ // Notice:
133+ // - after kanji segments, there is "[{reading}]", and no space afterwards
134+ // - after kana segments, there is a space
135+ lang:: jpn:: furigana_parts ( headword, reading)
136+ . map ( |( headword_part, reading_part) | {
137+ if reading_part. is_empty ( ) {
138+ format ! ( "{headword_part} " )
139+ } else {
140+ format ! ( "{headword_part}[{reading_part}]" )
141+ }
142+ } )
143+ . join ( "" )
129144 }
130145 Term :: Headword ( headword) => headword. to_string ( ) ,
131146 Term :: Reading ( reading) => reading. to_string ( ) ,
132147 }
133148}
134149
150+ fn term_audio ( entries : & [ & RecordEntry ] ) -> Option < Vec < u8 > > {
151+ entries
152+ . iter ( )
153+ . find_map ( |entry| match & entry. record {
154+ Record :: YomichanAudioForvo ( audio) => Some ( & audio. audio ) ,
155+ Record :: YomichanAudioJpod ( audio) => Some ( & audio. audio ) ,
156+ Record :: YomichanAudioNhk16 ( audio) => Some ( & audio. audio ) ,
157+ Record :: YomichanAudioShinmeikai8 ( audio) => Some ( & audio. audio ) ,
158+ _ => None ,
159+ } )
160+ . map ( |audio| audio. data . to_vec ( ) )
161+ }
162+
135163fn sentence_cloze ( sentence : & str , term_span : Range < usize > ) -> Option < String > {
136164 let cloze_prefix = sentence. get ( ..term_span. start ) ?;
137165 let cloze_body = sentence. get ( term_span. clone ( ) ) ?;
@@ -176,7 +204,7 @@ fn all_glossaries(glossaries: &[String]) -> String {
176204fn pitch_positions ( entries : & [ & RecordEntry ] ) -> String {
177205 entries
178206 . iter ( )
179- . filter_map ( |record | match & record . record {
207+ . filter_map ( |entry | match & entry . record {
180208 Record :: YomitanPitch ( dict:: yomitan:: Pitch { position, .. } ) => Some ( * position) ,
181209 _ => None ,
182210 } )
@@ -194,11 +222,11 @@ fn frequency_list<'a>(
194222) -> String {
195223 entries
196224 . iter ( )
197- . filter_map ( |record | match & record . record {
225+ . filter_map ( |entry | match & entry . record {
198226 Record :: YomitanFrequency ( dict:: yomitan:: Frequency { value, display } ) => {
199227 match ( value, display) {
200- ( _, Some ( display) ) => Some ( ( record , display. clone ( ) ) ) ,
201- ( Some ( FrequencyValue :: Rank ( rank) ) , None ) => Some ( ( record , format ! ( "{rank}" ) ) ) ,
228+ ( _, Some ( display) ) => Some ( ( entry , display. clone ( ) ) ) ,
229+ ( Some ( FrequencyValue :: Rank ( rank) ) , None ) => Some ( ( entry , format ! ( "{rank}" ) ) ) ,
202230 _ => None ,
203231 }
204232 }
@@ -217,7 +245,7 @@ fn frequency_harmonic_mean(entries: &[&RecordEntry]) -> String {
217245 harmonic_mean (
218246 entries
219247 . iter ( )
220- . filter_map ( |record | match & record . record {
248+ . filter_map ( |entry | match & entry . record {
221249 Record :: YomitanFrequency ( dict:: yomitan:: Frequency {
222250 // TODO: how do we handle occurrences?
223251 value : Some ( FrequencyValue :: Rank ( rank) ) ,
0 commit comments