diff --git a/.gitignore b/.gitignore index b727055..709c932 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ example/input_files/kanjidic2.xml example/input_files/pitch_accents.txt example/input_files/frequency_list.txt example/input_files/enamdict_utf-8 +example/input_files/mecab.zip # Temporary and output files example/temp_files/ diff --git a/example/sagase_dictionary_example.dart b/example/sagase_dictionary_example.dart index 1748e22..a3162dc 100644 --- a/example/sagase_dictionary_example.dart +++ b/example/sagase_dictionary_example.dart @@ -3,7 +3,7 @@ import 'dart:io'; import 'package:archive/archive_io.dart'; import 'package:drift/drift.dart'; import 'package:path/path.dart' as path; -import 'package:sagase_dictionary/src/database.dart'; +import 'package:sagase_dictionary/sagase_dictionary.dart'; import 'package:sagase_dictionary/src/dictionary_builder.dart'; void main() async { @@ -17,9 +17,6 @@ void main() async { Directory(tempFilesPath).createSync(recursive: true); String outputFilesPath = path.join(examplePath, 'output_files'); Directory(outputFilesPath).createSync(recursive: true); - if (File(path.join(outputFilesPath, 'dictionary.zip')).existsSync()) { - File(path.join(outputFilesPath, 'dictionary.zip')).deleteSync(); - } print('Creating dictionary'); @@ -66,17 +63,119 @@ void main() async { int kanjiCount = await database.kanjis.count().getSingle(); print('Kanji ${kanjiCount == 13108 ? "valid" : "INVALID"} - $kanjiCount'); - // Export database to file + // Export database without proper nouns to file + await _exportAndCompressDatabase( + database, + path.join(tempFilesPath, SagaseDictionaryConstants.dictionaryDatabaseFile), + path.join(outputFilesPath, SagaseDictionaryConstants.dictionaryZip), + ); + + // Add proper nouns to database + print('Adding proper nouns...'); + await DictionaryBuilder.createProperNounDictionary( + database, + File(path.join(inputFilesPath, 'enamdict_utf-8')).readAsStringSync(), + showProgress: true, + ); + + int properNounCount = await database.properNouns.count().getSingle(); + print('\nProper noun count - $properNounCount'); + + // Export database with proper nouns to file + await _exportAndCompressDatabase( + database, + path.join(tempFilesPath, SagaseDictionaryConstants.dictionaryDatabaseFile), + path.join( + outputFilesPath, + SagaseDictionaryConstants.dictionaryWithProperNounsZip, + ), + ); + + // Close database + await database.close(); + + // Open proper noun database + print('Creating proper noun dictionary...'); + final properNounDatabase = AppDatabase(); + await DictionaryBuilder.createProperNounDictionary( + properNounDatabase, + File(path.join(inputFilesPath, 'enamdict_utf-8')).readAsStringSync(), + showProgress: true, + ); + print(''); + + // Export proper noun database + await _exportAndCompressDatabase( + properNounDatabase, + path.join( + tempFilesPath, + SagaseDictionaryConstants.properNounDictionaryDatabaseFile, + ), + path.join( + outputFilesPath, + SagaseDictionaryConstants.properNounDictionaryZip, + ), + ); + + // Close proper noun database + await properNounDatabase.close(); + + // Create required assets tar + print("Creating required assets tar"); + final archive = Archive(); + + String dictionaryPath = path.join( + outputFilesPath, + SagaseDictionaryConstants.dictionaryZip, + ); + String mecabPath = path.join( + inputFilesPath, + SagaseDictionaryConstants.mecabZip, + ); + + final dictionaryBytes = await File(dictionaryPath).readAsBytes(); + final dictionaryArchiveFile = ArchiveFile( + SagaseDictionaryConstants.dictionaryZip, + dictionaryBytes.length, + dictionaryBytes); + archive.addFile(dictionaryArchiveFile); + + final mecabBytes = await File(mecabPath).readAsBytes(); + final mecabArchiveFile = ArchiveFile( + SagaseDictionaryConstants.mecabZip, + mecabBytes.length, + mecabBytes, + ); + archive.addFile(mecabArchiveFile); + + final encodedArchive = TarEncoder().encode(archive); + + File(path.join( + outputFilesPath, + SagaseDictionaryConstants.requiredAssetsTar, + )).writeAsBytesSync(encodedArchive); + + // Delete temp files + await Directory(tempFilesPath).delete(recursive: true); + + print('Done!'); +} + +Future _exportAndCompressDatabase( + AppDatabase database, + String dbFilePath, + String archiveFilePath, +) async { print('Exporting...'); - final file = File(path.join(tempFilesPath, 'dictionary.sqlite')); - if (file.existsSync()) file.deleteSync(); - await database.customStatement('VACUUM INTO ?', [file.path]); + final dbFile = File(dbFilePath); + if (dbFile.existsSync()) dbFile.deleteSync(); + await database.customStatement('VACUUM INTO ?', [dbFile.path]); // Compress the exported file print('Compressing...'); - final bytes = - await File(path.join(tempFilesPath, 'dictionary.sqlite')).readAsBytes(); - final archiveFile = ArchiveFile('dictionary.sqlite', bytes.length, bytes); + final bytes = dbFile.readAsBytesSync(); + final archiveFile = + ArchiveFile(dbFile.uri.pathSegments.last, bytes.length, bytes); final archive = Archive(); archive.addFile(archiveFile); final encodedArchive = @@ -85,12 +184,5 @@ void main() async { print('Compression did not work'); return; } - await File(path.join(outputFilesPath, 'dictionary.zip')) - .writeAsBytes(encodedArchive); - - // Close database and delete the temp files - await database.close(); - await Directory(tempFilesPath).delete(recursive: true); - - print('Done!'); + File(archiveFilePath).writeAsBytesSync(encodedArchive); } diff --git a/lib/sagase_dictionary.dart b/lib/sagase_dictionary.dart index 5e58cba..ce431a7 100644 --- a/lib/sagase_dictionary.dart +++ b/lib/sagase_dictionary.dart @@ -11,6 +11,7 @@ export 'src/datamodels/kanjis.dart' show Kanji, KanjiReading; export 'src/datamodels/my_dictionary_lists.dart' show MyDictionaryList; export 'src/datamodels/predefined_dictionary_lists.dart' show PredefinedDictionaryList; +export 'src/datamodels/proper_nouns.dart' show ProperNoun; export 'src/datamodels/spaced_repetition_datas.dart' show SpacedRepetitionData; export 'src/datamodels/vocabs.dart' show Vocab, VocabExample, VocabReference; diff --git a/lib/src/dao/proper_nouns_dao.dart b/lib/src/dao/proper_nouns_dao.dart new file mode 100644 index 0000000..0769b1f --- /dev/null +++ b/lib/src/dao/proper_nouns_dao.dart @@ -0,0 +1,193 @@ +import 'package:drift/drift.dart'; +import 'package:kana_kit/kana_kit.dart'; +import 'package:sagase_dictionary/src/database.dart'; +import 'package:sagase_dictionary/src/datamodels/proper_nouns.dart'; +import 'package:sagase_dictionary/src/utils/string_utils.dart'; + +part 'proper_nouns_dao.g.dart'; + +@DriftAccessor(tables: [ProperNouns]) +class ProperNounsDao extends DatabaseAccessor + with _$ProperNounsDaoMixin { + final _kanaKit = const KanaKit().copyWithConfig(passRomaji: true); + + ProperNounsDao(super.db); + + Future importProperNouns(String path) async { + await db.customStatement('ATTACH DATABASE ? AS proper_noun_db', [path]); + await db.transaction(() async { + await db.customStatement( + 'INSERT INTO ${db.properNouns.actualTableName} SELECT * FROM proper_noun_db.${db.properNouns.actualTableName}', + ); + await db.customStatement( + 'INSERT INTO ${db.properNounRomajiWords.actualTableName} SELECT * FROM proper_noun_db.${db.properNounRomajiWords.actualTableName}', + ); + }); + await db.customStatement('DETACH DATABASE proper_noun_db'); + } + + Future deleteProperNouns() async { + await db.delete(db.properNouns).go(); + await db.delete(db.properNounRomajiWords).go(); + } + + Future> getByWriting(String text) async { + return (db.select(db.properNouns) + ..where( + (properNoun) => Expression.or([ + properNoun.writing.equals(text), + properNoun.writingSearchForm.equals( + _kanaKit.toHiragana(text.toLowerCase().romajiToHalfWidth())), + ]), + )) + .get(); + } + + Future> getByReading(String text) async { + return (db.select(db.properNouns) + ..where((properNoun) => Expression.or([ + properNoun.reading.equals(text), + properNoun.readingSearchForm.equals(_kanaKit.toHiragana(text)), + ]))) + .get(); + } + + Future> getByWritingAndReading( + String writing, + String reading, + ) { + return (db.select(db.properNouns) + ..where((properNoun) => Expression.and([ + Expression.or([ + properNoun.writing.equals(writing), + properNoun.writingSearchForm.equals(_kanaKit + .toHiragana(writing.toLowerCase().romajiToHalfWidth())), + ]), + Expression.or([ + properNoun.reading.equals(reading), + properNoun.readingSearchForm + .equals(_kanaKit.toHiragana(reading)), + ]), + ]))) + .get(); + } + + Future> search(String text) async { + final cleanedText = RegExp.escape(text).toLowerCase().removeDiacritics(); + + if (_kanaKit.isRomaji(cleanedText)) { + // Romaji + final splits = cleanedText.splitWords(); + + if (splits.length == 1) { + final lengthColumn = db.properNouns.romaji.length.iif( + db.properNouns.romaji.collate(Collate.noCase).like('$cleanedText%'), + db.properNouns.readingRomaji.length, + ); + + return (db.select(db.properNouns).join([ + leftOuterJoin( + db.properNounRomajiWords, + db.properNounRomajiWords.properNounId.equalsExp(db.properNouns.id), + ) + ]) + ..where(Expression.or([ + db.properNounRomajiWords.word.like('$cleanedText%'), + db.properNouns.readingRomaji.like('$cleanedText%'), + db.properNouns.readingRomajiSimplified.like('$cleanedText%'), + db.properNouns.romaji + .collate(Collate.noCase) + .like('$cleanedText%'), + ])) + ..orderBy([OrderingTerm.asc(lengthColumn)]) + ..groupBy([db.properNouns.id]) + ..limit(500)) + .map((row) => row.readTable(db.properNouns)) + .get(); + } else { + // Create a join that matches all but the last word and starts with for the last word + // Then use having to exclude results that don't contain all words + final uniqueWords = splits.toSet().toList(); + // Last word in splits might match another word so make sure to separate words correctly + late final List wordsExceptLast; + if (splits.where((e) => e == splits.last).length > 1) { + wordsExceptLast = uniqueWords; + } else { + wordsExceptLast = uniqueWords.sublist(0, uniqueWords.length - 1); + } + final startsWithLastWord = '${splits.last}%'; + final minStartsWithLastWordLength = db.properNounRomajiWords.word.length + .min( + filter: db.properNounRomajiWords.word.like(startsWithLastWord)); + + return (db.select(db.properNouns).join([ + innerJoin( + db.properNounRomajiWords, + db.properNounRomajiWords.properNounId.equalsExp(db.properNouns.id), + ), + ]) + ..addColumns([minStartsWithLastWordLength]) + ..where(Expression.or([ + db.properNounRomajiWords.word.isIn(wordsExceptLast), + db.properNounRomajiWords.word.like(startsWithLastWord), + ])) + ..orderBy([ + OrderingTerm.asc( + minStartsWithLastWordLength, + nulls: NullsOrder.last, + ), + ]) + ..groupBy( + [db.properNouns.id], + having: Expression.and([ + db.properNounRomajiWords.word + .caseMatch(when: { + for (var w in wordsExceptLast) Variable(w): Variable(w) + }) + .count(distinct: true) + .equals(wordsExceptLast.length), + CaseWhenExpression(cases: [ + CaseWhen( + db.properNounRomajiWords.word.like(startsWithLastWord), + then: const Constant(0), + ), + ]).count().isBiggerOrEqualValue(1), + ]), + ) + ..limit(500)) + .map((row) => row.readTable(db.properNouns)) + .get(); + } + } else { + // Japanese text + if (_kanaKit.isKana(cleanedText)) { + // Search by reading + return (db.select(db.properNouns) + ..where((properNoun) => Expression.or([ + properNoun.reading.like('$cleanedText%'), + properNoun.readingSearchForm + .like(_kanaKit.toHiragana('$cleanedText%')), + ])) + ..orderBy([ + (properNoun) => OrderingTerm.asc(properNoun.reading.length), + ]) + ..limit(500)) + .get(); + } else { + // Search by writing + return (db.select(db.properNouns) + ..where((properNoun) => Expression.or([ + properNoun.writing.like('$cleanedText%'), + properNoun.writingSearchForm.like(_kanaKit.toHiragana( + '$cleanedText%'.toLowerCase().romajiToHalfWidth())), + ])) + ..orderBy([ + (properNoun) => OrderingTerm.asc(properNoun.writing.length), + (properNoun) => OrderingTerm.asc(properNoun.reading.length), + ]) + ..limit(500)) + .get(); + } + } + } +} diff --git a/lib/src/dao/proper_nouns_dao.g.dart b/lib/src/dao/proper_nouns_dao.g.dart new file mode 100644 index 0000000..518ed47 --- /dev/null +++ b/lib/src/dao/proper_nouns_dao.g.dart @@ -0,0 +1,8 @@ +// GENERATED CODE - DO NOT MODIFY BY HAND + +part of 'proper_nouns_dao.dart'; + +// ignore_for_file: type=lint +mixin _$ProperNounsDaoMixin on DatabaseAccessor { + $ProperNounsTable get properNouns => attachedDatabase.properNouns; +} diff --git a/lib/src/database.dart b/lib/src/database.dart index 4e52d3b..8d88879 100644 --- a/lib/src/database.dart +++ b/lib/src/database.dart @@ -4,6 +4,7 @@ import 'package:sagase_dictionary/src/dao/flashcard_sets_dao.dart'; import 'package:sagase_dictionary/src/dao/kanjis_dao.dart'; import 'package:sagase_dictionary/src/dao/my_dictionary_lists_dao.dart'; import 'package:sagase_dictionary/src/dao/predefined_dictionary_lists_dao.dart'; +import 'package:sagase_dictionary/src/dao/proper_nouns_dao.dart'; import 'package:sagase_dictionary/src/dao/radicals_dao.dart'; import 'package:sagase_dictionary/src/dao/search_history_items_dao.dart'; import 'package:sagase_dictionary/src/dao/spaced_repetition_datas_dao.dart'; @@ -37,6 +38,7 @@ part 'database.g.dart'; MyDictionaryListItems, PredefinedDictionaryLists, ProperNouns, + ProperNounRomajiWords, Radicals, SearchHistoryItems, SpacedRepetitionDatas, @@ -59,6 +61,7 @@ part 'database.g.dart'; KanjisDao, MyDictionaryListsDao, PredefinedDictionaryListsDao, + ProperNounsDao, RadicalsDao, SearchHistoryItemsDao, SpacedRepetitionDatasDao, diff --git a/lib/src/database.g.dart b/lib/src/database.g.dart index d53dcfa..7e4c7db 100644 --- a/lib/src/database.g.dart +++ b/lib/src/database.g.dart @@ -3835,11 +3835,11 @@ class $ProperNounsTable extends ProperNouns type: DriftSqlType.string, requiredDuringInsert: true); static const VerificationMeta _typesMeta = const VerificationMeta('types'); @override - late final GeneratedColumnWithTypeConverter?, String> - types = GeneratedColumn('types', aliasedName, true, - type: DriftSqlType.string, requiredDuringInsert: false) - .withConverter?>( - $ProperNounsTable.$convertertypesn); + late final GeneratedColumnWithTypeConverter, String> + types = GeneratedColumn('types', aliasedName, false, + type: DriftSqlType.string, requiredDuringInsert: true) + .withConverter>( + $ProperNounsTable.$convertertypes); @override List get $columns => [ id, @@ -3935,9 +3935,9 @@ class $ProperNounsTable extends ProperNouns data['${effectivePrefix}reading_romaji_simplified']), romaji: attachedDatabase.typeMapping .read(DriftSqlType.string, data['${effectivePrefix}romaji'])!, - types: $ProperNounsTable.$convertertypesn.fromSql(attachedDatabase + types: $ProperNounsTable.$convertertypes.fromSql(attachedDatabase .typeMapping - .read(DriftSqlType.string, data['${effectivePrefix}types'])), + .read(DriftSqlType.string, data['${effectivePrefix}types'])!), ); } @@ -3948,174 +3948,6 @@ class $ProperNounsTable extends ProperNouns static TypeConverter, String> $convertertypes = const ProperNounTypeConverter(); - static TypeConverter?, String?> $convertertypesn = - NullAwareTypeConverter.wrap($convertertypes); -} - -class ProperNoun extends DataClass implements Insertable { - final int id; - final String? writing; - final String? writingSearchForm; - final String reading; - final String? readingSearchForm; - final String readingRomaji; - final String? readingRomajiSimplified; - final String romaji; - final List? types; - const ProperNoun( - {required this.id, - this.writing, - this.writingSearchForm, - required this.reading, - this.readingSearchForm, - required this.readingRomaji, - this.readingRomajiSimplified, - required this.romaji, - this.types}); - @override - Map toColumns(bool nullToAbsent) { - final map = {}; - map['id'] = Variable(id); - if (!nullToAbsent || writing != null) { - map['writing'] = Variable(writing); - } - if (!nullToAbsent || writingSearchForm != null) { - map['writing_search_form'] = Variable(writingSearchForm); - } - map['reading'] = Variable(reading); - if (!nullToAbsent || readingSearchForm != null) { - map['reading_search_form'] = Variable(readingSearchForm); - } - map['reading_romaji'] = Variable(readingRomaji); - if (!nullToAbsent || readingRomajiSimplified != null) { - map['reading_romaji_simplified'] = - Variable(readingRomajiSimplified); - } - map['romaji'] = Variable(romaji); - if (!nullToAbsent || types != null) { - map['types'] = - Variable($ProperNounsTable.$convertertypesn.toSql(types)); - } - return map; - } - - ProperNounsCompanion toCompanion(bool nullToAbsent) { - return ProperNounsCompanion( - id: Value(id), - writing: writing == null && nullToAbsent - ? const Value.absent() - : Value(writing), - writingSearchForm: writingSearchForm == null && nullToAbsent - ? const Value.absent() - : Value(writingSearchForm), - reading: Value(reading), - readingSearchForm: readingSearchForm == null && nullToAbsent - ? const Value.absent() - : Value(readingSearchForm), - readingRomaji: Value(readingRomaji), - readingRomajiSimplified: readingRomajiSimplified == null && nullToAbsent - ? const Value.absent() - : Value(readingRomajiSimplified), - romaji: Value(romaji), - types: - types == null && nullToAbsent ? const Value.absent() : Value(types), - ); - } - - factory ProperNoun.fromJson(Map json, - {ValueSerializer? serializer}) { - serializer ??= driftRuntimeOptions.defaultSerializer; - return ProperNoun( - id: serializer.fromJson(json['id']), - writing: serializer.fromJson(json['writing']), - writingSearchForm: - serializer.fromJson(json['writingSearchForm']), - reading: serializer.fromJson(json['reading']), - readingSearchForm: - serializer.fromJson(json['readingSearchForm']), - readingRomaji: serializer.fromJson(json['readingRomaji']), - readingRomajiSimplified: - serializer.fromJson(json['readingRomajiSimplified']), - romaji: serializer.fromJson(json['romaji']), - types: serializer.fromJson?>(json['types']), - ); - } - @override - Map toJson({ValueSerializer? serializer}) { - serializer ??= driftRuntimeOptions.defaultSerializer; - return { - 'id': serializer.toJson(id), - 'writing': serializer.toJson(writing), - 'writingSearchForm': serializer.toJson(writingSearchForm), - 'reading': serializer.toJson(reading), - 'readingSearchForm': serializer.toJson(readingSearchForm), - 'readingRomaji': serializer.toJson(readingRomaji), - 'readingRomajiSimplified': - serializer.toJson(readingRomajiSimplified), - 'romaji': serializer.toJson(romaji), - 'types': serializer.toJson?>(types), - }; - } - - ProperNoun copyWith( - {int? id, - Value writing = const Value.absent(), - Value writingSearchForm = const Value.absent(), - String? reading, - Value readingSearchForm = const Value.absent(), - String? readingRomaji, - Value readingRomajiSimplified = const Value.absent(), - String? romaji, - Value?> types = const Value.absent()}) => - ProperNoun( - id: id ?? this.id, - writing: writing.present ? writing.value : this.writing, - writingSearchForm: writingSearchForm.present - ? writingSearchForm.value - : this.writingSearchForm, - reading: reading ?? this.reading, - readingSearchForm: readingSearchForm.present - ? readingSearchForm.value - : this.readingSearchForm, - readingRomaji: readingRomaji ?? this.readingRomaji, - readingRomajiSimplified: readingRomajiSimplified.present - ? readingRomajiSimplified.value - : this.readingRomajiSimplified, - romaji: romaji ?? this.romaji, - types: types.present ? types.value : this.types, - ); - @override - String toString() { - return (StringBuffer('ProperNoun(') - ..write('id: $id, ') - ..write('writing: $writing, ') - ..write('writingSearchForm: $writingSearchForm, ') - ..write('reading: $reading, ') - ..write('readingSearchForm: $readingSearchForm, ') - ..write('readingRomaji: $readingRomaji, ') - ..write('readingRomajiSimplified: $readingRomajiSimplified, ') - ..write('romaji: $romaji, ') - ..write('types: $types') - ..write(')')) - .toString(); - } - - @override - int get hashCode => Object.hash(id, writing, writingSearchForm, reading, - readingSearchForm, readingRomaji, readingRomajiSimplified, romaji, types); - @override - bool operator ==(Object other) => - identical(this, other) || - (other is ProperNoun && - other.id == this.id && - other.writing == this.writing && - other.writingSearchForm == this.writingSearchForm && - other.reading == this.reading && - other.readingSearchForm == this.readingSearchForm && - other.readingRomaji == this.readingRomaji && - other.readingRomajiSimplified == this.readingRomajiSimplified && - other.romaji == this.romaji && - other.types == this.types); } class ProperNounsCompanion extends UpdateCompanion { @@ -4127,7 +3959,7 @@ class ProperNounsCompanion extends UpdateCompanion { final Value readingRomaji; final Value readingRomajiSimplified; final Value romaji; - final Value?> types; + final Value> types; const ProperNounsCompanion({ this.id = const Value.absent(), this.writing = const Value.absent(), @@ -4148,10 +3980,11 @@ class ProperNounsCompanion extends UpdateCompanion { required String readingRomaji, this.readingRomajiSimplified = const Value.absent(), required String romaji, - this.types = const Value.absent(), + required List types, }) : reading = Value(reading), readingRomaji = Value(readingRomaji), - romaji = Value(romaji); + romaji = Value(romaji), + types = Value(types); static Insertable custom({ Expression? id, Expression? writing, @@ -4186,7 +4019,7 @@ class ProperNounsCompanion extends UpdateCompanion { Value? readingRomaji, Value? readingRomajiSimplified, Value? romaji, - Value?>? types}) { + Value>? types}) { return ProperNounsCompanion( id: id ?? this.id, writing: writing ?? this.writing, @@ -4231,7 +4064,7 @@ class ProperNounsCompanion extends UpdateCompanion { } if (types.present) { map['types'] = Variable( - $ProperNounsTable.$convertertypesn.toSql(types.value)); + $ProperNounsTable.$convertertypes.toSql(types.value)); } return map; } @@ -4253,6 +4086,219 @@ class ProperNounsCompanion extends UpdateCompanion { } } +class $ProperNounRomajiWordsTable extends ProperNounRomajiWords + with TableInfo<$ProperNounRomajiWordsTable, ProperNounRomajiWord> { + @override + final GeneratedDatabase attachedDatabase; + final String? _alias; + $ProperNounRomajiWordsTable(this.attachedDatabase, [this._alias]); + static const VerificationMeta _idMeta = const VerificationMeta('id'); + @override + late final GeneratedColumn id = GeneratedColumn( + 'id', aliasedName, false, + hasAutoIncrement: true, + type: DriftSqlType.int, + requiredDuringInsert: false, + defaultConstraints: + GeneratedColumn.constraintIsAlways('PRIMARY KEY AUTOINCREMENT')); + static const VerificationMeta _wordMeta = const VerificationMeta('word'); + @override + late final GeneratedColumn word = GeneratedColumn( + 'word', aliasedName, false, + type: DriftSqlType.string, requiredDuringInsert: true); + static const VerificationMeta _properNounIdMeta = + const VerificationMeta('properNounId'); + @override + late final GeneratedColumn properNounId = GeneratedColumn( + 'proper_noun_id', aliasedName, false, + type: DriftSqlType.int, requiredDuringInsert: true); + @override + List get $columns => [id, word, properNounId]; + @override + String get aliasedName => _alias ?? actualTableName; + @override + String get actualTableName => $name; + static const String $name = 'proper_noun_romaji_words'; + @override + VerificationContext validateIntegrity( + Insertable instance, + {bool isInserting = false}) { + final context = VerificationContext(); + final data = instance.toColumns(true); + if (data.containsKey('id')) { + context.handle(_idMeta, id.isAcceptableOrUnknown(data['id']!, _idMeta)); + } + if (data.containsKey('word')) { + context.handle( + _wordMeta, word.isAcceptableOrUnknown(data['word']!, _wordMeta)); + } else if (isInserting) { + context.missing(_wordMeta); + } + if (data.containsKey('proper_noun_id')) { + context.handle( + _properNounIdMeta, + properNounId.isAcceptableOrUnknown( + data['proper_noun_id']!, _properNounIdMeta)); + } else if (isInserting) { + context.missing(_properNounIdMeta); + } + return context; + } + + @override + Set get $primaryKey => {id}; + @override + ProperNounRomajiWord map(Map data, {String? tablePrefix}) { + final effectivePrefix = tablePrefix != null ? '$tablePrefix.' : ''; + return ProperNounRomajiWord( + id: attachedDatabase.typeMapping + .read(DriftSqlType.int, data['${effectivePrefix}id'])!, + word: attachedDatabase.typeMapping + .read(DriftSqlType.string, data['${effectivePrefix}word'])!, + properNounId: attachedDatabase.typeMapping + .read(DriftSqlType.int, data['${effectivePrefix}proper_noun_id'])!, + ); + } + + @override + $ProperNounRomajiWordsTable createAlias(String alias) { + return $ProperNounRomajiWordsTable(attachedDatabase, alias); + } +} + +class ProperNounRomajiWord extends DataClass + implements Insertable { + final int id; + final String word; + final int properNounId; + const ProperNounRomajiWord( + {required this.id, required this.word, required this.properNounId}); + @override + Map toColumns(bool nullToAbsent) { + final map = {}; + map['id'] = Variable(id); + map['word'] = Variable(word); + map['proper_noun_id'] = Variable(properNounId); + return map; + } + + ProperNounRomajiWordsCompanion toCompanion(bool nullToAbsent) { + return ProperNounRomajiWordsCompanion( + id: Value(id), + word: Value(word), + properNounId: Value(properNounId), + ); + } + + factory ProperNounRomajiWord.fromJson(Map json, + {ValueSerializer? serializer}) { + serializer ??= driftRuntimeOptions.defaultSerializer; + return ProperNounRomajiWord( + id: serializer.fromJson(json['id']), + word: serializer.fromJson(json['word']), + properNounId: serializer.fromJson(json['properNounId']), + ); + } + @override + Map toJson({ValueSerializer? serializer}) { + serializer ??= driftRuntimeOptions.defaultSerializer; + return { + 'id': serializer.toJson(id), + 'word': serializer.toJson(word), + 'properNounId': serializer.toJson(properNounId), + }; + } + + ProperNounRomajiWord copyWith({int? id, String? word, int? properNounId}) => + ProperNounRomajiWord( + id: id ?? this.id, + word: word ?? this.word, + properNounId: properNounId ?? this.properNounId, + ); + @override + String toString() { + return (StringBuffer('ProperNounRomajiWord(') + ..write('id: $id, ') + ..write('word: $word, ') + ..write('properNounId: $properNounId') + ..write(')')) + .toString(); + } + + @override + int get hashCode => Object.hash(id, word, properNounId); + @override + bool operator ==(Object other) => + identical(this, other) || + (other is ProperNounRomajiWord && + other.id == this.id && + other.word == this.word && + other.properNounId == this.properNounId); +} + +class ProperNounRomajiWordsCompanion + extends UpdateCompanion { + final Value id; + final Value word; + final Value properNounId; + const ProperNounRomajiWordsCompanion({ + this.id = const Value.absent(), + this.word = const Value.absent(), + this.properNounId = const Value.absent(), + }); + ProperNounRomajiWordsCompanion.insert({ + this.id = const Value.absent(), + required String word, + required int properNounId, + }) : word = Value(word), + properNounId = Value(properNounId); + static Insertable custom({ + Expression? id, + Expression? word, + Expression? properNounId, + }) { + return RawValuesInsertable({ + if (id != null) 'id': id, + if (word != null) 'word': word, + if (properNounId != null) 'proper_noun_id': properNounId, + }); + } + + ProperNounRomajiWordsCompanion copyWith( + {Value? id, Value? word, Value? properNounId}) { + return ProperNounRomajiWordsCompanion( + id: id ?? this.id, + word: word ?? this.word, + properNounId: properNounId ?? this.properNounId, + ); + } + + @override + Map toColumns(bool nullToAbsent) { + final map = {}; + if (id.present) { + map['id'] = Variable(id.value); + } + if (word.present) { + map['word'] = Variable(word.value); + } + if (properNounId.present) { + map['proper_noun_id'] = Variable(properNounId.value); + } + return map; + } + + @override + String toString() { + return (StringBuffer('ProperNounRomajiWordsCompanion(') + ..write('id: $id, ') + ..write('word: $word, ') + ..write('properNounId: $properNounId') + ..write(')')) + .toString(); + } +} + class $PredefinedDictionaryListsTable extends PredefinedDictionaryLists with TableInfo<$PredefinedDictionaryListsTable, PredefinedDictionaryList> { @override @@ -5605,13 +5651,16 @@ abstract class _$AppDatabase extends GeneratedDatabase { late final Index uKRadicalsRadical = Index('UK_radicals_radical', 'CREATE UNIQUE INDEX UK_radicals_radical ON radicals (radical)'); late final $ProperNounsTable properNouns = $ProperNounsTable(this); + late final $ProperNounRomajiWordsTable properNounRomajiWords = + $ProperNounRomajiWordsTable(this); late final Index iXProperNounsReading = Index('IX_proper_nouns_reading', 'CREATE INDEX IX_proper_nouns_reading ON proper_nouns (reading)'); late final Index iXProperNounsReadingRomaji = Index( 'IX_proper_nouns_reading_romaji', 'CREATE INDEX IX_proper_nouns_reading_romaji ON proper_nouns (reading_romaji)'); - late final Index iXProperNounsRomaji = Index('IX_proper_nouns_romaji', - 'CREATE INDEX IX_proper_nouns_romaji ON proper_nouns (romaji)'); + late final Index iXProperNounRomajiWordsWord = Index( + 'IX_proper_noun_romaji_words_word', + 'CREATE INDEX IX_proper_noun_romaji_words_word ON proper_noun_romaji_words (word)'); late final $PredefinedDictionaryListsTable predefinedDictionaryLists = $PredefinedDictionaryListsTable(this); late final $MyDictionaryListsTable myDictionaryLists = @@ -5641,6 +5690,8 @@ abstract class _$AppDatabase extends GeneratedDatabase { late final Index iXProperNounsReadingRomajiSimplified = Index( 'IX_proper_nouns_reading_romaji_simplified', 'CREATE INDEX IX_proper_nouns_reading_romaji_simplified ON proper_nouns (reading_romaji_simplified) WHERE reading_romaji_simplified IS NOT NULL'); + late final Index iXProperNounsRomaji = Index('IX_proper_nouns_romaji', + 'CREATE INDEX IX_proper_nouns_romaji ON proper_nouns (romaji COLLATE NOCASE) WHERE romaji'); late final Index iXMyDictionaryListItemsVocabId = Index( 'IX_my_dictionary_list_items_vocab_id', 'CREATE INDEX IX_my_dictionary_list_items_vocab_id ON my_dictionary_list_items (vocab_id) WHERE vocab_id != 0'); @@ -5662,6 +5713,8 @@ abstract class _$AppDatabase extends GeneratedDatabase { MyDictionaryListsDao(this as AppDatabase); late final PredefinedDictionaryListsDao predefinedDictionaryListsDao = PredefinedDictionaryListsDao(this as AppDatabase); + late final ProperNounsDao properNounsDao = + ProperNounsDao(this as AppDatabase); late final RadicalsDao radicalsDao = RadicalsDao(this as AppDatabase); late final SearchHistoryItemsDao searchHistoryItemsDao = SearchHistoryItemsDao(this as AppDatabase); @@ -5701,9 +5754,10 @@ abstract class _$AppDatabase extends GeneratedDatabase { radicals, uKRadicalsRadical, properNouns, + properNounRomajiWords, iXProperNounsReading, iXProperNounsReadingRomaji, - iXProperNounsRomaji, + iXProperNounRomajiWordsWord, predefinedDictionaryLists, myDictionaryLists, myDictionaryListItems, @@ -5716,6 +5770,7 @@ abstract class _$AppDatabase extends GeneratedDatabase { iXProperNounsWritingSearchForm, iXProperNounsReadingSearchForm, iXProperNounsReadingRomajiSimplified, + iXProperNounsRomaji, iXMyDictionaryListItemsVocabId, iXMyDictionaryListItemsKanjiId, iXKanjiReadingsReadingSearchForm, @@ -6917,207 +6972,109 @@ class $$RadicalsTableOrderingComposer ColumnOrderings(column, joinBuilders: joinBuilders)); } -typedef $$ProperNounsTableInsertCompanionBuilder = ProperNounsCompanion - Function({ +typedef $$ProperNounRomajiWordsTableInsertCompanionBuilder + = ProperNounRomajiWordsCompanion Function({ Value id, - Value writing, - Value writingSearchForm, - required String reading, - Value readingSearchForm, - required String readingRomaji, - Value readingRomajiSimplified, - required String romaji, - Value?> types, + required String word, + required int properNounId, }); -typedef $$ProperNounsTableUpdateCompanionBuilder = ProperNounsCompanion - Function({ +typedef $$ProperNounRomajiWordsTableUpdateCompanionBuilder + = ProperNounRomajiWordsCompanion Function({ Value id, - Value writing, - Value writingSearchForm, - Value reading, - Value readingSearchForm, - Value readingRomaji, - Value readingRomajiSimplified, - Value romaji, - Value?> types, + Value word, + Value properNounId, }); -class $$ProperNounsTableTableManager extends RootTableManager< +class $$ProperNounRomajiWordsTableTableManager extends RootTableManager< _$AppDatabase, - $ProperNounsTable, - ProperNoun, - $$ProperNounsTableFilterComposer, - $$ProperNounsTableOrderingComposer, - $$ProperNounsTableProcessedTableManager, - $$ProperNounsTableInsertCompanionBuilder, - $$ProperNounsTableUpdateCompanionBuilder> { - $$ProperNounsTableTableManager(_$AppDatabase db, $ProperNounsTable table) + $ProperNounRomajiWordsTable, + ProperNounRomajiWord, + $$ProperNounRomajiWordsTableFilterComposer, + $$ProperNounRomajiWordsTableOrderingComposer, + $$ProperNounRomajiWordsTableProcessedTableManager, + $$ProperNounRomajiWordsTableInsertCompanionBuilder, + $$ProperNounRomajiWordsTableUpdateCompanionBuilder> { + $$ProperNounRomajiWordsTableTableManager( + _$AppDatabase db, $ProperNounRomajiWordsTable table) : super(TableManagerState( db: db, table: table, - filteringComposer: - $$ProperNounsTableFilterComposer(ComposerState(db, table)), - orderingComposer: - $$ProperNounsTableOrderingComposer(ComposerState(db, table)), + filteringComposer: $$ProperNounRomajiWordsTableFilterComposer( + ComposerState(db, table)), + orderingComposer: $$ProperNounRomajiWordsTableOrderingComposer( + ComposerState(db, table)), getChildManagerBuilder: (p) => - $$ProperNounsTableProcessedTableManager(p), + $$ProperNounRomajiWordsTableProcessedTableManager(p), getUpdateCompanionBuilder: ({ Value id = const Value.absent(), - Value writing = const Value.absent(), - Value writingSearchForm = const Value.absent(), - Value reading = const Value.absent(), - Value readingSearchForm = const Value.absent(), - Value readingRomaji = const Value.absent(), - Value readingRomajiSimplified = const Value.absent(), - Value romaji = const Value.absent(), - Value?> types = const Value.absent(), + Value word = const Value.absent(), + Value properNounId = const Value.absent(), }) => - ProperNounsCompanion( + ProperNounRomajiWordsCompanion( id: id, - writing: writing, - writingSearchForm: writingSearchForm, - reading: reading, - readingSearchForm: readingSearchForm, - readingRomaji: readingRomaji, - readingRomajiSimplified: readingRomajiSimplified, - romaji: romaji, - types: types, + word: word, + properNounId: properNounId, ), getInsertCompanionBuilder: ({ Value id = const Value.absent(), - Value writing = const Value.absent(), - Value writingSearchForm = const Value.absent(), - required String reading, - Value readingSearchForm = const Value.absent(), - required String readingRomaji, - Value readingRomajiSimplified = const Value.absent(), - required String romaji, - Value?> types = const Value.absent(), + required String word, + required int properNounId, }) => - ProperNounsCompanion.insert( + ProperNounRomajiWordsCompanion.insert( id: id, - writing: writing, - writingSearchForm: writingSearchForm, - reading: reading, - readingSearchForm: readingSearchForm, - readingRomaji: readingRomaji, - readingRomajiSimplified: readingRomajiSimplified, - romaji: romaji, - types: types, + word: word, + properNounId: properNounId, ), )); } -class $$ProperNounsTableProcessedTableManager extends ProcessedTableManager< - _$AppDatabase, - $ProperNounsTable, - ProperNoun, - $$ProperNounsTableFilterComposer, - $$ProperNounsTableOrderingComposer, - $$ProperNounsTableProcessedTableManager, - $$ProperNounsTableInsertCompanionBuilder, - $$ProperNounsTableUpdateCompanionBuilder> { - $$ProperNounsTableProcessedTableManager(super.$state); +class $$ProperNounRomajiWordsTableProcessedTableManager + extends ProcessedTableManager< + _$AppDatabase, + $ProperNounRomajiWordsTable, + ProperNounRomajiWord, + $$ProperNounRomajiWordsTableFilterComposer, + $$ProperNounRomajiWordsTableOrderingComposer, + $$ProperNounRomajiWordsTableProcessedTableManager, + $$ProperNounRomajiWordsTableInsertCompanionBuilder, + $$ProperNounRomajiWordsTableUpdateCompanionBuilder> { + $$ProperNounRomajiWordsTableProcessedTableManager(super.$state); } -class $$ProperNounsTableFilterComposer - extends FilterComposer<_$AppDatabase, $ProperNounsTable> { - $$ProperNounsTableFilterComposer(super.$state); +class $$ProperNounRomajiWordsTableFilterComposer + extends FilterComposer<_$AppDatabase, $ProperNounRomajiWordsTable> { + $$ProperNounRomajiWordsTableFilterComposer(super.$state); ColumnFilters get id => $state.composableBuilder( column: $state.table.id, builder: (column, joinBuilders) => ColumnFilters(column, joinBuilders: joinBuilders)); - ColumnFilters get writing => $state.composableBuilder( - column: $state.table.writing, - builder: (column, joinBuilders) => - ColumnFilters(column, joinBuilders: joinBuilders)); - - ColumnFilters get writingSearchForm => $state.composableBuilder( - column: $state.table.writingSearchForm, - builder: (column, joinBuilders) => - ColumnFilters(column, joinBuilders: joinBuilders)); - - ColumnFilters get reading => $state.composableBuilder( - column: $state.table.reading, - builder: (column, joinBuilders) => - ColumnFilters(column, joinBuilders: joinBuilders)); - - ColumnFilters get readingSearchForm => $state.composableBuilder( - column: $state.table.readingSearchForm, - builder: (column, joinBuilders) => - ColumnFilters(column, joinBuilders: joinBuilders)); - - ColumnFilters get readingRomaji => $state.composableBuilder( - column: $state.table.readingRomaji, - builder: (column, joinBuilders) => - ColumnFilters(column, joinBuilders: joinBuilders)); - - ColumnFilters get readingRomajiSimplified => $state.composableBuilder( - column: $state.table.readingRomajiSimplified, + ColumnFilters get word => $state.composableBuilder( + column: $state.table.word, builder: (column, joinBuilders) => ColumnFilters(column, joinBuilders: joinBuilders)); - ColumnFilters get romaji => $state.composableBuilder( - column: $state.table.romaji, + ColumnFilters get properNounId => $state.composableBuilder( + column: $state.table.properNounId, builder: (column, joinBuilders) => ColumnFilters(column, joinBuilders: joinBuilders)); - - ColumnWithTypeConverterFilters?, List, - String> - get types => $state.composableBuilder( - column: $state.table.types, - builder: (column, joinBuilders) => ColumnWithTypeConverterFilters( - column, - joinBuilders: joinBuilders)); } -class $$ProperNounsTableOrderingComposer - extends OrderingComposer<_$AppDatabase, $ProperNounsTable> { - $$ProperNounsTableOrderingComposer(super.$state); +class $$ProperNounRomajiWordsTableOrderingComposer + extends OrderingComposer<_$AppDatabase, $ProperNounRomajiWordsTable> { + $$ProperNounRomajiWordsTableOrderingComposer(super.$state); ColumnOrderings get id => $state.composableBuilder( column: $state.table.id, builder: (column, joinBuilders) => ColumnOrderings(column, joinBuilders: joinBuilders)); - ColumnOrderings get writing => $state.composableBuilder( - column: $state.table.writing, - builder: (column, joinBuilders) => - ColumnOrderings(column, joinBuilders: joinBuilders)); - - ColumnOrderings get writingSearchForm => $state.composableBuilder( - column: $state.table.writingSearchForm, - builder: (column, joinBuilders) => - ColumnOrderings(column, joinBuilders: joinBuilders)); - - ColumnOrderings get reading => $state.composableBuilder( - column: $state.table.reading, - builder: (column, joinBuilders) => - ColumnOrderings(column, joinBuilders: joinBuilders)); - - ColumnOrderings get readingSearchForm => $state.composableBuilder( - column: $state.table.readingSearchForm, - builder: (column, joinBuilders) => - ColumnOrderings(column, joinBuilders: joinBuilders)); - - ColumnOrderings get readingRomaji => $state.composableBuilder( - column: $state.table.readingRomaji, - builder: (column, joinBuilders) => - ColumnOrderings(column, joinBuilders: joinBuilders)); - - ColumnOrderings get readingRomajiSimplified => - $state.composableBuilder( - column: $state.table.readingRomajiSimplified, - builder: (column, joinBuilders) => - ColumnOrderings(column, joinBuilders: joinBuilders)); - - ColumnOrderings get romaji => $state.composableBuilder( - column: $state.table.romaji, + ColumnOrderings get word => $state.composableBuilder( + column: $state.table.word, builder: (column, joinBuilders) => ColumnOrderings(column, joinBuilders: joinBuilders)); - ColumnOrderings get types => $state.composableBuilder( - column: $state.table.types, + ColumnOrderings get properNounId => $state.composableBuilder( + column: $state.table.properNounId, builder: (column, joinBuilders) => ColumnOrderings(column, joinBuilders: joinBuilders)); } @@ -7358,8 +7315,8 @@ class _$AppDatabaseManager { $$SearchHistoryItemsTableTableManager(_db, _db.searchHistoryItems); $$RadicalsTableTableManager get radicals => $$RadicalsTableTableManager(_db, _db.radicals); - $$ProperNounsTableTableManager get properNouns => - $$ProperNounsTableTableManager(_db, _db.properNouns); + $$ProperNounRomajiWordsTableTableManager get properNounRomajiWords => + $$ProperNounRomajiWordsTableTableManager(_db, _db.properNounRomajiWords); $$MyDictionaryListItemsTableTableManager get myDictionaryListItems => $$MyDictionaryListItemsTableTableManager(_db, _db.myDictionaryListItems); $$DictionaryInfosTableTableManager get dictionaryInfos => diff --git a/lib/src/datamodels/japanese_text_token.dart b/lib/src/datamodels/japanese_text_token.dart index 8cbc2d9..42306bb 100644 --- a/lib/src/datamodels/japanese_text_token.dart +++ b/lib/src/datamodels/japanese_text_token.dart @@ -1,4 +1,4 @@ -import 'package:sagase_dictionary/src/datamodels/vocabs.dart'; +import 'package:sagase_dictionary/src/datamodels/dictionary_item.dart'; import 'package:sagase_dictionary/src/utils/enums.dart'; class JapaneseTextToken { @@ -8,7 +8,7 @@ class JapaneseTextToken { final List rubyTextPairs; List? trailing; PartOfSpeech? pos; - List? associatedVocab; + List? associatedDictionaryItems; JapaneseTextToken({ required this.original, @@ -17,7 +17,7 @@ class JapaneseTextToken { required this.rubyTextPairs, this.trailing, this.pos, - this.associatedVocab, + this.associatedDictionaryItems, }); } diff --git a/lib/src/datamodels/proper_nouns.dart b/lib/src/datamodels/proper_nouns.dart index a4cee0e..9edefca 100644 --- a/lib/src/datamodels/proper_nouns.dart +++ b/lib/src/datamodels/proper_nouns.dart @@ -1,11 +1,12 @@ import 'dart:convert'; import 'package:drift/drift.dart'; +import 'package:sagase_dictionary/src/datamodels/dictionary_item.dart'; import 'package:sagase_dictionary/src/utils/enums.dart'; +@UseRowClass(ProperNoun) @TableIndex(name: 'IX_proper_nouns_reading', columns: {#reading}) @TableIndex(name: 'IX_proper_nouns_reading_romaji', columns: {#readingRomaji}) -@TableIndex(name: 'IX_proper_nouns_romaji', columns: {#romaji}) class ProperNouns extends Table { IntColumn get id => integer().autoIncrement()(); @@ -17,8 +18,31 @@ class ProperNouns extends Table { TextColumn get readingRomajiSimplified => text().nullable()(); TextColumn get romaji => text()(); - TextColumn get types => - text().map(const ProperNounTypeConverter()).nullable()(); + TextColumn get types => text().map(const ProperNounTypeConverter())(); +} + +class ProperNoun extends DictionaryItem { + final String? writing; + final String? writingSearchForm; + final String reading; + final String? readingSearchForm; + final String readingRomaji; + final String? readingRomajiSimplified; + final String romaji; + + final List types; + + ProperNoun({ + required super.id, + required this.writing, + required this.writingSearchForm, + required this.reading, + required this.readingSearchForm, + required this.readingRomaji, + required this.readingRomajiSimplified, + required this.romaji, + required this.types, + }); } class ProperNounTypeConverter @@ -37,3 +61,10 @@ class ProperNounTypeConverter return jsonEncode(value.map((e) => e.index).toList()); } } + +@TableIndex(name: 'IX_proper_noun_romaji_words_word', columns: {#word}) +class ProperNounRomajiWords extends Table { + IntColumn get id => integer().autoIncrement()(); + TextColumn get word => text()(); + IntColumn get properNounId => integer()(); +} diff --git a/lib/src/datamodels/proper_nouns.drift b/lib/src/datamodels/proper_nouns.drift index 2fdc23e..98f0435 100644 --- a/lib/src/datamodels/proper_nouns.drift +++ b/lib/src/datamodels/proper_nouns.drift @@ -11,3 +11,6 @@ ON proper_nouns(reading_search_form) WHERE reading_search_form IS NOT NULL; CREATE INDEX IX_proper_nouns_reading_romaji_simplified ON proper_nouns(reading_romaji_simplified) WHERE reading_romaji_simplified IS NOT NULL; + +CREATE INDEX IX_proper_nouns_romaji +ON proper_nouns(romaji COLLATE NOCASE) WHERE romaji; diff --git a/lib/src/dictionary_builder.dart b/lib/src/dictionary_builder.dart index 3fa5763..4a5a6ee 100644 --- a/lib/src/dictionary_builder.dart +++ b/lib/src/dictionary_builder.dart @@ -2317,11 +2317,12 @@ class DictionaryBuilder { bool showProgress = false, }) async { if (showProgress) { - stdout.write('\nProper noun progress 0%'); + stdout.write('Proper noun progress 0%'); } final lines = enamdict.split('\n'); List properNouns = []; + List properNounRomajiWords = []; double progress = 0; for (int i = 0; i < lines.length; i++) { if (showProgress) { @@ -2378,8 +2379,21 @@ class DictionaryBuilder { line = line.substring(typesString.length + 4); String romaji = line.substring(0, line.length - 1); + // If romaji contains multiple words or was changed by removing diacritics add them to romaji words + final words = + romaji.toLowerCase().removeDiacritics().splitWords().toSet().toList(); + if (words.isNotEmpty && words[0] != romaji.toLowerCase()) { + for (final word in words) { + properNounRomajiWords.add(ProperNounRomajiWordsCompanion( + word: Value(word), + properNounId: Value(i), + )); + } + } + properNouns.add( ProperNounsCompanion( + id: Value(i), writing: Value.absentIfNull(writing), writingSearchForm: Value.absentIfNull(writingSearchForm), reading: Value(reading), @@ -2394,6 +2408,7 @@ class DictionaryBuilder { await db.batch((batch) { batch.insertAll(db.properNouns, properNouns); + batch.insertAll(db.properNounRomajiWords, properNounRomajiWords); }); } diff --git a/lib/src/utils/constants.dart b/lib/src/utils/constants.dart index 4350be3..6c454ed 100644 --- a/lib/src/utils/constants.dart +++ b/lib/src/utils/constants.dart @@ -32,6 +32,17 @@ class SagaseDictionaryConstants { static const int dictionaryListIdKenteiLevelPre1 = 28; static const int dictionaryListIdKenteiLevel1 = 29; + static const dictionaryDatabaseFile = 'dictionary.sqlite'; + static const properNounDictionaryDatabaseFile = + 'proper_noun_dictionary.sqlite'; + + static const requiredAssetsTar = 'required_assets.tar'; + static const dictionaryZip = 'dictionary.zip'; + static const dictionaryWithProperNounsZip = + 'dictionary_with_proper_nouns.zip'; + static const properNounDictionaryZip = 'proper_noun_dictionary.zip'; + static const mecabZip = 'mecab.zip'; + static const backupDictionaryVersion = 'dictionary_version'; static const backupTimestamp = 'timestamp'; static const backupMyDictionaryLists = 'my_dictionary_lists'; diff --git a/lib/src/utils/string_utils.dart b/lib/src/utils/string_utils.dart index 1979654..1407732 100644 --- a/lib/src/utils/string_utils.dart +++ b/lib/src/utils/string_utils.dart @@ -1,4 +1,5 @@ import 'dart:math'; +import 'package:diacritic/diacritic.dart' as diacritic; extension JapaneseTextHelpers on String { static const fullWidthRegExp = r'([\uff01-\uff5e])'; @@ -41,4 +42,8 @@ extension JapaneseTextHelpers on String { List splitWords() { return splitWordsRegExp.allMatches(this).map((e) => e[0]!).toList(); } + + String removeDiacritics() { + return diacritic.removeDiacritics(this); + } } diff --git a/pubspec.yaml b/pubspec.yaml index 9ec05da..c7ddcfc 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -7,7 +7,7 @@ environment: sdk: '>=3.0.5 <4.0.0' dependencies: - drift: ^2.18.0 + drift: 2.18.0 path: ^1.9.0 xml: ^6.4.2 meta: ^1.10.0 @@ -15,12 +15,14 @@ dependencies: kana_kit: # TODO change to main one after merge git: url: https://github.com/Moseco/kana_kit + ref: 'f79e782' + diacritic: ^0.1.5 dev_dependencies: lints: ^4.0.0 test: ^1.24.9 - drift_dev: ^2.18.0 + drift_dev: 2.18.0 build_runner: any async: ^2.11.0 diff --git a/test/common.dart b/test/common.dart index 794ab24..061519f 100644 --- a/test/common.dart +++ b/test/common.dart @@ -1456,3 +1456,10 @@ const shortKanjiListData = '''{ "kentei_level_1": [] } '''; + +const shortEnamdict = '''さくら /(f) Sakura/ +たなかさくら /(h) Tanaka Sakura/ +東京 [とうきょう] /(p,s) Tokyo/ +ヴィナス /(u) Venus/ +宇宙機構 [うちゅうきかん] /(o) Japanese Aerospace Exploration Agency (JAXA) (formerly NASDA)/ +安倍晋三 [あべしんぞう] /(h) Shinzō Abe (1954.9.21-2022.7.8; Prime Minister of Japan 2006-2007 and 2012-2020)/'''; diff --git a/test/dao/proper_nouns_dao_test.dart b/test/dao/proper_nouns_dao_test.dart new file mode 100644 index 0000000..e8f0929 --- /dev/null +++ b/test/dao/proper_nouns_dao_test.dart @@ -0,0 +1,244 @@ +import 'dart:io'; + +import 'package:drift/drift.dart' as drift; +import 'package:path/path.dart' as path; +import 'package:sagase_dictionary/src/database.dart'; +import 'package:sagase_dictionary/src/dictionary_builder.dart'; +import 'package:test/test.dart'; + +import '../common.dart'; + +void main() { + group('ProperNounsDaoTest', () { + late AppDatabase database; + + setUp(() async { + // Create basic database with data + database = AppDatabase(); + + // Add proper nouns + await DictionaryBuilder.createProperNounDictionary( + database, + shortEnamdict, + ); + }); + + tearDown(() async { + await database.close(); + }); + + test('importProperNouns', () async { + // Create temp directory and database file + final tempDirectory = + Directory.systemTemp.createTempSync('proper_nouns_test.'); + final properNounDatabaseFile = + File(path.join(tempDirectory.path, 'proper_noun_database.sqlite')); + + // Export the database + await database + .customStatement('VACUUM INTO ?', [properNounDatabaseFile.path]); + await database.close(); + + // Open a new database + final db = AppDatabase(); + + expect(await db.properNouns.count().getSingle(), 0); + + // Import proper noun database + await db.properNounsDao.importProperNouns(properNounDatabaseFile.path); + + expect(await db.properNouns.count().getSingle(), 6); + + // Cleanup + await db.close(); + tempDirectory.deleteSync(recursive: true); + }); + + group('getByWriting', () { + test('Matching writing', () async { + final results = await database.properNounsDao.getByWriting('東京'); + expect(results.length, 1); + expect(results[0].writing, '東京'); + }); + + test('Missing writing', () async { + final results = await database.properNounsDao.getByWriting('大阪'); + expect(results.length, 0); + }); + }); + + group('getByReading', () { + test('Matching reading', () async { + final results = await database.properNounsDao.getByReading('とうきょう'); + expect(results.length, 1); + expect(results[0].writing, '東京'); + }); + + test('Missing reading', () async { + final results = await database.properNounsDao.getByReading('おおさか'); + expect(results.length, 0); + }); + }); + + group('getByWritingReading', () { + test('Matching writing and reading', () async { + final results = await database.properNounsDao.getByWritingAndReading( + '東京', + 'とうきょう', + ); + expect(results.length, 1); + expect(results[0].writing, '東京'); + }); + + test('Missing writing with matching reading', () async { + final results = await database.properNounsDao.getByWritingAndReading( + '東', + 'とうきょう', + ); + expect(results.length, 0); + }); + + test('Matching writing with missing reading', () async { + final results = await database.properNounsDao.getByWritingAndReading( + '東京', + 'ときょ', + ); + expect(results.length, 0); + }); + + test('Missing writing and reading', () async { + final results = await database.properNounsDao.getByWritingAndReading( + '大阪', + 'おおさか', + ); + expect(results.length, 0); + }); + }); + + group('search', () { + test('Writing complete', () async { + final results = await database.properNounsDao.search('東京'); + expect(results.length, 1); + expect(results[0].writing, '東京'); + }); + + test('Writing partial', () async { + final results = await database.properNounsDao.search('東'); + expect(results.length, 1); + expect(results[0].writing, '東京'); + }); + + test('Reading complete', () async { + var results = await database.properNounsDao.search('さくら'); + expect(results.length, 1); + expect(results[0].reading, 'さくら'); + + results = await database.properNounsDao.search('ヴィナス'); + expect(results.length, 1); + expect(results[0].reading, 'ヴィナス'); + }); + + test('Reading partial', () async { + final results = await database.properNounsDao.search('さく'); + expect(results.length, 1); + expect(results[0].reading, 'さくら'); + }); + + test('Reading romaji complete', () async { + final results = await database.properNounsDao.search('toukyou'); + expect(results.length, 1); + expect(results[0].writing, '東京'); + }); + + test('Reading romaji partial', () async { + final results = await database.properNounsDao.search('touky'); + expect(results.length, 1); + expect(results[0].writing, '東京'); + }); + + test('Romaji complete', () async { + var results = await database.properNounsDao.search('Sakura'); + expect(results.length, 2); + expect(results[0].reading, 'さくら'); + expect(results[1].reading, 'たなかさくら'); + + results = await database.properNounsDao.search('sakura'); + expect(results.length, 2); + expect(results[0].reading, 'さくら'); + expect(results[1].reading, 'たなかさくら'); + + results = await database.properNounsDao.search('Venus'); + expect(results.length, 1); + expect(results[0].reading, 'ヴィナス'); + + results = await database.properNounsDao.search('venus'); + expect(results.length, 1); + expect(results[0].reading, 'ヴィナス'); + + results = await database.properNounsDao.search('jaxa'); + expect(results.length, 1); + expect(results[0].reading, 'うちゅうきかん'); + }); + + test('Romaji partial', () async { + var results = await database.properNounsDao.search('Saku'); + expect(results.length, 2); + expect(results[0].reading, 'さくら'); + expect(results[1].reading, 'たなかさくら'); + + results = await database.properNounsDao.search('saku'); + expect(results.length, 2); + expect(results[0].reading, 'さくら'); + expect(results[1].reading, 'たなかさくら'); + + results = await database.properNounsDao.search('Ven'); + expect(results.length, 1); + expect(results[0].reading, 'ヴィナス'); + + results = await database.properNounsDao.search('ven'); + expect(results.length, 1); + expect(results[0].reading, 'ヴィナス'); + + results = await database.properNounsDao.search('jax'); + expect(results.length, 1); + expect(results[0].reading, 'うちゅうきかん'); + }); + + test('Romaji multiple words complete', () async { + var results = await database.properNounsDao.search('Sakura Tanaka'); + expect(results.length, 1); + expect(results[0].reading, 'たなかさくら'); + + results = await database.properNounsDao.search('Tanaka Sakura'); + expect(results.length, 1); + expect(results[0].reading, 'たなかさくら'); + + results = await database.properNounsDao.search('Japanese Aerospace'); + expect(results.length, 1); + expect(results[0].reading, 'うちゅうきかん'); + + results = await database.properNounsDao.search('Shinzo Abe'); + expect(results.length, 1); + expect(results[0].reading, 'あべしんぞう'); + + results = await database.properNounsDao.search('Shinzō Abe'); + expect(results.length, 1); + expect(results[0].reading, 'あべしんぞう'); + }); + + test('Romaji multiple words partial', () async { + var results = await database.properNounsDao.search('Sakura Tana'); + expect(results.length, 1); + expect(results[0].reading, 'たなかさくら'); + + results = await database.properNounsDao.search('Tanaka Saku'); + expect(results.length, 1); + expect(results[0].reading, 'たなかさくら'); + + results = await database.properNounsDao.search('Japanese Aerosp'); + expect(results.length, 1); + expect(results[0].reading, 'うちゅうきかん'); + }); + }); + }); +} diff --git a/test/dictionary_builder_test.dart b/test/dictionary_builder_test.dart index 117911e..c774682 100644 --- a/test/dictionary_builder_test.dart +++ b/test/dictionary_builder_test.dart @@ -11,13 +11,9 @@ void main() { group('DictionaryBuilderTest', () { late AppDatabase database; - setUp(() => database = AppDatabase()); + setUpAll(() async { + database = AppDatabase(); - tearDown(() async { - await database.close(); - }); - - test('Dictionary creation with short version', () async { await DictionaryBuilder.createDictionary( database, shortJMdict, @@ -31,6 +27,17 @@ void main() { shortFrequencyListData, ); + await DictionaryBuilder.createProperNounDictionary( + database, + shortEnamdict, + ); + }); + + tearDownAll(() async { + await database.close(); + }); + + test('Vocab', () async { // Dictionary info final dictionaryInfo = await database.dictionaryInfosDao.get(); expect(dictionaryInfo!.id, 0); @@ -512,7 +519,9 @@ void main() { expect(vocab9.definitions[0].antonyms!.length, 1); expect(vocab9.definitions[0].antonyms![0].ids, null); expect(vocab9.definitions[0].antonyms![0].text, '活語'); + }); + test('Radicals', () async { // Radicals final radical1 = await database.radicalsDao.get('一'); expect(radical1.radical, '一'); @@ -547,7 +556,9 @@ void main() { expect(radical3.importance, null); expect(radical3.variants, null); expect(radical3.variantOf, '乙'); + }); + test('Kanji', () async { // Kanji final kanji1 = await database.kanjisDao.getKanji('亜'); expect(kanji1.kanji, '亜'); @@ -758,7 +769,9 @@ void main() { expect(kanji4.nanori![5].readingSearchForm, null); expect(kanji4.nanori![5].readingRomaji, 'yuku'); expect(kanji4.nanori![5].readingRomajiSimplified, null); + }); + test('Predefined dictionary lists', () async { // Predefined dictionary lists final n5List = await database.predefinedDictionaryListsDao .get(SagaseDictionaryConstants.dictionaryListIdJlptVocabN5); @@ -770,5 +783,122 @@ void main() { expect(jouyouList.kanji.length, 1); expect(jouyouList.kanji[0], '亜'.kanjiCodePoint()); }); + + test('Proper nouns', () async { + // Proper nouns + final properNouns = await database.select(database.properNouns).get(); + + expect(properNouns[0].writing, null); + expect(properNouns[0].writingSearchForm, null); + expect(properNouns[0].reading, 'さくら'); + expect(properNouns[0].readingSearchForm, null); + expect(properNouns[0].readingRomaji, 'sakura'); + expect(properNouns[0].readingRomajiSimplified, null); + expect(properNouns[0].romaji, 'Sakura'); + expect(properNouns[0].types, [ProperNounType.femaleName]); + var properNounRomajiWords = + await (database.select(database.properNounRomajiWords) + ..where((word) => word.properNounId.equals(properNouns[0].id))) + .get(); + expect(properNounRomajiWords.length, 0); + + expect(properNouns[1].writing, null); + expect(properNouns[1].writingSearchForm, null); + expect(properNouns[1].reading, 'たなかさくら'); + expect(properNouns[1].readingSearchForm, null); + expect(properNouns[1].readingRomaji, 'tanakasakura'); + expect(properNouns[1].readingRomajiSimplified, null); + expect(properNouns[1].romaji, 'Tanaka Sakura'); + expect(properNouns[1].types, [ProperNounType.fullName]); + properNounRomajiWords = + await (database.select(database.properNounRomajiWords) + ..where((word) => word.properNounId.equals(properNouns[1].id))) + .get(); + expect(properNounRomajiWords.length, 2); + expect(properNounRomajiWords[0].word, 'tanaka'); + expect(properNounRomajiWords[1].word, 'sakura'); + + expect(properNouns[2].writing, '東京'); + expect(properNouns[2].writingSearchForm, null); + expect(properNouns[2].reading, 'とうきょう'); + expect(properNouns[2].readingSearchForm, null); + expect(properNouns[2].readingRomaji, 'toukyou'); + expect(properNouns[2].readingRomajiSimplified, 'tokyo'); + expect(properNouns[2].romaji, 'Tokyo'); + expect(properNouns[2].types, [ + ProperNounType.placeName, + ProperNounType.surname, + ]); + properNounRomajiWords = + await (database.select(database.properNounRomajiWords) + ..where((word) => word.properNounId.equals(properNouns[2].id))) + .get(); + expect(properNounRomajiWords.length, 0); + + expect(properNouns[3].writing, null); + expect(properNouns[3].writingSearchForm, null); + expect(properNouns[3].reading, 'ヴィナス'); + expect(properNouns[3].readingSearchForm, 'ゔぃなす'); + expect(properNouns[3].readingRomaji, 'vyinasu'); + expect(properNouns[3].readingRomajiSimplified, null); + expect(properNouns[3].romaji, 'Venus'); + expect(properNouns[3].types, [ProperNounType.personName]); + properNounRomajiWords = + await (database.select(database.properNounRomajiWords) + ..where((word) => word.properNounId.equals(properNouns[3].id))) + .get(); + expect(properNounRomajiWords.length, 0); + + expect(properNouns[4].writing, '宇宙機構'); + expect(properNouns[4].writingSearchForm, null); + expect(properNouns[4].reading, 'うちゅうきかん'); + expect(properNouns[4].readingSearchForm, null); + expect(properNouns[4].readingRomaji, 'uchuukikan'); + expect(properNouns[4].readingRomajiSimplified, 'uchukikan'); + expect( + properNouns[4].romaji, + 'Japanese Aerospace Exploration Agency (JAXA) (formerly NASDA)', + ); + expect(properNouns[4].types, [ProperNounType.organization]); + properNounRomajiWords = + await (database.select(database.properNounRomajiWords) + ..where((word) => word.properNounId.equals(properNouns[4].id))) + .get(); + expect(properNounRomajiWords.length, 7); + expect(properNounRomajiWords[0].word, 'japanese'); + expect(properNounRomajiWords[1].word, 'aerospace'); + expect(properNounRomajiWords[2].word, 'exploration'); + expect(properNounRomajiWords[3].word, 'agency'); + expect(properNounRomajiWords[4].word, 'jaxa'); + expect(properNounRomajiWords[5].word, 'formerly'); + expect(properNounRomajiWords[6].word, 'nasda'); + + expect(properNouns[5].writing, '安倍晋三'); + expect(properNouns[5].writingSearchForm, null); + expect(properNouns[5].reading, 'あべしんぞう'); + expect(properNouns[5].readingSearchForm, null); + expect(properNouns[5].readingRomaji, 'abeshinzou'); + expect(properNouns[5].readingRomajiSimplified, 'abeshinzo'); + expect( + properNouns[5].romaji, + 'Shinzō Abe (1954.9.21-2022.7.8; Prime Minister of Japan 2006-2007 and 2012-2020)', + ); + expect(properNouns[5].types, [ProperNounType.fullName]); + properNounRomajiWords = + await (database.select(database.properNounRomajiWords) + ..where((word) => word.properNounId.equals(properNouns[5].id))) + .get(); + expect(properNounRomajiWords.length, 10); + expect(properNounRomajiWords[0].word, 'shinzo'); + expect(properNounRomajiWords[1].word, 'abe'); + expect(properNounRomajiWords[2].word, '1954.9.21-2022.7.8'); + expect(properNounRomajiWords[3].word, 'prime'); + expect(properNounRomajiWords[4].word, 'minister'); + expect(properNounRomajiWords[5].word, 'of'); + expect(properNounRomajiWords[6].word, 'japan'); + expect(properNounRomajiWords[7].word, '2006-2007'); + expect(properNounRomajiWords[8].word, 'and'); + expect(properNounRomajiWords[9].word, '2012-2020'); + }); }); }