From fc70f76e2e04832b6934169ac7754dee43a1d428 Mon Sep 17 00:00:00 2001 From: Sadra Barikbin Date: Sun, 4 Feb 2024 09:27:33 +0330 Subject: [PATCH] Do the improvement --- src/ccutil/tessdatamanager.cpp | 32 ++++++++++++++++++++++++++++++- src/ccutil/tessdatamanager.h | 1 + src/training/combine_tessdata.cpp | 17 ++++++++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/src/ccutil/tessdatamanager.cpp b/src/ccutil/tessdatamanager.cpp index 8ab26506c5..03ae5d27f2 100644 --- a/src/ccutil/tessdatamanager.cpp +++ b/src/ccutil/tessdatamanager.cpp @@ -85,6 +85,32 @@ bool TessdataManager::LoadArchiveFile(const char *filename) { } return result; } + +bool TessdataManager::SaveArchiveFile(const char *filename) const{ + bool result = false; + archive *a = archive_write_new(); + archive_entry *ae = archive_entry_new(); + if (a != nullptr) { + archive_write_set_format_zip(a); + archive_write_open_filename(a, filename); + std::string filename_str = filename; + filename_str += "."; + archive_entry_set_filetype(ae, AE_IFREG); + archive_entry_set_perm(ae, 333); + for (unsigned i = 0; i < TESSDATA_NUM_ENTRIES; ++i) { + if (!entries_[i].empty()) { + archive_entry_set_pathname(ae, (filename_str + kTessdataFileSuffixes[i]).c_str()); + archive_entry_set_size(ae, entries_[i].size()); + archive_write_header(a, ae); + archive_write_data(a, &entries_[i][0], entries_[i].size()); + } + } + result = archive_write_close(a) == ARCHIVE_OK; + archive_write_free(a); + return result; + } + return result; +} #endif bool TessdataManager::Init(const char *data_file_name) { @@ -162,12 +188,16 @@ void TessdataManager::OverwriteEntry(TessdataType type, const char *data, int si // Saves to the given filename. bool TessdataManager::SaveFile(const char *filename, FileWriter writer) const { - // TODO: This method supports only the proprietary file format. +// TODO: This method supports only the proprietary file format. ASSERT_HOST(is_loaded_); std::vector data; Serialize(&data); if (writer == nullptr) { +#if defined(HAVE_LIBARCHIVE) + return SaveArchiveFile(filename); +#else return SaveDataToFile(data, filename); +#endif } else { return (*writer)(data, filename); } diff --git a/src/ccutil/tessdatamanager.h b/src/ccutil/tessdatamanager.h index 01bfebe6ef..500ba9ada6 100644 --- a/src/ccutil/tessdatamanager.h +++ b/src/ccutil/tessdatamanager.h @@ -223,6 +223,7 @@ class TESS_API TessdataManager { private: // Use libarchive. bool LoadArchiveFile(const char *filename); + bool SaveArchiveFile(const char *filename) const; /** * Fills type with TessdataType of the tessdata component represented by the diff --git a/src/training/combine_tessdata.cpp b/src/training/combine_tessdata.cpp index 6c452f7676..b3448cc9fe 100644 --- a/src/training/combine_tessdata.cpp +++ b/src/training/combine_tessdata.cpp @@ -219,6 +219,19 @@ int main(int argc, char **argv) { tprintf("Failed to write modified traineddata:%s!\n", argv[2]); return EXIT_FAILURE; } + } else if (argc == 3 && strcmp(argv[1], "-t") == 0) { +#if defined(HAVE_LIBARCHIVE) + if (!tm.Init(argv[2])) { + tprintf("Failed to read %s\n", argv[2]); + return EXIT_FAILURE; + } + if (!tm.SaveFile(argv[2], nullptr)) { + tprintf("Failed to tranform traineddata:%s!\n", argv[2]); + return EXIT_FAILURE; + } +#else + tprintf("Failed to load libarchive. Is tesseract compiled with libarchive support?\n"); +#endif } else if (argc == 3 && strcmp(argv[1], "-d") == 0) { return list_components(tm, argv[2]); } else if (argc == 3 && strcmp(argv[1], "-l") == 0) { @@ -272,6 +285,10 @@ int main(int argc, char **argv) { "Usage for compacting LSTM component to int:\n" " %s -c traineddata_file\n", argv[0]); + printf( + "Usage for transforming the proprietary .traineddata file to a zip archive:\n" + " %s -t traineddata_file\n", + argv[0]); return EXIT_FAILURE; } tm.Directory();