Skip to content

Commit

Permalink
wvware progress
Browse files Browse the repository at this point in the history
  • Loading branch information
andiwand committed Sep 28, 2024
1 parent 43be0ce commit f683779
Show file tree
Hide file tree
Showing 9 changed files with 131 additions and 52 deletions.
4 changes: 4 additions & 0 deletions src/odr/file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,10 @@ DocumentMeta DocumentFile::document_meta() const {

Document DocumentFile::document() const { return Document(m_impl->document()); }

std::shared_ptr<internal::abstract::DocumentFile> DocumentFile::impl() const {
return m_impl;
}

PdfFile::PdfFile(std::shared_ptr<internal::abstract::PdfFile> impl)
: DecodedFile(impl), m_impl{std::move(impl)} {}

Expand Down
4 changes: 3 additions & 1 deletion src/odr/file.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ enum class FileLocation {
enum class DecoderEngine {
odr,
poppler,
wv_ware,
wvware,
};

/// @brief Preference for decoding files.
Expand Down Expand Up @@ -267,6 +267,8 @@ class DocumentFile final : public DecodedFile {

[[nodiscard]] Document document() const;

[[nodiscard]] std::shared_ptr<internal::abstract::DocumentFile> impl() const;

private:
std::shared_ptr<internal::abstract::DocumentFile> m_impl;
};
Expand Down
46 changes: 33 additions & 13 deletions src/odr/html.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
#include <odr/internal/html/pdf2htmlEX_wrapper.hpp>
#include <odr/internal/html/pdf_file.hpp>
#include <odr/internal/html/text_file.hpp>
#include <odr/internal/html/wvWare_wrapper.hpp>
#include <odr/internal/oldms_wvware/wvware_oldms_file.hpp>
#include <odr/internal/pdf_poppler/poppler_pdf_file.hpp>

#include <filesystem>
Expand Down Expand Up @@ -75,11 +77,9 @@ Html html::translate(const DecodedFile &decoded_file,
} else if (decoded_file.is_image_file()) {
return translate(decoded_file.image_file(), output_path, config);
} else if (decoded_file.is_archive_file()) {
return translate(decoded_file.archive_file().archive(), output_path,
config);
return translate(decoded_file.archive_file(), output_path, config);
} else if (decoded_file.is_document_file()) {
return translate(decoded_file.document_file().document(), output_path,
config);
return translate(decoded_file.document_file(), output_path, config);
} else if (decoded_file.is_pdf_file()) {
return translate(decoded_file.pdf_file(), output_path, config);
}
Expand All @@ -99,17 +99,24 @@ Html html::translate(const ImageFile &image_file,
return internal::html::translate_image_file(image_file, output_path, config);
}

Html html::translate(const Archive &archive, const std::string &output_path,
const HtmlConfig &config) {
fs::create_directories(output_path);
return internal::html::translate_filesystem(
FileType::unknown, archive.filesystem(), output_path, config);
Html html::translate(const ArchiveFile &archive_file,
const std::string &output_path, const HtmlConfig &config) {
return translate(archive_file.archive(), output_path, config);
}

Html html::translate(const Document &document, const std::string &output_path,
const HtmlConfig &config) {
fs::create_directories(output_path);
return internal::html::translate_document(document, output_path, config);
Html html::translate(const DocumentFile &document_file,
const std::string &output_path, const HtmlConfig &config) {
auto document_file_impl = document_file.impl();

if (auto wv_document_file =
std::dynamic_pointer_cast<internal::WvWareLegacyMicrosoftFile>(
document_file_impl)) {
fs::create_directories(output_path);
return internal::html::translate_wvware_oldms_file(*wv_document_file,
output_path, config);
}

return translate(document_file.document(), output_path, config);
}

Html html::translate(const PdfFile &pdf_file, const std::string &output_path,
Expand All @@ -126,6 +133,19 @@ Html html::translate(const PdfFile &pdf_file, const std::string &output_path,
return internal::html::translate_pdf_file(pdf_file, output_path, config);
}

Html html::translate(const Archive &archive, const std::string &output_path,
const HtmlConfig &config) {
fs::create_directories(output_path);
return internal::html::translate_filesystem(
FileType::unknown, archive.filesystem(), output_path, config);
}

Html html::translate(const Document &document, const std::string &output_path,
const HtmlConfig &config) {
fs::create_directories(output_path);
return internal::html::translate_document(document, output_path, config);
}

void html::edit(const Document &document, const char *diff) {
auto json = nlohmann::json::parse(diff);
for (const auto &[key, value] : json["modifiedText"].items()) {
Expand Down
27 changes: 22 additions & 5 deletions src/odr/html.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,20 +144,20 @@ Html translate(const ImageFile &image_file, const std::string &output_path,
const HtmlConfig &config);
/// @brief Translates an archive to HTML.
///
/// @param archive Archive to translate.
/// @param archive Archive file to translate.
/// @param output_path Path to save the HTML output.
/// @param config Configuration for the HTML output.
/// @return HTML output.
Html translate(const Archive &archive, const std::string &output_path,
Html translate(const ArchiveFile &archive_file, const std::string &output_path,
const HtmlConfig &config);
/// @brief Translates a document to HTML.
///
/// @param document Document to translate.
/// @param document_file Document file to translate.
/// @param output_path Path to save the HTML output.
/// @param config Configuration for the HTML output.
/// @return HTML output.
Html translate(const Document &document, const std::string &output_path,
const HtmlConfig &config);
Html translate(const DocumentFile &document_file,
const std::string &output_path, const HtmlConfig &config);
/// @brief Translates a PDF file to HTML.
///
/// @param pdf_file PDF file to translate.
Expand All @@ -167,6 +167,23 @@ Html translate(const Document &document, const std::string &output_path,
Html translate(const PdfFile &pdf_file, const std::string &output_path,
const HtmlConfig &config);

/// @brief Translates an archive to HTML.
///
/// @param archive Archive to translate.
/// @param output_path Path to save the HTML output.
/// @param config Configuration for the HTML output.
/// @return HTML output.
Html translate(const Archive &archive, const std::string &output_path,
const HtmlConfig &config);
/// @brief Translates a document to HTML.
///
/// @param document Document to translate.
/// @param output_path Path to save the HTML output.
/// @param config Configuration for the HTML output.
/// @return HTML output.
Html translate(const Document &document, const std::string &output_path,
const HtmlConfig &config);

/// @brief Edits a document with a diff.
///
/// @note The diff is generated by our JavaScript code in the browser.
Expand Down
47 changes: 35 additions & 12 deletions src/odr/internal/oldms_wvware/wvware_oldms_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,43 @@
#include <memory>
#include <utility>

#include <gsf/gsf-input-memory.h>
#include <gsf/gsf-input-stdio.h>
#include <wv/wv.h>

namespace odr::internal {

WvWareLegacyMicrosoftFile::WvWareLegacyMicrosoftFile(
std::shared_ptr<common::DiskFile> file)
: m_file{std::move(file)} {
GError *error = nullptr;

m_gsf_input =
gsf_input_stdio_new(m_file->disk_path()->string().c_str(), &error);

if (m_gsf_input == nullptr) {
throw std::runtime_error("gsf_input_stdio_new failed");
}

open();
}

WvWareLegacyMicrosoftFile::WvWareLegacyMicrosoftFile(
std::shared_ptr<common::MemoryFile> file)
: m_file{std::move(file)} {
m_gsf_input = gsf_input_memory_new(
reinterpret_cast<const guint8 *>(m_file->memory_data()),
static_cast<gsf_off_t>(m_file->size()), false);

open();
}

WvWareLegacyMicrosoftFile::~WvWareLegacyMicrosoftFile() { wvOLEFree(&m_ps); }

void WvWareLegacyMicrosoftFile::open() {
wvInit();
char *path = const_cast<char *>(m_file->disk_path()->string().c_str());
int ret = wvInitParser(&m_ps, path);

int ret = wvInitParser_gsf(&m_ps, m_gsf_input);

// check if password is required
if ((ret & 0x8000) != 0) {
Expand All @@ -27,7 +54,7 @@ WvWareLegacyMicrosoftFile::WvWareLegacyMicrosoftFile(
ret = 0;
}
} else {
m_encryption_state = EncryptionState::decrypted;
m_encryption_state = EncryptionState::not_encrypted;
}

if (ret != 0) {
Expand All @@ -36,32 +63,28 @@ WvWareLegacyMicrosoftFile::WvWareLegacyMicrosoftFile(
}
}

WvWareLegacyMicrosoftFile::~WvWareLegacyMicrosoftFile() { wvOLEFree(&m_ps); }

std::shared_ptr<abstract::File>
WvWareLegacyMicrosoftFile::file() const noexcept {
return m_file;
}

FileType WvWareLegacyMicrosoftFile::file_type() const noexcept {
return {}; // TODO
return FileType::legacy_word_document;
}

FileMeta WvWareLegacyMicrosoftFile::file_meta() const noexcept {
return {}; // TODO
return {file_type(), password_encrypted(), document_meta()};
}

DecoderEngine WvWareLegacyMicrosoftFile::decoder_engine() const noexcept {
return DecoderEngine::wv_ware;
return DecoderEngine::wvware;
}

DocumentType WvWareLegacyMicrosoftFile::document_type() const {
return {}; // TODO
return DocumentType::text;
}

DocumentMeta WvWareLegacyMicrosoftFile::document_meta() const {
return {}; // TODO
}
DocumentMeta WvWareLegacyMicrosoftFile::document_meta() const { return {}; }

bool WvWareLegacyMicrosoftFile::password_encrypted() const noexcept {
return m_encryption_state == EncryptionState::encrypted ||
Expand Down
7 changes: 6 additions & 1 deletion src/odr/internal/oldms_wvware/wvware_oldms_file.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,15 @@

namespace odr::internal::common {
class DiskFile;
class MemoryFile;
} // namespace odr::internal::common

namespace odr::internal {

class WvWareLegacyMicrosoftFile final : public abstract::DocumentFile {
public:
explicit WvWareLegacyMicrosoftFile(std::shared_ptr<common::DiskFile> file);
explicit WvWareLegacyMicrosoftFile(std::shared_ptr<common::MemoryFile> file);
~WvWareLegacyMicrosoftFile() final;

[[nodiscard]] std::shared_ptr<abstract::File> file() const noexcept final;
Expand All @@ -40,12 +42,15 @@ class WvWareLegacyMicrosoftFile final : public abstract::DocumentFile {
[[nodiscard]] wvParseStruct &parse_struct() const;

private:
std::shared_ptr<common::DiskFile> m_file;
std::shared_ptr<abstract::File> m_file;
GsfInput *m_gsf_input{};

EncryptionState m_encryption_state{EncryptionState::unknown};

wvParseStruct m_ps{};
int m_encryption_flag{};

void open();
};

} // namespace odr::internal
Expand Down
34 changes: 20 additions & 14 deletions src/odr/internal/open_strategy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <odr/internal/magic.hpp>
#include <odr/internal/odf/odf_file.hpp>
#include <odr/internal/oldms/oldms_file.hpp>
#include <odr/internal/oldms_wvware/wvware_oldms_file.hpp>
#include <odr/internal/ooxml/ooxml_file.hpp>
#include <odr/internal/pdf/pdf_file.hpp>
#include <odr/internal/pdf_poppler/poppler_pdf_file.hpp>
Expand Down Expand Up @@ -101,10 +102,8 @@ open_strategy::engines(const std::shared_ptr<abstract::File> &file,

result.push_back(DecoderEngine::odr);

if (as == FileType::legacy_word_document ||
as == FileType::legacy_powerpoint_presentation ||
as == FileType::legacy_excel_worksheets) {
result.push_back(DecoderEngine::wv_ware);
if (as == FileType::legacy_word_document) {
result.push_back(DecoderEngine::wvware);
}

if (as == FileType::portable_document_format) {
Expand Down Expand Up @@ -251,6 +250,15 @@ open_strategy::open_file(std::shared_ptr<abstract::File> file, FileType as,
}
return nullptr;
}
if (with == DecoderEngine::wvware) {
try {
auto memory_file = std::make_shared<common::MemoryFile>(*file);
return std::make_unique<odr::internal::WvWareLegacyMicrosoftFile>(
std::move(memory_file));
} catch (...) {
}
return nullptr;
}
return nullptr;
}

Expand Down Expand Up @@ -363,11 +371,10 @@ open_strategy::open_file(std::shared_ptr<abstract::File> file, FileType as,
std::unique_ptr<abstract::DecodedFile>
open_strategy::open_file(std::shared_ptr<abstract::File> file,
const DecodePreference &preference) {
std::vector<FileType> probe_types =
preference.as_file_type.has_value()
? std::vector{*preference.as_file_type}
: preference.file_type_priority;
{
std::vector<FileType> probe_types;
if (preference.as_file_type.has_value()) {
probe_types.push_back(*preference.as_file_type);
} else {
std::vector<FileType> detected_types = types(file);
probe_types.insert(probe_types.end(), detected_types.begin(),
detected_types.end());
Expand All @@ -376,11 +383,10 @@ open_strategy::open_file(std::shared_ptr<abstract::File> file,
}

for (FileType as : probe_types) {
std::vector<DecoderEngine> probe_engines =
preference.with_engine.has_value()
? std::vector{*preference.with_engine}
: preference.engine_priority;
{
std::vector<DecoderEngine> probe_engines;
if (preference.with_engine.has_value()) {
probe_engines.push_back(*preference.with_engine);
} else {
std::vector<DecoderEngine> detected_engines = engines(file, as);
probe_engines.insert(probe_engines.end(), detected_engines.begin(),
detected_engines.end());
Expand Down
8 changes: 4 additions & 4 deletions src/odr/open_document_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,8 @@ std::string OpenDocumentReader::engine_to_string(const DecoderEngine engine) {
return "odr";
} else if (engine == DecoderEngine::poppler) {
return "poppler";
} else if (engine == DecoderEngine::wv_ware) {
return "wv_ware";
} else if (engine == DecoderEngine::wvware) {
return "wvware";
}
throw UnknownDecoderEngine();
}
Expand All @@ -180,8 +180,8 @@ DecoderEngine OpenDocumentReader::engine_by_name(const std::string &name) {
return DecoderEngine::odr;
} else if (name == "poppler") {
return DecoderEngine::poppler;
} else if (name == "wv_ware") {
return DecoderEngine::wv_ware;
} else if (name == "wvware") {
return DecoderEngine::wvware;
}
throw UnknownDecoderEngine();
}
Expand Down
6 changes: 4 additions & 2 deletions test/src/html_output_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ using HtmlOutputTests = ::testing::TestWithParam<TestParams>;
TEST_P(HtmlOutputTests, html_meta) {
const TestParams &params = GetParam();
const TestFile &test_file = params.test_file;
const std::string &test_file_path = params.path;
const DecoderEngine engine = params.engine;
const std::string &test_repo = params.test_repo;
const std::string &output_path = params.output_path;
Expand All @@ -48,7 +47,6 @@ TEST_P(HtmlOutputTests, html_meta) {

// these files cannot be opened
if (util::string::ends_with(test_file.short_path, ".sxw") ||
(test_file.type == FileType::legacy_word_document) ||
(test_file.type == FileType::legacy_powerpoint_presentation) ||
(test_file.type == FileType::legacy_excel_worksheets) ||
(test_file.type == FileType::word_perfect) ||
Expand Down Expand Up @@ -187,6 +185,10 @@ std::vector<TestParams> list_test_params() {
if (test_file.type == FileType::portable_document_format) {
engines.push_back(DecoderEngine::poppler);
}
if (test_file.type == FileType::legacy_word_document) {
engines.clear();
engines.push_back(DecoderEngine::wvware);
}

for (const DecoderEngine engine : engines) {
params.push_back(create_test_params(test_file, engine));
Expand Down

0 comments on commit f683779

Please sign in to comment.