diff --git a/package.json b/package.json index a6d9da2e..6566f0a8 100644 --- a/package.json +++ b/package.json @@ -17,7 +17,8 @@ "bin": { "wgd": "src/wikigdrive.sh", "wikigdrive": "src/wikigdrive.sh", - "wikigdrivectl": "src/wikigdrivectl.sh" + "wikigdrivectl": "src/wikigdrivectl.sh", + "odt2md": "src/odt2md.sh" }, "main": "src/cli/wikigdrive.ts", "scripts": { diff --git a/src/cli/odt2md.ts b/src/cli/odt2md.ts new file mode 100644 index 00000000..f60d5fc6 --- /dev/null +++ b/src/cli/odt2md.ts @@ -0,0 +1,89 @@ +'use strict'; + +import path from 'path'; +import minimist from 'minimist'; +import {fileURLToPath} from 'url'; +import {Buffer} from 'buffer'; +import fs from 'fs'; + +import {OdtProcessor} from '../odt/OdtProcessor.js'; +import {UnMarshaller} from '../odt/UnMarshaller.js'; +import {DocumentContent, DocumentStyles, LIBREOFFICE_CLASSES} from '../odt/LibreOffice.js'; +import {OdtToMarkdown} from '../odt/OdtToMarkdown.js'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); + +process.env.GIT_SHA = process.env.GIT_SHA || 'dev'; + +async function usage() { + const pkg = JSON.parse(new TextDecoder().decode(fs.readFileSync(path.resolve(__dirname, '..', '..', 'package.json')))); + + const commandUsage = 'echo "test" | odt2md\n\nor\n\nodt2md filename.odt'; + + console.log( + `${pkg.name} version: ${pkg.version}, ${process.env.GIT_SHA}\n\nUsage:\n${commandUsage.trim()}\n`); +} + +async function main() { + const inputArr = []; + + process.stdin.on( 'data', function(data) { inputArr.push(data); } ); + + await new Promise(resolve => { + setTimeout(() => { + process.stdin.destroy(); + resolve(null); + }, 50); + process.stdin.on( 'end', resolve); + }); + + const argv = minimist(process.argv.slice(2)); + + if (inputArr.length === 0) { + if (argv._.length < 1 || argv.h || argv.help) { + await usage(); + process.exit(1); + } + + inputArr.push(fs.readFileSync(path.resolve(process.cwd(), argv._[0]))); + } + + if (inputArr.length === 0) { + console.error('No input'); + process.exit(1); + } + + const processor = new OdtProcessor(); + await processor.loadFromBuffer(Buffer.concat(inputArr)); + if (!processor.getContentXml()) { + throw Error('No odt processed'); + } + + const parser = new UnMarshaller(LIBREOFFICE_CLASSES, 'DocumentContent'); + const document: DocumentContent = parser.unmarshal(processor.getContentXml()); + if (!document) { + throw Error('No document unmarshalled'); + } + const parserStyles = new UnMarshaller(LIBREOFFICE_CLASSES, 'DocumentStyles'); + const styles: DocumentStyles = parserStyles.unmarshal(processor.getStylesXml()); + if (!styles) { + throw Error('No styles unmarshalled'); + } + const converter = new OdtToMarkdown(document, styles, processor.getFileNameMap(), processor.getXmlMap()); + const markdown = await converter.convert(); + console.log(markdown); +} + +try { + await main(); + process.exit(0); +} catch (err) { + if (err.isUsageError) { + console.error(err.message); + await usage(); + } else { + console.error(err); + } + process.exit(1); +} diff --git a/src/containers/transform/TaskLocalFileTransform.ts b/src/containers/transform/TaskLocalFileTransform.ts index f65a51bf..23214da0 100644 --- a/src/containers/transform/TaskLocalFileTransform.ts +++ b/src/containers/transform/TaskLocalFileTransform.ts @@ -139,8 +139,8 @@ export class TaskLocalFileTransform extends QueueTask { const picturesDirAbsolute = destinationPath + '/' + this.realFileName.replace(/.md$/, '.assets/'); if (SINGLE_THREADED_TRANSFORM) { - const processor = new OdtProcessor(odtPath, true); - await processor.load(); + const processor = new OdtProcessor(true); + await processor.load(odtPath); await processor.unzipAssets(destinationPath, this.realFileName); const content = processor.getContentXml(); const stylesXml = processor.getStylesXml(); diff --git a/src/odt/OdtProcessor.ts b/src/odt/OdtProcessor.ts index 340a7bb8..61e04123 100644 --- a/src/odt/OdtProcessor.ts +++ b/src/odt/OdtProcessor.ts @@ -18,17 +18,33 @@ export class OdtProcessor { private fileNameMap: { [name: string]: string }; private xmlMap: { [name: string]: string }; - constructor(private odtPath: string, private contentAddressable = false) { + constructor(private contentAddressable = false) { this.fileNameMap = {}; this.xmlMap = {}; } - async load() { - if (!fs.existsSync(this.odtPath)) { + async load(odtPath: string) { + if (!fs.existsSync(odtPath)) { return; } const jsZip = new JSZip(); - const input: Buffer = fs.readFileSync(this.odtPath); + const input: Buffer = fs.readFileSync(odtPath); + const zip = await jsZip.loadAsync(input); + + this.files = zip.folder('').files; + + if (this.files['content.xml']) { + this.contentXml = await this.files['content.xml'].async('string'); + } + if (this.files['styles.xml']) { + this.stylesXml = await this.files['styles.xml'].async('string'); + } + + await this.processMathMl(); + } + + async loadFromBuffer(input: Buffer): Promise { + const jsZip = new JSZip(); const zip = await jsZip.loadAsync(input); this.files = zip.folder('').files; diff --git a/src/odt/executeOdtToMarkdown.ts b/src/odt/executeOdtToMarkdown.ts index 030b0c24..2d6c71c4 100644 --- a/src/odt/executeOdtToMarkdown.ts +++ b/src/odt/executeOdtToMarkdown.ts @@ -8,8 +8,8 @@ import {generateDocumentFrontMatter} from '../containers/transform/frontmatters/ import {OdtProcessor} from './OdtProcessor.ts'; export async function executeOdtToMarkdown(workerData) { - const processor = new OdtProcessor(workerData.odtPath, true); - await processor.load(); + const processor = new OdtProcessor(true); + await processor.load(workerData.odtPath); await processor.unzipAssets(workerData.destinationPath, workerData.realFileName); const content = processor.getContentXml(); const stylesXml = processor.getStylesXml(); diff --git a/src/odt2md.sh b/src/odt2md.sh new file mode 100755 index 00000000..fa36eaea --- /dev/null +++ b/src/odt2md.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +FULL_PATH="$(readlink -f ${BASH_SOURCE[0]})" +MAIN_DIR=$(dirname "$FULL_PATH")/.. +NODE_MODULES=$MAIN_DIR/node_modules + +POSITIONAL_ARGS=() +INSPECT="" + +ORIG_ARGS=$@ + +while [[ $# -gt 0 ]]; do + case $1 in + --inspect) + INSPECT="$1" + shift # past argument + ;; + *) + if [[ -z "$CMD" ]]; then + CMD=$1 + fi + POSITIONAL_ARGS+=("$1") # save positional arg + shift # past argument + ;; + esac +done + +if test "$INSPECT" = "--inspect"; then + /usr/bin/env node --inspect --no-warnings --enable-source-maps --experimental-specifier-resolution=node --loader ts-node/esm $MAIN_DIR/src/cli/odt2md.ts $ORIG_ARGS +else + /usr/bin/env node --no-warnings --enable-source-maps --experimental-specifier-resolution=node --loader ts-node/esm $MAIN_DIR/src/cli/odt2md.ts $ORIG_ARGS +fi diff --git a/test/odt/OdtLoad.test.ts b/test/odt/OdtLoad.test.ts index 359b2076..b1c55386 100644 --- a/test/odt/OdtLoad.test.ts +++ b/test/odt/OdtLoad.test.ts @@ -16,8 +16,8 @@ describe('OdtLoad', () => { it('test content.xml transform to object', async () => { const fileSystem = new FileContentService(__dirname); const odtPath = fileSystem.getRealPath() + '/' + 'example_document.odt'; - const processor = new OdtProcessor(odtPath); - await processor.load(); + const processor = new OdtProcessor(); + await processor.load(odtPath); const content = processor.getContentXml(); diff --git a/test/odt_md/Issues.test.ts b/test/odt_md/Issues.test.ts index 696eae47..3ee032fb 100644 --- a/test/odt_md/Issues.test.ts +++ b/test/odt_md/Issues.test.ts @@ -64,8 +64,8 @@ describe('MarkDownTransformTest', () => { async function transformOdt(id: string) { const folder = new FileContentService(__dirname); const odtPath = folder.getRealPath() + '/' + id + '.odt'; - const processor = new OdtProcessor(odtPath); - await processor.load(); + const processor = new OdtProcessor(); + await processor.load(odtPath); if (!processor.getContentXml()) { throw Error('No odt processed'); } diff --git a/test/odt_md/MarkDownTransform.test.ts b/test/odt_md/MarkDownTransform.test.ts index 68820712..0b407d35 100644 --- a/test/odt_md/MarkDownTransform.test.ts +++ b/test/odt_md/MarkDownTransform.test.ts @@ -1,7 +1,7 @@ import {assert} from 'chai'; import fs from 'fs'; -import {compareTexts, createTmpDir} from '../utils.ts'; +import {compareTexts} from '../utils.ts'; import {OdtToMarkdown} from '../../src/odt/OdtToMarkdown.ts'; import {DocumentContent, DocumentStyles, LIBREOFFICE_CLASSES} from '../../src/odt/LibreOffice.ts'; import {UnMarshaller} from '../../src/odt/UnMarshaller.ts'; @@ -135,8 +135,8 @@ describe('MarkDownTransformTest', () => { async function transformOdt(id: string) { const folder = new FileContentService(__dirname); const odtPath = folder.getRealPath() + '/' + id + '.odt'; - const processor = new OdtProcessor(odtPath); - await processor.load(); + const processor = new OdtProcessor(); + await processor.load(odtPath); if (!processor.getContentXml()) { throw Error('No odt processed'); } diff --git a/test/odt_md/RewriteRules.test.ts b/test/odt_md/RewriteRules.test.ts index 1ae71d24..6aa80374 100644 --- a/test/odt_md/RewriteRules.test.ts +++ b/test/odt_md/RewriteRules.test.ts @@ -42,8 +42,8 @@ describe('RewriteRulesTest', () => { async function transformOdt(id: string) { const folder = new FileContentService(__dirname); const odtPath = folder.getRealPath() + '/' + id + '.odt'; - const processor = new OdtProcessor(odtPath); - await processor.load(); + const processor = new OdtProcessor(); + await processor.load(odtPath); if (!processor.getContentXml()) { throw Error('No odt processed'); }