Skip to content

Commit

Permalink
OldDiffable: More accurate export
Browse files Browse the repository at this point in the history
Version 0.0.8 did not support WIVU morphology yet. While there is a
separate StrippedDiffable option to strip it, also strip it in
OldDiffable.

When exporting for 0.0.2 or older, make sure not to export mixed grammar
information by removing RMAC or Source Indices. When grammar tag does
not contain any grammar information any more, replace it by an extra
attribute.
  • Loading branch information
schierlm committed May 20, 2024
1 parent 329e3dd commit eee21c1
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 16 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ both for import and for export:
- **RoundtripXML**: Useful for interchange of modules with converters written
in other programming languages (that prefer XML binding to plaintext parsing)

Note that the **Diffable** format got new features in v0.0.8 as well as in v0.0.9, which
Note that the **Diffable** format got new features in v0.0.3, v0.0.8 and v0.0.9, which
are backwards compatible but *not* forwards compatible. Use the **OldDiffable** format
to export Bibles in v0.0.9 so that v0.0.8 or older versions are guaranteed to be able
to read them.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public Collection<Module<ExportFormat>> getExportFormats() {
List<Module<ExportFormat>> result = new ArrayList<ModuleRegistry.Module<ExportFormat>>();
result.add(new Module<ExportFormat>("Validate", "Validate bible for inconsistencies", Validate.HELP_TEXT, Validate.class));
result.add(new Module<ExportFormat>("StrippedDiffable", "Like Diffable, but with features stripped.", StrippedDiffable.HELP_TEXT, StrippedDiffable.class));
result.add(new Module<ExportFormat>("OldDiffable", "Export bibles to Diffable format of v0.0.8 or v0.0.7 (or older).", OldDiffable.HELP_TEXT, OldDiffable.class));
result.add(new Module<ExportFormat>("OldDiffable", "Export bibles to Diffable format of v0.0.8 or v0.0.7 or v0.0.2.", OldDiffable.HELP_TEXT, OldDiffable.class));
result.add(new Module<ExportFormat>("ZefaniaXMLMyBible", "Zefania XML - well known bible format (with MyBible optimizations).", ZefaniaXMLMyBible.HELP_TEXT, ZefaniaXMLMyBible.class));
result.add(new Module<ExportFormat>("ZefDicMyBible", "Zefania Dictionary exporter for MyBible.", ZefDicMyBible.HELP_TEXT, ZefDicMyBible.class));
result.add(new Module<ExportFormat>("MobiPocket", "MobiPocket ebook format (predecessor of Kindle's format)", MobiPocket.HELP_TEXT, MobiPocket.class));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,51 +6,56 @@
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;

import biblemulticonverter.data.Bible;
import biblemulticonverter.data.Book;
import biblemulticonverter.data.BookID;
import biblemulticonverter.data.Chapter;
import biblemulticonverter.data.Utils;
import biblemulticonverter.data.FormattedText.ExtraAttributePriority;
import biblemulticonverter.data.FormattedText.FormattingInstructionKind;
import biblemulticonverter.data.FormattedText.LineBreakKind;
import biblemulticonverter.data.FormattedText.RawHTMLMode;
import biblemulticonverter.data.FormattedText.Visitor;
import biblemulticonverter.format.StrippedDiffable.Feature;
import biblemulticonverter.data.Verse;

public class OldDiffable implements ExportFormat {

public static final String[] HELP_TEXT = {
"Export bibles to Diffable format of v0.0.8 or v0.0.7 (or older).",
"Export bibles to Diffable format of v0.0.8 or v0.0.7 or v0.0.2.",
"",
"Usage: OldDiffable [-older] <OutputFile>",
"Usage: OldDiffable [-older|-oldest] <OutputFile>",
"",
"When -older switch is not given, export to v0.0.8 format"
"When -oldest switch is given, export to v0.0.2 format;",
"When -older switch is given, export to v0.0.7 format;",
"When neither switch is given, export to v0.0.8 format."
};

private static final String MAGIC = "BibleMultiConverter-1.0 Title: ";

@Override
public void doExport(Bible bible, String... exportArgs) throws Exception {
boolean olderFormat = exportArgs[0].equals("-older");
File exportFile = new File(exportArgs[olderFormat ? 1 : 0]);
int format = exportArgs[0].equals("-oldest") ? 2 : exportArgs[0].equals("-older") ? 7 : 8;
File exportFile = new File(exportArgs[format < 8 ? 1 : 0]);
try (Writer w = new OutputStreamWriter(new FileOutputStream(exportFile), StandardCharsets.UTF_8)) {
doExport(bible, w, olderFormat);
doExport(bible, w, format);
}
}

private void doExport(Bible bible, Writer w, boolean olderFormat) throws IOException {
private void doExport(Bible bible, Writer w, int format) throws IOException {
w.write(MAGIC + bible.getName() + "\n");
for (Book book : bible.getBooks()) {
w.write(book.getAbbr() + " = " + book.getId().getOsisID() + "\t" + book.getShortName() + "\t" + book.getLongName() + "\n");
int chapterNumber = 0;
for (Chapter ch : book.getChapters()) {
chapterNumber++;
if (ch.getProlog() != null) {
ch.getProlog().accept(new OldDiffableVisitor(w, book.getAbbr() + " " + chapterNumber + " ", olderFormat));
ch.getProlog().accept(new OldDiffableVisitor(w, book.getAbbr() + " " + chapterNumber + " ", format));
}
for (Verse v : ch.getVerses()) {
v.accept(new OldDiffableVisitor(w, book.getAbbr() + " " + chapterNumber + ":" + v.getNumber() + " ", olderFormat));
v.accept(new OldDiffableVisitor(w, book.getAbbr() + " " + chapterNumber + ":" + v.getNumber() + " ", format));
}
}
}
Expand All @@ -61,13 +66,13 @@ private static class OldDiffableVisitor implements Visitor<IOException> {
private final String linePrefix;
private final OldDiffableVisitor childVisitor;
private boolean startNewLine = false, inMainContent = false;
private boolean olderFormat;
private int format;

private OldDiffableVisitor(Writer w, String linePrefix, boolean olderFormat) throws IOException {
private OldDiffableVisitor(Writer w, String linePrefix, int format) throws IOException {
this.w = w;
this.linePrefix = linePrefix;
this.olderFormat = olderFormat;
childVisitor = linePrefix == null ? this : new OldDiffableVisitor(w, null, olderFormat);
this.format = format;
childVisitor = linePrefix == null ? this : new OldDiffableVisitor(w, null, format);
if (linePrefix != null)
w.write(linePrefix);
}
Expand Down Expand Up @@ -144,6 +149,37 @@ public void visitLineBreak(LineBreakKind kind) throws IOException {

@Override
public Visitor<IOException> visitGrammarInformation(char[] strongsPrefixes, int[] strongs, String[] rmac, int[] sourceIndices) throws IOException {
if (rmac != null) {
boolean changed = false;
for (int i = 0; i < rmac.length; i++) {
if (rmac[i].matches(Utils.WIVU_REGEX)) {
rmac[i] = null;
changed = true;
}
}
if (changed) {
System.out.println("WARNING: Dropping WIVU morphology");
rmac = Arrays.asList(rmac).stream().filter(r -> r != null).toArray(String[]::new);
if (rmac.length == 0) {
rmac = null;
}
}
}
if (format == 2) {
int neededLength = strongs == null ? -1 : strongs.length;
if (rmac != null && rmac.length != neededLength) {
System.out.println("WARNING: Dropping RMAC for oldest format as it does not match Strongs length");
rmac = null;
}
if (sourceIndices != null && sourceIndices.length != neededLength) {
System.out.println("WARNING: Dropping Source Indices for oldest format as they does not match Strongs length");
sourceIndices = null;
}
}
if (strongs == null && rmac == null && sourceIndices == null) {
System.out.println("WARNING: Dropping grammar tag without attributes");
return visitExtraAttribute(ExtraAttributePriority.KEEP_CONTENT, "olddiffable", "grammar", "empty");
}
checkLine();
w.write("<grammar strong=\"");
if (strongs != null) {
Expand All @@ -154,7 +190,7 @@ public Visitor<IOException> visitGrammarInformation(char[] strongsPrefixes, int[
}
}
if (strongsPrefixes != null) {
if (olderFormat) {
if (format < 8) {
System.out.println("WARNING: Dropping strongs prefixes for older format");
} else {
w.write("\" strongpfx=\"");
Expand Down

0 comments on commit eee21c1

Please sign in to comment.