Skip to content

Commit

Permalink
Support one file per chapter for USFM/USX
Browse files Browse the repository at this point in the history
  • Loading branch information
schierlm committed Nov 1, 2024
1 parent 5bfd084 commit f7a9ea0
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,16 @@
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import biblemulticonverter.data.Bible;
import biblemulticonverter.data.Book;
Expand Down Expand Up @@ -498,17 +501,35 @@ protected List<ParatextBook> doImportAllBooks(File inputFile) throws Exception {
System.setProperty(name, props.getProperty(name));
}
}
Map<ParatextID, List<ParatextBook>> seenBooks = new EnumMap<>(ParatextID.class);
for (File file : inputFile.listFiles()) {
if (file.getName().equals("biblemulticonverter.properties"))
continue;
try {
ParatextBook book = doImportBook(file);
if (book != null)
if (book != null) {
seenBooks.computeIfAbsent(book.getId(), x -> new ArrayList<>()).add(book);
result.add(book);
}
} catch (Exception ex) {
throw new RuntimeException("Failed parsing " + file.getName(), ex);
}
}
for (List<ParatextBook> booksPerID : seenBooks.values()) {
if (booksPerID.size() > 1) {
Map<ParatextBook, Integer> firstChap = new HashMap<>();
for (ParatextBook book : booksPerID) {
firstChap.put(book, book.getContent().stream().filter(x -> x instanceof ChapterStart).mapToInt(x -> ((ChapterStart) x).getChapter()).min().orElse(0));
}
booksPerID.sort(Comparator.comparing(bk -> firstChap.get(bk)));
ParatextBook firstBook = booksPerID.remove(0);
for (ParatextBook nextBook : booksPerID) {
result.remove(nextBook);
firstBook.getContent().addAll(nextBook.getContent());
firstBook.getAttributes().putAll(nextBook.getAttributes());
}
}
}
Map<ParatextID, Integer> bookOrder = new EnumMap<>(ParatextID.class);
String bookOrderProperty = System.getProperty("biblemulticonverter.paratext.bookorder");
if (bookOrderProperty != null && !bookOrderProperty.isEmpty()) {
Expand All @@ -527,6 +548,7 @@ protected List<ParatextBook> doImportAllBooks(File inputFile) throws Exception {
return result;
}


protected abstract ParatextBook doImportBook(File inputFile) throws Exception;

@Override
Expand Down Expand Up @@ -615,8 +637,54 @@ public void doExportBooks(List<ParatextBook> books, String... exportArgs) throws
String namePattern = exportArgs[1];
for (ParatextBook book : books) {
String name = namePattern.replace("#", book.getId().getNumber()).replace("*", book.getId().getIdentifier());
doExportBook(book, new File(baseDir, name));
if (name.contains("?")) {
List<ParatextBookContentPart> remainingContent = new ArrayList<>();
for (int i = 1; i < book.getContent().size(); i++) {
if (book.getContent().get(i) instanceof ChapterStart) {
List<ParatextBookContentPart> rest = book.getContent().subList(i, book.getContent().size());
remainingContent.addAll(rest);
rest.clear();
break;
}
}
exportChapterBook(book, baseDir, name);
while (!remainingContent.isEmpty()) {
ParatextBook restBook = new ParatextBook(book.getId(), book.getBibleName());
for (int i = 1; i < remainingContent.size(); i++) {
if (remainingContent.get(i) instanceof ChapterStart) {
List<ParatextBookContentPart> start = remainingContent.subList(0, i);
restBook.getContent().addAll(start);
start.clear();
break;
}
}
if (restBook.getContent().isEmpty()) {
restBook.getContent().addAll(remainingContent);
remainingContent.clear();
}
exportChapterBook(restBook, baseDir, name);
}
} else {
doExportBook(book, new File(baseDir, name));
}
}
}

private void exportChapterBook(ParatextBook book, File baseDir, String name) throws Exception {
int pos = name.indexOf("?"), endPos = pos + 1;
String prefix = "0";
while (endPos < name.length() && name.charAt(endPos) == '?') {
prefix += "0";
endPos++;
}
int chapnum = 0;
if (!book.getContent().isEmpty() && book.getContent().get(0) instanceof ChapterStart) {
chapnum = ((ChapterStart) book.getContent().get(0)).getChapter();
}
String chapter = "" + chapnum;
if (chapter.length() < prefix.length())
chapter = prefix.substring(chapter.length()) + chapter;
doExportBook(book, new File(baseDir, name.substring(0, pos) + chapter + name.substring(endPos)));
}

protected abstract void doExportBook(ParatextBook book, File outFile) throws Exception;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ public class USFM extends AbstractParatextFormat {
"Point the importer to a directory that contains the .usfm files.",
"",
"When exporting, you need to give a file name pattern. You can use # for ",
"the book number and * for the book name."
"the book number and * for the book name. Use ? to split by chapters, for ",
"the chapter number; ?? or ??? to force leading zeroes."
};

public static final Set<String> KNOWN_CHARACTER_TAGS = new HashSet<>(Arrays.asList("f", "fe", "x", "ef"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ public class USX extends AbstractUSXFormat<ParaStyle, CharStyle> {
"Point the importer to a directory that contains the .usx version 2 files.",
"",
"When exporting, you need to give a file name pattern. You can use # for ",
"the book number and * for the book name."
"the book number and * for the book name. Use ? to split by chapters, for ",
"the chapter number; ?? or ??? to force leading zeroes."
};

private static final Set<AutoClosingFormattingKind> USX_2_AUTO_CLOSING_FORMATTING_KINDS = AutoClosingFormattingKind.allForVersion(Version.V2_2);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ public class USX3 extends AbstractUSXFormat<ParaStyle, CharStyle> {
"Point the importer to a directory that contains the .usx version 3 files.",
"",
"When exporting, you need to give a file name pattern. You can use # for ",
"the book number and * for the book name."
"the book number and * for the book name. Use ? to split by chapters, for ",
"the chapter number; ?? or ??? to force leading zeroes."
};

private Map<NoteStyle, ParatextCharacterContent.FootnoteXrefKind> NOTE_STYLE_MAP = new EnumMap<>(NoteStyle.class);
Expand Down

0 comments on commit f7a9ea0

Please sign in to comment.