Skip to content

Commit

Permalink
fix: Replace dollar character by appropriate HTML entity, otherwise i…
Browse files Browse the repository at this point in the history
…t's treated as special RegExp character

Refs: #42
Signed-off-by: Dzmitry Yurtsevich <dzmitry.yurtsevich@sbb.ch>
  • Loading branch information
yurtsevich-sbb committed Jun 27, 2024
1 parent 2dd641d commit 0f80249
Showing 1 changed file with 5 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,9 @@ public HtmlProcessor(FileResourceProvider fileResourceProvider, LocalizationSett
}

public String processHtmlForPDF(@NotNull String html, @NotNull ExportParams exportParams, @NotNull List<String> selectedRoleEnumValues) {
// Replace all dollar-characters in HTML document before applying any regular expressions, as it has special meaning there
html = html.replaceAll("\\$", "&dollar;");

html = removePd4mlTags(html);
html = html.replace("/ria/images/enums/", "/icons/default/enums/");
html = html.replace("<p><br></p>", "<br/>");
Expand Down Expand Up @@ -559,13 +562,13 @@ private String removePd4mlTags(@NotNull String html) {
@SuppressWarnings({"java:S5869", "java:S6019"})
String properTableHeads(@NotNull String html) {
// Searches for all subsequent table rows (<tr>-tags) inside <tbody> which contain <th>-tags
// followed by a row which doesn't contain <th>.
// followed by a row which doesn't contain <th> (or closing </tbody> tag).
// There are 2 groups in this regexp, first one is unnamed, containing <tbody> and <tr>-tags containing <th>-tags,
// second one is named ("header") and contains those <tr>-tags which include <th>-tags. The regexp is ending
// by positive lookahead "(?=<tr)" which doesn't take part in replacement.
// The sense in this regexp is to find <tr>-tags containing <th>-tags and move it from <tbody> into <thead>,
// for table headers to repeat on each page.
Pattern pattern = Pattern.compile("(<tbody>[^<]*(?<header><tr>[^<]*<th[\\s|\\S]*?))(?=<tr)");
Pattern pattern = Pattern.compile("(<tbody>[^<]*(?<header><tr>[^<]*<th[\\s|\\S]*?))(?=(<tr|</tbody))");
Matcher matcher = pattern.matcher(html);

StringBuilder buf = new StringBuilder();
Expand Down

0 comments on commit 0f80249

Please sign in to comment.