Skip to content

Commit

Permalink
Handle case-insensitive document declaration in the XML parser
Browse files Browse the repository at this point in the history
Current we are strict on the document declaration for files which are parsed by the XML parser, the document declaration must be in upper case. Going forward support document declarations regardless of their case.

We want to do this to allow the XML to parse HTML files, which aren't as strict in their formatting
  • Loading branch information
lkerford committed Oct 10, 2024
1 parent fb0c122 commit c2039e0
Show file tree
Hide file tree
Showing 11 changed files with 245 additions and 209 deletions.
2 changes: 1 addition & 1 deletion rewrite-xml/src/main/antlr/XMLLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ DTD_CLOSE : '>' -> popMode ;
DTD_SUBSET_OPEN : '[' -> pushMode(INSIDE_DTD_SUBSET) ;
DTD_S : S -> skip ;

DOCTYPE : 'DOCTYPE' ;
DOCTYPE options { caseInsensitive = true; } : 'DOCTYPE' ;

DTD_NAME : Name -> type(Name) ;
DTD_STRING : STRING -> type(STRING) ;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@ public Xml.DocTypeDecl visitDoctypedecl(XMLParser.DoctypedeclContext ctx) {
prefix,
Markers.EMPTY,
name,
ctx.DOCTYPE().getText(),
externalId,
internalSubset == null ? emptyList() : internalSubset,
externalSubsets,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ public Xml visitCharData(Xml.CharData charData, PrintOutputCapture<P> p) {
@Override
public Xml visitDocTypeDecl(Xml.DocTypeDecl docTypeDecl, PrintOutputCapture<P> p) {
beforeSyntax(docTypeDecl, p);
p.append("<!DOCTYPE");
p.append("<!" + docTypeDecl.getDocumentDeclaration());
visit(docTypeDecl.getName(), p);
visit(docTypeDecl.getExternalId(), p);
visit(docTypeDecl.getInternalSubset(), p);
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ STRING=35
Name=36
'?'=8
'<'=10
'DOCTYPE'=17
'/>'=29
'%@'=31
'%'=32
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ null
null
null
null
'DOCTYPE'
null
null
null
null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,8 @@ private static String[] makeRuleNames() {
private static String[] makeLiteralNames() {
return new String[] {
null, null, null, null, null, null, null, null, "'?'", null, "'<'", null,
null, null, null, null, null, "'DOCTYPE'", null, null, null, null, null,
null, null, null, null, null, null, "'/>'", null, "'%@'", "'%'", "'/'",
"'='"
null, null, null, null, null, null, null, null, null, null, null, null,
null, null, null, null, null, "'/>'", null, "'%@'", "'%'", "'/'", "'='"
};
}
private static final String[] _LITERAL_NAMES = makeLiteralNames();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ STRING=35
Name=36
'?'=8
'<'=10
'DOCTYPE'=17
'/>'=29
'%@'=31
'%'=32
Expand Down
20 changes: 19 additions & 1 deletion rewrite-xml/src/main/java/org/openrewrite/xml/tree/Xml.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
package org.openrewrite.xml.tree;

import com.fasterxml.jackson.annotation.JsonCreator;
import lombok.*;
import lombok.experimental.FieldDefaults;
import org.apache.commons.text.StringEscapeUtils;
Expand All @@ -32,7 +33,10 @@
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.*;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.UUID;
import java.util.stream.Collectors;

import static java.util.Collections.emptyList;
Expand Down Expand Up @@ -651,6 +655,7 @@ public String toString() {
@Value
@EqualsAndHashCode(callSuper = false, onlyExplicitlyIncluded = true)
@With
@AllArgsConstructor(onConstructor_ = {@JsonCreator})
class DocTypeDecl implements Xml, Misc {
@EqualsAndHashCode.Include
UUID id;
Expand All @@ -669,6 +674,7 @@ public String getPrefix() {

Markers markers;
Ident name;
String documentDeclaration;

@Nullable
Ident externalId;
Expand All @@ -683,6 +689,18 @@ public String getPrefix() {
*/
String beforeTagDelimiterPrefix;

public DocTypeDecl(UUID id, String prefix, Markers markers, Ident name, Ident externalId, List<Ident> internalSubset, ExternalSubsets externalSubsets, String beforeTagDelimiterPrefix) {
this(id,
prefix,
markers,
name,
"DOCTYPE",
externalId,
internalSubset,
externalSubsets,
beforeTagDelimiterPrefix);
}

@Value
@EqualsAndHashCode(callSuper = false, onlyExplicitlyIncluded = true)
@With
Expand Down
17 changes: 17 additions & 0 deletions rewrite-xml/src/test/java/org/openrewrite/xml/XmlParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,23 @@ void jsp() {
);
}

@Test
void lowerCaseDocType() {
rewriteRun(
xml(
//language=html
"""
<!doctype html>
<html lang="en">
<body>
<h2><s:property value="messageStore.message" /></h2>
</body>
</html>
"""
)
);
}

@Issue("https://github.com/openrewrite/rewrite/issues/2189")
@Test
void specialCharacters() {
Expand Down

0 comments on commit c2039e0

Please sign in to comment.