Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix all XPathMatcherTest TODO's + handle nested elements with the same name #4532

Merged
merged 3 commits into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 47 additions & 30 deletions rewrite-xml/src/main/java/org/openrewrite/xml/XPathMatcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,14 @@ public class XPathMatcher {
private final boolean startsWithSlash;
private final boolean startsWithDoubleSlash;
private final String[] parts;
private final long tagMatchingParts;

public XPathMatcher(String expression) {
this.expression = expression;
startsWithSlash = expression.startsWith("/");
startsWithDoubleSlash = expression.startsWith("//");
parts = splitOnXPathSeparator(expression.substring(startsWithDoubleSlash ? 2 : startsWithSlash ? 1 : 0));
tagMatchingParts = Arrays.stream(parts).filter(part -> !part.isEmpty() && !part.startsWith("@")).count();
}

private String[] splitOnXPathSeparator(String input) {
Expand Down Expand Up @@ -92,18 +94,8 @@ public boolean matches(Cursor cursor) {
if (index < 0) {
return false;
}
if (part.startsWith("@")) { // is attribute selector
partWithCondition = part;
tagForCondition = i > 0 ? path.get(i - 1) : path.get(i);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using i straight up in path.get() was completely wrong since path is reverted while parts is not – pathIndex should always be used.

Note that if a part starts with @, it MUST be the last part – I wanted to check it here, but then I realized all ifs actually result in the same code (and conditions matching sub-elements were not handled…).

} else { // is element selector
if (part.charAt(index + 1) == '@') { // is Attribute condition
partWithCondition = part;
tagForCondition = path.get(i);
} else if (part.contains("(") && part.contains(")")) { // is function condition
partWithCondition = part;
tagForCondition = path.get(i);
}
}
partWithCondition = part;
tagForCondition = path.get(pathIndex);
} else if (i < path.size() && i > 0 && parts[i - 1].endsWith("]")) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don’t understand what’s the purpose of this if. All tests still pass if I remove it but I’m not sure whether I’m missing an edge case or it’s really unneeded.

String partBefore = parts[i - 1];
int index = partBefore.indexOf("[");
Expand All @@ -117,6 +109,7 @@ public boolean matches(Cursor cursor) {
}
} else if (part.endsWith(")")) { // is xpath method
// TODO: implement other xpath methods
throw new UnsupportedOperationException("XPath methods are not supported");
}

String partName;
Expand Down Expand Up @@ -164,36 +157,42 @@ public boolean matches(Cursor cursor) {
}
}

return startsWithSlash || path.size() - pathIndex <= 1;
// we have matched the whole XPath, and it does not start with the root
return true;
} else {
Collections.reverse(path);

// Deal with the two forward slashes in the expression; works, but I'm not proud of it.
if (expression.contains("//") && !expression.contains("://") && Arrays.stream(parts).anyMatch(StringUtils::isBlank)) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing :// does not seem to break anything, but fixes XPaths with both // and a namespace URL condition.

I don’t think the first condition is still relevant either, however the isBlank check is inconsistent with the indexOf("") on the next line. I think it should use isEmpty() like I did to compute tagMatchingParts.

Note that the first condition is insufficient on its own because of URLs again – or actually any value that could contain double slashes in a condition.

if (expression.contains("//") && Arrays.stream(parts).anyMatch(StringUtils::isBlank)) {
int blankPartIndex = Arrays.asList(parts).indexOf("");
int doubleSlashIndex = expression.indexOf("//");

if (path.size() > blankPartIndex && path.size() >= parts.length - 1) {
String newExpression;
if (Objects.equals(path.get(blankPartIndex).getName(), parts[blankPartIndex + 1])) {
newExpression = String.format(
"%s/%s",
expression.substring(0, doubleSlashIndex),
expression.substring(doubleSlashIndex + 2)
);
} else {
newExpression = String.format(
"%s/%s/%s",
expression.substring(0, doubleSlashIndex),
path.get(blankPartIndex).getName(),
expression.substring(doubleSlashIndex + 2)
);
if (path.size() > blankPartIndex && path.size() >= tagMatchingParts) {
Xml.Tag blankPartTag = path.get(blankPartIndex);
String part = parts[blankPartIndex + 1];
Matcher matcher = ELEMENT_WITH_CONDITION_PATTERN.matcher(part);
if (matcher.matches() ?
matchesElementWithConditionFunction(matcher, blankPartTag, cursor) != null :
Objects.equals(blankPartTag.getName(), part)) {
if (matchesWithoutDoubleSlashesAt(cursor, doubleSlashIndex)) {
return true;
}
// fall-through: maybe we can skip this element and match further down
}
String newExpression = String.format(
// the // here allows to skip several levels of nested elements
"%s/%s//%s",
expression.substring(0, doubleSlashIndex),
blankPartTag.getName(),
expression.substring(doubleSlashIndex + 2)
);
return new XPathMatcher(newExpression).matches(cursor);
} else if (path.size() == tagMatchingParts) {
return matchesWithoutDoubleSlashesAt(cursor, doubleSlashIndex);
}
}

if (parts.length > path.size() + 1) {
if (tagMatchingParts > path.size()) {
return false;
}

Expand Down Expand Up @@ -235,6 +234,24 @@ public boolean matches(Cursor cursor) {
}
}

private boolean matchesWithoutDoubleSlashesAt(Cursor cursor, int doubleSlashIndex) {
String newExpression = String.format(
"%s/%s",
expression.substring(0, doubleSlashIndex),
expression.substring(doubleSlashIndex + 2)
);
return new XPathMatcher(newExpression).matches(cursor);
}

/**
* Checks that the given {@code tag} matches the XPath part represented by {@code matcher}.
*
* @param matcher an XPath part matcher for {@link #ELEMENT_WITH_CONDITION_PATTERN}
* @param tag a tag to match
* @param cursor the cursor we are trying to match
* @return the element name specified before the condition of the part
* (either {@code tag.getName()}, {@code "*"} or an attribute name) or {@code null} if the tag did not match
*/
private @Nullable String matchesElementWithConditionFunction(Matcher matcher, Xml.Tag tag, Cursor cursor) {
boolean isAttributeElement = matcher.group(1) != null;
String element = matcher.group(2);
Expand Down
87 changes: 73 additions & 14 deletions rewrite-xml/src/test/java/org/openrewrite/xml/XPathMatcherTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,13 @@ void matchAbsolute() {
assertThat(match("/dependencies/dependency", xmlDoc)).isTrue();
assertThat(match("/dependencies/*/artifactId", xmlDoc)).isTrue();
assertThat(match("/dependencies/*", xmlDoc)).isTrue();
assertThat(match("/dependencies//dependency", xmlDoc)).isTrue();
assertThat(match("/dependencies//dependency//groupId", xmlDoc)).isTrue();

// negative matches
assertThat(match("/dependencies/dne", xmlDoc)).isFalse();
assertThat(match("/dependencies//dne", xmlDoc)).isFalse();
assertThat(match("/dependencies//dependency//dne", xmlDoc)).isFalse();
}

@Test
Expand All @@ -127,6 +133,17 @@ void matchAbsoluteAttribute() {
assertThat(match("/dependencies/dependency/artifactId/@scope", xmlDoc)).isTrue();
assertThat(match("/dependencies/dependency/artifactId/@*", xmlDoc)).isTrue();
assertThat(match("/dependencies/dependency/groupId/@*", xmlDoc)).isFalse();
assertThat(match("/dependencies//dependency//@scope", xmlDoc)).isTrue();
assertThat(match("/dependencies//dependency//artifactId//@scope", xmlDoc)).isTrue();
assertThat(match("/dependencies//dependency//@*", xmlDoc)).isTrue();
assertThat(match("/dependencies//dependency//artifactId//@*", xmlDoc)).isTrue();

// negative matches
assertThat(match("/dependencies/dependency/artifactId/@dne", xmlDoc)).isFalse();
assertThat(match("/dependencies/dependency/artifactId/@dne", xmlDoc)).isFalse();
assertThat(match("/dependencies//dependency//@dne", xmlDoc)).isFalse();
assertThat(match("/dependencies//dependency//artifactId//@dne", xmlDoc)).isFalse();

}

@Test
Expand All @@ -136,8 +153,6 @@ void matchRelative() {
assertThat(match("//dependency", xmlDoc)).isTrue();
assertThat(match("dependency/*", xmlDoc)).isTrue();
assertThat(match("dne", xmlDoc)).isFalse();
assertThat(match("/dependencies//dependency", xmlDoc)).isTrue();
assertThat(match("/dependencies//dependency/groupId", xmlDoc)).isTrue();
}

@Test
Expand All @@ -147,6 +162,51 @@ void matchRelativeAttribute() {
assertThat(match("//dependency/artifactId/@scope", xmlDoc)).isTrue();
}

@Test
void matchNestedElementsWithSameName() {
var xml = new XmlParser().parse(
"""
<?xml version="1.0" encoding="UTF-8"?>
<root>
<element foo="bar">
<element foo="bar">
<test>auie</test>
</element>
<element qux="quux" />
</element>
</root>
"""
).toList().get(0);

// no / at start
assertThat(match("element/test", xml)).isTrue();
assertThat(match("element[@foo='bar']/test", xml)).isTrue();
assertThat(match("element[@foo='baz']/test", xml)).isFalse();
assertThat(match("element/@qux", xml)).isTrue();
assertThat(match("dne[@foo='bar']/test", xml)).isFalse();

// // at start
assertThat(match("//element/test", xml)).isTrue();
assertThat(match("//element[@foo='bar']/test", xml)).isTrue();
assertThat(match("//element[@foo='baz']/test", xml)).isFalse();
assertThat(match("//element/@qux", xml)).isTrue();
assertThat(match("//dne[@foo='bar']/test", xml)).isFalse();

// TODO // in the middle without / (or with //) at start (not currently supported)
// assertThat(match("root//element/test", xml)).isTrue();
// assertThat(match("root//element[@foo='bar']/test", xml)).isTrue();
// assertThat(match("root//element[@foo='baz']/test", xml)).isFalse();
// assertThat(match("root//element/@qux", xml)).isTrue();
// assertThat(match("root//dne[@foo='bar']/test", xml)).isFalse();

// // in the middle with / at start
assertThat(match("/root//element/test", xml)).isTrue();
assertThat(match("/root//element[@foo='bar']/test", xml)).isTrue();
assertThat(match("/root//element[@foo='baz']/test", xml)).isFalse();
assertThat(match("/root//element/@qux", xml)).isTrue();
assertThat(match("/root//dne[@foo='bar']/test", xml)).isFalse();
}

@Test
void matchPom() {
assertThat(match("/project/build/plugins/plugin/configuration/source",
Expand All @@ -163,8 +223,10 @@ void matchPom() {
pomXml1)).isTrue();
assertThat(match("/project/build//plugins/plugin/configuration/source",
pomXml2)).isTrue();
// assertThat(match("/project/build//plugin/configuration/source", pomXml2)).isTrue(); // TODO: seems parser only handles // up to 1 level
// assertThat(match("/project//configuration/source", pomXml2)).isTrue(); // TODO: was already failing previously
// skip 2+ levels with //
assertThat(match("/project/build//plugin/configuration/source", pomXml2)).isTrue();
assertThat(match("/project//configuration/source", pomXml2)).isTrue();
assertThat(match("/project//plugin//source", pomXml2)).isTrue();
}

private final SourceFile attributeXml = new XmlParser().parse(
Expand Down Expand Up @@ -203,7 +265,7 @@ void wildcards() {
// relative xpath with wildcard element
assertThat(match("//*[@foo='bar']", attributeXml)).isTrue();
assertThat(match("//*[@foo='baz']", attributeXml)).isFalse();
// assertThat(match("//*[foo='bar']", attributeXml)).isFalse(); // TODO: fix relative xpath with condition
assertThat(match("//*[foo='bar']", attributeXml)).isFalse();
assertThat(match("//*[foo='baz']", attributeXml)).isTrue();
}

Expand All @@ -220,7 +282,7 @@ void relativePathsWithConditions() {
</root>
"""
).toList().get(0);
// assertThat(match("//element1[foo='bar']", xml)).isFalse(); // TODO: fix - was already failing before * changes
assertThat(match("//element1[foo='bar']", xml)).isFalse();
assertThat(match("//element1[foo='baz']", xml)).isTrue();
assertThat(match("//element1[@foo='bar']", xml)).isTrue();
assertThat(match("//element1[foo='baz']/test", xml)).isTrue();
Expand Down Expand Up @@ -275,8 +337,7 @@ void matchLocalNameFunctionCondition() {
assertThat(match("//ns2:element2[local-name()='element2']", namespacedXml)).isTrue();
assertThat(match("//dne[local-name()='dne']", namespacedXml)).isFalse();

// TODO: fix mid-path // with condition
// assertThat(match("/root//element1[local-name()='element1']", namespacedXml)).isTrue();
assertThat(match("/root//element1[local-name()='element1']", namespacedXml)).isTrue();
}

@Test
Expand All @@ -300,9 +361,8 @@ void matchAttributeCondition() {
assertThat(match("//element1[@*='dne']", namespacedXml)).isFalse();
assertThat(match("/root/element1[@*='content3']", namespacedXml)).isTrue();
assertThat(match("/root/element1[@*='dne']", namespacedXml)).isFalse();
// TODO: fix mid-path // match with condition
// assertThat(match("/root//element1[@*='content3']", namespacedXml)).isTrue();
// assertThat(match("/root//element1[@*='dne']", namespacedXml)).isFalse();
assertThat(match("/root//element1[@*='content3']", namespacedXml)).isTrue();
assertThat(match("/root//element1[@*='dne']", namespacedXml)).isFalse();
}

@Test
Expand All @@ -327,9 +387,8 @@ void matchAttributeElement() {
assertThat(match("/root/parent/element3/@ns3:attr[namespace-uri()='http://www.example.com/namespace3']", namespacedXml)).isTrue();
assertThat(match("//element3/@ns3:attr[namespace-uri()='http://www.example.com/namespace3']", namespacedXml)).isTrue();

// TODO: fix mid-path // match with attribute element
// assertThat(match("/root//element1/@*", namespacedXml)).isTrue();
// assertThat(match("/root//element1/@*[namespace-uri()='http://www.example.com/namespace3']", namespacedXml)).isTrue();
assertThat(match("/root//element1/@*", namespacedXml)).isTrue();
assertThat(match("/root//element1/@*[namespace-uri()='http://www.example.com/namespace3']", namespacedXml)).isTrue();
}

@Test
Expand Down