From 3300d1591c513ed95191d6f8cc16efc21ef737f3 Mon Sep 17 00:00:00 2001
From: Fabian Meumertzheim <fabian@meumertzhe.im>
Date: Thu, 19 Sep 2024 21:57:35 +0200
Subject: [PATCH] [7.4.0] Allow unquoted canonical repository names with
 `query` (#23675)

Unquoted `query` words that start with `@@` are no longer broken on `+`,
which allows the usage of unquoted canonical labels in expressions. In
the very rare case of a shorthand form of a canonical label referring to
a WORKSPACE repo (which contains no `+`), the `+` can be separated by a
space to get the original behavior (e.g. `@@foo+bar` no longer means
`@@foo//:foo + //bar:bar`, but `@@foo +bar` does.

Closes #23600.

PiperOrigin-RevId: 673763980
Change-Id: Ie5509647b4e04e2f72b908d609781c79cf7b06d4

Fixes #23601
---
 site/en/query/language.md                     |  4 ++-
 .../build/lib/query2/engine/Lexer.java        | 25 ++++++++++++++-----
 .../build/lib/query2/engine/LexerTest.java    | 24 ++++++++++++++++++
 3 files changed, 46 insertions(+), 7 deletions(-)
diff --git a/site/en/query/language.md b/site/en/query/language.md
index 55c3fb2b4bdc55..d015d36da6b557 100644
--- a/site/en/query/language.md
+++ b/site/en/query/language.md
@@ -86,7 +86,9 @@ tokens:
   hyphen, underscore, colon, dollar sign, tilde, left square brace, right square
   brace). However, unquoted words may not start with a hyphen `-` or asterisk `*`
   even though relative [target names](/concepts/labels#target-names) may start
-  with those characters.
+  with those characters. As a special rule meant to simplify the handling of
+  labels referring to external repositories, unquoted words that start with
+  `@@` may contain `+` characters.
 
   Unquoted words also may not include the characters plus sign `+` or equals
   sign `=`, even though those characters are permitted in target names. When
diff --git a/src/main/java/com/google/devtools/build/lib/query2/engine/Lexer.java b/src/main/java/com/google/devtools/build/lib/query2/engine/Lexer.java
index 279258273259e4..3fa3e24de3580d 100644
--- a/src/main/java/com/google/devtools/build/lib/query2/engine/Lexer.java
+++ b/src/main/java/com/google/devtools/build/lib/query2/engine/Lexer.java
@@ -168,8 +168,10 @@ private TokenKind getTokenKindForWord(String word) {
     return kind == null ? TokenKind.WORD : kind;
   }
 
-  private String scanWord() {
+  private String scanWord(char firstChar) {
     int oldPos = pos - 1;
+    boolean startsWithDoubleAt =
+        firstChar == '@' && pos < input.length() && input.charAt(pos) == '@';
     while (pos < input.length()) {
       switch (input.charAt(pos)) {
         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
@@ -192,6 +194,17 @@ private String scanWord() {
         case ']':
           pos++;
           break;
+        case '+':
+          if (startsWithDoubleAt) {
+            // Allow unquoted canonical labels such as
+            // @@rules_jvm_external++maven+maven//:bar, but still parse @foo+@bar as two separate
+            // labels (here @foo refers to the @foo//:foo target).
+            // If @@foo+bar is intended to mean @@foo + bar, it can be written as such with spaces.
+            pos++;
+          } else {
+            return bufferSlice(oldPos, pos);
+          }
+          break;
        default:
           return bufferSlice(oldPos, pos);
       }
@@ -202,13 +215,13 @@ private String scanWord() {
   /**
    * Scans a word or keyword.
    *
-   * ON ENTRY: 'pos' is 1 + the index of the first char in the word.
-   * ON EXIT: 'pos' is 1 + the index of the last char in the word.
+   * <p>ON ENTRY: 'pos' is 1 + the index of the first char in the word. ON EXIT: 'pos' is 1 + the
+   * index of the last char in the word.
    *
    * @return the word or keyword token.
    */
-  private Token wordOrKeyword() {
-    String word = scanWord();
+  private Token wordOrKeyword(char firstChar) {
+    String word = scanWord(firstChar);
     TokenKind kind = getTokenKindForWord(word);
     return kind == TokenKind.WORD ? new Token(word) : new Token(kind);
   }
@@ -260,7 +273,7 @@ private void tokenize() throws QuerySyntaxException {
         break;
       }
       default: {
-        addToken(wordOrKeyword());
+            addToken(wordOrKeyword(c));
         break;
       } // default
       } // switch
diff --git a/src/test/java/com/google/devtools/build/lib/query2/engine/LexerTest.java b/src/test/java/com/google/devtools/build/lib/query2/engine/LexerTest.java
index 604a1ac6f2da26..3988e3659c8073 100644
--- a/src/test/java/com/google/devtools/build/lib/query2/engine/LexerTest.java
+++ b/src/test/java/com/google/devtools/build/lib/query2/engine/LexerTest.java
@@ -141,4 +141,28 @@ public void testOperatorWithUnquotedExprWithSpecialCharacters() throws QuerySynt
     assertThat(tokens[6].kind).isEqualTo(Lexer.TokenKind.WORD);
     assertThat(tokens[7].kind).isEqualTo(Lexer.TokenKind.RPAREN);
   }
+
+  @Test
+  public void testUnquotedCanonicalLabels() throws QuerySyntaxException {
+    Lexer.Token[] tokens =
+        scan("somepath(@foo+@bar+//baz+@@foo +bar,  @@rules_jvm_external++maven+maven//:bar)");
+    assertThat(asString(tokens))
+        .isEqualTo(
+            "somepath ( @foo + @bar + //baz + @@foo + bar , @@rules_jvm_external++maven+maven//:bar"
+                + " ) EOF");
+    assertThat(tokens[0].kind).isEqualTo(Lexer.TokenKind.WORD);
+    assertThat(tokens[1].kind).isEqualTo(Lexer.TokenKind.LPAREN);
+    assertThat(tokens[2].kind).isEqualTo(Lexer.TokenKind.WORD);
+    assertThat(tokens[3].kind).isEqualTo(Lexer.TokenKind.PLUS);
+    assertThat(tokens[4].kind).isEqualTo(Lexer.TokenKind.WORD);
+    assertThat(tokens[5].kind).isEqualTo(Lexer.TokenKind.PLUS);
+    assertThat(tokens[6].kind).isEqualTo(Lexer.TokenKind.WORD);
+    assertThat(tokens[7].kind).isEqualTo(Lexer.TokenKind.PLUS);
+    assertThat(tokens[8].kind).isEqualTo(Lexer.TokenKind.WORD);
+    assertThat(tokens[9].kind).isEqualTo(Lexer.TokenKind.PLUS);
+    assertThat(tokens[10].kind).isEqualTo(Lexer.TokenKind.WORD);
+    assertThat(tokens[11].kind).isEqualTo(Lexer.TokenKind.COMMA);
+    assertThat(tokens[12].kind).isEqualTo(Lexer.TokenKind.WORD);
+    assertThat(tokens[13].kind).isEqualTo(Lexer.TokenKind.RPAREN);
+  }
 }