Skip to content

Commit

Permalink
Merge pull request #283 from traceon/backticks-ident-space-fix
Browse files Browse the repository at this point in the history
Fix for IDENT token parsing when substituting escape sequences
  • Loading branch information
Enmk authored Apr 14, 2020
2 parents f0fa5dc + f08d34f commit 4ed271e
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 21 deletions.
60 changes: 39 additions & 21 deletions driver/escaping/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,41 +164,59 @@ Token Lexer::NextToken() {
has_slash = false;
}

return Token {Token::INVALID, StringView(st, cur_ - st)};
return Token {Token::INVALID, StringView(st, cur_)};
}

default: {
const char * st = cur_;

if (*cur_ == '`') {
bool inside_quotes = true;
for (++cur_; cur_ < end_; ++cur_) {
if (isalpha(*cur_) || *cur_ == '_' || *cur_ == '`') {
bool has_dot = false;
bool has_backtick = false;

while (cur_ < end_) {
if (*cur_ == '`') {
inside_quotes = !inside_quotes;
if (cur_ < end_ && *(cur_ + 1) == '.') {
++cur_;
continue;
} else if (!inside_quotes)
return Token {Token::IDENT, StringView(st, ++cur_)};
if (cur_ < end_)
++cur_;
has_backtick = true;
bool found_closing_backtick = false;

for (++cur_; cur_ < end_; ++cur_) {
if (*cur_ == '`') {
found_closing_backtick = true;
++cur_;
break;
}
}

if (!found_closing_backtick) {
return Token {Token::INVALID, StringView(st, cur_)};
}
}
else if (isalpha(*cur_) || *cur_ == '_') {
for (++cur_; cur_ < end_; ++cur_) {
if (!isalpha(*cur_) && !isdigit(*cur_) && *cur_ != '_') {
break;
}
}
}
if (!isalpha(*cur_) && !isdigit(*cur_) && *cur_ != '_' && *cur_ != '.') {
else {
return Token {Token::INVALID, StringView(st, cur_)};
}
}

break;
}

if (isalpha(*cur_) || *cur_ == '_') {
for (++cur_; cur_ < end_; ++cur_) {
if (!isalpha(*cur_) && !isdigit(*cur_) && *cur_ != '_' && *cur_ != '.') {
if (cur_ < end_ && *cur_ == '.') {
has_dot = true;
++cur_;
}
else {
break;
}
}

return Token {LookupIdent(to_upper(StringView(st, cur_))), StringView(st, cur_)};
if (has_dot || has_backtick) {
return Token {Token::IDENT, StringView(st, cur_)};
}
else {
return Token {LookupIdent(to_upper(StringView(st, cur_))), StringView(st, cur_)};
}
}

if (isdigit(*cur_) || *cur_ == '.' || *cur_ == '-') {
Expand Down
69 changes: 69 additions & 0 deletions driver/test/escape_sequences_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,71 @@

#include <gtest/gtest.h>

TEST(EscapeSequencesCase, ParseIdent1) {
ASSERT_EQ(replaceEscapeSequences("SELECT SUM({fn CONVERT(abc, SQL_BIGINT)})"),
"SELECT SUM(toInt64(abc))");
}

TEST(EscapeSequencesCase, ParseIdent2) {
ASSERT_EQ(replaceEscapeSequences("SELECT SUM({fn CONVERT(`abc`, SQL_BIGINT)})"),
"SELECT SUM(toInt64(`abc`))");
}

TEST(EscapeSequencesCase, ParseIdent3) {
ASSERT_EQ(replaceEscapeSequences("SELECT SUM({fn CONVERT(`0 a b $ c`, SQL_BIGINT)})"),
"SELECT SUM(toInt64(`0 a b $ c`))");
}

TEST(EscapeSequencesCase, ParseIdent4) {
ASSERT_EQ(replaceEscapeSequences("SELECT SUM({fn CONVERT(abc.`0 a b $ c`, SQL_BIGINT)})"),
"SELECT SUM(toInt64(abc.`0 a b $ c`))");
}

TEST(EscapeSequencesCase, ParseIdent5) {
ASSERT_EQ(replaceEscapeSequences("SELECT SUM({fn CONVERT(`0 a b $ c`.abc, SQL_BIGINT)})"),
"SELECT SUM(toInt64(`0 a b $ c`.abc))");
}

TEST(EscapeSequencesCase, ParseIdent6) {
ASSERT_EQ(replaceEscapeSequences("SELECT SUM({fn CONVERT(abc.`0 a b $ c`.abc, SQL_BIGINT)})"),
"SELECT SUM(toInt64(abc.`0 a b $ c`.abc))");
}

TEST(EscapeSequencesCase, ParseIdent7) {
ASSERT_EQ(replaceEscapeSequences("SELECT SUM({fn CONVERT(`0 a b $ c`.abc.`0 a b $ c`, SQL_BIGINT)})"),
"SELECT SUM(toInt64(`0 a b $ c`.abc.`0 a b $ c`))");
}

TEST(EscapeSequencesCase, ParseIdentX1) {
ASSERT_EQ(replaceEscapeSequences("SELECT SUM({fn CONVERT(0 a b $ c, SQL_BIGINT)})"),
"SELECT SUM({fn CONVERT(0 a b $ c, SQL_BIGINT)})");
}

TEST(EscapeSequencesCase, ParseIdentX2) {
ASSERT_EQ(replaceEscapeSequences("SELECT SUM({fn CONVERT(.abc, SQL_BIGINT)})"),
"SELECT SUM({fn CONVERT(.abc, SQL_BIGINT)})");
}

TEST(EscapeSequencesCase, ParseIdentX3) {
ASSERT_EQ(replaceEscapeSequences("SELECT SUM({fn CONVERT(.`abc`, SQL_BIGINT)})"),
"SELECT SUM({fn CONVERT(.`abc`, SQL_BIGINT)})");
}

TEST(EscapeSequencesCase, ParseIdentX4) {
ASSERT_EQ(replaceEscapeSequences("SELECT SUM({fn CONVERT(abc., SQL_BIGINT)})"),
"SELECT SUM({fn CONVERT(abc., SQL_BIGINT)})");
}

TEST(EscapeSequencesCase, ParseIdentX5) {
ASSERT_EQ(replaceEscapeSequences("SELECT SUM({fn CONVERT(`abc`., SQL_BIGINT)})"),
"SELECT SUM({fn CONVERT(`abc`., SQL_BIGINT)})");
}

TEST(EscapeSequencesCase, ParseIdentX6) {
ASSERT_EQ(replaceEscapeSequences("SELECT SUM({fn CONVERT(abc..abc, SQL_BIGINT)})"),
"SELECT SUM({fn CONVERT(abc..abc, SQL_BIGINT)})");
}

TEST(EscapeSequencesCase, ParseConvert1) {
ASSERT_EQ(replaceEscapeSequences("SELECT {fn CONVERT(1, SQL_BIGINT)}"), "SELECT toInt64(1)");
}
Expand Down Expand Up @@ -52,6 +117,10 @@ TEST(EscapeSequencesCase, ParseRound) {
ASSERT_EQ(replaceEscapeSequences("SELECT {fn ROUND(1.1 + 2.4, 1)}"), "SELECT round(1.1 + 2.4, 1)");
}

TEST(EscapeSequencesCase, ParseFloor) {
ASSERT_EQ(replaceEscapeSequences("SELECT {fn FLOOR(1.1 + 2.4, 1)}"), "SELECT floor(1.1 + 2.4, 1)");
}

TEST(EscapeSequencesCase, ParsePower) {
ASSERT_EQ(replaceEscapeSequences("SELECT {fn POWER(`f_g38d`.`hsf_thkd_wect_fxge`,2)}"), "SELECT pow(`f_g38d`.`hsf_thkd_wect_fxge`,2)");
}
Expand Down

0 comments on commit 4ed271e

Please sign in to comment.