diff --git a/CHANGELOG.md b/CHANGELOG.md index 25dc4e7ece2..0b162187a32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ - Minor: Improve appearance of reply button. (#5491) - Minor: Introduce HTTP API for plugins. (#5383, #5492, #5494) - Minor: Support more Firefox variants for incognito link opening. (#5503) +- Minor: Links can now have prefixes and suffixes such as parentheses. (#5486) - Bugfix: Fixed tab move animation occasionally failing to start after closing a tab. (#5426) - Bugfix: If a network request errors with 200 OK, Qt's error code is now reported instead of the HTTP status. (#5378) - Bugfix: Fixed restricted users usernames not being clickable. (#5405) diff --git a/benchmarks/src/LinkParser.cpp b/benchmarks/src/LinkParser.cpp index 3a331242845..b38e15de3e6 100644 --- a/benchmarks/src/LinkParser.cpp +++ b/benchmarks/src/LinkParser.cpp @@ -5,8 +5,6 @@ #include #include -#include - using namespace chatterino; const QString INPUT = QStringLiteral( @@ -14,8 +12,9 @@ const QString INPUT = QStringLiteral( "(or 2.4.2 if its out) " "https://github.com/Chatterino/chatterino2/releases/tag/nightly-build " "AlienPls https://www.youtube.com/watch?v=ELBBiBDcWc0 " - "127.0.3 aaaa xd 256.256.256.256 AsdQwe xd 127.0.0.1 https://. https://.be " - "https://a http://a.b https://a.be ftp://xdd.com " + "127.0.3 aaaa xd 256.256.256.256 AsdQwe xd 127.0.0.1 https://. " + "*https://.be " + "https://a: http://a.b (https://a.be) ftp://xdd.com " "this is a text lol . ://foo.com //aa.de :/foo.de xd.XDDDDDD "); static void BM_LinkParsing(benchmark::State &state) @@ -24,15 +23,15 @@ static void BM_LinkParsing(benchmark::State &state) // Make sure the TLDs are loaded { - benchmark::DoNotOptimize(LinkParser("xd.com").result()); + benchmark::DoNotOptimize(linkparser::parse("xd.com")); } for (auto _ : state) { - for (auto word : words) + for (const auto &word : words) { - LinkParser parser(word); - benchmark::DoNotOptimize(parser.result()); + auto parsed = linkparser::parse(word); + benchmark::DoNotOptimize(parsed); } } } diff --git a/src/common/LinkParser.cpp b/src/common/LinkParser.cpp index d64abed31cf..ecc8fb75169 100644 --- a/src/common/LinkParser.cpp +++ b/src/common/LinkParser.cpp @@ -113,19 +113,58 @@ bool startsWithPort(QStringView string) return true; } +/// @brief Strips ignored characters off @a source +/// +/// As per https://github.github.com/gfm/#autolinks-extension-: +/// +/// '<', '*', '_', '~', and '(' are ignored at the beginning +/// '>', '?', '!', '.', ',', ':', '*', '~', and ')' are ignored at the end +/// +/// A difference to GFM is that the source isn't scanned for parentheses and '_' +/// isn't a valid suffix. +void strip(QStringView &source) +{ + while (!source.isEmpty()) + { + auto c = source.first(); + if (c == u'<' || c == u'*' || c == u'_' || c == u'~' || c == u'(') + { + source = source.mid(1); + continue; + } + break; + } + + while (!source.isEmpty()) + { + auto c = source.last(); + if (c == u'>' || c == u'?' || c == u'!' || c == u'.' || c == u',' || + c == u':' || c == u'*' || c == u'~' || c == u')') + { + source.chop(1); + continue; + } + break; + } +} + } // namespace -namespace chatterino { +namespace chatterino::linkparser { -LinkParser::LinkParser(const QString &unparsedString) +std::optional parse(const QString &source) noexcept { - ParsedLink result; + std::optional result; // This is not implemented with a regex to increase performance. - QStringView remaining(unparsedString); - QStringView protocol(remaining); + + QStringView link{source}; + strip(link); + + QStringView remaining = link; + QStringView protocol; // Check protocol for https?:// - if (remaining.startsWith(QStringLiteral("http"), Qt::CaseInsensitive) && + if (remaining.startsWith(u"http", Qt::CaseInsensitive) && remaining.length() >= 4 + 3 + 1) // 'http' + '://' + [any] { // optimistic view assuming there's a protocol (http or https) @@ -136,11 +175,11 @@ LinkParser::LinkParser(const QString &unparsedString) withProto = withProto.mid(1); } - if (withProto.startsWith(QStringLiteral("://"))) + if (withProto.startsWith(u"://")) { // there's really a protocol => consume it remaining = withProto.mid(3); - result.protocol = {protocol.begin(), remaining.begin()}; + protocol = {link.begin(), remaining.begin()}; } } @@ -161,7 +200,7 @@ LinkParser::LinkParser(const QString &unparsedString) { if (lastWasDot) // no double dots .. { - return; + return result; } lastDotPos = i; lastWasDot = true; @@ -181,7 +220,7 @@ LinkParser::LinkParser(const QString &unparsedString) if (!startsWithPort(remaining)) { - return; + return result; } break; @@ -198,23 +237,22 @@ LinkParser::LinkParser(const QString &unparsedString) if (lastWasDot || lastDotPos <= 0) { - return; + return result; } // check host/tld if ((nDots == 3 && isValidIpv4(host)) || isValidTld(host.mid(lastDotPos + 1))) { - result.host = host; - result.rest = rest; - result.source = unparsedString; - this->result_ = std::move(result); + result = Parsed{ + .protocol = protocol, + .host = host, + .rest = rest, + .link = link, + }; } -} -const std::optional &LinkParser::result() const -{ - return this->result_; + return result; } -} // namespace chatterino +} // namespace chatterino::linkparser diff --git a/src/common/LinkParser.hpp b/src/common/LinkParser.hpp index 2ef1183181c..12976b25d06 100644 --- a/src/common/LinkParser.hpp +++ b/src/common/LinkParser.hpp @@ -4,43 +4,126 @@ #include -namespace chatterino { +namespace chatterino::linkparser { -struct ParsedLink { +/// @brief Represents a parsed link +/// +/// A parsed link is represented as views over the source string for its +/// different segments. In this simplified model, a link consists of an optional +/// @a protocol, a mandatory @a host and an optional @a rest. These segments are +/// always next to eachother in the input string, however together, they don't +/// span the whole input as it could contain prefixes or suffixes. +/// +/// Prefixes and suffixes are almost identical to the ones in GitHub Flavored +/// Markdown (GFM - https://github.github.com/gfm/#autolinks-extension-). +/// The main differences are that '_' isn't a valid suffix and parentheses +/// aren't counted (e.g. "(a.com/(foo)! would result in "a.com/(foo"). +/// Matching is done case insensitive (e.g. "HTTp://a.com" would be valid). +/// +/// A @a protocol can either be empty, "http://", or "https://". +/// A @a host can either be an IPv4 address or a hostname. The hostname must end +/// in a valid top level domain. Otherwise, there are no restrictions on it. +/// The @a rest can start with an optional port followed by either a '/', '?', +/// or '#'. +/// +/// @b Example +/// +/// ```text +/// (https://wiki.chatterino.com/Help/#overview) +/// ▏▏proto ▕ host ▏ rest ▏▏ +/// ▏▏ link ▏▏ +/// ▏ source ▏ +/// ``` +struct Parsed { /// The parsed protocol of the link. Can be empty. /// + /// ```text /// https://www.forsen.tv/commands - /// ^------^ + /// ▏╌╌╌╌╌╌▕ + /// + /// www.forsen.tv/commands + /// (empty) + /// ``` QStringView protocol; /// The parsed host of the link. Can not be empty. /// + /// ```text /// https://www.forsen.tv/commands - /// ^-----------^ + /// ▏╌╌╌╌╌╌╌╌╌╌╌▕ + /// ``` QStringView host; /// The remainder of the link. Can be empty. /// + /// ```text /// https://www.forsen.tv/commands - /// ^-------^ + /// ▏╌╌╌╌╌╌╌▕ + /// + /// https://www.forsen.tv + /// (empty) + /// ``` QStringView rest; - /// The original unparsed link. + /// The matched link. Can not be empty. /// - /// https://www.forsen.tv/commands - /// ^----------------------------^ - QString source; -}; + /// ```text + /// (https://www.forsen.tv/commands) + /// ▏╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌▕ + /// ``` + QStringView link; + + /// Checks if the parsed link contains a prefix + bool hasPrefix(const QString &source) const noexcept + { + return this->link.begin() != source.begin(); + } -class LinkParser -{ -public: - explicit LinkParser(const QString &unparsedString); + /// The prefix before the parsed link inside @a source. May be empty. + /// + /// ```text + /// (https://www.forsen.tv/commands) + /// ^ + /// + /// https://www.forsen.tv/commands + /// (empty) + /// ``` + QStringView prefix(const QString &source) const noexcept + { + return {source.data(), this->link.begin()}; + } - const std::optional &result() const; + /// Checks if the parsed link contains a suffix + bool hasSuffix(const QString &source) const noexcept + { + return this->link.end() != source.end(); + } -private: - std::optional result_{}; + /// The suffix after the parsed link inside @a source. May be empty. + /// + /// ```text + /// (https://www.forsen.tv/commands) + /// ^ + /// + /// https://www.forsen.tv/commands + /// (empty) + /// ``` + QStringView suffix(const QString &source) const noexcept + { + return { + this->link.begin() + this->link.size(), + source.data() + source.length(), + }; + } }; -} // namespace chatterino +/// @brief Parses a link from @a source into its segments +/// +/// If no link is contained in @a source, `std::nullopt` will be returned. +/// The returned value is valid as long as @a source exists, as it contains +/// views into @a source. +/// +/// For the accepted links, see Parsed. +std::optional parse(const QString &source) noexcept; + +} // namespace chatterino::linkparser diff --git a/src/controllers/commands/builtin/twitch/SendWhisper.cpp b/src/controllers/commands/builtin/twitch/SendWhisper.cpp index aa30e2f152a..483f72853a4 100644 --- a/src/controllers/commands/builtin/twitch/SendWhisper.cpp +++ b/src/controllers/commands/builtin/twitch/SendWhisper.cpp @@ -152,10 +152,10 @@ bool appendWhisperMessageWordsLocally(const QStringList &words) void operator()(const QString &string, MessageBuilder &b) const { - LinkParser parser(string); - if (parser.result()) + auto link = linkparser::parse(string); + if (link) { - b.addLink(*parser.result()); + b.addLink(*link, string); } else { diff --git a/src/messages/MessageBuilder.cpp b/src/messages/MessageBuilder.cpp index 64af873b9f9..c7959da93d5 100644 --- a/src/messages/MessageBuilder.cpp +++ b/src/messages/MessageBuilder.cpp @@ -94,10 +94,10 @@ MessageBuilder::MessageBuilder(SystemMessageTag, const QString &text, text.split(QRegularExpression("\\s"), Qt::SkipEmptyParts); for (const auto &word : textFragments) { - LinkParser parser(word); - if (parser.result()) + auto link = linkparser::parse(word); + if (link) { - this->addLink(*parser.result()); + this->addLink(*link, word); continue; } @@ -637,30 +637,45 @@ std::unique_ptr MessageBuilder::releaseBack() return ptr; } -void MessageBuilder::addLink(const ParsedLink &parsedLink) +void MessageBuilder::addLink(const linkparser::Parsed &parsedLink, + const QString &source) { QString lowercaseLinkString; - QString origLink = parsedLink.source; + QString origLink = parsedLink.link.toString(); QString fullUrl; if (parsedLink.protocol.isNull()) { - fullUrl = QStringLiteral("http://") + parsedLink.source; + fullUrl = QStringLiteral("http://") + origLink; } else { lowercaseLinkString += parsedLink.protocol; - fullUrl = parsedLink.source; + fullUrl = origLink; } lowercaseLinkString += parsedLink.host.toString().toLower(); lowercaseLinkString += parsedLink.rest; auto textColor = MessageColor(MessageColor::Link); + + if (parsedLink.hasPrefix(source)) + { + this->emplace(parsedLink.prefix(source).toString(), + MessageElementFlag::Text, this->textColor_) + ->setTrailingSpace(false); + } auto *el = this->emplace( LinkElement::Parsed{.lowercase = lowercaseLinkString, .original = origLink}, fullUrl, MessageElementFlag::Text, textColor); + if (parsedLink.hasSuffix(source)) + { + el->setTrailingSpace(false); + this->emplace(parsedLink.suffix(source).toString(), + MessageElementFlag::Text, this->textColor_); + } + getIApp()->getLinkResolver()->resolve(el->linkInfo()); } @@ -676,20 +691,18 @@ void MessageBuilder::addIrcMessageText(const QString &text) int fg = -1; int bg = -1; - for (const auto &word : words) + for (const auto &string : words) { - if (word.isEmpty()) + if (string.isEmpty()) { continue; } - auto string = QString(word); - // Actually just text - LinkParser parser(string); - if (parser.result()) + auto link = linkparser::parse(string); + if (link) { - this->addLink(*parser.result()); + this->addLink(*link, string); continue; } @@ -772,15 +785,13 @@ void MessageBuilder::addTextOrEmoji(EmotePtr emote) this->emplace(emote, MessageElementFlag::EmojiAll); } -void MessageBuilder::addTextOrEmoji(const QString &string_) +void MessageBuilder::addTextOrEmoji(const QString &string) { - auto string = QString(string_); - // Actually just text - LinkParser linkParser(string); - if (linkParser.result()) + auto link = linkparser::parse(string); + if (link) { - this->addLink(*linkParser.result()); + this->addLink(*link, string); return; } diff --git a/src/messages/MessageBuilder.hpp b/src/messages/MessageBuilder.hpp index 63b9fd8cb95..89de9921260 100644 --- a/src/messages/MessageBuilder.hpp +++ b/src/messages/MessageBuilder.hpp @@ -24,7 +24,9 @@ class TextElement; struct Emote; using EmotePtr = std::shared_ptr; -struct ParsedLink; +namespace linkparser { + struct Parsed; +} // namespace linkparser struct SystemMessageTag { }; @@ -112,7 +114,7 @@ class MessageBuilder std::weak_ptr weakOf(); void append(std::unique_ptr element); - void addLink(const ParsedLink &parsedLink); + void addLink(const linkparser::Parsed &parsedLink, const QString &source); /** * Adds the text, applies irc colors, adds links, diff --git a/src/messages/search/LinkPredicate.cpp b/src/messages/search/LinkPredicate.cpp index 8430f84ba11..4c4def2aecc 100644 --- a/src/messages/search/LinkPredicate.cpp +++ b/src/messages/search/LinkPredicate.cpp @@ -14,7 +14,7 @@ bool LinkPredicate::appliesToImpl(const Message &message) { for (const auto &word : message.messageText.split(' ', Qt::SkipEmptyParts)) { - if (LinkParser(word).result()) + if (linkparser::parse(word).has_value()) { return true; } diff --git a/src/providers/twitch/TwitchMessageBuilder.cpp b/src/providers/twitch/TwitchMessageBuilder.cpp index fe7e449909b..a5fa340b702 100644 --- a/src/providers/twitch/TwitchMessageBuilder.cpp +++ b/src/providers/twitch/TwitchMessageBuilder.cpp @@ -733,12 +733,12 @@ void TwitchMessageBuilder::addTextOrEmoji(const QString &string_) } // Actually just text - LinkParser parsed(string); + auto link = linkparser::parse(string); auto textColor = this->textColor_; - if (parsed.result()) + if (link) { - this->addLink(*parsed.result()); + this->addLink(*link, string); return; } diff --git a/tests/src/LinkParser.cpp b/tests/src/LinkParser.cpp index 9d964ce1542..746486c9711 100644 --- a/tests/src/LinkParser.cpp +++ b/tests/src/LinkParser.cpp @@ -8,21 +8,52 @@ using namespace chatterino; struct Case { + // -Wmissing-field-initializers complains otherwise + // NOLINTBEGIN(readability-redundant-member-init) QString protocol{}; QString host{}; QString rest{}; + // NOLINTEND(readability-redundant-member-init) void check() const { - auto input = this->protocol + this->host + this->rest; - LinkParser p(input); - ASSERT_TRUE(p.result().has_value()) << input; - - const auto &r = *p.result(); - ASSERT_EQ(r.source, input); - ASSERT_EQ(r.protocol, this->protocol) << this->protocol; - ASSERT_EQ(r.host, this->host) << this->host; - ASSERT_EQ(r.rest, this->rest) << this->rest; + QStringList prefixes{ + "", "_", "__", "<", "<<", "<_<", "(((", "<*_~(", "**", "~~", + }; + QStringList suffixes{ + "", ">", "?", "!", ".", ",", ":", + "*", "~", ">>", "?!.", "~~,*!?", "**", + }; + + for (const auto &prefix : prefixes) + { + for (const auto &suffix : suffixes) + { + checkSingle(prefix, suffix); + } + } + } + + void checkSingle(const QString &prefix, const QString &suffix) const + { + auto link = this->protocol + this->host + this->rest; + auto input = prefix + link + suffix; + auto p = linkparser::parse(input); + ASSERT_TRUE(p.has_value()) << input; + + if (!p) + { + return; + } + + ASSERT_EQ(p->link, link); + ASSERT_EQ(p->protocol, this->protocol); + ASSERT_EQ(p->host, this->host); + ASSERT_EQ(p->rest, this->rest); + ASSERT_EQ(p->prefix(input), prefix); + ASSERT_EQ(p->suffix(input), suffix); + ASSERT_EQ(p->hasPrefix(input), !prefix.isEmpty()); + ASSERT_EQ(p->hasSuffix(input), !suffix.isEmpty()); } }; @@ -56,6 +87,10 @@ TEST(LinkParser, parseDomainLinks) {"", "https.cat"}, {"", "httpsd.cat"}, {"", "http.cat", "/200"}, + {"", "http.cat", "/200("}, + {"", "a.com", "?("}, + {"", "a.com", "#("}, + {"", "a.com", "/__my_user__"}, // test case-insensitiveness {"HtTpS://", "127.0.0.1.CoM"}, {"HTTP://", "XD.CHATTERINO.COM", "/#?FOO"}, @@ -82,7 +117,6 @@ TEST(LinkParser, parseIpv4Links) {"", "1.1.1.1"}, {"", "001.001.01.1"}, {"", "123.246.87.0"}, - {"", "196.168.0.1", ":"}, {"", "196.168.4.2", "/foo"}, {"", "196.168.4.2", "?foo"}, {"http://", "196.168.4.0", "#foo"}, @@ -122,12 +156,24 @@ TEST(LinkParser, doesntParseInvalidIpv4Links) "255.256.255.255", "255.255.256.255", "255.255.255.256", + ":127.0.0.1", + ">1.2.3.4", + "?196.162.8.1", + "!196.162.8.1", + ".196.162.8.1", + ",196.162.8.1", + ":196.162.8.1", + "+196.162.8.1", + "196.162.8.1<", + "196.162.8.1(())", + "196.162.8.1(", + "196.162.8.1(!", }; for (const auto &input : inputs) { - LinkParser p(input); - ASSERT_FALSE(p.result().has_value()) << input; + auto p = linkparser::parse(input); + ASSERT_FALSE(p.has_value()) << input; } } @@ -166,11 +212,23 @@ TEST(LinkParser, doesntParseInvalidLinks) "http:/cat.com", "http:/cat.com", "https:/cat.com", + "chatterino.com-", + "<<>>", + ">><<", + "a.com>><<", + "~~a.com()", + "https://chatterino.com>", + // invalid characters are still accepted (see #4769) + // "chatterino.com>", + // "", }; for (const auto &input : inputs) { - LinkParser p(input); - ASSERT_FALSE(p.result().has_value()) << input; + auto p = linkparser::parse(input); + ASSERT_FALSE(p.has_value()) << input; } }