Skip to content

Commit

Permalink
chore: improve link parser and its tests a bit (#5522)
Browse files Browse the repository at this point in the history
  • Loading branch information
Nerixyz authored Jul 23, 2024
1 parent a0b70b8 commit a2cbe63
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 16 deletions.
34 changes: 18 additions & 16 deletions src/common/LinkParser.cpp
Original file line number Diff line number Diff line change
@@ -1,17 +1,24 @@
#define QT_NO_CAST_FROM_ASCII // avoids unexpected implicit casts
#include "common/LinkParser.hpp"

#include "util/QCompareCaseInsensitive.hpp"

#include <QFile>
#include <QSet>
#include <QString>
#include <QStringView>
#include <QTextStream>

#include <set>

namespace {

QSet<QString> &tlds()
using namespace chatterino;

using TldSet = std::set<QString, QCompareCaseInsensitive>;

TldSet &tlds()
{
static QSet<QString> tlds = [] {
static TldSet tlds = [] {
QFile file(QStringLiteral(":/tlds.txt"));
file.open(QFile::ReadOnly);
QTextStream stream(&file);
Expand All @@ -21,19 +28,12 @@ QSet<QString> &tlds()
#else
stream.setCodec("UTF-8");
#endif
int safetyMax = 20000;

QSet<QString> set;
TldSet set;

while (!stream.atEnd())
{
auto line = stream.readLine();
set.insert(line);

if (safetyMax-- == 0)
{
break;
}
set.emplace(stream.readLine());
}

return set;
Expand All @@ -43,7 +43,7 @@ QSet<QString> &tlds()

bool isValidTld(QStringView tld)
{
return tlds().contains(tld.toString().toLower());
return tlds().contains(tld);
}

bool isValidIpv4(QStringView host)
Expand Down Expand Up @@ -166,6 +166,8 @@ namespace chatterino::linkparser {

std::optional<Parsed> parse(const QString &source) noexcept
{
using SizeType = QString::size_type;

std::optional<Parsed> result;
// This is not implemented with a regex to increase performance.

Expand Down Expand Up @@ -201,11 +203,11 @@ std::optional<Parsed> parse(const QString &source) noexcept
QStringView host = remaining;
QStringView rest;
bool lastWasDot = true;
int lastDotPos = -1;
int nDots = 0;
SizeType lastDotPos = -1;
SizeType nDots = 0;

// Extract the host
for (int i = 0; i < remaining.size(); i++)
for (SizeType i = 0; i < remaining.size(); i++)
{
char16_t currentChar = remaining[i].unicode();
if (currentChar == u'.')
Expand Down
11 changes: 11 additions & 0 deletions tests/src/LinkParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ TEST(LinkParser, parseDomainLinks)
{"", "chatterino.com", ":80"},
{"", "wiki.chatterino.com", ":80"},
{"", "wiki.chatterino.com", ":80/foo/bar"},
{"", "wiki.chatterino.com", ":80?foo"},
{"", "wiki.chatterino.com", ":80#foo"},
{"", "wiki.chatterino.com", "/:80?foo/bar"},
{"", "wiki.chatterino.com", "/127.0.0.1"},
{"", "a.b.c.chatterino.com"},
Expand Down Expand Up @@ -156,6 +158,7 @@ TEST(LinkParser, parseIpv4Links)
TEST(LinkParser, doesntParseInvalidIpv4Links)
{
const QStringList inputs = {
"196.162.a.1",
// U+0660 - in category "number digits"
QStringLiteral("٠.٠.٠.٠"),
"https://127.0.0.",
Expand Down Expand Up @@ -186,6 +189,10 @@ TEST(LinkParser, doesntParseInvalidIpv4Links)
"196.162.8.1(",
"196.162.8.1(!",
"127.1.1;.com",
"127.0.-.1",
"127...",
"1.1.1.",
"1.1.1.:80",
};

for (const auto &input : inputs)
Expand Down Expand Up @@ -223,6 +230,10 @@ TEST(LinkParser, doesntParseInvalidLinks)
"https://pn./",
"pn./",
"pn.",
"pn.:80",
"pn./foo",
"pn.#foo",
"pn.?foo",
"http/chatterino.com",
"http/wiki.chatterino.com",
"http:cat.com",
Expand Down

0 comments on commit a2cbe63

Please sign in to comment.