Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reddit slash links #32

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ some extensions or allowing some deviations from the specification.

* With the flag `MD_FLAG_NOINDENTEDCODEBLOCKS`, indented code blocks are
disabled.

* With the flag `MD_FLAG_REDDITAUTOLINKS`, Reddit subreddit and user links such as r/test or /u/me are detected


## Input/Output Encoding
Expand Down
7 changes: 7 additions & 0 deletions md2html/md2html.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ static const option cmdline_options[] = {
{ "version", 'v', 'v', OPTION_ARG_NONE },
{ "commonmark", 0, 'c', OPTION_ARG_NONE },
{ "github", 0, 'g', OPTION_ARG_NONE },
{ "reddit", 0, 'r', OPTION_ARG_NONE },
{ "fverbatim-entities", 0, 'E', OPTION_ARG_NONE },
{ "fpermissive-atx-headers", 0, 'A', OPTION_ARG_NONE },
{ "fpermissive-url-autolinks", 0, 'U', OPTION_ARG_NONE },
Expand All @@ -208,6 +209,7 @@ static const option cmdline_options[] = {
{ "fcollapse-whitespace", 0, 'W', OPTION_ARG_NONE },
{ "ftables", 0, 'T', OPTION_ARG_NONE },
{ "fstrikethrough", 0, 'S', OPTION_ARG_NONE },
{ "freddit-autolinks", 0, 'R', OPTION_ARG_NONE },
{ 0 }
};

Expand All @@ -229,6 +231,7 @@ usage(void)
"(note these are equivalent to some combinations of flags below)\n"
" --commonmark CommonMark (this is default)\n"
" --github Github Flavored Markdown\n"
" --reddit Reddit's dialect of Markdown\n"
"\n"
"Markdown extension options:\n"
" --fcollapse-whitespace\n"
Expand All @@ -253,6 +256,8 @@ usage(void)
" --fno-html-spans\n"
" Disable raw HTML spans\n"
" --fno-html Same as --fno-html-blocks --fno-html-spans\n"
" --freddit-autolinks\n"
" Enable Reddit autolinks of the form /u/x, /r/x, u/x, r/x\n"
" --ftables Enable tables\n"
" --fstrikethrough Enable strikethrough spans\n"
);
Expand Down Expand Up @@ -288,6 +293,7 @@ cmdline_callback(int opt, char const* value, void* data)

case 'c': parser_flags = MD_DIALECT_COMMONMARK; break;
case 'g': parser_flags = MD_DIALECT_GITHUB; break;
case 'r': parser_flags = MD_DIALECT_REDDIT; break;

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a minor detail, but please swap 'r' and 'R' so that there is some consistency: Lower case for dialect options, upper case for extensions.

case 'E': renderer_flags |= MD_RENDER_FLAG_VERBATIM_ENTITIES; break;
case 'A': parser_flags |= MD_FLAG_PERMISSIVEATXHEADERS; break;
Expand All @@ -302,6 +308,7 @@ cmdline_callback(int opt, char const* value, void* data)
case 'V': parser_flags |= MD_FLAG_PERMISSIVEAUTOLINKS; break;
case 'T': parser_flags |= MD_FLAG_TABLES; break;
case 'S': parser_flags |= MD_FLAG_STRIKETHROUGH; break;
case 'R': parser_flags |= MD_FLAG_REDDITAUTOLINKS; break;

default:
fprintf(stderr, "Illegal option: %s\n", value);
Expand Down
18 changes: 18 additions & 0 deletions md2html/render_html.c
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,22 @@ render_open_a_span(MD_RENDER_HTML* r, const MD_SPAN_A_DETAIL* det)
RENDER_LITERAL(r, "\">");
}

static void
render_reddit_link(MD_RENDER_HTML* r, const MD_REDDIT_SLASH_DETAIL* det)
{
RENDER_LITERAL(r, "<a href=\"https://www.reddit.com/");
if (det->type == MD_REDDIT_SUBREDDIT)
{
RENDER_LITERAL(r, "r/");
}
else
{
RENDER_LITERAL(r, "u/");
}
render_text(r, det->name, det->size);
RENDER_LITERAL(r, "\">");
}

static void
render_open_img_span(MD_RENDER_HTML* r, const MD_SPAN_IMG_DETAIL* det)
{
Expand Down Expand Up @@ -413,6 +429,7 @@ enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
case MD_SPAN_IMG: render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break;
case MD_SPAN_CODE: RENDER_LITERAL(r, "<code>"); break;
case MD_SPAN_DEL: RENDER_LITERAL(r, "<del>"); break;
case MD_REDDIT_SLASH_LINK: render_reddit_link(r, (MD_REDDIT_SLASH_DETAIL*)detail); break;
}

return 0;
Expand All @@ -438,6 +455,7 @@ leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
case MD_SPAN_IMG: /*noop, handled above*/ break;
case MD_SPAN_CODE: RENDER_LITERAL(r, "</code>"); break;
case MD_SPAN_DEL: RENDER_LITERAL(r, "</del>"); break;
case MD_REDDIT_SLASH_LINK: RENDER_LITERAL(r, "</a>"); break;
}

return 0;
Expand Down
94 changes: 82 additions & 12 deletions md4c/md4c.c
Original file line number Diff line number Diff line change
Expand Up @@ -2696,7 +2696,8 @@ md_build_mark_char_map(MD_CTX* ctx)
ctx->mark_char_map['!'] = 1;
ctx->mark_char_map[']'] = 1;
ctx->mark_char_map['\0'] = 1;

if (ctx->r.flags & MD_FLAG_REDDITAUTOLINKS)
ctx->mark_char_map['/'] = 1;
if(ctx->r.flags & MD_FLAG_STRIKETHROUGH)
ctx->mark_char_map['~'] = 1;

Expand Down Expand Up @@ -2912,11 +2913,48 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
/* Push a dummy as a reserve for a closer. */
PUSH_MARK('D', off, off, 0);
}

off++;
continue;
}

/* A potential permissive Reddit autolink */
if(ch == _T('/')) {
if(line->beg + 1 <= off && (CH(off - 1) == 'u' || CH(off - 1) == 'r') &&
(line->beg + 1 == off ||
(CH(off - 2) != '/' && (ISUNICODEPUNCTBEFORE(off - 1) || ISUNICODEWHITESPACE(off - 2)))) &&
line->end > off + 1 && ISALNUM(off + 1))
{
OFF index = off + 2;
while (index <= line->end)
{
if (!(ISALNUM(index) || (CH(index) == '_')))
break;
index++;
}
/* u/something or r/something */
PUSH_MARK('/', off - 1, index, MD_MARK_RESOLVED);
off = index;
}
else if (line->end > off + 3 && ((CH(off + 2) == '/') && (CH(off + 1) == 'u' || CH(off + 1) == 'r') &&
ISALNUM(off + 3)))
{
OFF index = off + 4;
while (index <= line->end)
{
if (!(ISALNUM(index) || (CH(index) == '_')))
break;
index++;
}
PUSH_MARK('/', off, index, MD_MARK_RESOLVED);
off = index;
}
else
{
off++;
}
continue;
}

/* A potential permissive URL autolink. */
if(ch == _T(':')) {
static struct {
Expand Down Expand Up @@ -3605,6 +3643,9 @@ md_analyze_permissive_url_autolink(MD_CTX* ctx, int mark_index)
OFF off = opener->end;
int seen_dot = FALSE;
int seen_underscore_or_hyphen[2] = { FALSE, FALSE };
if (opener->end == opener->beg) {
opener->ch = '/';
}

/* Check for domain. */
while(off < ctx->size) {
Expand Down Expand Up @@ -3711,7 +3752,6 @@ md_analyze_permissive_email_autolink(MD_CTX* ctx, int mark_index)
closer->end = end;
md_resolve_range(ctx, NULL, mark_index, closer_index);
}

static inline void
md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
int mark_beg, int mark_end, const CHAR* mark_chars)
Expand Down Expand Up @@ -3752,6 +3792,7 @@ md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
case '_': md_analyze_underscore(ctx, i); break;
case '~': md_analyze_tilde(ctx, i); break;
case '.': /* Pass through. */
case '/': /* Pass through */
case ':': md_analyze_permissive_url_autolink(ctx, i); break;
case '@': md_analyze_permissive_email_autolink(ctx, i); break;
}
Expand Down Expand Up @@ -3970,22 +4011,51 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
const MD_MARK* closer = &ctx->marks[opener->next];
const CHAR* dest = STR(opener->end);
SZ dest_size = closer->beg - opener->end;

if(opener->ch == '@' || opener->ch == '.') {
if (opener->ch == '@' || opener->ch == '.') {
dest_size += 7;
MD_TEMP_BUFFER(dest_size * sizeof(CHAR));
memcpy(ctx->buffer,
(opener->ch == '@' ? _T("mailto:") : _T("http://")),
7 * sizeof(CHAR));
memcpy(ctx->buffer + 7, dest, (dest_size-7) * sizeof(CHAR));
(opener->ch == '@' ? _T("mailto:") : _T("http://")),
7 * sizeof(CHAR));
memcpy(ctx->buffer + 7, dest, (dest_size - 7) * sizeof(CHAR));
dest = ctx->buffer;
}

MD_CHECK(md_enter_leave_span_a(ctx, (mark->flags & MD_MARK_OPENER),
MD_SPAN_A, dest, dest_size, TRUE, NULL, 0));
MD_CHECK(md_enter_leave_span_a(ctx, (mark->flags & MD_MARK_OPENER), MD_SPAN_A, dest, dest_size, TRUE, NULL, 0));
break;
}

case '/': /* Permissive Reddit autolinks */
{
MD_REDDIT_SLASH_DETAIL det;
if (CH(mark->beg) == '/')
{
det.name = ctx->text + mark->beg + 3;
det.size = mark->end - mark->beg - 3;
if (CH(mark->beg + 1) == 'r')
{
det.type = MD_REDDIT_SUBREDDIT;
}
else
{
det.type = MD_REDDIT_USER;
}
}
else // u/something or r/something instead of /r/something
{
det.name = ctx->text + mark->beg + 2;
det.size = mark->end - mark->beg - 2;
if (CH(mark->beg) == 'r')
{
det.type = MD_REDDIT_SUBREDDIT;
}
else
{
det.type = MD_REDDIT_USER;
}
}
MD_ENTER_SPAN(MD_REDDIT_SLASH_LINK, &det);
MD_TEXT(text_type, STR(mark->beg), mark->end - mark->beg);
MD_LEAVE_SPAN(MD_REDDIT_SLASH_LINK, &det);
}break;
case '&': /* Entity. */
MD_TEXT(MD_TEXT_ENTITY, STR(mark->beg), mark->end - mark->beg);
break;
Expand Down
19 changes: 15 additions & 4 deletions md4c/md4c.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ typedef enum MD_SPANTYPE {
/* <del>...</del>
* Note: Recognized only when MD_FLAG_STRIKETHROUGH is enabled.
*/
MD_SPAN_DEL
MD_SPAN_DEL,
MD_REDDIT_SLASH_LINK
} MD_SPANTYPE;

/* Text is the actual textual contents of span. */
Expand Down Expand Up @@ -172,8 +173,10 @@ typedef enum MD_TEXTTYPE {
* The text contains verbatim '\n' for the new lines. */
MD_TEXT_HTML
} MD_TEXTTYPE;


typedef enum MD_REDDIT_SLASH_TYPE
{
MD_REDDIT_USER, MD_REDDIT_SUBREDDIT
} MD_REDDIT_SLASH_TYPE;
/* Alignment enumeration. */
typedef enum MD_ALIGN {
MD_ALIGN_DEFAULT = 0, /* When unspecified. */
Expand Down Expand Up @@ -245,7 +248,12 @@ typedef struct MD_SPAN_A_DETAIL {
MD_ATTRIBUTE href;
MD_ATTRIBUTE title;
} MD_SPAN_A_DETAIL;

typedef struct MD_REDDIT_SLASH_DETAIL
{
MD_REDDIT_SLASH_TYPE type; //whether it's a user or subreddit
unsigned char size;
MD_CHAR * name;
}MD_REDDIT_SLASH_DETAIL;
/* Detailed info for MD_SPAN_IMG. */
typedef struct MD_SPAN_IMG_DETAIL {
MD_ATTRIBUTE src;
Expand All @@ -271,6 +279,8 @@ typedef struct MD_SPAN_IMG_DETAIL {

#define MD_FLAG_PERMISSIVEAUTOLINKS (MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS)
#define MD_FLAG_NOHTML (MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS)
#define MD_FLAG_REDDITAUTOLINKS 0x8000 /* Enable Reddit autolinks */


/* Convenient sets of flags corresponding to well-known Markdown dialects.
* Note we may only support subset of features of the referred dialect.
Expand All @@ -279,6 +289,7 @@ typedef struct MD_SPAN_IMG_DETAIL {
*/
#define MD_DIALECT_COMMONMARK 0
#define MD_DIALECT_GITHUB (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH)
#define MD_DIALECT_REDDIT (MD_FLAG_PERMISSIVEATXHEADERS | MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_NOHTML | MD_FLAG_REDDITAUTOLINKS | MD_FLAG_STRIKETHROUGH)

/* Renderer structure.
*/
Expand Down
4 changes: 4 additions & 0 deletions scripts/run-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ echo
echo "Strikethrough extension:"
$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/strikethrough.txt" -p "$PROGRAM --fstrikethrough"

echo
echo "Reddit autolinks extension:"
$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/reddit-autolinks.txt" -p "$PROGRAM --freddit-autolinks"

echo
echo "Pathological input:"
$PYTHON "$TEST_DIR/pathological_tests.py" -p "$PROGRAM"
22 changes: 22 additions & 0 deletions test/reddit-autolinks.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

# Reddit Autolinks

With the flag `MD_FLAG_REDDITAUTOLINKS`, MD4C enables extension for recognition Reddit usernames and subreddits.

For reference, here is snudown's (Reddit's markdown parser) implementation [link](https://github.com/reddit/snudown/blob/master/src/autolink.c).

Reddit autolinks can be prefixed by a a forward slash or not: r/test and /r/test both work. A link ends when a non-alphanumeric character is hit. The only such character allowed it an underscore.

```````````````````````````````` example
Here is a link to /r/askreddit by u/me.
.
<p>Here is a link to <a href="https://www.reddit.com/r/askreddit">/r/askreddit</a> by <a href="https://www.reddit.com/u/me">u/me</a>.</p>
````````````````````````````````

A link can show up in the middle of the word if prefixed by a forward slash (you don't need a whitespace before).

```````````````````````````````` example
World/r/news vs /r/worldnew;s
.
<p>World<a href="https://www.reddit.com/r/news">/r/news</a> vs <a href="https://www.reddit.com/r/worldnew">/r/worldnew</a>;s</p>
````````````````````````````````