diff --git a/README.md b/README.md index 6a77a338..93ad5bcf 100644 --- a/README.md +++ b/README.md @@ -104,6 +104,8 @@ some extensions or allowing some deviations from the specification. * With the flag `MD_FLAG_NOINDENTEDCODEBLOCKS`, indented code blocks are disabled. + + * With the flag `MD_FLAG_REDDITAUTOLINKS`, Reddit subreddit and user links such as r/test or /u/me are detected ## Input/Output Encoding diff --git a/md2html/md2html.c b/md2html/md2html.c index f9a5548a..32c7ea65 100644 --- a/md2html/md2html.c +++ b/md2html/md2html.c @@ -195,6 +195,7 @@ static const option cmdline_options[] = { { "version", 'v', 'v', OPTION_ARG_NONE }, { "commonmark", 0, 'c', OPTION_ARG_NONE }, { "github", 0, 'g', OPTION_ARG_NONE }, + { "reddit", 0, 'r', OPTION_ARG_NONE }, { "fverbatim-entities", 0, 'E', OPTION_ARG_NONE }, { "fpermissive-atx-headers", 0, 'A', OPTION_ARG_NONE }, { "fpermissive-url-autolinks", 0, 'U', OPTION_ARG_NONE }, @@ -208,6 +209,7 @@ static const option cmdline_options[] = { { "fcollapse-whitespace", 0, 'W', OPTION_ARG_NONE }, { "ftables", 0, 'T', OPTION_ARG_NONE }, { "fstrikethrough", 0, 'S', OPTION_ARG_NONE }, + { "freddit-autolinks", 0, 'R', OPTION_ARG_NONE }, { 0 } }; @@ -229,6 +231,7 @@ usage(void) "(note these are equivalent to some combinations of flags below)\n" " --commonmark CommonMark (this is default)\n" " --github Github Flavored Markdown\n" + " --reddit Reddit's dialect of Markdown\n" "\n" "Markdown extension options:\n" " --fcollapse-whitespace\n" @@ -253,6 +256,8 @@ usage(void) " --fno-html-spans\n" " Disable raw HTML spans\n" " --fno-html Same as --fno-html-blocks --fno-html-spans\n" + " --freddit-autolinks\n" + " Enable Reddit autolinks of the form /u/x, /r/x, u/x, r/x\n" " --ftables Enable tables\n" " --fstrikethrough Enable strikethrough spans\n" ); @@ -288,6 +293,7 @@ cmdline_callback(int opt, char const* value, void* data) case 'c': parser_flags = MD_DIALECT_COMMONMARK; break; case 'g': parser_flags = MD_DIALECT_GITHUB; break; + case 'r': parser_flags = MD_DIALECT_REDDIT; break; case 'E': renderer_flags |= MD_RENDER_FLAG_VERBATIM_ENTITIES; break; case 'A': parser_flags |= MD_FLAG_PERMISSIVEATXHEADERS; break; @@ -302,6 +308,7 @@ cmdline_callback(int opt, char const* value, void* data) case 'V': parser_flags |= MD_FLAG_PERMISSIVEAUTOLINKS; break; case 'T': parser_flags |= MD_FLAG_TABLES; break; case 'S': parser_flags |= MD_FLAG_STRIKETHROUGH; break; + case 'R': parser_flags |= MD_FLAG_REDDITAUTOLINKS; break; default: fprintf(stderr, "Illegal option: %s\n", value); diff --git a/md2html/render_html.c b/md2html/render_html.c index 8f0b22c0..3ca58833 100644 --- a/md2html/render_html.c +++ b/md2html/render_html.c @@ -310,6 +310,22 @@ render_open_a_span(MD_RENDER_HTML* r, const MD_SPAN_A_DETAIL* det) RENDER_LITERAL(r, "\">"); } +static void +render_reddit_link(MD_RENDER_HTML* r, const MD_REDDIT_SLASH_DETAIL* det) +{ + RENDER_LITERAL(r, "type == MD_REDDIT_SUBREDDIT) + { + RENDER_LITERAL(r, "r/"); + } + else + { + RENDER_LITERAL(r, "u/"); + } + render_text(r, det->name, det->size); + RENDER_LITERAL(r, "\">"); +} + static void render_open_img_span(MD_RENDER_HTML* r, const MD_SPAN_IMG_DETAIL* det) { @@ -413,6 +429,7 @@ enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata) case MD_SPAN_IMG: render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break; case MD_SPAN_CODE: RENDER_LITERAL(r, ""); break; case MD_SPAN_DEL: RENDER_LITERAL(r, ""); break; + case MD_REDDIT_SLASH_LINK: render_reddit_link(r, (MD_REDDIT_SLASH_DETAIL*)detail); break; } return 0; @@ -438,6 +455,7 @@ leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) case MD_SPAN_IMG: /*noop, handled above*/ break; case MD_SPAN_CODE: RENDER_LITERAL(r, ""); break; case MD_SPAN_DEL: RENDER_LITERAL(r, ""); break; + case MD_REDDIT_SLASH_LINK: RENDER_LITERAL(r, ""); break; } return 0; diff --git a/md4c/md4c.c b/md4c/md4c.c index 52efc7a7..6f9bebd5 100644 --- a/md4c/md4c.c +++ b/md4c/md4c.c @@ -2696,7 +2696,8 @@ md_build_mark_char_map(MD_CTX* ctx) ctx->mark_char_map['!'] = 1; ctx->mark_char_map[']'] = 1; ctx->mark_char_map['\0'] = 1; - + if (ctx->r.flags & MD_FLAG_REDDITAUTOLINKS) + ctx->mark_char_map['/'] = 1; if(ctx->r.flags & MD_FLAG_STRIKETHROUGH) ctx->mark_char_map['~'] = 1; @@ -2912,11 +2913,48 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode) /* Push a dummy as a reserve for a closer. */ PUSH_MARK('D', off, off, 0); } - off++; continue; } + /* A potential permissive Reddit autolink */ + if(ch == _T('/')) { + if(line->beg + 1 <= off && (CH(off - 1) == 'u' || CH(off - 1) == 'r') && + (line->beg + 1 == off || + (CH(off - 2) != '/' && (ISUNICODEPUNCTBEFORE(off - 1) || ISUNICODEWHITESPACE(off - 2)))) && + line->end > off + 1 && ISALNUM(off + 1)) + { + OFF index = off + 2; + while (index <= line->end) + { + if (!(ISALNUM(index) || (CH(index) == '_'))) + break; + index++; + } + /* u/something or r/something */ + PUSH_MARK('/', off - 1, index, MD_MARK_RESOLVED); + off = index; + } + else if (line->end > off + 3 && ((CH(off + 2) == '/') && (CH(off + 1) == 'u' || CH(off + 1) == 'r') && + ISALNUM(off + 3))) + { + OFF index = off + 4; + while (index <= line->end) + { + if (!(ISALNUM(index) || (CH(index) == '_'))) + break; + index++; + } + PUSH_MARK('/', off, index, MD_MARK_RESOLVED); + off = index; + } + else + { + off++; + } + continue; + } + /* A potential permissive URL autolink. */ if(ch == _T(':')) { static struct { @@ -3605,6 +3643,9 @@ md_analyze_permissive_url_autolink(MD_CTX* ctx, int mark_index) OFF off = opener->end; int seen_dot = FALSE; int seen_underscore_or_hyphen[2] = { FALSE, FALSE }; + if (opener->end == opener->beg) { + opener->ch = '/'; + } /* Check for domain. */ while(off < ctx->size) { @@ -3711,7 +3752,6 @@ md_analyze_permissive_email_autolink(MD_CTX* ctx, int mark_index) closer->end = end; md_resolve_range(ctx, NULL, mark_index, closer_index); } - static inline void md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int mark_beg, int mark_end, const CHAR* mark_chars) @@ -3752,6 +3792,7 @@ md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, case '_': md_analyze_underscore(ctx, i); break; case '~': md_analyze_tilde(ctx, i); break; case '.': /* Pass through. */ + case '/': /* Pass through */ case ':': md_analyze_permissive_url_autolink(ctx, i); break; case '@': md_analyze_permissive_email_autolink(ctx, i); break; } @@ -3970,22 +4011,51 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines) const MD_MARK* closer = &ctx->marks[opener->next]; const CHAR* dest = STR(opener->end); SZ dest_size = closer->beg - opener->end; - - if(opener->ch == '@' || opener->ch == '.') { + if (opener->ch == '@' || opener->ch == '.') { dest_size += 7; MD_TEMP_BUFFER(dest_size * sizeof(CHAR)); memcpy(ctx->buffer, - (opener->ch == '@' ? _T("mailto:") : _T("http://")), - 7 * sizeof(CHAR)); - memcpy(ctx->buffer + 7, dest, (dest_size-7) * sizeof(CHAR)); + (opener->ch == '@' ? _T("mailto:") : _T("http://")), + 7 * sizeof(CHAR)); + memcpy(ctx->buffer + 7, dest, (dest_size - 7) * sizeof(CHAR)); dest = ctx->buffer; } - - MD_CHECK(md_enter_leave_span_a(ctx, (mark->flags & MD_MARK_OPENER), - MD_SPAN_A, dest, dest_size, TRUE, NULL, 0)); + MD_CHECK(md_enter_leave_span_a(ctx, (mark->flags & MD_MARK_OPENER), MD_SPAN_A, dest, dest_size, TRUE, NULL, 0)); break; } - + case '/': /* Permissive Reddit autolinks */ + { + MD_REDDIT_SLASH_DETAIL det; + if (CH(mark->beg) == '/') + { + det.name = ctx->text + mark->beg + 3; + det.size = mark->end - mark->beg - 3; + if (CH(mark->beg + 1) == 'r') + { + det.type = MD_REDDIT_SUBREDDIT; + } + else + { + det.type = MD_REDDIT_USER; + } + } + else // u/something or r/something instead of /r/something + { + det.name = ctx->text + mark->beg + 2; + det.size = mark->end - mark->beg - 2; + if (CH(mark->beg) == 'r') + { + det.type = MD_REDDIT_SUBREDDIT; + } + else + { + det.type = MD_REDDIT_USER; + } + } + MD_ENTER_SPAN(MD_REDDIT_SLASH_LINK, &det); + MD_TEXT(text_type, STR(mark->beg), mark->end - mark->beg); + MD_LEAVE_SPAN(MD_REDDIT_SLASH_LINK, &det); + }break; case '&': /* Entity. */ MD_TEXT(MD_TEXT_ENTITY, STR(mark->beg), mark->end - mark->beg); break; diff --git a/md4c/md4c.h b/md4c/md4c.h index c68527be..aef3bdef 100644 --- a/md4c/md4c.h +++ b/md4c/md4c.h @@ -131,7 +131,8 @@ typedef enum MD_SPANTYPE { /* ... * Note: Recognized only when MD_FLAG_STRIKETHROUGH is enabled. */ - MD_SPAN_DEL + MD_SPAN_DEL, + MD_REDDIT_SLASH_LINK } MD_SPANTYPE; /* Text is the actual textual contents of span. */ @@ -172,8 +173,10 @@ typedef enum MD_TEXTTYPE { * The text contains verbatim '\n' for the new lines. */ MD_TEXT_HTML } MD_TEXTTYPE; - - +typedef enum MD_REDDIT_SLASH_TYPE +{ + MD_REDDIT_USER, MD_REDDIT_SUBREDDIT +} MD_REDDIT_SLASH_TYPE; /* Alignment enumeration. */ typedef enum MD_ALIGN { MD_ALIGN_DEFAULT = 0, /* When unspecified. */ @@ -245,7 +248,12 @@ typedef struct MD_SPAN_A_DETAIL { MD_ATTRIBUTE href; MD_ATTRIBUTE title; } MD_SPAN_A_DETAIL; - +typedef struct MD_REDDIT_SLASH_DETAIL +{ + MD_REDDIT_SLASH_TYPE type; //whether it's a user or subreddit + unsigned char size; + MD_CHAR * name; +}MD_REDDIT_SLASH_DETAIL; /* Detailed info for MD_SPAN_IMG. */ typedef struct MD_SPAN_IMG_DETAIL { MD_ATTRIBUTE src; @@ -271,6 +279,8 @@ typedef struct MD_SPAN_IMG_DETAIL { #define MD_FLAG_PERMISSIVEAUTOLINKS (MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS) #define MD_FLAG_NOHTML (MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS) +#define MD_FLAG_REDDITAUTOLINKS 0x8000 /* Enable Reddit autolinks */ + /* Convenient sets of flags corresponding to well-known Markdown dialects. * Note we may only support subset of features of the referred dialect. @@ -279,6 +289,7 @@ typedef struct MD_SPAN_IMG_DETAIL { */ #define MD_DIALECT_COMMONMARK 0 #define MD_DIALECT_GITHUB (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH) +#define MD_DIALECT_REDDIT (MD_FLAG_PERMISSIVEATXHEADERS | MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_NOHTML | MD_FLAG_REDDITAUTOLINKS | MD_FLAG_STRIKETHROUGH) /* Renderer structure. */ diff --git a/scripts/run-tests.sh b/scripts/run-tests.sh index cc62111b..632119ae 100755 --- a/scripts/run-tests.sh +++ b/scripts/run-tests.sh @@ -54,6 +54,10 @@ echo echo "Strikethrough extension:" $PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/strikethrough.txt" -p "$PROGRAM --fstrikethrough" +echo +echo "Reddit autolinks extension:" +$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/reddit-autolinks.txt" -p "$PROGRAM --freddit-autolinks" + echo echo "Pathological input:" $PYTHON "$TEST_DIR/pathological_tests.py" -p "$PROGRAM" diff --git a/test/reddit-autolinks.txt b/test/reddit-autolinks.txt new file mode 100644 index 00000000..996e3199 --- /dev/null +++ b/test/reddit-autolinks.txt @@ -0,0 +1,22 @@ + +# Reddit Autolinks + +With the flag `MD_FLAG_REDDITAUTOLINKS`, MD4C enables extension for recognition Reddit usernames and subreddits. + +For reference, here is snudown's (Reddit's markdown parser) implementation [link](https://github.com/reddit/snudown/blob/master/src/autolink.c). + +Reddit autolinks can be prefixed by a a forward slash or not: r/test and /r/test both work. A link ends when a non-alphanumeric character is hit. The only such character allowed it an underscore. + +```````````````````````````````` example +Here is a link to /r/askreddit by u/me. +. +

Here is a link to /r/askreddit by u/me.

+```````````````````````````````` + +A link can show up in the middle of the word if prefixed by a forward slash (you don't need a whitespace before). + +```````````````````````````````` example +World/r/news vs /r/worldnew;s +. +

World/r/news vs /r/worldnew;s

+```````````````````````````````` \ No newline at end of file