diff --git a/src/md4c-html.c b/src/md4c-html.c
index 4dbba9aa..45a5ead1 100644
--- a/src/md4c-html.c
+++ b/src/md4c-html.c
@@ -47,8 +47,8 @@
-typedef struct MD_HTML_tag MD_HTML;
-struct MD_HTML_tag {
+typedef struct MD_HTML MD_HTML;
+struct MD_HTML {
void (*process_output)(const MD_CHAR*, MD_SIZE, void*);
void* userdata;
unsigned flags;
@@ -70,89 +70,11 @@ struct MD_HTML_tag {
#define ISALNUM(ch) (ISLOWER(ch) || ISUPPER(ch) || ISDIGIT(ch))
-static inline void
-render_verbatim(MD_HTML* r, const MD_CHAR* text, MD_SIZE size)
-{
- r->process_output(text, size, r->userdata);
-}
-
/* Keep this as a macro. Most compiler should then be smart enough to replace
* the strlen() call with a compile-time constant if the string is a C literal. */
#define RENDER_VERBATIM(r, verbatim) \
- render_verbatim((r), (verbatim), (MD_SIZE) (strlen(verbatim)))
-
-
-static void
-render_html_escaped(MD_HTML* r, const MD_CHAR* data, MD_SIZE size)
-{
- MD_OFFSET beg = 0;
- MD_OFFSET off = 0;
-
- /* Some characters need to be escaped in normal HTML text. */
- #define NEED_HTML_ESC(ch) (r->escape_map[(unsigned char)(ch)] & NEED_HTML_ESC_FLAG)
-
- while(1) {
- /* Optimization: Use some loop unrolling. */
- while(off + 3 < size && !NEED_HTML_ESC(data[off+0]) && !NEED_HTML_ESC(data[off+1])
- && !NEED_HTML_ESC(data[off+2]) && !NEED_HTML_ESC(data[off+3]))
- off += 4;
- while(off < size && !NEED_HTML_ESC(data[off]))
- off++;
-
- if(off > beg)
- render_verbatim(r, data + beg, off - beg);
-
- if(off < size) {
- switch(data[off]) {
- case '&': RENDER_VERBATIM(r, "&"); break;
- case '<': RENDER_VERBATIM(r, "<"); break;
- case '>': RENDER_VERBATIM(r, ">"); break;
- case '"': RENDER_VERBATIM(r, """); break;
- }
- off++;
- } else {
- break;
- }
- beg = off;
- }
-}
-
-static void
-render_url_escaped(MD_HTML* r, const MD_CHAR* data, MD_SIZE size)
-{
- static const MD_CHAR hex_chars[] = "0123456789ABCDEF";
- MD_OFFSET beg = 0;
- MD_OFFSET off = 0;
-
- /* Some characters need to be escaped in URL attributes. */
- #define NEED_URL_ESC(ch) (r->escape_map[(unsigned char)(ch)] & NEED_URL_ESC_FLAG)
-
- while(1) {
- while(off < size && !NEED_URL_ESC(data[off]))
- off++;
- if(off > beg)
- render_verbatim(r, data + beg, off - beg);
-
- if(off < size) {
- char hex[3];
-
- switch(data[off]) {
- case '&': RENDER_VERBATIM(r, "&"); break;
- default:
- hex[0] = '%';
- hex[1] = hex_chars[((unsigned)data[off] >> 4) & 0xf];
- hex[2] = hex_chars[((unsigned)data[off] >> 0) & 0xf];
- render_verbatim(r, hex, 3);
- break;
- }
- off++;
- } else {
- break;
- }
+ md_html_output_verbatim((r), (verbatim), (MD_SIZE) (strlen(verbatim)))
- beg = off;
- }
-}
static unsigned
hex_val(char ch)
@@ -207,7 +129,7 @@ render_entity(MD_HTML* r, const MD_CHAR* text, MD_SIZE size,
void (*fn_append)(MD_HTML*, const MD_CHAR*, MD_SIZE))
{
if(r->flags & MD_HTML_FLAG_VERBATIM_ENTITIES) {
- render_verbatim(r, text, size);
+ md_html_output_verbatim(r, text, size);
return;
}
@@ -258,7 +180,7 @@ render_attribute(MD_HTML* r, const MD_ATTRIBUTE* attr,
const MD_CHAR* text = attr->text + off;
switch(type) {
- case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, render_verbatim); break;
+ case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, md_html_output_verbatim); break;
case MD_TEXT_ENTITY: render_entity(r, text, size, fn_append); break;
default: fn_append(r, text, size); break;
}
@@ -302,7 +224,7 @@ render_open_code_block(MD_HTML* r, const MD_BLOCK_CODE_DETAIL* det)
/* If known, output the HTML 5 attribute class="language-LANGNAME". */
if(det->lang.text != NULL) {
RENDER_VERBATIM(r, " class=\"language-");
- render_attribute(r, &det->lang, render_html_escaped);
+ render_attribute(r, &det->lang, md_html_output_escaped);
RENDER_VERBATIM(r, "\"");
}
@@ -327,11 +249,11 @@ static void
render_open_a_span(MD_HTML* r, const MD_SPAN_A_DETAIL* det)
{
RENDER_VERBATIM(r, "href, render_url_escaped);
+ render_attribute(r, &det->href, md_html_output_url_escaped);
if(det->title.text != NULL) {
RENDER_VERBATIM(r, "\" title=\"");
- render_attribute(r, &det->title, render_html_escaped);
+ render_attribute(r, &det->title, md_html_output_escaped);
}
RENDER_VERBATIM(r, "\">");
@@ -341,7 +263,7 @@ static void
render_open_img_span(MD_HTML* r, const MD_SPAN_IMG_DETAIL* det)
{
RENDER_VERBATIM(r, "src, render_url_escaped);
+ render_attribute(r, &det->src, md_html_output_url_escaped);
RENDER_VERBATIM(r, "\" alt=\"");
}
@@ -351,7 +273,7 @@ render_close_img_span(MD_HTML* r, const MD_SPAN_IMG_DETAIL* det)
{
if(det->title.text != NULL) {
RENDER_VERBATIM(r, "\" title=\"");
- render_attribute(r, &det->title, render_html_escaped);
+ render_attribute(r, &det->title, md_html_output_escaped);
}
RENDER_VERBATIM(r, (r->flags & MD_HTML_FLAG_XHTML) ? "\" />" : "\">");
@@ -361,18 +283,85 @@ static void
render_open_wikilink_span(MD_HTML* r, const MD_SPAN_WIKILINK_DETAIL* det)
{
RENDER_VERBATIM(r, "target, render_html_escaped);
+ render_attribute(r, &det->target, md_html_output_escaped);
RENDER_VERBATIM(r, "\">");
}
+static void
+md_html_init(MD_HTML* mh, void (*process_output)(const MD_CHAR*, MD_SIZE, void*),
+ void* userdata, unsigned renderer_flags)
+{
+ int i;
+
+ mh->process_output = process_output;
+ mh->userdata = userdata;
+ mh->flags = renderer_flags;
+ mh->image_nesting_level = 0;
+
+ /* Build map of characters which need escaping. */
+ for(i = 0; i < 256; i++) {
+ unsigned char ch = (unsigned char) i;
+
+ if(strchr("\"&<>", ch) != NULL)
+ mh->escape_map[i] |= NEED_HTML_ESC_FLAG;
+
+ if(!ISALNUM(ch) && strchr("~-_.+!*(),%#@?=;:/,+$", ch) == NULL)
+ mh->escape_map[i] |= NEED_URL_ESC_FLAG;
+ }
+}
+
/**************************************
*** HTML renderer implementation ***
**************************************/
-static int
-enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
+int
+md_html(const MD_CHAR* input, MD_SIZE input_size,
+ void (*process_output)(const MD_CHAR*, MD_SIZE, void*),
+ void* userdata, unsigned parser_flags, unsigned renderer_flags)
+{
+ MD_HTML mh;
+ MD_PARSER_v2 parser = {
+ 0,
+ parser_flags,
+ md_html_enter_block,
+ md_html_leave_block,
+ md_html_enter_span,
+ md_html_leave_span,
+ md_html_text,
+ md_html_debug_log,
+ NULL
+ };
+
+ md_html_init(&mh, process_output, userdata, renderer_flags);
+
+ /* For compatibility with old apps. */
+ if(renderer_flags & MD_HTML_FLAG_SKIP_UTF8_BOM)
+ parser.flags |= MD_FLAG_SKIPBOM;
+
+ return md_parse(input, input_size, (MD_PARSER*) &parser, (void*) &mh);
+}
+
+MD_HTML*
+md_html_create(void (*process_output)(const MD_CHAR*, MD_SIZE, void*),
+ void* userdata, unsigned renderer_flags)
+{
+ MD_HTML* mh;
+ mh = (MD_HTML*) malloc(sizeof(MD_HTML));
+ if(mh != NULL)
+ md_html_init(mh, process_output, userdata, renderer_flags);
+ return mh;
+}
+
+void
+md_html_destroy(MD_HTML* mh)
+{
+ free(mh);
+}
+
+int
+md_html_enter_block(int type, void* detail, void* userdata)
{
static const MD_CHAR* head[6] = { "", "", "", "", "", "" };
MD_HTML* r = (MD_HTML*) userdata;
@@ -399,8 +388,8 @@ enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
return 0;
}
-static int
-leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
+int
+md_html_leave_block(int type, void* detail, void* userdata)
{
static const MD_CHAR* head[6] = { "
\n", "\n", "\n", "\n", "\n", "\n" };
MD_HTML* r = (MD_HTML*) userdata;
@@ -427,8 +416,8 @@ leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
return 0;
}
-static int
-enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
+int
+md_html_enter_span(int type, void* detail, void* userdata)
{
MD_HTML* r = (MD_HTML*) userdata;
int inside_img = (r->image_nesting_level > 0);
@@ -468,8 +457,8 @@ enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
return 0;
}
-static int
-leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
+int
+md_html_leave_span(int type, void* detail, void* userdata)
{
MD_HTML* r = (MD_HTML*) userdata;
@@ -494,69 +483,108 @@ leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
return 0;
}
-static int
-text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata)
+int
+md_html_text(int type, const MD_CHAR* text, MD_SIZE size, void* userdata)
{
MD_HTML* r = (MD_HTML*) userdata;
switch(type) {
- case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, render_verbatim); break;
+ case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, md_html_output_verbatim); break;
case MD_TEXT_BR: RENDER_VERBATIM(r, (r->image_nesting_level == 0
? ((r->flags & MD_HTML_FLAG_XHTML) ? "
\n" : "
\n")
: " "));
break;
case MD_TEXT_SOFTBR: RENDER_VERBATIM(r, (r->image_nesting_level == 0 ? "\n" : " ")); break;
- case MD_TEXT_HTML: render_verbatim(r, text, size); break;
- case MD_TEXT_ENTITY: render_entity(r, text, size, render_html_escaped); break;
- default: render_html_escaped(r, text, size); break;
+ case MD_TEXT_HTML: md_html_output_verbatim(r, text, size); break;
+ case MD_TEXT_ENTITY: render_entity(r, text, size, md_html_output_escaped); break;
+ default: md_html_output_escaped(r, text, size); break;
}
return 0;
}
-static void
-debug_log_callback(const char* msg, void* userdata)
+void
+md_html_debug_log(const char* msg, void* userdata)
{
MD_HTML* r = (MD_HTML*) userdata;
if(r->flags & MD_HTML_FLAG_DEBUG)
fprintf(stderr, "MD4C: %s\n", msg);
}
-int
-md_html(const MD_CHAR* input, MD_SIZE input_size,
- void (*process_output)(const MD_CHAR*, MD_SIZE, void*),
- void* userdata, unsigned parser_flags, unsigned renderer_flags)
+void
+md_html_output_verbatim(MD_HTML* r, const MD_CHAR* text, MD_SIZE size)
{
- MD_HTML render = { process_output, userdata, renderer_flags, 0, { 0 } };
- int i;
+ r->process_output(text, size, r->userdata);
+}
- MD_PARSER parser = {
- 0,
- parser_flags,
- enter_block_callback,
- leave_block_callback,
- enter_span_callback,
- leave_span_callback,
- text_callback,
- debug_log_callback,
- NULL
- };
+void
+md_html_output_escaped(MD_HTML* r, const MD_CHAR* data, MD_SIZE size)
+{
+ MD_OFFSET beg = 0;
+ MD_OFFSET off = 0;
- /* Build map of characters which need escaping. */
- for(i = 0; i < 256; i++) {
- unsigned char ch = (unsigned char) i;
+ /* Some characters need to be escaped in normal HTML text. */
+ #define NEED_HTML_ESC(ch) (r->escape_map[(unsigned char)(ch)] & NEED_HTML_ESC_FLAG)
- if(strchr("\"&<>", ch) != NULL)
- render.escape_map[i] |= NEED_HTML_ESC_FLAG;
+ while(1) {
+ /* Optimization: Use some loop unrolling. */
+ while(off + 3 < size && !NEED_HTML_ESC(data[off+0]) && !NEED_HTML_ESC(data[off+1])
+ && !NEED_HTML_ESC(data[off+2]) && !NEED_HTML_ESC(data[off+3]))
+ off += 4;
+ while(off < size && !NEED_HTML_ESC(data[off]))
+ off++;
- if(!ISALNUM(ch) && strchr("~-_.+!*(),%#@?=;:/,+$", ch) == NULL)
- render.escape_map[i] |= NEED_URL_ESC_FLAG;
+ if(off > beg)
+ md_html_output_verbatim(r, data + beg, off - beg);
+
+ if(off < size) {
+ switch(data[off]) {
+ case '&': RENDER_VERBATIM(r, "&"); break;
+ case '<': RENDER_VERBATIM(r, "<"); break;
+ case '>': RENDER_VERBATIM(r, ">"); break;
+ case '"': RENDER_VERBATIM(r, """); break;
+ }
+ off++;
+ } else {
+ break;
+ }
+ beg = off;
}
+}
- /* For compatibility with old apps. */
- if(renderer_flags & MD_HTML_FLAG_SKIP_UTF8_BOM)
- parser.flags |= MD_FLAG_SKIPBOM;
+void
+md_html_output_url_escaped(MD_HTML* r, const MD_CHAR* data, MD_SIZE size)
+{
+ static const MD_CHAR hex_chars[] = "0123456789ABCDEF";
+ MD_OFFSET beg = 0;
+ MD_OFFSET off = 0;
- return md_parse(input, input_size, &parser, (void*) &render);
-}
+ /* Some characters need to be escaped in URL attributes. */
+ #define NEED_URL_ESC(ch) (r->escape_map[(unsigned char)(ch)] & NEED_URL_ESC_FLAG)
+ while(1) {
+ while(off < size && !NEED_URL_ESC(data[off]))
+ off++;
+ if(off > beg)
+ md_html_output_verbatim(r, data + beg, off - beg);
+
+ if(off < size) {
+ char hex[3];
+
+ switch(data[off]) {
+ case '&': RENDER_VERBATIM(r, "&"); break;
+ default:
+ hex[0] = '%';
+ hex[1] = hex_chars[((unsigned)data[off] >> 4) & 0xf];
+ hex[2] = hex_chars[((unsigned)data[off] >> 0) & 0xf];
+ md_html_output_verbatim(r, hex, 3);
+ break;
+ }
+ off++;
+ } else {
+ break;
+ }
+
+ beg = off;
+ }
+}
diff --git a/src/md4c-html.h b/src/md4c-html.h
index 15adcb1b..84001d3d 100644
--- a/src/md4c-html.h
+++ b/src/md4c-html.h
@@ -40,11 +40,14 @@
#define MD_HTML_FLAG_XHTML 0x0008
-/* Render Markdown into HTML.
+/* Simple do-it-all function for converting Markdown to HTML.
*
* Note only contents of tag is generated. Caller must generate
* HTML header/footer manually before/after calling md_html().
*
+ * For more control over the conversion (e.g. to customize the output), you may
+ * use more fine-grained API below.
+ *
* Params input and input_size specify the Markdown input.
* Callback process_output() gets called with chunks of HTML output.
* (Typical implementation may just output the bytes to a file or append to
@@ -61,6 +64,80 @@ int md_html(const MD_CHAR* input, MD_SIZE input_size,
void* userdata, unsigned parser_flags, unsigned renderer_flags);
+/* The functions below provide more finer-grained building blocks, which allow
+ * application to e.g. customize how (some) Markdown syntax constructions are
+ * converted into HTML.
+ *
+ * The call to md_html() above is morally equivalent to this code:
+ *
+ * ``` C
+ * #include "md4c.h"
+ * #include "md4c-html.h"
+ *
+ * int
+ * md_html(const MD_CHAR* input, MD_SIZE input_size,
+ * void (*process_output)(const MD_CHAR*, MD_SIZE, void*),
+ * void* userdata, unsigned parser_flags, unsigned renderer_flags)
+ * {
+ * MD_HTML* mh;
+ * MD_PARSER_v2 p;
+ * int ret;
+ *
+ * mh = md_html_create(process_output, userdata, parser_flags, renderer_flags);
+ * if(mh == NULL)
+ * return -1;
+ *
+ * memset(&p, 0, sizeof(p));
+ * p.abi_version = 2;
+ * p.flags = parser_flags;
+ * p.enter_block = md_html_enter_block;
+ * p.leave_block = md_html_leave_block;
+ * p.enter_span = md_html_enter_span;
+ * p.leave_span = md_html_leave_span;
+ * p.text = md_html_text;
+ * p.debug_log = md_html_debug_log;
+ *
+ * ret = md_parse(input, input_size, (MD_PARSER*) &p, (void*) mh);
+ *
+ * md_html_destroy(mh);
+ * return ret;
+ * }
+ * ```
+ *
+ * This allows application to implement its own callbacks for md_parse()
+ * which may provide custom output e.g. for some block and/or span types, and
+ * calls the original callback for block/span types it does not want to
+ * customize.
+ */
+
+/* An opaque structure representing the Markdown-to-HTML converter. */
+typedef struct MD_HTML MD_HTML;
+
+/* Create/destroy the Markdown-to-HTML converter structure. */
+MD_HTML* md_html_create(void (*process_output)(const MD_CHAR*, MD_SIZE, void*),
+ void* userdata, unsigned renderer_flags);
+void md_html_destroy(MD_HTML* mh);
+
+/* Standard HTML callbacks for MD_PARSER.
+ *
+ * (Application can use its own callback and use these functions as "fallback"
+ * for stuff it does not want to customize. In such case the application is
+ * responsible for propagating MD_HTML* returned from md_html_create() as
+ * userdata to these standard callbacks.)
+ */
+int md_html_enter_block(int block_type, void* detail, void* userdata);
+int md_html_leave_block(int block_type, void* detail, void* userdata);
+int md_html_enter_span(int span_type, void* detail, void* userdata);
+int md_html_leave_span(int span_type, void* detail, void* userdata);
+int md_html_text(int text_type, const MD_CHAR* text, MD_SIZE size, void* userdata);
+void md_html_debug_log(const char* msg, void* userdata);
+
+/* Functions to call from custom md_parser() callbacks, to make an output. */
+void md_html_output_verbatim(MD_HTML* mh, const MD_CHAR* test, MD_SIZE size);
+void md_html_output_escaped(MD_HTML* mh, const MD_CHAR* test, MD_SIZE size);
+void md_html_output_url_escaped(MD_HTML* mh, const MD_CHAR* test, MD_SIZE size);
+
+
#ifdef __cplusplus
} /* extern "C" { */
#endif