diff --git a/judge/management/commands/import_polygon_package.py b/judge/management/commands/import_polygon_package.py
index 29f91bea4..41829f546 100644
--- a/judge/management/commands/import_polygon_package.py
+++ b/judge/management/commands/import_polygon_package.py
@@ -23,14 +23,26 @@
from judge.utils.problem_data import ProblemDataCompiler
from judge.views.widgets import django_uploader
-PANDOC_FILTER = """
-function normalize_quote(text)
+PANDOC_FILTER = r"""
+local function normalize_quote(text)
-- These four quotes are disallowed characters.
-- See DMOJ_PROBLEM_STATEMENT_DISALLOWED_CHARACTERS
- text = text:gsub('\\u{2018}', "'") -- left single quote
- text = text:gsub('\\u{2019}', "'") -- right single quote
- text = text:gsub('\\u{201C}', '"') -- left double quote
- text = text:gsub('\\u{201D}', '"') -- right double quote
+ text = text:gsub('\u{2018}', "'") -- left single quote
+ text = text:gsub('\u{2019}', "'") -- right single quote
+ text = text:gsub('\u{201C}', '"') -- left double quote
+ text = text:gsub('\u{201D}', '"') -- right double quote
+ return text
+end
+
+local function escape_html_content(text)
+ -- Escape HTML/Markdown/MathJax syntax characters
+ text = text:gsub('&', '&') -- must be first
+ text = text:gsub('<', "<")
+ text = text:gsub('>', ">")
+ text = text:gsub('*', '\\*')
+ text = text:gsub('_', '\\_')
+ text = text:gsub('%$', '%$')
+ text = text:gsub('~', '~')
return text
end
@@ -42,13 +54,14 @@
function Image(el)
-- And blank lines before and after the image for caption to work
- return {pandoc.RawInline('markdown', '\\n\\n'), el, pandoc.RawInline('markdown', '\\n\\n')}
+ return {pandoc.RawInline('markdown', '\n\n'), el, pandoc.RawInline('markdown', '\n\n')}
end
function Code(el)
- -- Normalize quotes
- el.text = normalize_quote(el.text)
- return el
+ -- Normalize quotes and render similar to Codeforces
+ local text = normalize_quote(el.text)
+ text = escape_html_content(text)
+ return pandoc.RawInline('html', '' .. text .. '')
end
function CodeBlock(el)
@@ -75,25 +88,35 @@
end
function Str(el)
- -- en dash and em dash would still show up correctly if we don't escape them,
+ -- Normalize quotes
+ el.text = normalize_quote(el.text)
+
+ -- en dash/em dash/non-breaking space would still show up correctly if we don't escape them,
-- but they would be hardly noticeable while editing.
local res = {}
local part = ''
for c in el.text:gmatch(utf8.charpattern) do
- if c == '\\u{2013}' then
+ if c == '\u{2013}' then
-- en dash
if part ~= '' then
table.insert(res, pandoc.Str(part))
part = ''
end
table.insert(res, pandoc.RawInline('html', '–'))
- elseif c == '\\u{2014}' then
+ elseif c == '\u{2014}' then
-- em dash
if part ~= '' then
table.insert(res, pandoc.Str(part))
part = ''
end
table.insert(res, pandoc.RawInline('html', '—'))
+ elseif c == '\u{00A0}' then
+ -- Non-breaking space
+ if part ~= '' then
+ table.insert(res, pandoc.Str(part))
+ part = ''
+ end
+ table.insert(res, pandoc.RawInline('html', ' '))
else
part = part .. c
end
@@ -102,9 +125,6 @@
table.insert(res, pandoc.Str(part))
end
- -- Normalize quotes
- el.text = normalize_quote(el.text)
-
return res
end
@@ -156,16 +176,37 @@
"""
+# Polygon uses some custom macros: https://polygon.codeforces.com/docs/statements-tex-manual
+# For example, \bf is deprecated in modern LaTeX, but Polygon treats it the same as \textbf
+# and recommends writing \bf{...} instead of \textbf{...} for brevity.
+# Similar for \it, \tt, \t
+# We just redefine them to their modern counterparts.
+# Note that this would break {\bf abcd}, but AFAIK Polygon never recommends that so it's fine.
+TEX_MACROS = r"""
+\renewcommand{\bf}{\textbf}
+\renewcommand{\it}{\textit}
+\renewcommand{\tt}{\texttt}
+\renewcommand{\t}{\texttt}
+"""
+
+
def pandoc_tex_to_markdown(tex):
- tmp_dir = tempfile.TemporaryDirectory()
- with open(os.path.join(tmp_dir.name, 'temp.tex'), 'w') as f:
- f.write(tex)
- with open(os.path.join(tmp_dir.name, 'filter.lua'), 'w') as f:
- f.write(PANDOC_FILTER)
- subprocess.run(['pandoc', '--lua-filter=filter.lua', '-t', 'gfm', '-o', 'temp.md', 'temp.tex'], cwd=tmp_dir.name)
- with open(os.path.join(tmp_dir.name, 'temp.md'), 'r') as f:
- md = f.read()
- tmp_dir.cleanup()
+ tex = TEX_MACROS + tex
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ with open(os.path.join(tmp_dir, 'temp.tex'), 'w', encoding='utf-8') as f:
+ f.write(tex)
+
+ with open(os.path.join(tmp_dir, 'filter.lua'), 'w', encoding='utf-8') as f:
+ f.write(PANDOC_FILTER)
+
+ subprocess.run(
+ ['pandoc', '--lua-filter=filter.lua', '-t', 'gfm', '-o', 'temp.md', 'temp.tex'],
+ cwd=tmp_dir,
+ check=True,
+ )
+
+ with open(os.path.join(tmp_dir, 'temp.md'), 'r', encoding='utf-8') as f:
+ md = f.read()
return md