diff --git a/backend/app/parsing.py b/backend/app/parsing.py index 7f719a3d..e18b98ba 100644 --- a/backend/app/parsing.py +++ b/backend/app/parsing.py @@ -1,5 +1,5 @@ """Module contains logic for parsing binary blobs into text.""" -from langchain_community.document_loaders.parsers import BS4HTMLParser, PDFMinerParser +from langchain_community.document_loaders.parsers import BS4HTMLParser, PDFMinerParser, MarkdownParser from langchain_community.document_loaders.parsers.generic import MimeTypeBasedParser from langchain_community.document_loaders.parsers.msword import MsWordParser from langchain_community.document_loaders.parsers.txt import TextParser @@ -7,6 +7,7 @@ HANDLERS = { "application/pdf": PDFMinerParser(), "text/plain": TextParser(), + "text/markdown": MarkdownParser(), "text/html": BS4HTMLParser(), "application/msword": MsWordParser(), "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ( diff --git a/backend/tests/unit_tests/agent_executor/test_parsing.py b/backend/tests/unit_tests/agent_executor/test_parsing.py index b4a9ee5a..dae3462f 100644 --- a/backend/tests/unit_tests/agent_executor/test_parsing.py +++ b/backend/tests/unit_tests/agent_executor/test_parsing.py @@ -15,6 +15,7 @@ def test_list_of_supported_mimetypes() -> None: "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "text/html", "text/plain", + "text/markdown", ] diff --git a/backend/tests/unit_tests/agent_executor/test_upload.py b/backend/tests/unit_tests/agent_executor/test_upload.py index e239ef02..e238c3f8 100644 --- a/backend/tests/unit_tests/agent_executor/test_upload.py +++ b/backend/tests/unit_tests/agent_executor/test_upload.py @@ -46,4 +46,5 @@ def test_mimetype_guessing() -> None: "sample.pdf": "application/pdf", "sample.rtf": "application/rtf", "sample.txt": "text/plain", + "sample.md": "text/markdown", } == name_to_mime diff --git a/backend/tests/unit_tests/fixtures/sample.md b/backend/tests/unit_tests/fixtures/sample.md new file mode 100644 index 00000000..77128cfd --- /dev/null +++ b/backend/tests/unit_tests/fixtures/sample.md @@ -0,0 +1,7 @@ +# 🦜️ LangChain + +## Heading 2 + +### Heading 3 + +#### Heading 4