Skip to content

Commit

Permalink
add Markdown as a support RAG mime type
Browse files Browse the repository at this point in the history
  • Loading branch information
P. Taylor Goetz committed May 25, 2024
1 parent b85eb8d commit cdd9459
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 1 deletion.
3 changes: 2 additions & 1 deletion backend/app/parsing.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""Module contains logic for parsing binary blobs into text."""
from langchain_community.document_loaders.parsers import BS4HTMLParser, PDFMinerParser
from langchain_community.document_loaders.parsers import BS4HTMLParser, PDFMinerParser, MarkdownParser
from langchain_community.document_loaders.parsers.generic import MimeTypeBasedParser
from langchain_community.document_loaders.parsers.msword import MsWordParser
from langchain_community.document_loaders.parsers.txt import TextParser

HANDLERS = {
"application/pdf": PDFMinerParser(),
"text/plain": TextParser(),
"text/markdown": MarkdownParser(),
"text/html": BS4HTMLParser(),
"application/msword": MsWordParser(),
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": (
Expand Down
1 change: 1 addition & 0 deletions backend/tests/unit_tests/agent_executor/test_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def test_list_of_supported_mimetypes() -> None:
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"text/html",
"text/plain",
"text/markdown",
]


Expand Down
1 change: 1 addition & 0 deletions backend/tests/unit_tests/agent_executor/test_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,5 @@ def test_mimetype_guessing() -> None:
"sample.pdf": "application/pdf",
"sample.rtf": "application/rtf",
"sample.txt": "text/plain",
"sample.md": "text/markdown",
} == name_to_mime
7 changes: 7 additions & 0 deletions backend/tests/unit_tests/fixtures/sample.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# 🦜️ LangChain

## Heading 2

### Heading 3

#### Heading 4

0 comments on commit cdd9459

Please sign in to comment.