Skip to content

Commit

Permalink
Improved testing code
Browse files Browse the repository at this point in the history
  • Loading branch information
Ak-Gautam committed Jan 22, 2024
1 parent 2ee1c24 commit 1bec723
Showing 1 changed file with 8 additions and 8 deletions.
16 changes: 8 additions & 8 deletions invoice-extractor/donut_cord/base_cord_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,19 +102,19 @@ def is_pdf(data):

# Testing block
if __name__ == "__main__":
#pdf_file_path = "/content/ex_sa_inv.pdf" # Replace with your PDF file path
image_file_path = "C:/Users/aksha/Desktop/temp_index/indexify-extractors/invoice-extractor/donut_cord/ex_inv_th.png" # Replace with your image file path
pdf_file_path = "/content/sa_inv_lst.pdf" # Replace with your PDF file path
image_file_path = "/content/ex_sa_inv.jpg" # Replace with your image file path

extractor = DonutBaseV2()

# with open(pdf_file_path, "rb") as file:
# pdf_data = file.read()
# pdf_content = Content(data=pdf_data)
# pdf_results = extractor.extract([pdf_content], SimpleInvoiceParserInputParams())
# print("PDF Extraction Results:", pdf_results)
with open(pdf_file_path, "rb") as file:
pdf_data = file.read()
pdf_content = Content(data=pdf_data)
pdf_results = extractor.extract([pdf_content], SimpleInvoiceParserInputParams())
print("PDF Extraction Results:", pdf_results)

with open(image_file_path, "rb") as file:
image_data = file.read()
image_content = Content(data=image_data, content_type='image/png') # Include content_type here
image_content = Content(data=image_data)
image_results = extractor.extract([image_content], SimpleInvoiceParserInputParams())
print("Image Extraction Results:", image_results)

0 comments on commit 1bec723

Please sign in to comment.