Skip to content

Commit

Permalink
Merge pull request #34 from robertknight/e2e-python
Browse files Browse the repository at this point in the history
Add Python script to run end-to-end tests
  • Loading branch information
robertknight authored Feb 28, 2024
2 parents 5de95fc + 48454e8 commit d52f6da
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 8 deletions.
9 changes: 1 addition & 8 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
TMPDIR := $(or $(RUNNER_TEMP),/tmp)

.PHONY: build
build:
cargo build
Expand All @@ -26,12 +24,7 @@ test:

.PHONY: test-e2e
test-e2e:
# Simple test case
cargo run --release -p ocrs-cli ocrs-cli/test-data/why-rust.png -o $(TMPDIR)/why-rust.txt
diff --ignore-space-change -u $(TMPDIR)/why-rust.txt ocrs-cli/test-data/why-rust.expected.txt
# Long lines
cargo run --release -p ocrs-cli ocrs-cli/test-data/polar-bears.png -o $(TMPDIR)/polar-bears.txt
diff --ignore-space-change -u $(TMPDIR)/polar-bears.txt ocrs-cli/test-data/polar-bears.expected.txt
python tools/test-e2e.py ocrs-cli/test-data/

.PHONY: wasm
wasm:
Expand Down
88 changes: 88 additions & 0 deletions tools/test-e2e.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#!/usr/bin/env python

from argparse import ArgumentParser
import re
import os
from subprocess import run
import sys
import time


def build_ocrs() -> None:
run("cargo build --release -p ocrs-cli", shell=True, check=True, text=True)


def extract_text(image_path: str) -> str:
"""Extract text from an image using ocrs."""
result = run(
# We run the binary directly here rather than use `cargo run` as it
# is slightly faster.
[f"target/release/ocrs", image_path],
check=True,
text=True,
capture_output=True,
)
return result.stdout


IMAGE_PAT = "\\.(jpeg|jpg|png|webp)$"


def run_tests(test_case_dir: str) -> bool:
"""
Compare extracted text for image files against expectations.
Each image file in `test_case_dir` is expected to have an accompanying
"{image_name}.expected.txt" file.
Returns True if all test cases passed.
"""
image_filenames = [
path for path in os.listdir(test_case_dir) if re.search(IMAGE_PAT, path)
]

print(f"Testing {len(image_filenames)} images...")

errors = 0
for i, fname in enumerate(image_filenames):
basename = os.path.splitext(fname)[0]
expected_path = f"{test_case_dir}/{basename}.expected.txt"
with open(expected_path) as fp:
expected_text = fp.read()

print(f"[{i+1}/{len(image_filenames)}] Testing {fname}", end="")
start = time.perf_counter()
text = extract_text(f"{test_case_dir}/{fname}")
elapsed = time.perf_counter() - start
print(f" ({elapsed:0.2f}s)")

expected_text = expected_text.strip()
text = text.strip()

if text != expected_text:
print(f"Actual vs expected mismatch for {fname}")
errors += 1

if errors != 0:
print(f"{errors} tests failed")

return errors == 0


parser = ArgumentParser(
description="""
Run end-to-end tests of ocrs.
Runs ocrs on a set of image files and compares the extracted text with
expectations in `{imagename}.expected.txt` files.
"""
)
parser.add_argument("dir", help="Directory containing test images and expected outputs")
args = parser.parse_args()

print("Building ocrs...")
build_ocrs()
passed = run_tests(args.dir)

if not passed:
sys.exit(1)

0 comments on commit d52f6da

Please sign in to comment.