From 48454e8af91ed7a6166b130fb6639c98c9424219 Mon Sep 17 00:00:00 2001 From: Robert Knight Date: Wed, 28 Feb 2024 02:02:48 +0000 Subject: [PATCH] Add Python script to run end-to-end tests This enables adding new test cases just by adding new images + expected text output in `ocrs-cli/test-data/`. Also the Python script can more easily include extra info such as the runtime of each test. --- Makefile | 9 +---- tools/test-e2e.py | 88 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 8 deletions(-) create mode 100755 tools/test-e2e.py diff --git a/Makefile b/Makefile index c5484fc..9d68a25 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,3 @@ -TMPDIR := $(or $(RUNNER_TEMP),/tmp) - .PHONY: build build: cargo build @@ -26,12 +24,7 @@ test: .PHONY: test-e2e test-e2e: - # Simple test case - cargo run --release -p ocrs-cli ocrs-cli/test-data/why-rust.png -o $(TMPDIR)/why-rust.txt - diff --ignore-space-change -u $(TMPDIR)/why-rust.txt ocrs-cli/test-data/why-rust.expected.txt - # Long lines - cargo run --release -p ocrs-cli ocrs-cli/test-data/polar-bears.png -o $(TMPDIR)/polar-bears.txt - diff --ignore-space-change -u $(TMPDIR)/polar-bears.txt ocrs-cli/test-data/polar-bears.expected.txt + python tools/test-e2e.py ocrs-cli/test-data/ .PHONY: wasm wasm: diff --git a/tools/test-e2e.py b/tools/test-e2e.py new file mode 100755 index 0000000..3b96083 --- /dev/null +++ b/tools/test-e2e.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python + +from argparse import ArgumentParser +import re +import os +from subprocess import run +import sys +import time + + +def build_ocrs() -> None: + run("cargo build --release -p ocrs-cli", shell=True, check=True, text=True) + + +def extract_text(image_path: str) -> str: + """Extract text from an image using ocrs.""" + result = run( + # We run the binary directly here rather than use `cargo run` as it + # is slightly faster. + [f"target/release/ocrs", image_path], + check=True, + text=True, + capture_output=True, + ) + return result.stdout + + +IMAGE_PAT = "\\.(jpeg|jpg|png|webp)$" + + +def run_tests(test_case_dir: str) -> bool: + """ + Compare extracted text for image files against expectations. + + Each image file in `test_case_dir` is expected to have an accompanying + "{image_name}.expected.txt" file. + + Returns True if all test cases passed. + """ + image_filenames = [ + path for path in os.listdir(test_case_dir) if re.search(IMAGE_PAT, path) + ] + + print(f"Testing {len(image_filenames)} images...") + + errors = 0 + for i, fname in enumerate(image_filenames): + basename = os.path.splitext(fname)[0] + expected_path = f"{test_case_dir}/{basename}.expected.txt" + with open(expected_path) as fp: + expected_text = fp.read() + + print(f"[{i+1}/{len(image_filenames)}] Testing {fname}", end="") + start = time.perf_counter() + text = extract_text(f"{test_case_dir}/{fname}") + elapsed = time.perf_counter() - start + print(f" ({elapsed:0.2f}s)") + + expected_text = expected_text.strip() + text = text.strip() + + if text != expected_text: + print(f"Actual vs expected mismatch for {fname}") + errors += 1 + + if errors != 0: + print(f"{errors} tests failed") + + return errors == 0 + + +parser = ArgumentParser( + description=""" +Run end-to-end tests of ocrs. + +Runs ocrs on a set of image files and compares the extracted text with +expectations in `{imagename}.expected.txt` files. +""" +) +parser.add_argument("dir", help="Directory containing test images and expected outputs") +args = parser.parse_args() + +print("Building ocrs...") +build_ocrs() +passed = run_tests(args.dir) + +if not passed: + sys.exit(1)