Skip to content

Commit

Permalink
Implement flexible parameters and add static constructor
Browse files Browse the repository at this point in the history
  • Loading branch information
mishagp committed Dec 19, 2023
1 parent e78c490 commit 1c9f3a7
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 8 deletions.
37 changes: 31 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,40 @@ on your platform.
use mishagp\OCRmyPDF\OCRmyPDF;

//Return file path of outputted, OCRed PDF
echo (new OCRmyPDF('document.pdf'))->run();
echo OCRmyPDF::make('document.pdf')->run();

//Return file contents of outputted, OCRed PDF
echo (new OCRmyPDF('scannedImage.png'))->setOutputPDFPath(null)->run();
echo OCRmyPDF::make('scannedImage.png')->setOutputPDFPath(null)->run();
```

## API

_This section is a work-in-progress._
### setParam

Define invocation parameters for `ocrmypdf`. See `ocrmypdf --help` for a list of available parameters.

> [!IMPORTANT]
> Parameters configured via `setParam` will override any other parameters or configurations set otherwise.
```php
use mishagp\OCRmyPDF\OCRmyPDF;

//Passing a single parameter with a value
OCRmyPDF::make('document_zh-CN.pdf')
->setParam('-l', 'chi_sim')
->run();

//Passing a single parameter without a value
OCRmyPDF::make('document_withBackground.pdf')
->setParam('--remove-background')
->run();

//Passing multiple parameters
OCRmyPDF::make('document_withoutAttribution.pdf')
->setParam('--title', 'Lorem Ipsum')
->setParam('--keywords', 'Lorem,Ipsum,dolor,sit,amet')
->run();
```

### setInputData

Expand All @@ -51,7 +76,7 @@ imagepng($img, null, 0);
$size = ob_get_length();
$data = ob_get_clean();

echo (new OCRmyPDF())
OCRmyPDF::make()
->setInputData($data, $size)
->run();
```
Expand All @@ -62,7 +87,7 @@ Specify a writable path where `ocrmypdf` should generate output PDF.

```php
use mishagp\OCRmyPDF\OCRmyPDF;
echo (new OCRmyPDF('document.pdf'))
OCRmyPDF::make('document.pdf')
->setOutputPDFPath('/outputDir/ocr_document.pdf')
->run();
```
Expand All @@ -73,7 +98,7 @@ Define a custom location of the `ocrmypdf` executable, if by any reason it is no

```php
use mishagp\OCRmyPDF\OCRmyPDF;
echo (new OCRmyPDF('document.pdf'))
OCRmyPDF::make('document.pdf')
->setExecutable('/path/to/ocrmypdf')
->run();
```
Expand Down
21 changes: 20 additions & 1 deletion src/Command.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ public function __construct(
public ?string $inputFilePath = null,
public ?string $outputPDFPath = null,
public ?string $tempDir = null,
public ?int $threadLimit = null
public ?int $threadLimit = null,
public array $parameters = []
)
{
}
Expand Down Expand Up @@ -65,7 +66,25 @@ public function __toString(): string
$cmd = [];

$cmd[] = self::escape($this->executable);

if ($this->threadLimit) $cmd[] = "--jobs=$this->threadLimit";

foreach ($this->parameters as $key => $value) {
if ($value !== true) {
$paramKeyValue = $key;
$paramKeyValue .= "='";
if (is_array($value)) {
$paramKeyValue .= join(',', $value);
} else {
$paramKeyValue .= $value;
}
$paramKeyValue .= "'";
$cmd[] = $paramKeyValue;
} else {
$cmd[] = $key;
}
}

$cmd[] = $this->useFileAsInput ? self::escape((string)$this->inputFilePath) : "-";
$cmd[] = $this->useFileAsOutput ? self::escape($this->getOutputPDFPath()) : "-";

Expand Down
21 changes: 20 additions & 1 deletion src/OCRmyPDF.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ public function __construct(string $inputFile = null, Command $command = null)
$this->setInputFile("$inputFile");
}

static function make(string $inputFile = null, Command $command = null): self
{
return new OCRmyPDF($inputFile, $command);
}

/**
* @param string $filePath
* @return bool
Expand Down Expand Up @@ -105,7 +110,7 @@ public function run(): string
);
}

$process = new Process("$this->command");
$process = new Process((string)$this->command);

if (!$this->command->useFileAsInput) {
$process->write(
Expand Down Expand Up @@ -156,6 +161,7 @@ public function setInputData(string $inputData, int $inputDataSize): OCRmyPDF

/**
* @return void
* @throws NoWritePermissionsException
*/
private function cleanTempFiles(): void
{
Expand Down Expand Up @@ -194,4 +200,17 @@ public function setOutputPDFPath(string|null $outputPDFPath): self
}
return $this;
}

/**
* @param string|string[]|null $value
*/
public function setParam(string $param, null|string|array $value = null): self
{
if (!str_starts_with($param, '-') && !str_starts_with($param, '--')) {
throw new InvalidArgumentException("Parameter $param must start with a - or --");
}

$this->command->parameters[$param] = $value ?? true;
return $this;
}
}
55 changes: 55 additions & 0 deletions tests/E2E/OCRmyPDFParsesParametersTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
<?php

namespace mishagp\OCRmyPDF\Tests\E2E;

use mishagp\OCRmyPDF\OCRmyPDF;
use mishagp\OCRmyPDF\OCRmyPDFException;
use mishagp\OCRmyPDF\UnsuccessfulCommandException;
use PHPUnit\Framework\TestCase;

class OCRmyPDFParsesParametersTest extends TestCase
{
/**
* @throws OCRmyPDFException
* @throws UnsuccessfulCommandException
*/
public function testProcess_en_US_doc1_SetTitleParam(): void
{
$inputFile = __DIR__ . DIRECTORY_SEPARATOR . "examples" . DIRECTORY_SEPARATOR . "en_US_doc1.pdf";
$outputPath = sys_get_temp_dir() .
DIRECTORY_SEPARATOR .
basename((string)tempnam(sys_get_temp_dir(), 'ocr_')) .
".pdf";

$instance = OCRmyPDF::make($inputFile)
->setOutputPDFPath($outputPath)
->setParam('--title', "Lorem Ipsum");

$outputPath = $instance->run();
$this->assertFileExists($outputPath);
$this->assertFileIsReadable($outputPath);
$this->assertFileIsWritable($outputPath);
echo "Output: $outputPath";
}

/**
* @throws OCRmyPDFException
* @throws UnsuccessfulCommandException
*/
public function testProcess_en_US_doc1_SetInvalidParam(): void
{
$this->expectException(UnsuccessfulCommandException::class);

$inputFile = __DIR__ . DIRECTORY_SEPARATOR . "examples" . DIRECTORY_SEPARATOR . "en_US_doc1.pdf";
$outputPath = sys_get_temp_dir() .
DIRECTORY_SEPARATOR .
basename((string)tempnam(sys_get_temp_dir(), 'ocr_')) .
".pdf";

$instance = OCRmyPDF::make($inputFile)
->setOutputPDFPath($outputPath)
->setParam('--this-is-not-a-valid-param');

$instance->run();
}
}

0 comments on commit 1c9f3a7

Please sign in to comment.