Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid unnecessary copies when preparing detection inputs #57

Merged
merged 2 commits into from
May 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 15 additions & 8 deletions ocrs/src/detection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use rten_tensor::prelude::*;
use rten_tensor::{NdTensor, NdTensorView, Tensor};

use crate::preprocess::BLACK_VALUE;
use crate::tensor_util::IntoCow;

/// Parameters that control post-processing of text detection model outputs.
#[derive(Clone, Debug, PartialEq)]
Expand Down Expand Up @@ -165,22 +166,28 @@ impl TextDetector {
// inputs, within some limits.
let pad_bottom = (in_height as i32 - img_height as i32).max(0);
let pad_right = (in_width as i32 - img_width as i32).max(0);
let grey_img = if pad_bottom > 0 || pad_right > 0 {
let pads = &[0, 0, 0, 0, 0, 0, pad_bottom, pad_right];
image.pad(pads.into(), BLACK_VALUE)?
} else {
image.as_dyn().to_tensor()
};
let image = (pad_bottom > 0 || pad_right > 0)
.then(|| {
let pads = &[0, 0, 0, 0, 0, 0, pad_bottom, pad_right];
image.pad(pads.into(), BLACK_VALUE)
})
.transpose()?
.map(|t| t.into_cow())
.unwrap_or(image.into_dyn().into_cow());

// Resize images to the text detection model's input size.
let resized_grey_img = grey_img.resize_image([in_height, in_width])?;
let image = (image.size(2) != in_height || image.size(3) != in_width)
.then(|| image.resize_image([in_height, in_width]))
.transpose()?
.map(|t| t.into_cow())
.unwrap_or(image);

// Run text detection model to compute a probability mask indicating whether
// each pixel is part of a text word or not.
let text_mask: Tensor<f32> = self
.model
.run_one(
(&resized_grey_img).into(),
image.view().into(),
if debug {
Some(RunOptions {
timing: true,
Expand Down
2 changes: 2 additions & 0 deletions ocrs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ mod log;
mod preprocess;
mod recognition;

mod tensor_util;

#[cfg(test)]
mod test_util;

Expand Down
37 changes: 37 additions & 0 deletions ocrs/src/tensor_util.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
use std::borrow::Cow;

use rten_tensor::prelude::*;
use rten_tensor::{MutLayout, TensorBase};

/// Convert an owned tensor or view into one which uses a [Cow] for storage.
///
/// This is useful for code that wants to conditionally copy a tensor, as this
/// trait can be used to convert either an owned copy or view to the same type.
pub trait IntoCow {
type Cow;

fn into_cow(self) -> Self::Cow;
}

impl<'a, T, L: MutLayout> IntoCow for TensorBase<T, &'a [T], L>
where
[T]: ToOwned,
{
type Cow = TensorBase<T, Cow<'a, [T]>, L>;

fn into_cow(self) -> Self::Cow {
TensorBase::from_data(self.shape(), Cow::Borrowed(self.non_contiguous_data()))
}
}

impl<T: Clone + 'static, L: MutLayout> IntoCow for TensorBase<T, Vec<T>, L>
where
[T]: ToOwned<Owned = Vec<T>>,
{
type Cow = TensorBase<T, Cow<'static, [T]>, L>;

fn into_cow(self) -> Self::Cow {
let layout = self.layout().clone();
TensorBase::from_data(layout.shape(), Cow::Owned(self.into_data()))
}
}
Loading