Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package ocrs for openSUSE:Factory checked in at 2024-05-20 18:12:53 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/ocrs (Old) and /work/SRC/openSUSE:Factory/.ocrs.new.1880 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "ocrs" Mon May 20 18:12:53 2024 rev:2 rq:1175075 version:0.7.0 Changes: -------- --- /work/SRC/openSUSE:Factory/ocrs/ocrs.changes 2024-05-16 17:17:03.320010571 +0200 +++ /work/SRC/openSUSE:Factory/.ocrs.new.1880/ocrs.changes 2024-05-20 18:14:24.640417779 +0200 @@ -1,0 +2,10 @@ +Sun May 19 03:49:44 UTC 2024 - Muhammad Akbar Yanuar Mantari <[email protected]> + +- Update to version 0.7.0 + * Updated rten to v0.9.0: This brings a simpler API for loading + models from disk (Model::load_file) and improves performance. + * Updated image crate. This includes a much faster JPEG decoder. + * Re-designed the API for loading images to be easier to use and + more efficient. + +------------------------------------------------------------------- Old: ---- ocrs-ocrs-v0.6.0.tar.gz New: ---- ocrs-ocrs-v0.7.0.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ ocrs.spec ++++++ --- /var/tmp/diff_new_pack.nS9bGJ/_old 2024-05-20 18:14:26.500485395 +0200 +++ /var/tmp/diff_new_pack.nS9bGJ/_new 2024-05-20 18:14:26.504485540 +0200 @@ -22,7 +22,7 @@ %bcond_without test %endif Name: ocrs -Version: 0.6.0 +Version: 0.7.0 Release: 0 Summary: A modern OCR engine written in Rust License: Apache-2.0 AND MIT ++++++ ocrs-ocrs-v0.6.0.tar.gz -> ocrs-ocrs-v0.7.0.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocrs-ocrs-v0.6.0/.github/dependabot.yml new/ocrs-ocrs-v0.7.0/.github/dependabot.yml --- old/ocrs-ocrs-v0.6.0/.github/dependabot.yml 1970-01-01 01:00:00.000000000 +0100 +++ new/ocrs-ocrs-v0.7.0/.github/dependabot.yml 2024-05-16 10:41:41.000000000 +0200 @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: "cargo" + directory: "/" + schedule: + interval: "monthly" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocrs-ocrs-v0.6.0/.github/workflows/ci.yml new/ocrs-ocrs-v0.7.0/.github/workflows/ci.yml --- old/ocrs-ocrs-v0.6.0/.github/workflows/ci.yml 2024-04-29 23:31:34.000000000 +0200 +++ new/ocrs-ocrs-v0.7.0/.github/workflows/ci.yml 2024-05-16 10:41:41.000000000 +0200 @@ -12,11 +12,14 @@ uses: actions/checkout@v3 - name: Setup rust run: rustup target add wasm32-unknown-unknown + if: ${{ matrix.os == 'ubuntu-latest' }} - name: Cache uses: actions/cache@v3 with: path: | ~/.cargo/bin/ + ~/.cargo/.crates.toml + ~/.cargo/.crates2.json ~/.cargo/registry/index/ ~/.cargo/registry/cache/ ~/.cargo/git/db/ @@ -24,10 +27,12 @@ key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - name: Install wasm-bindgen run: cargo install wasm-bindgen-cli --version 0.2.89 + if: ${{ matrix.os == 'ubuntu-latest' }} - name: Build run: cargo build - name: WASM build run: make wasm + if: ${{ matrix.os == 'ubuntu-latest' }} - name: Test run: make test - name: Lint diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocrs-ocrs-v0.6.0/CHANGELOG.md new/ocrs-ocrs-v0.7.0/CHANGELOG.md --- old/ocrs-ocrs-v0.6.0/CHANGELOG.md 2024-04-29 23:31:34.000000000 +0200 +++ new/ocrs-ocrs-v0.7.0/CHANGELOG.md 2024-05-16 10:41:41.000000000 +0200 @@ -5,6 +5,27 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.7.0] - 2024-05-16 + +### Breaking changes + +The APIs for loading models and images have changed in this release to make them +more efficient and easier to use. See the updated +[hello_ocr](https://github.com/robertknight/ocrs/blob/main/ocrs/examples/hello_ocr.rs) +example. + +### Changes + + - Updated rten to v0.9.0. This brings a simpler API for loading models from + disk (`Model::load_file`) and improves performance + (https://github.com/robertknight/ocrs/pull/76) + + - Updated image crate. This includes a much faster JPEG decoder + (https://github.com/robertknight/ocrs/pull/58) + + - Re-designed the API for loading images to be easier to use and more + efficient (https://github.com/robertknight/ocrs/pull/56). + ## [0.6.0] - 2024-04-29 - Updated rten to v0.8.0. This fixes a crash on x86-64 CPUs that don't support diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocrs-ocrs-v0.6.0/Cargo.lock new/ocrs-ocrs-v0.7.0/Cargo.lock --- old/ocrs-ocrs-v0.6.0/Cargo.lock 2024-04-29 23:31:34.000000000 +0200 +++ new/ocrs-ocrs-v0.7.0/Cargo.lock 2024-05-16 10:41:41.000000000 +0200 @@ -22,9 +22,9 @@ [[package]] name = "base64" -version = "0.21.5" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "bitflags" @@ -51,6 +51,12 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] +name = "byteorder-lite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" + +[[package]] name = "cc" version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -66,12 +72,6 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] -name = "color_quant" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" - -[[package]] name = "crc32fast" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -119,12 +119,9 @@ [[package]] name = "fastrand" -version = "1.9.0" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" -dependencies = [ - "instant", -] +checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" [[package]] name = "fdeflate" @@ -196,26 +193,27 @@ [[package]] name = "image" -version = "0.24.7" +version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f3dfdbdd72063086ff443e297b61695500514b1e41095b6fb9a5ab48a70a711" +checksum = "fd54d660e773627692c524beaad361aca785a4f9f5730ce91f42aabe5bce3d11" dependencies = [ "bytemuck", "byteorder", - "color_quant", - "jpeg-decoder", - "num-rational", + "image-webp", "num-traits", "png", + "zune-core", + "zune-jpeg", ] [[package]] -name = "instant" -version = "0.1.12" +name = "image-webp" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +checksum = "d730b085583c4d789dfd07fdcf185be59501666a90c97c40162b37e4fdad272d" dependencies = [ - "cfg-if", + "byteorder-lite", + "thiserror", ] [[package]] @@ -225,15 +223,6 @@ checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] -name = "jpeg-decoder" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc0000e42512c92e31c2252315bda326620a4e034105e900c98ec492fa077b3e" -dependencies = [ - "rayon", -] - -[[package]] name = "lexopt" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -268,27 +257,6 @@ ] [[package]] -name = "num-integer" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" -dependencies = [ - "autocfg", - "num-traits", -] - -[[package]] -name = "num-rational" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] name = "num-traits" version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -299,22 +267,23 @@ [[package]] name = "ocrs" -version = "0.6.0" +version = "0.7.0" dependencies = [ "anyhow", "fastrand", + "image", "lexopt", "rayon", "rten", - "rten-imageio", "rten-imageproc", "rten-tensor", + "thiserror", "wasm-bindgen", ] [[package]] name = "ocrs-cli" -version = "0.6.0" +version = "0.7.0" dependencies = [ "anyhow", "home", @@ -375,9 +344,9 @@ [[package]] name = "rayon" -version = "1.8.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" dependencies = [ "either", "rayon-core", @@ -385,9 +354,9 @@ [[package]] name = "rayon-core" -version = "1.12.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ "crossbeam-deque", "crossbeam-utils", @@ -409,9 +378,9 @@ [[package]] name = "rten" -version = "0.8.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed8c84990cfa2d35011d40e0a8f5ad6d1a877dd80f513f04a2a070445cdd82f2" +checksum = "cb9d6d80601e57cab46f477955be6e3be1a4c92ed0aebb3376e1f19d24e83bb1" dependencies = [ "flatbuffers", "libm", @@ -424,39 +393,28 @@ ] [[package]] -name = "rten-imageio" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2cf8a71d80e033c9549a5cfd46353c792017525390130f9e0b5be33bf017e18" -dependencies = [ - "image", - "png", - "rten-tensor", -] - -[[package]] name = "rten-imageproc" -version = "0.8.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d26fd4e8299e8c9b37affb04836a6d1ac67fee62a157a7b06b3cdc9d9b66e40" +checksum = "529fdef25f8232ebb08fb6cfc785ec97a7fb268bebc4895e36e8750e2bbeaa51" dependencies = [ "rten-tensor", ] [[package]] name = "rten-tensor" -version = "0.8.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d2541dfaf69014c2e730f8386fc9647ddc0c3381b1fe21ce1640f0ed4f74357" +checksum = "ffa78180a98337a43163e9da8f202120e9ae3b82366cccfb05a5a854e48cd581" dependencies = [ "smallvec", ] [[package]] name = "rten-vecmath" -version = "0.8.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc89d64420a5b7a7d74e3b5cc9424029a2ce86906cdaed50491c44e6f1a090f8" +checksum = "495f48d459768d61ca37b418f79ac7aac3a707024c79fa49a14dd2c1ad8a2c0e" [[package]] name = "rustc-hash" @@ -475,23 +433,32 @@ [[package]] name = "rustls" -version = "0.21.10" +version = "0.22.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba" +checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432" dependencies = [ "log", "ring", + "rustls-pki-types", "rustls-webpki", - "sct", + "subtle", + "zeroize", ] [[package]] +name = "rustls-pki-types" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "beb461507cee2c2ff151784c52762cf4d9ff6a61f3e80968600ed24fa837fa54" + +[[package]] name = "rustls-webpki" -version = "0.101.7" +version = "0.102.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +checksum = "f3bce581c0dd41bce533ce695a1437fa16a7ab5ac3ccfa99fe1a620a7885eabf" dependencies = [ "ring", + "rustls-pki-types", "untrusted", ] @@ -502,16 +469,6 @@ checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" [[package]] -name = "sct" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" -dependencies = [ - "ring", - "untrusted", -] - -[[package]] name = "semver" version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -539,9 +496,9 @@ [[package]] name = "serde_json" -version = "1.0.110" +version = "1.0.116" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fbd975230bada99c8bb618e0c365c2eefa219158d5c6c29610fd09ff1833257" +checksum = "3e17db7126d17feb94eb3fad46bf1a96b034e8aacbc2e775fe81505f8b0b2813" dependencies = [ "itoa", "ryu", @@ -567,6 +524,12 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] +name = "subtle" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" + +[[package]] name = "syn" version = "2.0.46" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -578,6 +541,26 @@ ] [[package]] +name = "thiserror" +version = "1.0.59" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0126ad08bff79f29fc3ae6a55cc72352056dfff61e3ff8bb7129476d44b23aa" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.59" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1cd413b5d558b4c5bf3680e324a6fa5014e7b7c067a51e69dbdf47eb7148b66" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] name = "tinyvec" version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -621,15 +604,16 @@ [[package]] name = "ureq" -version = "2.9.1" +version = "2.9.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8cdd25c339e200129fe4de81451814e5228c9b771d57378817d6117cc2b3f97" +checksum = "d11a831e3c0b56e438a28308e7c810799e3c118417f342d30ecec080105395cd" dependencies = [ "base64", "flate2", "log", "once_cell", "rustls", + "rustls-pki-types", "rustls-webpki", "url", "webpki-roots", @@ -708,9 +692,12 @@ [[package]] name = "webpki-roots" -version = "0.25.3" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1778a42e8b3b90bff8d0f5032bf22250792889a5cdc752aa0020c84abe3aaf10" +checksum = "b3de34ae270483955a94f4b21bdaaeb83d508bb84a01435f393818edb0012009" +dependencies = [ + "rustls-pki-types", +] [[package]] name = "windows-sys" @@ -843,3 +830,24 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" + +[[package]] +name = "zeroize" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" + +[[package]] +name = "zune-core" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f423a2c17029964870cfaabb1f13dfab7d092a62a29a89264f4d36990ca414a" + +[[package]] +name = "zune-jpeg" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec866b44a2a1fd6133d363f073ca1b179f438f99e7e5bfb1e33f7181facfe448" +dependencies = [ + "zune-core", +] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocrs-ocrs-v0.6.0/ocrs/Cargo.toml new/ocrs-ocrs-v0.7.0/ocrs/Cargo.toml --- old/ocrs-ocrs-v0.6.0/ocrs/Cargo.toml 2024-04-29 23:31:34.000000000 +0200 +++ new/ocrs-ocrs-v0.7.0/ocrs/Cargo.toml 2024-05-16 10:41:41.000000000 +0200 @@ -1,6 +1,6 @@ [package] name = "ocrs" -version = "0.6.0" +version = "0.7.0" edition = "2021" authors = ["Robert Knight"] description = "OCR engine" @@ -10,10 +10,11 @@ [dependencies] anyhow = "1.0.80" -rayon = "1.7.0" -rten = { version = "0.8.0" } -rten-imageproc = { version = "0.8.0" } -rten-tensor = { version = "0.8.0" } +rayon = "1.10.0" +rten = { version = "0.9.0" } +rten-imageproc = { version = "0.9.0" } +rten-tensor = { version = "0.9.0" } +thiserror = "1.0.59" [target.'cfg(target_arch = "wasm32")'.dependencies] # nb. When changing this, make sure the version of wasm-bindgen-cli installed @@ -21,9 +22,13 @@ wasm-bindgen = "0.2.89" [dev-dependencies] -fastrand = "1.9.0" +fastrand = "2.1.0" +image = { version = "0.25.1", default-features = false, features = ["png", "jpeg", "webp"] } lexopt = "0.3.0" -rten-imageio = { version = "0.8.0" } [lib] crate-type = ["lib", "cdylib"] + +[features] +# Use AVX-512 instructions if available. Requires nightly Rust. +avx512 = ["rten/avx512"] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocrs-ocrs-v0.6.0/ocrs/examples/hello_ocr.rs new/ocrs-ocrs-v0.7.0/ocrs/examples/hello_ocr.rs --- old/ocrs-ocrs-v0.6.0/ocrs/examples/hello_ocr.rs 2024-04-29 23:31:34.000000000 +0200 +++ new/ocrs-ocrs-v0.7.0/ocrs/examples/hello_ocr.rs 2024-05-16 10:41:41.000000000 +0200 @@ -1,11 +1,10 @@ use std::collections::VecDeque; use std::error::Error; -use std::fs; use std::path::PathBuf; -use ocrs::{OcrEngine, OcrEngineParams}; +use ocrs::{ImageSource, OcrEngine, OcrEngineParams}; use rten::Model; -use rten_imageio::read_image; +#[allow(unused)] use rten_tensor::prelude::*; struct Args { @@ -37,22 +36,22 @@ Ok(Args { image }) } -/// Read a file from a path that is relative to the crate root. -fn read_file(path: &str) -> Result<Vec<u8>, std::io::Error> { +/// Given a file path relative to the crate root, return the absolute path. +fn file_path(path: &str) -> PathBuf { let mut abs_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); abs_path.push(path); - fs::read(abs_path) + abs_path } fn main() -> Result<(), Box<dyn Error>> { let args = parse_args()?; // Use the `download-models.sh` script to download the models. - let detection_model_data = read_file("examples/text-detection.rten")?; - let rec_model_data = read_file("examples/text-recognition.rten")?; + let detection_model_path = file_path("examples/text-detection.rten"); + let rec_model_path = file_path("examples/text-recognition.rten"); - let detection_model = Model::load(&detection_model_data)?; - let recognition_model = Model::load(&rec_model_data)?; + let detection_model = Model::load_file(detection_model_path)?; + let recognition_model = Model::load_file(rec_model_path)?; let engine = OcrEngine::new(OcrEngineParams { detection_model: Some(detection_model), @@ -60,13 +59,14 @@ ..Default::default() })?; - // Read image using image-rs library and convert to a - // (channels, height, width) tensor with f32 values in [0, 1]. - let image = read_image(&args.image)?; + // Read image using image-rs library, and convert to RGB if not already + // in that format. + let img = image::open(&args.image).map(|image| image.into_rgb8())?; // Apply standard image pre-processing expected by this library (convert // to greyscale, map range to [-0.5, 0.5]). - let ocr_input = engine.prepare_input(image.view())?; + let img_source = ImageSource::from_bytes(img.as_raw(), img.dimensions())?; + let ocr_input = engine.prepare_input(img_source)?; // Detect and recognize text. If you only need the text and don't need any // layout information, you can also use `engine.get_text(&ocr_input)`, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocrs-ocrs-v0.6.0/ocrs/src/detection.rs new/ocrs-ocrs-v0.7.0/ocrs/src/detection.rs --- old/ocrs-ocrs-v0.6.0/ocrs/src/detection.rs 2024-04-29 23:31:34.000000000 +0200 +++ new/ocrs-ocrs-v0.7.0/ocrs/src/detection.rs 2024-05-16 10:41:41.000000000 +0200 @@ -38,7 +38,7 @@ /// Find the minimum-area oriented rectangles containing each connected /// component in the binary mask `mask`. fn find_connected_component_rects( - mask: NdTensorView<i32, 2>, + mask: NdTensorView<bool, 2>, expand_dist: f32, min_area: f32, ) -> Vec<RotatedRect> { @@ -112,13 +112,7 @@ debug: bool, ) -> anyhow::Result<Vec<RotatedRect>> { let text_mask = self.detect_text_pixels(image, debug)?; - let binary_mask = text_mask.map(|prob| { - if *prob > self.params.text_threshold { - 1i32 - } else { - 0 - } - }); + let binary_mask = text_mask.map(|prob| *prob > self.params.text_threshold); // Distance to expand bounding boxes by. This is useful when the model is // trained to assign a positive label to pixels in a smaller area than the @@ -165,22 +159,28 @@ // inputs, within some limits. let pad_bottom = (in_height as i32 - img_height as i32).max(0); let pad_right = (in_width as i32 - img_width as i32).max(0); - let grey_img = if pad_bottom > 0 || pad_right > 0 { - let pads = &[0, 0, 0, 0, 0, 0, pad_bottom, pad_right]; - image.pad(pads.into(), BLACK_VALUE)? - } else { - image.as_dyn().to_tensor() - }; + let image = (pad_bottom > 0 || pad_right > 0) + .then(|| { + let pads = &[0, 0, 0, 0, 0, 0, pad_bottom, pad_right]; + image.pad(pads.into(), BLACK_VALUE) + }) + .transpose()? + .map(|t| t.into_cow()) + .unwrap_or(image.as_dyn().as_cow()); // Resize images to the text detection model's input size. - let resized_grey_img = grey_img.resize_image([in_height, in_width])?; + let image = (image.size(2) != in_height || image.size(3) != in_width) + .then(|| image.resize_image([in_height, in_width])) + .transpose()? + .map(|t| t.into_cow()) + .unwrap_or(image); // Run text detection model to compute a probability mask indicating whether // each pixel is part of a text word or not. let text_mask: Tensor<f32> = self .model .run_one( - (&resized_grey_img).into(), + image.view().into(), if debug { Some(RunOptions { timing: true, @@ -235,7 +235,7 @@ // Expand `r` because `fill_rect` does not set points along the // right/bottom boundary. let expanded = r.adjust_tlbr(0, 0, 1, 1); - fill_rect(mask.view_mut(), expanded, 1); + fill_rect(mask.view_mut(), expanded, true); } let min_area = 100.; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocrs-ocrs-v0.6.0/ocrs/src/layout_analysis.rs new/ocrs-ocrs-v0.7.0/ocrs/src/layout_analysis.rs --- old/ocrs-ocrs-v0.6.0/ocrs/src/layout_analysis.rs 2024-04-29 23:31:34.000000000 +0200 +++ new/ocrs-ocrs-v0.7.0/ocrs/src/layout_analysis.rs 2024-05-16 10:41:41.000000000 +0200 @@ -325,7 +325,7 @@ .map(|r| RotatedRect::from_rect(r.to_f32())) .collect(); - let rng = fastrand::Rng::with_seed(1234); + let mut rng = fastrand::Rng::with_seed(1234); rng.shuffle(&mut words); let lines = find_text_lines(&words); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocrs-ocrs-v0.6.0/ocrs/src/lib.rs new/ocrs-ocrs-v0.7.0/ocrs/src/lib.rs --- old/ocrs-ocrs-v0.6.0/ocrs/src/lib.rs 2024-04-29 23:31:34.000000000 +0200 +++ new/ocrs-ocrs-v0.7.0/ocrs/src/lib.rs 2024-05-16 10:41:41.000000000 +0200 @@ -2,7 +2,7 @@ use rten::Model; use rten_imageproc::RotatedRect; use rten_tensor::prelude::*; -use rten_tensor::{NdTensor, NdTensorView}; +use rten_tensor::NdTensor; mod detection; mod geom_util; @@ -24,6 +24,7 @@ use preprocess::prepare_image; use recognition::{RecognitionOpt, TextRecognizer}; +pub use preprocess::{DimOrder, ImagePixels, ImageSource, ImageSourceError}; pub use recognition::DecodeMethod; pub use text_items::{TextChar, TextItem, TextLine, TextWord}; @@ -81,10 +82,7 @@ } /// Preprocess an image for use with other methods of the engine. - /// - /// The input `image` should be a CHW tensor with values in the range 0-1 - /// and either 1 (grey), 3 (RGB) or 4 (RGBA) channels. - pub fn prepare_input(&self, image: NdTensorView<f32, 3>) -> anyhow::Result<OcrInput> { + pub fn prepare_input(&self, image: ImageSource) -> anyhow::Result<OcrInput> { Ok(OcrInput { image: prepare_image(image), }) @@ -214,7 +212,7 @@ use rten_tensor::prelude::*; use rten_tensor::{NdTensor, Tensor}; - use super::{OcrEngine, OcrEngineParams}; + use super::{DimOrder, ImageSource, OcrEngine, OcrEngineParams}; /// Generate a dummy CHW input image for OCR processing. /// @@ -268,7 +266,7 @@ ); let model_data = mb.finish(); - Model::load(&model_data).unwrap() + Model::load(model_data).unwrap() } /// Create a fake text recognition model. @@ -328,7 +326,7 @@ mb.add_output(transpose_out); let model_data = mb.finish(); - Model::load(&model_data).unwrap() + Model::load(model_data).unwrap() } /// Return expected word locations for an image generated by @@ -357,7 +355,7 @@ recognition_model: None, ..Default::default() })?; - let input = engine.prepare_input(image.view())?; + let input = engine.prepare_input(ImageSource::from_tensor(image.view(), DimOrder::Chw)?)?; let [chans, height, width] = input.image.shape(); assert_eq!(chans, 1); @@ -376,7 +374,7 @@ recognition_model: None, ..Default::default() })?; - let input = engine.prepare_input(image.view())?; + let input = engine.prepare_input(ImageSource::from_tensor(image.view(), DimOrder::Chw)?)?; let words = engine.detect_words(&input)?; assert_eq!(words.len(), n_words); @@ -418,7 +416,7 @@ recognition_model: Some(fake_recognition_model()), ..Default::default() })?; - let input = engine.prepare_input(image.view())?; + let input = engine.prepare_input(ImageSource::from_tensor(image.view(), DimOrder::Chw)?)?; // Create a dummy input line with a single word which fills the image. let mut line_regions: Vec<Vec<RotatedRect>> = Vec::new(); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocrs-ocrs-v0.6.0/ocrs/src/preprocess.rs new/ocrs-ocrs-v0.7.0/ocrs/src/preprocess.rs --- old/ocrs-ocrs-v0.6.0/ocrs/src/preprocess.rs 2024-04-29 23:31:34.000000000 +0200 +++ new/ocrs-ocrs-v0.7.0/ocrs/src/preprocess.rs 2024-05-16 10:41:41.000000000 +0200 @@ -1,31 +1,179 @@ +use std::fmt::Debug; + use rten_tensor::prelude::*; use rten_tensor::{NdTensor, NdTensorView}; +use thiserror::Error; + +/// View of an image's pixels, in either (height, width, channels) or (channels, +/// height, width) order. +pub enum ImagePixels<'a> { + /// Pixel values in the range [0, 1] + Floats(NdTensorView<'a, f32, 3>), + /// Pixel values in the range [0, 255] + Bytes(NdTensorView<'a, u8, 3>), +} + +impl<'a> From<NdTensorView<'a, f32, 3>> for ImagePixels<'a> { + fn from(value: NdTensorView<'a, f32, 3>) -> Self { + ImagePixels::Floats(value) + } +} + +impl<'a> From<NdTensorView<'a, u8, 3>> for ImagePixels<'a> { + fn from(value: NdTensorView<'a, u8, 3>) -> Self { + ImagePixels::Bytes(value) + } +} + +impl<'a> ImagePixels<'a> { + fn shape(&self) -> [usize; 3] { + match self { + ImagePixels::Floats(f) => f.shape(), + ImagePixels::Bytes(b) => b.shape(), + } + } + + /// Return the pixel value at an index as a value in [0, 1]. + fn pixel_as_f32(&self, index: [usize; 3]) -> f32 { + match self { + ImagePixels::Floats(f) => f[index], + ImagePixels::Bytes(b) => b[index] as f32 / 255., + } + } +} + +/// Errors that can occur when creating an [ImageSource]. +#[derive(Error, Clone, Debug, PartialEq)] +pub enum ImageSourceError { + /// The image channel count is not 1 (greyscale), 3 (RGB) or 4 (RGBA). + #[error("channel count is not 1, 3 or 4")] + UnsupportedChannelCount, + /// The image data length is not a multiple of the channel size. + #[error("data length is not a multiple of `width * height`")] + InvalidDataLength, +} + +/// Specifies the order in which pixels are laid out in an image tensor. +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum DimOrder { + /// Channels last order. This is the order used by the + /// [image](https://github.com/image-rs/image) crate and HTML Canvas APIs. + Hwc, + /// Channels first order. This is the order used by many machine-learning + /// libraries for image tensors. + Chw, +} + +/// View of an image, for use with +/// [OcrEngine::prepare_input](crate::OcrEngine::prepare_input). +pub struct ImageSource<'a> { + data: ImagePixels<'a>, + order: DimOrder, +} + +impl<'a> ImageSource<'a> { + /// Create an image source from a buffer of pixels in HWC order. + /// + /// An image loaded using the `image` crate can be converted to an + /// [ImageSource] using: + /// + /// ```no_run + /// use ocrs::ImageSource; + /// + /// # fn main() -> Result<(), Box<dyn std::error::Error>> { + /// let image = image::open("image.jpg")?.into_rgb8(); + /// let img_source = ImageSource::from_bytes(image.as_raw(), image.dimensions())?; + /// # Ok(()) + /// # } + /// ``` + pub fn from_bytes( + bytes: &'a [u8], + dimensions: (u32, u32), + ) -> Result<ImageSource<'a>, ImageSourceError> { + let (width, height) = dimensions; + let channel_len = (width * height) as usize; + + if channel_len == 0 { + return Err(ImageSourceError::UnsupportedChannelCount); + } + + if bytes.len() % channel_len != 0 { + return Err(ImageSourceError::InvalidDataLength); + } + + let channels = bytes.len() / channel_len; + Self::from_tensor( + NdTensorView::from_data([height as usize, width as usize, channels], bytes), + DimOrder::Hwc, + ) + } + + /// Create an image source from a tensor of bytes (`u8`) or floats (`f32`), + /// in either channels-first (CHW) or channels-last (HWC) order. + pub fn from_tensor<T>( + data: NdTensorView<'a, T, 3>, + order: DimOrder, + ) -> Result<ImageSource<'a>, ImageSourceError> + where + NdTensorView<'a, T, 3>: Into<ImagePixels<'a>>, + { + let channels = match order { + DimOrder::Hwc => data.size(2), + DimOrder::Chw => data.size(0), + }; + match channels { + 1 | 3 | 4 => Ok(ImageSource { + data: data.into(), + order, + }), + _ => Err(ImageSourceError::UnsupportedChannelCount), + } + } + + /// Return the shape of the image as a `[channels, height, width]` array. + pub(crate) fn shape(&self) -> [usize; 3] { + let shape = self.data.shape(); + + match self.order { + DimOrder::Chw => shape, + DimOrder::Hwc => [shape[2], shape[0], shape[1]], + } + } + + /// Return the pixel from a given channel and spatial coordinate, as a + /// float in [0, 1]. + pub(crate) fn get_pixel(&self, channel: usize, y: usize, x: usize) -> f32 { + let index = match self.order { + DimOrder::Chw => [channel, y, x], + DimOrder::Hwc => [y, x, channel], + }; + self.data.pixel_as_f32(index) + } +} /// The value used to represent fully black pixels in OCR input images /// prepared by [prepare_image]. pub const BLACK_VALUE: f32 = -0.5; -/// Convert a CHW image into a greyscale image. +/// Prepare an image for use with text detection and recognition models. +/// +/// This involves: /// -/// This function is intended to approximately match torchvision's RGB => -/// greyscale conversion when using `torchvision.io.read_image(path, +/// - Converting the pixels to floats +/// - Converting the color format to greyscale +/// - Adding a bias ([BLACK_VALUE]) to the greyscale value +/// +/// The greyscale conversion is intended to approximately match torchvision's +/// RGB => greyscale conversion when using `torchvision.io.read_image(path, /// ImageReadMode.GRAY)`, which is used when training models with greyscale /// inputs. torchvision internally uses libpng's `png_set_rgb_to_gray`. -/// -/// `normalize_pixel` is a function applied to each greyscale pixel value before -/// it is written into the output tensor. -fn greyscale_image<F: Fn(f32) -> f32>( - img: NdTensorView<f32, 3>, - normalize_pixel: F, -) -> NdTensor<f32, 3> { +pub fn prepare_image(img: ImageSource) -> NdTensor<f32, 3> { let [chans, height, width] = img.shape(); assert!( matches!(chans, 1 | 3 | 4), "expected greyscale, RGB or RGBA input image" ); - let mut output = NdTensor::zeros([1, height, width]); - let used_chans = chans.min(3); // For RGBA images, only RGB channels are used let chan_weights: &[f32] = if chans == 1 { &[1.] @@ -35,24 +183,140 @@ &[0.299, 0.587, 0.114] }; - let mut out_lum_chan = output.slice_mut([0]); - + // Ideally we would use `NdTensor::from_fn` here, but explicit loops are + // currently faster. + let mut grey_img = NdTensor::uninit([height, width]); for y in 0..height { for x in 0..width { - let mut pixel = 0.; - for c in 0..used_chans { - pixel += img[[c, y, x]] * chan_weights[c]; + let mut pixel = BLACK_VALUE; + for (chan, weight) in (0..used_chans).zip(chan_weights) { + pixel += img.get_pixel(chan, y, x) * weight } - out_lum_chan[[y, x]] = normalize_pixel(pixel); + grey_img[[y, x]].write(pixel); } } - output + // Safety: We initialized all the pixels. + unsafe { grey_img.assume_init().into_shape([1, height, width]) } } -/// Prepare an image for use with text detection and recognition models. -/// -/// This converts an input CHW image with values in the range 0-1 to a greyscale -/// image with values in the range `BLACK_VALUE` to `BLACK_VALUE + 1`. -pub fn prepare_image(image: NdTensorView<f32, 3>) -> NdTensor<f32, 3> { - greyscale_image(image, |pixel| pixel + BLACK_VALUE) +#[cfg(test)] +mod tests { + use rten_tensor::prelude::*; + use rten_tensor::NdTensor; + + use super::{DimOrder, ImageSource, ImageSourceError}; + + #[test] + fn test_image_source_from_bytes() { + struct Case { + len: usize, + width: u32, + height: u32, + error: Option<ImageSourceError>, + } + + let cases = [ + Case { + len: 100, + width: 10, + height: 10, + error: None, + }, + Case { + len: 50, + width: 10, + height: 10, + error: Some(ImageSourceError::InvalidDataLength), + }, + Case { + len: 8 * 8 * 2, + width: 8, + height: 8, + error: Some(ImageSourceError::UnsupportedChannelCount), + }, + Case { + len: 0, + width: 0, + height: 10, + error: Some(ImageSourceError::UnsupportedChannelCount), + }, + ]; + + for Case { + len, + width, + height, + error, + } in cases + { + let data: Vec<u8> = (0u8..len as u8).collect(); + let source = ImageSource::from_bytes(&data, (width, height)); + assert_eq!(source.as_ref().err(), error.as_ref()); + + if let Ok(source) = source { + let channels = len as usize / (width * height) as usize; + let tensor = + NdTensor::from_data([height as usize, width as usize, channels], data.clone()); + + assert_eq!(source.shape(), tensor.permuted([2, 0, 1]).shape()); + assert_eq!(source.get_pixel(0, 2, 3), tensor[[2, 3, 0]] as f32 / 255.,); + } + } + } + + #[test] + fn test_image_source_from_data() { + struct Case { + shape: [usize; 3], + error: Option<ImageSourceError>, + order: DimOrder, + } + + let cases = [ + Case { + shape: [1, 5, 5], + error: None, + order: DimOrder::Chw, + }, + Case { + shape: [1, 5, 5], + error: Some(ImageSourceError::UnsupportedChannelCount), + order: DimOrder::Hwc, + }, + Case { + shape: [0, 5, 5], + error: Some(ImageSourceError::UnsupportedChannelCount), + order: DimOrder::Chw, + }, + ]; + + for Case { + shape, + error, + order, + } in cases + { + let len: usize = shape.iter().product(); + let tensor = NdTensor::<u8, 1>::arange(0, len as u8, None).into_shape(shape); + let source = ImageSource::from_tensor(tensor.view(), order); + assert_eq!(source.as_ref().err(), error.as_ref()); + + if let Ok(source) = source { + assert_eq!( + source.shape(), + match order { + DimOrder::Chw => tensor.shape(), + DimOrder::Hwc => tensor.permuted([2, 0, 1]).shape(), + } + ); + assert_eq!( + source.get_pixel(0, 2, 3), + match order { + DimOrder::Chw => tensor[[0, 2, 3]] as f32 / 255., + DimOrder::Hwc => tensor[[2, 3, 0]] as f32 / 255., + } + ); + } + } + } } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocrs-ocrs-v0.6.0/ocrs/src/wasm_api.rs new/ocrs-ocrs-v0.7.0/ocrs/src/wasm_api.rs --- old/ocrs-ocrs-v0.6.0/ocrs/src/wasm_api.rs 2024-04-29 23:31:34.000000000 +0200 +++ new/ocrs-ocrs-v0.7.0/ocrs/src/wasm_api.rs 2024-05-16 10:41:41.000000000 +0200 @@ -1,18 +1,16 @@ use wasm_bindgen::prelude::*; use rten::ops; -use rten::{Model, OpRegistry}; +use rten::{Model, ModelOptions, OpRegistry}; use rten_imageproc::{min_area_rect, BoundingRect, PointF}; use rten_tensor::prelude::*; -use rten_tensor::NdTensorView; -use crate::{OcrEngine as BaseOcrEngine, OcrEngineParams, OcrInput, TextItem}; +use crate::{ImageSource, OcrEngine as BaseOcrEngine, OcrEngineParams, OcrInput, TextItem}; /// Options for constructing an [OcrEngine]. #[wasm_bindgen] pub struct OcrEngineInit { - op_registry: OpRegistry, detection_model: Option<Model>, recognition_model: Option<Model>, } @@ -27,6 +25,13 @@ impl OcrEngineInit { #[wasm_bindgen(constructor)] pub fn new() -> OcrEngineInit { + OcrEngineInit { + detection_model: None, + recognition_model: None, + } + } + + fn op_registry() -> OpRegistry { let mut reg = OpRegistry::new(); // Register all the operators the OCR models currently use. @@ -51,25 +56,25 @@ reg.register_op::<ops::Transpose>(); reg.register_op::<ops::Unsqueeze>(); - OcrEngineInit { - op_registry: reg, - detection_model: None, - recognition_model: None, - } + reg } /// Load a model for text detection. #[wasm_bindgen(js_name = setDetectionModel)] - pub fn set_detection_model(&mut self, data: &[u8]) -> Result<(), String> { - let model = Model::load_with_ops(data, &self.op_registry).map_err(|e| e.to_string())?; + pub fn set_detection_model(&mut self, data: Vec<u8>) -> Result<(), String> { + let model = ModelOptions::with_ops(Self::op_registry()) + .load(data) + .map_err(|e| e.to_string())?; self.detection_model = Some(model); Ok(()) } /// Load a model for text recognition. #[wasm_bindgen(js_name = setRecognitionModel)] - pub fn set_recognition_model(&mut self, data: &[u8]) -> Result<(), String> { - let model = Model::load_with_ops(data, &self.op_registry).map_err(|e| e.to_string())?; + pub fn set_recognition_model(&mut self, data: Vec<u8>) -> Result<(), String> { + let model = ModelOptions::with_ops(Self::op_registry()) + .load(data) + .map_err(|e| e.to_string())?; self.recognition_model = Some(model); Ok(()) } @@ -93,7 +98,6 @@ let OcrEngineInit { detection_model, recognition_model, - op_registry: _op_registry, } = init; let engine = BaseOcrEngine::new(OcrEngineParams { detection_model, @@ -112,24 +116,11 @@ /// API. Supported channel combinations are RGB and RGBA. The number of /// channels is inferred from the length of `data`. #[wasm_bindgen(js_name = loadImage)] - pub fn load_image(&self, width: usize, height: usize, data: &[u8]) -> Result<Image, String> { - let pixels_per_chan = height * width; - let channels = data.len() / pixels_per_chan; - - if ![1, 3, 4].contains(&channels) { - return Err("expected channel count to be 1, 3 or 4".to_string()); - } - - let shape = [height, width, channels]; - if data.len() < shape.iter().product() { - return Err("incorrect data length for image size and channel count".to_string()); - } - - let tensor = NdTensorView::from_data(shape, data) - .permuted([2, 0, 1]) // HWC => CHW - .map(|x| (*x as f32) / 255.); + pub fn load_image(&self, width: u32, height: u32, data: &[u8]) -> Result<Image, String> { + let image_source = + ImageSource::from_bytes(data, (width, height)).map_err(|err| err.to_string())?; self.engine - .prepare_input(tensor.view()) + .prepare_input(image_source) .map(|input| Image { input }) .map_err(|e| e.to_string()) } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocrs-ocrs-v0.6.0/ocrs-cli/Cargo.toml new/ocrs-ocrs-v0.7.0/ocrs-cli/Cargo.toml --- old/ocrs-ocrs-v0.6.0/ocrs-cli/Cargo.toml 2024-04-29 23:31:34.000000000 +0200 +++ new/ocrs-ocrs-v0.7.0/ocrs-cli/Cargo.toml 2024-05-16 10:41:41.000000000 +0200 @@ -1,6 +1,6 @@ [package] name = "ocrs-cli" -version = "0.6.0" +version = "0.7.0" edition = "2021" authors = ["Robert Knight"] description = "OCR CLI tool for extracting text from images" @@ -9,19 +9,23 @@ repository = "https://github.com/robertknight/ocrs" [dependencies] -image = { version = "0.24.6", default-features = false, features = ["png", "jpeg", "jpeg_rayon", "webp"] } +image = { version = "0.25.1", default-features = false, features = ["png", "jpeg", "webp"] } png = "0.17.6" -serde_json = "1.0.91" -rten = { version = "0.8.0" } -rten-imageproc = { version = "0.8.0" } -rten-tensor = { version = "0.8.0" } -ocrs = { path = "../ocrs", version = "0.6.0" } +serde_json = "1.0.116" +rten = { version = "0.9.0" } +rten-imageproc = { version = "0.9.0" } +rten-tensor = { version = "0.9.0" } +ocrs = { path = "../ocrs", version = "0.7.0" } lexopt = "0.3.0" -ureq = "2.7.1" +ureq = "2.9.7" url = "2.4.0" home = "0.5.9" anyhow = "1.0.79" +[features] +# Use AVX-512 instructions if available. Requires nightly Rust. +avx512 = ["rten/avx512"] + [[bin]] name = "ocrs" path = "src/main.rs" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocrs-ocrs-v0.6.0/ocrs-cli/src/main.rs new/ocrs-ocrs-v0.7.0/ocrs-cli/src/main.rs --- old/ocrs-ocrs-v0.6.0/ocrs-cli/src/main.rs 2024-04-29 23:31:34.000000000 +0200 +++ new/ocrs-ocrs-v0.7.0/ocrs-cli/src/main.rs 2024-05-16 10:41:41.000000000 +0200 @@ -4,7 +4,7 @@ use std::io::BufWriter; use anyhow::{anyhow, Context}; -use ocrs::{DecodeMethod, OcrEngine, OcrEngineParams, OcrInput}; +use ocrs::{DecodeMethod, DimOrder, ImageSource, OcrEngine, OcrEngineParams, OcrInput}; use rten_imageproc::RotatedRect; use rten_tensor::prelude::*; use rten_tensor::{NdTensor, NdTensorView}; @@ -17,26 +17,6 @@ GeneratePngArgs, OutputFormat, }; -/// Read an image from `path` into a CHW tensor. -fn read_image(path: &str) -> anyhow::Result<NdTensor<f32, 3>> { - let input_img = image::open(path)?; - let input_img = input_img.into_rgb8(); - - let (width, height) = input_img.dimensions(); - - let in_chans = 3; - let mut float_img = NdTensor::zeros([in_chans, height as usize, width as usize]); - for c in 0..in_chans { - let mut chan_img = float_img.slice_mut([c]); - for y in 0..height { - for x in 0..width { - chan_img[[y as usize, x as usize]] = input_img.get_pixel(x, y)[c] as f32 / 255.0 - } - } - } - Ok(float_img) -} - /// Write a CHW image to a PNG file in `path`. fn write_image(path: &str, img: NdTensorView<f32, 3>) -> anyhow::Result<()> { let img_width = img.size(2); @@ -293,10 +273,7 @@ ) })?; - // Read image into CHW tensor. - let color_img = read_image(&args.image) - .with_context(|| format!("Failed to read image from {}", &args.image))?; - + // Initialize OCR engine. let engine = OcrEngine::new(OcrEngineParams { detection_model: Some(detection_model), recognition_model: Some(recognition_model), @@ -308,7 +285,23 @@ }, })?; - let ocr_input = engine.prepare_input(color_img.view())?; + // Read image into HWC tensor. + let color_img: NdTensor<u8, 3> = image::open(&args.image) + .map(|image| { + let image = image.into_rgb8(); + let (width, height) = image.dimensions(); + let in_chans = 3; + NdTensor::from_data( + [height as usize, width as usize, in_chans], + image.into_vec(), + ) + }) + .with_context(|| format!("Failed to read image from {}", &args.image))?; + + // Preprocess image for use with OCR engine. + let color_img_source = ImageSource::from_tensor(color_img.view(), DimOrder::Hwc)?; + let ocr_input = engine.prepare_input(color_img_source)?; + if args.text_map || args.text_mask { let text_map = engine.detect_text_pixels(&ocr_input)?; let [height, width] = text_map.shape(); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocrs-ocrs-v0.6.0/ocrs-cli/src/models.rs new/ocrs-ocrs-v0.7.0/ocrs-cli/src/models.rs --- old/ocrs-ocrs-v0.6.0/ocrs-cli/src/models.rs 2024-04-29 23:31:34.000000000 +0200 +++ new/ocrs-ocrs-v0.7.0/ocrs-cli/src/models.rs 2024-05-16 10:41:41.000000000 +0200 @@ -87,7 +87,6 @@ ModelSource::Url(url) => download_file(url, None)?, ModelSource::Path(path) => path.into(), }; - let model_bytes = fs::read(model_path)?; - let model = Model::load(&model_bytes)?; + let model = Model::load_file(model_path)?; Ok(model) } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocrs-ocrs-v0.6.0/ocrs-cli/src/output.rs new/ocrs-ocrs-v0.7.0/ocrs-cli/src/output.rs --- old/ocrs-ocrs-v0.6.0/ocrs-cli/src/output.rs 2024-04-29 23:31:34.000000000 +0200 +++ new/ocrs-ocrs-v0.7.0/ocrs-cli/src/output.rs 2024-05-16 10:41:41.000000000 +0200 @@ -102,8 +102,8 @@ /// Arguments for [generate_annotated_png]. pub struct GeneratePngArgs<'a> { - /// Input image as a (channels, height, width) tensor. - pub img: NdTensorView<'a, f32, 3>, + /// Input image as a (height, width, channels) tensor. + pub img: NdTensorView<'a, u8, 3>, /// Lines of text detected by OCR engine. pub line_rects: &'a [Vec<RotatedRect>], @@ -119,7 +119,8 @@ line_rects, text_lines, } = args; - let mut annotated_img = img.to_tensor(); + // HWC u8 => CHW f32 + let mut annotated_img = img.permuted([2, 0, 1]).map(|pixel| *pixel as f32 / 255.0); let mut painter = Painter::new(annotated_img.view_mut()); // Colors chosen from https://www.w3.org/wiki/CSS/Properties/color/keywords. @@ -247,7 +248,7 @@ #[test] fn test_generate_annotated_png() { - let img = NdTensor::zeros([3, 64, 64]); + let img = NdTensor::zeros([64, 64, 3]); let text_lines = &[ Some(TextLine::new(gen_text_chars("line one", 10))), Some(TextLine::new(gen_text_chars("line one", 10))), @@ -266,6 +267,6 @@ let annotated = generate_annotated_png(args); - assert_eq!(annotated.shape(), img.shape()); + assert_eq!(annotated.shape(), img.permuted([2, 0, 1]).shape()); } } ++++++ vendor.tar.zst ++++++ Binary files /var/tmp/diff_new_pack.nS9bGJ/_old and /var/tmp/diff_new_pack.nS9bGJ/_new differ
