diff --git a/app.properties b/app.properties index d1f4231..15503f4 100644 --- a/app.properties +++ b/app.properties @@ -1,4 +1,4 @@ -#Thu Nov 23 02:43:23 HST 2023 +#Sun Apr 28 04:53:24 HST 2024 recName=ch_PP-OCRv3_rec_infer model=model keysName=ppocr_keys_v1.txt diff --git a/docs/.vuepress/config.js b/docs/.vuepress/config.js deleted file mode 100644 index 86aaa48..0000000 --- a/docs/.vuepress/config.js +++ /dev/null @@ -1,41 +0,0 @@ -// 引入JSON文件 -const sidebarCn = require('./sidebar-cn.json'); -const sidebarEn = require('./sidebar-en.json'); -const navEn = require('./nav-en.json'); -module.exports = { - base: '/tools-ocr/', - title: 'Tools OCR', - description: 'Tools OCR', - head: [ - ["link", { - rel: "icon", - href: '/favicon.ico' - }], - ["meta", { - name: "author", - content: "litongjava@qq.com,jfinal@qq.com" - }], - ["meta", { - name: "keywords", - content: "tools-ocr,ocr tools" - }], - ["script", { - "crossorigin": "anonymous", - async: true, - src: "" - }], - ], - - markdown: { - lineNumbers: true - }, - themeConfig: { - logo: '/jfinallogo.png', - lastUpdated: 'Last Updated', // string | boolean,K - nav: navEn, - sidebar: { - '/cn/': sidebarCn, - '/en/': sidebarEn - } - }, -} \ No newline at end of file diff --git a/docs/.vuepress/nav-en.json b/docs/.vuepress/nav-en.json deleted file mode 100644 index b9dee82..0000000 --- a/docs/.vuepress/nav-en.json +++ /dev/null @@ -1,31 +0,0 @@ -[{ - "text": "Sources", - "ariaLabel": "Sources Menu", - "items": [{ - "text": "Gitee", - "link": "https://gitee.com/jfinal/jfinal" - }, - { - "text": "Github", - "link": "https://github.com/jfinal/jfinal" - } - ] - }, - { - "text": "Languages", - "ariaLabel": "Language Menu", - "items": [{ - "text": "Chinese", - "link": "/zh/1 快速上手/1.0 快速上手.md" - }, - { - "text": "English", - "link": "/en/1 Quick Start/1.0 Quick Start.md" - } - ] - }, - { - "text": "About", - "link": "/about/" - } -] \ No newline at end of file diff --git a/docs/.vuepress/sidebar-cn.json b/docs/.vuepress/sidebar-cn.json deleted file mode 100644 index c7085d9..0000000 --- a/docs/.vuepress/sidebar-cn.json +++ /dev/null @@ -1,185 +0,0 @@ -[{ - "title": "1 快速上手", - "collapsable": false, - "children": [ - "1 快速上手/1.0 快速上手.md", - "1 快速上手/1.1 Maven 基础.md", - "1 快速上手/1.2 jfinal-undertow 下开发.md", - "1 快速上手/1.3 jfinal-undertow 下部署.md", - "1 快速上手/1.4 jfinal-undertow 高级用法.md", - "1 快速上手/1.5 jfinal-undertow 常见问题.md", - "1 快速上手/1.6 jetty-server 下开发.md", - "1 快速上手/1.7 tomcat 下部署.md", - "1 快速上手/1.8 非 maven 方式开发.md", - "1 快速上手/1.9 IDEA下开发.md", - "1 快速上手/1.10 JBolt 插件下开发.md", - "1 快速上手/1.11 特别声明.md" - ] - }, - { - "title": "2 JFinalConfig", - "collapsable": false, - "children": [ - "2 JFinalConfig/2.1 概述.md", - "2 JFinalConfig/2.2 configConstant.md", - "2 JFinalConfig/2.3 configRoute.md", - "2 JFinalConfig/2.4 configEngine.md", - "2 JFinalConfig/2.5 configPlugin.md", - "2 JFinalConfig/2.6 configInterceptor.md", - "2 JFinalConfig/2.7 configHandler.md", - "2 JFinalConfig/2.8 onStart and onStop 回调配置.md", - "2 JFinalConfig/2.9 PropKit 读取配置.md" - ] - }, - { - "title": "3 Controller", - "collapsable": false, - "children": [ - "3 Controller/3.1 概述.md", - "3 Controller/3.2 Action.md", - "3 Controller/3.3 Action 参数注入.md", - "3 Controller/3.4 get & getPara 系列方法.md", - "3 Controller/3.5 getBean & getModel 系列.md", - "3 Controller/3.6 set & setAttr 方法.md", - "3 Controller/3.7 render 方法.md", - "3 Controller/3.8 renderFile 文件下载.md", - "3 Controller/3.9 renderQrCode 二维码生成.md", - "3 Controller/3.10 session 操作.md", - "3 Controller/3.11 getFile 文件上传.md", - "3 Controller/3.12 keep 系方法.md" - ] - }, - { - "title": "4 AOP", - "collapsable": false, - "children": [ - "4 AOP/4.1 概述.md", - "4 AOP/4.2 Interceptor.md", - "4 AOP/4.3 Before.md", - "4 AOP/4.4 Clear.md", - "4 AOP/4.5 Inject 依赖注入.md", - "4 AOP/4.6 Aop 工具.md", - "4 AOP/4.7 Routes 级别拦截器.md", - "4 AOP/4.8 Proxy 动态代理.md" - ] - }, - { - "title": "5 ActiveRecord", - "collapsable": false, - "children": [ - "5 ActiveRecord/5.1 概述.md", - "5 ActiveRecord/5.2 ActiveRecordPlugin.md", - "5 ActiveRecord/5.3 Model.md", - "5 ActiveRecord/5.4 生成器与 JavaBean.md", - "5 ActiveRecord/5.5 独创Db Record模式.md", - "5 ActiveRecord/5.6 paginate 分页.md", - "5 ActiveRecord/5.7 数据库事务处理.md", - "5 ActiveRecord/5.8 Cache 缓存.md", - "5 ActiveRecord/5.9 Dialect多数据库支持.md", - "5 ActiveRecord/5.10 表关联操作.md", - "5 ActiveRecord/5.11 复合主键.md", - "5 ActiveRecord/5.12 Oracle支持.md", - "5 ActiveRecord/5.13 Enjoy SQL 模板.md", - "5 ActiveRecord/5.14 多数据源支持.md", - "5 ActiveRecord/5.15 独立使用 ActiveRecord.md", - "5 ActiveRecord/5.16 调用存储过程.md" - ] - }, - { - "title": "6 Enjoy 模板引擎", - "collapsable": false, - "children": [ - "6 Enjoy 模板引擎/6.1 概述.md", - "6 Enjoy 模板引擎/6.2 引擎配置.md", - "6 Enjoy 模板引擎/6.3 表达式.md", - "6 Enjoy 模板引擎/6.4 指令.md", - "6 Enjoy 模板引擎/6.5 注释.md", - "6 Enjoy 模板引擎/6.6 原样输出.md", - "6 Enjoy 模板引擎/6.7 Shared Method 扩展.md", - "6 Enjoy 模板引擎/6.8 Shared Object扩展.md", - "6 Enjoy 模板引擎/6.9 Extension Method扩展.md", - "6 Enjoy 模板引擎/6.10 Spring boot 整合.md", - "6 Enjoy 模板引擎/6.11 独立使用 Enjoy.md" - ] - }, - { - "title": "7 EhCachePlugin", - "collapsable": false, - "children": [ - "7 EhCachePlugin/7.1 概述.md", - "7 EhCachePlugin/7.2 EhCachePlugin.md", - "7 EhCachePlugin/7.3 CacheInterceptor.md", - "7 EhCachePlugin/7.4 EvictInterceptor.md", - "7 EhCachePlugin/7.5 CacheKit.md", - "7 EhCachePlugin/7.6 ehcache.xml简介.md" - ] - }, - { - "title": "8 RedisPlugin", - "collapsable": false, - "children": [ - "8 RedisPlugin/8.1 概述.md", - "8 RedisPlugin/8.2 RedisPlugin.md", - "8 RedisPlugin/8.3 Redis与Cache.md", - "8 RedisPlugin/8.4 非web环境使用RedisPlugin.md" - ] - }, - { - "title": "9 Cron4jPlugin", - "collapsable": false, - "children": [ - "9 Cron4jPlugin/9.1 概述.md", - "9 Cron4jPlugin/9.2 Cron4jPlugin.md", - "9 Cron4jPlugin/9.3 使用外部配置文件.md", - "9 Cron4jPlugin/9.4 高级用法.md" - ] - }, - { - "title": "10 Validator", - "collapsable": false, - "children": [ - "10 Validator/10.1 概述.md", - "10 Validator/10.2 Validator.md", - "10 Validator/10.3 Validator配置.md" - ] - }, - { - "title": "11 国际化", - "collapsable": false, - "children": [ - "11 国际化/11.1 概述.md", - "11 国际化/11.2 I18n与Res.md", - "11 国际化/11.3 I18nInterceptor.md" - ] - }, - { - "title": "12 Json 转换", - "collapsable": false, - "children": [ - "12 Json 转换/12.1 概述.md", - "12 Json 转换/12.2 Json 配置.md", - "12 Json 转换/12.3 Json 的四个实现.md", - "12 Json 转换/12.4 Json 转换用法.md" - ] - }, - { - "title": "13 JFinal架构及扩展", - "collapsable": false, - "children": [ - "13 JFinal架构及扩展/13.1 概述.md", - "13 JFinal架构及扩展/13.2 架构.md" - ] - }, - { - "title": "14 升级JFinal", - "collapsable": false, - "children": [ - "14 升级JFinal/14.1 极速升级.md", - "14 升级JFinal/14.2 Ret.md", - "14 升级JFinal/14.3 configEngine.md", - "14 升级JFinal/14.4 baseViewPath.md", - "14 升级JFinal/14.5 RenderFactory.md", - "14 升级JFinal/14.6 其它.md" - ] - } -] \ No newline at end of file diff --git a/docs/.vuepress/sidebar-en.json b/docs/.vuepress/sidebar-en.json deleted file mode 100644 index 6624bdb..0000000 --- a/docs/.vuepress/sidebar-en.json +++ /dev/null @@ -1,186 +0,0 @@ -[ - { - "title": "1 Quick Start", - "collapsable": false, - "children": [ - "1 Quick Start/1.0 Quick Start.md", - "1 Quick Start/1.1 Basics of Maven.md", - "1 Quick Start/1.2 Development under jfinal-undertow.md", - "1 Quick Start/1.3 Deployment under jfinal-undertow.md", - "1 Quick Start/1.4 Advanced usage of jfinal-undertow.md", - "1 Quick Start/1.5 Common issues with jfinal-undertow.md", - "1 Quick Start/1.6 Development under jetty-server.md", - "1 Quick Start/1.7 Deployment under tomcat.md", - "1 Quick Start/1.8 Development without Maven.md", - "1 Quick Start/1.9 Development under IDEA.md", - "1 Quick Start/1.10 Development under JBolt plugin.md", - "1 Quick Start/1.11 Special Statement.md" - ] - }, - { - "title": "2 JFinalConfig", - "collapsable": false, - "children": [ - "2 JFinalConfig/2.1 Overview.md", - "2 JFinalConfig/2.2 configConstant.md", - "2 JFinalConfig/2.3 configRoute.md", - "2 JFinalConfig/2.4 configEngine.md", - "2 JFinalConfig/2.5 configPlugin.md", - "2 JFinalConfig/2.6 configInterceptor.md", - "2 JFinalConfig/2.7 configHandler.md", - "2 JFinalConfig/2.8 onStart and onStop callback configuration.md", - "2 JFinalConfig/2.9 PropKit configuration reading.md" - ] - }, - { - "title": "3 Controller", - "collapsable": false, - "children": [ - "3 Controller/3.1 Overview.md", - "3 Controller/3.2 Action.md", - "3 Controller/3.3 Action parameter injection.md", - "3 Controller/3.4 get & getPara series methods.md", - "3 Controller/3.5 getBean & getModel series.md", - "3 Controller/3.6 set & setAttr methods.md", - "3 Controller/3.7 render method.md", - "3 Controller/3.8 renderFile file download.md", - "3 Controller/3.9 renderQrCode QR code generation.md", - "3 Controller/3.10 session operations.md", - "3 Controller/3.11 getFile file upload.md", - "3 Controller/3.12 keep series methods.md" - ] - }, - { - "title": "4 AOP", - "collapsable": false, - "children": [ - "4 AOP/4.1 Overview.md", - "4 AOP/4.2 Interceptor.md", - "4 AOP/4.3 Before.md", - "4 AOP/4.4 Clear.md", - "4 AOP/4.5 Inject dependency injection.md", - "4 AOP/4.6 Aop tool.md", - "4 AOP/4.7 Routes level interceptor.md", - "4 AOP/4.8 Proxy dynamic proxy.md" - ] - }, - { - "title": "5 ActiveRecord", - "collapsable": false, - "children": [ - "5 ActiveRecord/5.1 Overview.md", - "5 ActiveRecord/5.2 ActiveRecordPlugin.md", - "5 ActiveRecord/5.3 Model.md", - "5 ActiveRecord/5.4 Generator & JavaBean.md", - "5 ActiveRecord/5.5 Original Db Record mode.md", - "5 ActiveRecord/5.6 paginate pagination.md", - "5 ActiveRecord/5.7 Database transaction handling.md", - "5 ActiveRecord/5.8 Cache caching.md", - "5 ActiveRecord/5.9 Dialect multiple database support.md", - "5 ActiveRecord/5.10 Table association operations.md", - "5 ActiveRecord/5.11 Composite primary key.md", - "5 ActiveRecord/5.12 Oracle support.md", - "5 ActiveRecord/5.13 Enjoy SQL template.md", - "5 ActiveRecord/5.14 Multi-data source support.md", - "5 ActiveRecord/5.15 Use ActiveRecord independently.md", - "5 ActiveRecord/5.16 Call stored procedure.md" - ] - }, - { - "title": "6 Enjoy template engine", - "collapsable": false, - "children": [ - "6 Enjoy template engine/6.1 Overview.md", - "6 Enjoy template engine/6.2 Engine configuration.md", - "6 Enjoy template engine/6.3 Expression.md", - "6 Enjoy template engine/6.4 Directive.md", - "6 Enjoy template engine/6.5 Comment.md", - "6 Enjoy template engine/6.6 Raw output.md", - "6 Enjoy template engine/6.7 Shared Method extension.md", - "6 Enjoy template engine/6.8 Shared Object extension.md", - "6 Enjoy template engine/6.9 Extension Method extension.md", - "6 Enjoy template engine/6.10 Spring boot integration.md", - "6 Enjoy template engine/6.11 Use Enjoy independently.md" - ] - }, - { - "title": "7 EhCachePlugin", - "collapsable": false, - "children": [ - "7 EhCachePlugin/7.1 Overview.md", - "7 EhCachePlugin/7.2 EhCachePlugin.md", - "7 EhCachePlugin/7.3 CacheInterceptor.md", - "7 EhCachePlugin/7.4 EvictInterceptor.md", - "7 EhCachePlugin/7.5 CacheKit.md", - "7 EhCachePlugin/7.6 Introduction to ehcache.xml.md" - ] - }, - { - "title": "8 RedisPlugin", - "collapsable": false, - "children": [ - "8 RedisPlugin/8.1 Overview.md", - "8 RedisPlugin/8.2 RedisPlugin.md", - "8 RedisPlugin/8.3 Redis and Cache.md", - "8 RedisPlugin/8.4 Use RedisPlugin in non-web environments.md" - ] - }, - { - "title": "9 Cron4jPlugin", - "collapsable": false, - "children": [ - "9 Cron4jPlugin/9.1 Overview.md", - "9 Cron4jPlugin/9.2 Cron4jPlugin.md", - "9 Cron4jPlugin/9.3 Use external configuration file.md", - "9 Cron4jPlugin/9.4 Advanced usage.md" - ] - }, - { - "title": "10 Validator", - "collapsable": false, - "children": [ - "10 Validator/10.1 Overview.md", - "10 Validator/10.2 Validator.md", - "10 Validator/10.3 Validator configuration.md" - ] - }, - { - "title": "11 Internationalization", - "collapsable": false, - "children": [ - "11 Internationalization/11.1 Overview.md", - "11 Internationalization/11.2 I18n & Res.md", - "11 Internationalization/11.3 I18nInterceptor.md" - ] - }, - { - "title": "12 Json conversion", - "collapsable": false, - "children": [ - "12 Json conversion/12.1 Overview.md", - "12 Json conversion/12.2 Json configuration.md", - "12 Json conversion/12.3 Four implementations of Json.md", - "12 Json conversion/12.4 Json conversion usage.md" - ] - }, - { - "title": "13 JFinal architecture and extensions", - "collapsable": false, - "children": [ - "13 JFinal architecture and extensions/13.1 Overview.md", - "13 JFinal architecture and extensions/13.2 Architecture.md" - ] - }, - { - "title": "14 Upgrade JFinal", - "collapsable": false, - "children": [ - "14 Upgrade JFinal/14.1 Rapid upgrade.md", - "14 Upgrade JFinal/14.2 Ret.md", - "14 Upgrade JFinal/14.3 configEngine.md", - "14 Upgrade JFinal/14.4 baseViewPath.md", - "14 Upgrade JFinal/14.5 RenderFactory.md", - "14 Upgrade JFinal/14.6 Others.md" - ] - } -] \ No newline at end of file diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index 3c01da3..0000000 --- a/docs/README.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -home: true -heroImage: /hero.png -heroText: Hero 标题 -tagline: Hero 副标题 -actionText: 快速上手 → -actionLink: /cn/guide/ -features: -- title: 简洁至上 - details: 以 Markdown 为中心的项目结构,以最少的配置帮助你专注于写作。 -- title: Vue驱动 - details: 享受 Vue + webpack 的开发体验,在 Markdown 中使用 Vue 组件,同时可以使用 Vue 来开发自定义主题。 -- title: 高性能 - details: VuePress 为每个页面预渲染生成静态的 HTML,同时在页面被加载的时候,将作为 SPA 运行。 -footer: MIT Licensed | Copyright © 2018-present Evan You ---- \ No newline at end of file diff --git a/docs/about/README.md b/docs/about/README.md deleted file mode 100644 index d50aab8..0000000 --- a/docs/about/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# about -litongjava(litongjava@qq.com) \ No newline at end of file diff --git "a/docs/cn/1 \345\277\253\351\200\237\344\270\212\346\211\213/1.0 \345\277\253\351\200\237\344\270\212\346\211\213.md" "b/docs/cn/1 \345\277\253\351\200\237\344\270\212\346\211\213/1.0 \345\277\253\351\200\237\344\270\212\346\211\213.md" deleted file mode 100644 index 8d9a077..0000000 --- "a/docs/cn/1 \345\277\253\351\200\237\344\270\212\346\211\213/1.0 \345\277\253\351\200\237\344\270\212\346\211\213.md" +++ /dev/null @@ -1,113 +0,0 @@ -# 快速上手 -## 代码结构 -这是一个典型的 Maven 项目结构,其中: - -- `pom.xml`: 这是 Maven 的项目对象模型文件,它包含了项目的依赖、插件和其他配置信息。 -- `src`: 这是源代码目录,通常包含主代码 (`src/main`) 和测试代码 (`src/test`)。 - - - -从 `pom.xml` 文件的部分内容中, - -1. 项目的 `groupId` 是 `com.luooqi`,而 `artifactId` 是 `tool-ocr`。这意味着该项目是由 `luooqi` 开发的一个名为 `tool-ocr` 的项目。 -2. 项目的版本信息由一个属性 `soft.version` 定义,其值为 `1.2.6`。 -3. 项目有几个依赖项,包括但不限于: - - `jnativehook`:可能与键盘和鼠标挂钩有关,允许应用程序全局监听键盘和鼠标事件。 - - `hutool-all`:Hutool 是一个 Java 工具包,包含了一些常用的 Java 功能模块。 - - `imgscalr-lib`:这是一个简单的 Java 图片缩放库。 - -项目的 `src` 目录结构如下: - -1. **资源文件**: - - `main/deploy/package/macosx/` 和 `main/deploy/package/windows/`:这些似乎是针对不同操作系统的应用程序图标。 - - `main/resources/`:包含了各种资源文件,如CSS、字体、图片等。 - -2. **Java 文件**: - - `com.benjaminwan.ocrlibrary`:这个包似乎包含与OCR处理相关的类。 - - `com.luooqi.ocr`:这是应用程序的主要包,其中 `MainFm.java` 可能是主类。 - - `com.luooqi.ocr.controller`:包含应用程序的控制器类。 - - `com.luooqi.ocr.local`:可能包含本地OCR功能的类。 - - `com.luooqi.ocr.model`:包含应用程序的数据模型。 - - `com.luooqi.ocr.snap`:似乎与屏幕截图功能有关。 - - `com.luooqi.ocr.utils`:包含各种实用程序类。 - -3. **测试文件**: - - `test/java/com/luooqi/ocr/utils/OcrUtilsTest.java`:一个针对 `OcrUtils` 类的测试。 - -## 使用javafx-maven-plugin 打包应用程序 -### javafx-maven-plugin简介 -javafx-maven-plugin 插件。这个插件为 JavaFX 项目提供了便捷的构建和打包工具。 -### 使用javafx-maven-plugin生成native -``` -set JAVA_HOME=D:\dev_program\java\jdk1.8.0_121 -mvn jfx:native -``` - -## 使用JavaFX 的 jpackage 工具 打包应用程序 -关于如何将其打包为 `.exe`,步骤大致如下: - -1. 使用Maven构建项目并生成JAR文件(java 8)。 -2. 使用JavaFX的jpackage工具打包JAR文件为EXE文件 (java 14)。 - -`jpackage` 是 Java 14 及更高版本中提供的一个实验工具,用于为 Java 应用程序创建本地包。以下是如何使用 `jpackage` 为 JavaFX 应用程序创建一个 Windows `.exe` 文件的基本步骤: - -### 1. 准备工作 -- 安装Java 8 和 Java 14 -- 确保您的 JDK 版本是 14 或更高版本,并且已经包含 `jpackage`。 -- 安装.NET SDK 6,下载地址https://dotnet.microsoft.com/en-us/download/dotnet/thank-you/sdk-6.0.317-windows-x64-installer -- 从 https://github.com/wixtoolset/wix3/releases 下载 WiX 3.0 或更高版本,然后将其添加到 PATH。 -### 安装wix3 -WiX Toolset 进行了一些变化,并且它们开始提供一个 .NET Core 工具,这可能是您使用的安装方法。但对于 `jpackage`,您需要的是传统的 WiX Toolset,它包含 `light.exe` 和 `candle.exe`。 - -请按照以下步骤操作: - -1. **访问 WiX Toolset 的 Releases 页面**: - - [WiX Toolset Releases](https://github.com/wixtoolset/wix3/releases) - -2. **下载并安装 WiX Toolset**: - - 在 Releases 页面,找到最新的稳定版本。 - - 下载 `.exe` 安装程序或 `.zip` 归档文件。 - - 如果下载了 `.exe` 安装程序,直接运行它以安装。如果下载了 `.zip` 归档文件,解压它到一个适当的目录。 - -3. **将 WiX Toolset 添加到 PATH**: - - 找到 WiX Toolset 的安装目录或您解压 `.zip` 文件的目录。确保这个目录下有 `bin` 子目录,并且其中包含 `light.exe` 和 `candle.exe`。 - - 将这个 `bin` 子目录添加到您的系统 `PATH`。 - -4. **重新运行 jpackage 命令**。 - -完成这些步骤后,您应该能够使用 `jpackage` 正确地打包您的应用程序为 `.exe` 文件。 -### 3. 创建 JavaFX JAR - -首先,您需要使用 Maven 构建项目并生成一个可执行的 JAR 文件。在项目根目录中执行以下命令: - -```bash -set JAVA_HOME=D:\dev_program\java\jdk1.8.0_121 -mvn clean package -DskipTests -``` - -确保 JAR 文件包含所有必要的依赖项并且可以独立运行。 - -### 4. 使用 jpackage 创建 `.exe` 文件 - -以下是一个基本的 `jpackage` 命令示例,用于将 JavaFX JAR 打包为 `.exe` 文件: - -```bash -jpackage --type exe --input target/ --main-jar tool-ocr-1.2.6.jar --name tree-hole-ocr --main-class com.luooqi.ocr.OcrApp -``` - -其中: - -- `--type exe`:指定输出类型为 `.exe`。 -- `--input target/`:指定包含 JAR 文件的目录。 -- `--main-jar`:指定要打包的主 JAR 文件。 -- `--name`:输出的应用程序名称。 -- `--main-class`:指定应用程序的主类。 -- `--win-shortcut`:为应用程序创建一个 Windows 快捷方式。 - -这只是一个基础示例。`jpackage` 提供了许多其他选项,例如设置应用程序图标、JVM 参数等。您可以查看 `jpackage` 的官方文档或使用 `jpackage --help` 命令查看所有可用选项。 - -完成上述步骤后,您应该会在当前目录中得到一个 `.exe` 文件和一个相应的安装程序。 - -注意:这个过程可能需要 JavaFX jmods 文件,您可能需要从 JavaFX 官方网站下载它们,并使用 `--module-path` 和 `--add-modules` 选项指定它们。 - -最后,如果你想要我为你生成这个 `.exe` 文件,请告诉我,我会为你完成这个步骤。 \ No newline at end of file diff --git a/package.json b/package.json deleted file mode 100644 index 1f96560..0000000 --- a/package.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "name": "tools-ocr", - "version": "1.0.0", - "description": "docs", - "main": "index.js", - "repository": "git@github.com:litongjava/tools-ocr.git", - "author": "litongjava ", - "license": "MIT", - "scripts": { - "dev": "vuepress dev docs", - "build": "vuepress build docs" - }, - "devDependencies": { - "vuepress": "^1.8.2" - } -} diff --git a/pom.xml b/pom.xml index ce322a4..f9f8010 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.luooqi tools-ocr - 2.2.9 + 2.3.0 UTF-8 @@ -57,41 +57,17 @@ ${logback.version} - + - ai.djl - api - ${djl.version} - - - ai.djl - basicdataset - ${djl.version} - - - ai.djl - model-zoo - ${djl.version} - - - - - ai.djl.pytorch - pytorch-engine - ${djl.version} - runtime - - - - ai.djl.onnxruntime - onnxruntime-engine - ${djl.version} + io.github.mymonstercat + rapidocr + 0.0.7 - ai.djl.opencv - opencv - ${djl.version} + io.github.mymonstercat + rapidocr-onnx-platform + 0.0.7 diff --git a/readme-cn.md b/readme-cn.md new file mode 100644 index 0000000..4911643 --- /dev/null +++ b/readme-cn.md @@ -0,0 +1,135 @@ +# 树洞 OCR + +[English](./readme.md) | [中文](./readme-cn.md) + +## 介绍 + +- 本地 OCR 识别 + : 树洞 OCR 文字识别工具无需联网,通过调用本地 OCR 技术,基于 Paddle OCR 模型和深度学习框架如 PyTorch、DJL,提供快速准确的文字识别。 +- 跨平台兼容 + : 基于 java 1.8 和 JavaFX 开发,支持在不同操作系统上运行,包括 Mac OS X 12.6 及以上版本。 +- 强大的功能支持 + : 除了基础的文字识别,还包括 PDF 识别、图片文字识别、快捷键截图识别等功能. + +## 主要依赖库 + +- jdk 1.8 +- javafx +- djl +- pytorch +- onnx +- paddle ocr +- opencv + +## 开源地址 + +[gitee](https://gitee.com/ppnt/tools-ocr) | [github](https://github.com/litongjava/tools-ocr) + +## document + +https://tree-hole-ocr-docs.vercel.app/ + +## required + +- Mac OS X 12.6 因为依赖 djl 0.25.0 + +## 安装 + +> - **安装路径请勿包含中文字符**; +> - 本程序使用 JavaFX 开发,提供的安装包中已经包含了 Java +> - 从[release](https://github.com/litongjava/tools-ocr/releases/)下载最新版本解压安装即可 + +## 程序使用 + +### 截图 + +- 方法一:在程序主界面点击截图按钮; +- 方法二:点击截图快捷键 F4。 + +### 圈选区域 + +进入截图界面后,按下鼠标左键,然后拖动即可圈选所要截取的区域; +圈选结束后,可以对圈选的区域进行微调: + +- 使用 **方向键**,可以对所选区域的右边界和上边界进行微调; +- 使用 **Shift+方向键**,可以对所选区域的左边界和下边界进行微调; +- 使用 **Ctrl+A**,可以全选整个屏幕。 + +### 确定圈选 + +圈选完成后,点击 `Enter` 或者 `Space` 键,或者鼠标左键双击即可确认圈选;确认圈选后,会自动对所选区域进行 OCR 文字识别。 + +![](readme_files/3.jpg) +![](readme_files/4.jpg) + +## 本地构建 + +### 下载模型并解压 + +``` +wget https://github.com/litongjava/tools-ocr/releases/download/model-ppocr-v4/ch_PP-OCRv4_rec_infer-onnx.zip +wget https://github.com/litongjava/tools-ocr/releases/download/model-ppocr-v4/ch_PP-OCRv4_det_infer-onnx.zip +``` + +解压模型 + +``` +mkdir models/ch_PP-OCRv4_rec_infer +mkdir models/ch_PP-OCRv4_det_infer +unzip /Users/mac/Downloads/ch_PP-OCRv4_rec_infer-onnx.zip -d models/ch_PP-OCRv4_rec_infer +unzip /Users/mac/Downloads/ch_PP-OCRv4_det_infer-onnx.zip -d models/ch_PP-OCRv4_det_infer +``` + +### 构建程序 + +你下载代码在本地进行构建,构建命令如下 +windows + +``` +mkdir target\jfx\app +cp -r models target\jfx\app +mvn jfx:native -DskipTests -f pom.xml +``` + +macos + +```shell script +rm -rf target/jfx/app +mkdir -p target/jfx/app +cp -r models target/jfx/app +mvn jfx:native -DskipTests -f pom.xml +``` + +## 查看系统运行日志 + +cd treehole.app/Contents/java/logs + +## 注意事项 + +### MAC 权限设置 + +由于监控了截图快捷键,因此 MAC 需要开启相应的权限,请见下图: +笔者设置如下 + +- Settings-->Security and Privacy-->Accessbility + ![MAC权限设置](readme_files/5.jpg) +- Settings-->Security and Privacy-->Screen Recording + ![2](readme_files/2.jpg) + +## 常用目录 + +- 日志目录/Applications/treehole.app/Contents/Java/logs +- 临时图片保存目录 /Applications/treehole.app/Contents/Java + +## TODO + +- [x] PDF 识别 +- [x] 图片文字识别 + - [x] 识别结果文本对齐(暂未实现多分栏) + - [x] 全屏模式下截图 + - [x] 添加正在识别动画 + - [x] 多屏支持 +- [ ] 文本翻译 +- [ ] 公式识别 +- [ ] 表格识别 +- [ ] 软件设置 diff --git a/readme.md b/readme.md index 2c15e9d..2c6bdf3 100644 --- a/readme.md +++ b/readme.md @@ -1,65 +1,136 @@ -# 树洞 OCR 文字识别 -一款跨平台的 OCR 小工具,调用本地OCR进行识别,无需联网即可使用 -用到的技术和框架 -- jdk 1.8 -- javafx -- djl -- pytorch -- onnx -- paddle ocr -- opencv - -## 开源地址 +# Tree Hole OCR + +[English](./readme.md) | [中文](./readme-cn.md) + +## Introduction + +- Local OCR Recognition: Tree Hole OCR text recognition tool does not require internet connection. It leverages local OCR technology, based on Paddle OCR model and deep learning frameworks such as PyTorch, DJL, to provide fast and accurate text recognition. +- Cross-platform compatibility: Developed with Java 1.8 and JavaFX, it supports operation on different operating systems, including Mac OS X 12.6 and above. +- Powerful functionality: In addition to basic text recognition, it also includes PDF recognition, image text recognition, shortcut key screenshot recognition, and more. + +## Dependencies Library + +- JDK 1.8 +- JavaFX +- DJL +- PyTorch +- ONNX +- Paddle OCR +- OpenCV + +## Open Source Address + [gitee](https://gitee.com/ppnt/tools-ocr) | [github](https://github.com/litongjava/tools-ocr) -## 安装 -> - **安装路径请勿包含中文字符**; -> - 本程序使用 JavaFX 开发,提供的安装包中已经包含了Java -> - 从[release](https://github.com/litongjava/tools-ocr/releases/)下载最新版本解压安装即可 +## Documentation + +https://tree-hole-ocr-docs.vercel.app/ + +## Requirements + +- Mac OS X 12.6 due to dependency on DJL 0.25.0 + +## Installation + +> - **Please do not include Chinese characters in the installation path**; +> - This program is developed with JavaFX, and the installation package provided already includes Java. +> - Download the latest version from [release](https://github.com/litongjava/tools-ocr/releases/) and unzip it for installation. + +## Using the Program + +### Screenshot + +- Method one: Click the screenshot button on the main interface of the program; +- Method two: Press the screenshot shortcut key F4. + +### Selecting Area + +After entering the screenshot interface, press and hold the left mouse button, then drag to select the area you want to capture; +After completing the selection, you can fine-tune the selected area: + +- Use **arrow keys** to adjust the right and top borders of the selected area; +- Use **Shift + arrow keys** to adjust the left and bottom borders of the selected area; +- Use **Ctrl + A** to select the entire screen. + +### Confirm Selection + +After completing the selection, press `Enter` or `Space` key, or double-click the left mouse button to confirm the selection; Once confirmed, the program will automatically perform OCR text recognition on the selected area. + +- image -## 程序使用 -### 启动截图 -- 方法一:在程序主界面点击截图按钮; -- 方法二:点击截图快捷键 F4。 + ![](readme_files/3.jpg) -### 圈选区域 -进入截图界面后,按下鼠标左键,然后拖动即可圈选所要截取的区域; -圈选结束后,可以对圈选的区域进行微调: -- 使用 **方向键**,可以对所选区域的右边界和上边界进行微调; -- 使用 **Shift+方向键**,可以对所选区域的左边界和下边界进行微调; -- 使用 **Ctrl+A**,可以全选整个屏幕。 +- result: -### 确定圈选 -圈选完成后,点击 `Enter` 或者 `Space` 键,或者鼠标左键双击即可确认圈选;确认圈选后,会自动对所选区域进行 OCR 文字识别。 + ![](readme_files/4.jpg) -![](readme_files/3.jpg) -![](readme_files/4.jpg) +## Local Build + +### Download and Unzip the Models + +``` +wget https://github.com/litongjava/tools-ocr/releases/download/model-ppocr-v4/ch_PP-OCRv4_rec_infer-onnx.zip +wget https://github.com/litongjava/tools-ocr/releases/download/model-ppocr-v4/ch_PP-OCRv4_det_infer-onnx.zip +``` + +Unzip the models + +``` +mkdir models/ch_PP-OCRv4_rec_infer +mkdir models/ch_PP-OCRv4_det_infer +unzip /Users/mac/Downloads/ch_PP-OCRv4_rec_infer-onnx.zip -d models/ch_PP-OCRv4_rec_infer +unzip /Users/mac/Downloads/ch_PP-OCRv4_det_infer-onnx.zip -d models/ch_PP-OCRv4_det_infer +``` + +### Build the Program + +You can download the code and build it locally. The build commands are as follows: +windows -## 本地构建 -你下载代码在本地进行构建,构建命令如下 ``` mkdir target\jfx\app cp -r models target\jfx\app mvn jfx:native -DskipTests -f pom.xml ``` -## 注意事项 -### MAC权限设置 -由于监控了截图快捷键,因此MAC需要开启相应的权限,请见下图: -![MAC权限设置](http://img.ifish.fun/Fo31NZQIhPNF6m7gOorRGDuKvaZ_) -笔者设置如下 -![1](readme_files/1.jpg) -![2](readme_files/2.jpg) +macos + +```shell script +rm -rf target/jfx/app +mkdir -p target/jfx/app +cp -r models target/jfx/app +mvn jfx:native -DskipTests -f pom.xml +``` + +## View System Operating Log + +cd treehole.app/Contents/java/logs + +## Notices + +### MAC Permission Settings + +Since screenshot shortcuts are monitored, MAC needs appropriate permissions settings, as shown below: + +- Settings --> Security and Privacy --> Accessibility + ![MAC Permission Settings](readme_files/5.jpg) +- Settings --> Security and Privacy --> Screen Recording + ![2](readme_files/2.jpg) + +## Common Directories + +- Log directory /Applications/treehole.app/Contents/Java/logs +- Temporary image saving directory /Applications/treehole.app/Contents/Java ## TODO -- [x] PDF识别 -- [x] 图片文字识别 - - [x] 识别结果文本对齐(暂未实现多分栏) - - [x] 全屏模式下截图 - - [x] 添加正在识别动画 - - [x] 多屏支持 -- [ ] 文本翻译 -- [ ] 公式识别 -- [ ] 表格识别 -- [ ] 软件设置 +- [x] PDF Recognition +- [x] Image Text Recognition + - [x] Recognition result text alignment (multi-column yet to be implemented) + - [x] Full screen mode screenshot + - [x] Adding recognition animation + - [x] Multi-screen support +- [ ] Text Translation +- [ ] Formula Recognition +- [ ] Table Recognition +- [ ] Software Settings diff --git a/readme_files/5.jpg b/readme_files/5.jpg new file mode 100644 index 0000000..05dab18 Binary files /dev/null and b/readme_files/5.jpg differ diff --git a/src/main/java/com/benjaminwan/ocrlibrary/OcrEngine.java b/src/main/java/com/benjaminwan/ocrlibrary/OcrEngine.java deleted file mode 100644 index 7f9d5e5..0000000 --- a/src/main/java/com/benjaminwan/ocrlibrary/OcrEngine.java +++ /dev/null @@ -1,120 +0,0 @@ -package com.benjaminwan.ocrlibrary; - -import cn.hutool.core.io.FileUtil; -import cn.hutool.log.StaticLog; - -import java.io.File; -import java.nio.charset.Charset; - -public final class OcrEngine { - /** - * 图像外接白框,用于提升识别率,文字框没有正确框住所有文字时,增加此值。 - */ - private int padding; - /** - * 文字框置信度门限,文字框没有正确框住所有文字时,减小此值 - */ - private float boxScoreThresh; - - private float boxThresh; - /** - * 单个文字框大小倍率,越大时单个文字框越大 - */ - private float unClipRatio; - /** - * 启用(1)/禁用(0) 文字方向检测,只有图片倒置的情况下(旋转90~270度的图片),才需要启用文字方向检测 - */ - private boolean doAngle; - /** - * 启用(1)/禁用(0) 角度投票(整张图片以最大可能文字方向来识别),当禁用文字方向检测时,此项也不起作用 - */ - private boolean mostAngle; - - public native boolean setNumThread(int numThread); - - public native void initLogger(boolean isConsole, boolean isPartImg, boolean isResultImg); - - public native void enableResultText(String imagePath); - - public native boolean initModels(String modelsDir, String detName, String clsName, String recName, String keysName); - - /** - * GPU0一般为默认GPU,参数选项:使用CPU(-1)/使用GPU0(0)/使用GPU1(1)/... - */ - public native void setGpuIndex(int gpuIndex); - - public native String getVersion(); - - public native OcrResult detect(String input, int padding, int maxSideLen, float boxScoreThresh, float boxThresh, float unClipRatio, boolean doAngle, boolean mostAngle); - - public OcrEngine() { - try { - StaticLog.info("java.library.path=" + System.getProperty("java.library.path")); - System.loadLibrary("RapidOcrNcnn"); - } catch (Exception e) { - e.printStackTrace(); - } - this.padding = 15; - this.boxScoreThresh = 0.25f; - this.boxThresh = 0.3f; - this.unClipRatio = 1.6f; - this.doAngle = true; - this.mostAngle = true; - } - - public int getPadding() { - return this.padding; - } - - public void setPadding(int i) { - this.padding = i; - } - - public float getBoxScoreThresh() { - return this.boxScoreThresh; - } - - public void setBoxScoreThresh(float f) { - this.boxScoreThresh = f; - } - - public float getBoxThresh() { - return this.boxThresh; - } - - public void setBoxThresh(float f) { - this.boxThresh = f; - } - - public float getUnClipRatio() { - return this.unClipRatio; - } - - public void setUnClipRatio(float f) { - this.unClipRatio = f; - } - - public boolean getDoAngle() { - return this.doAngle; - } - - public void setDoAngle(boolean z) { - this.doAngle = z; - } - - public boolean getMostAngle() { - return this.mostAngle; - } - - public void setMostAngle(boolean z) { - this.mostAngle = z; - } - - public OcrResult detect(String input) { - return detect(input, 0); - } - - public OcrResult detect(String input, int maxSideLen) { - return detect(input, this.padding, maxSideLen, this.boxScoreThresh, this.boxThresh, this.unClipRatio, this.doAngle, this.mostAngle); - } -} \ No newline at end of file diff --git a/src/main/java/com/benjaminwan/ocrlibrary/OcrFailed.java b/src/main/java/com/benjaminwan/ocrlibrary/OcrFailed.java deleted file mode 100644 index 71e7a4c..0000000 --- a/src/main/java/com/benjaminwan/ocrlibrary/OcrFailed.java +++ /dev/null @@ -1,9 +0,0 @@ -package com.benjaminwan.ocrlibrary; - -public final class OcrFailed extends OcrOutput { - public static final OcrFailed INSTANCE = new OcrFailed(); - - private OcrFailed() { - super(); - } -} diff --git a/src/main/java/com/benjaminwan/ocrlibrary/OcrOutput.java b/src/main/java/com/benjaminwan/ocrlibrary/OcrOutput.java deleted file mode 100644 index 5215bba..0000000 --- a/src/main/java/com/benjaminwan/ocrlibrary/OcrOutput.java +++ /dev/null @@ -1,5 +0,0 @@ -package com.benjaminwan.ocrlibrary; - -public abstract class OcrOutput { - -} diff --git a/src/main/java/com/benjaminwan/ocrlibrary/OcrResult.java b/src/main/java/com/benjaminwan/ocrlibrary/OcrResult.java deleted file mode 100644 index 566929c..0000000 --- a/src/main/java/com/benjaminwan/ocrlibrary/OcrResult.java +++ /dev/null @@ -1,54 +0,0 @@ -package com.benjaminwan.ocrlibrary; - -import java.util.ArrayList; - -public final class OcrResult extends OcrOutput { - private final double dbNetTime; - - private final ArrayList textBlocks; - private double detectTime; - - private String strRes; - - public OcrResult copy(double dbNetTime, ArrayList textBlocks, double detectTime, String strRes) { - return new OcrResult(dbNetTime, textBlocks, detectTime, strRes); - } - - public String toString() { - return "OcrResult(dbNetTime=" + this.dbNetTime + ", textBlocks=" + this.textBlocks + ", detectTime=" + this.detectTime + ", strRes=" + this.strRes + ')'; - } - - public double getDbNetTime() { - return this.dbNetTime; - } - - - public ArrayList getTextBlocks() { - return this.textBlocks; - } - - public double getDetectTime() { - return this.detectTime; - } - - public void setDetectTime(double d) { - this.detectTime = d; - } - - - public String getStrRes() { - return this.strRes; - } - - public void setStrRes(String str) { - this.strRes = str; - } - - public OcrResult(double dbNetTime, ArrayList textBlocks, double detectTime, String strRes) { - super(); - this.dbNetTime = dbNetTime; - this.textBlocks = textBlocks; - this.detectTime = detectTime; - this.strRes = strRes; - } -} diff --git a/src/main/java/com/benjaminwan/ocrlibrary/OcrStop.java b/src/main/java/com/benjaminwan/ocrlibrary/OcrStop.java deleted file mode 100644 index a7b6645..0000000 --- a/src/main/java/com/benjaminwan/ocrlibrary/OcrStop.java +++ /dev/null @@ -1,9 +0,0 @@ -package com.benjaminwan.ocrlibrary; - -public final class OcrStop extends OcrOutput { - public static final OcrStop INSTANCE = new OcrStop(); - - private OcrStop() { - super(); - } -} diff --git a/src/main/java/com/benjaminwan/ocrlibrary/Point.java b/src/main/java/com/benjaminwan/ocrlibrary/Point.java deleted file mode 100644 index 73e7d5f..0000000 --- a/src/main/java/com/benjaminwan/ocrlibrary/Point.java +++ /dev/null @@ -1,51 +0,0 @@ -package com.benjaminwan.ocrlibrary; - -public final class Point { - private int x; - private int y; - - public Point copy(int x, int y) { - return new Point(x, y); - } - - public String toString() { - return "Point(x=" + this.x + ", y=" + this.y + ')'; - } - - public int hashCode() { - int result = Integer.hashCode(this.x); - return (result * 31) + Integer.hashCode(this.y); - } - - public boolean equals(Object other) { - if (this == other) { - return true; - } - if (!(other instanceof Point)) { - return false; - } - Point point = (Point) other; - return this.x == point.x && this.y == point.y; - } - - public Point(int x, int y) { - this.x = x; - this.y = y; - } - - public int getX() { - return this.x; - } - - public void setX(int i) { - this.x = i; - } - - public int getY() { - return this.y; - } - - public void setY(int i) { - this.y = i; - } -} \ No newline at end of file diff --git a/src/main/java/com/benjaminwan/ocrlibrary/TextBlock.java b/src/main/java/com/benjaminwan/ocrlibrary/TextBlock.java deleted file mode 100644 index 6ac816b..0000000 --- a/src/main/java/com/benjaminwan/ocrlibrary/TextBlock.java +++ /dev/null @@ -1,79 +0,0 @@ -package com.benjaminwan.ocrlibrary; - -import java.util.ArrayList; -import java.util.Arrays; - -public final class TextBlock { - - private final ArrayList boxPoint; - private float boxScore; - private final int angleIndex; - private final float angleScore; - private final double angleTime; - - private final String text; - - private final float[] charScores; - private final double crnnTime; - private final double blockTime; - - - public String toString() { - return "TextBlock(boxPoint=" + this.boxPoint + ", boxScore=" + this.boxScore + ", angleIndex=" + this.angleIndex + ", angleScore=" + this.angleScore + ", angleTime=" + this.angleTime + ", text=" + this.text + ", charScores=" + Arrays.toString(this.charScores) + ", crnnTime=" + this.crnnTime + ", blockTime=" + this.blockTime + ')'; - } - - public TextBlock( ArrayList boxPoint, float boxScore, int angleIndex, float angleScore, double angleTime, String text, float[] charScores, double crnnTime, double blockTime) { - this.boxPoint = boxPoint; - this.boxScore = boxScore; - this.angleIndex = angleIndex; - this.angleScore = angleScore; - this.angleTime = angleTime; - this.text = text; - this.charScores = charScores; - this.crnnTime = crnnTime; - this.blockTime = blockTime; - } - - - public ArrayList getBoxPoint() { - return this.boxPoint; - } - - public float getBoxScore() { - return this.boxScore; - } - - public void setBoxScore(float f) { - this.boxScore = f; - } - - public int getAngleIndex() { - return this.angleIndex; - } - - public float getAngleScore() { - return this.angleScore; - } - - public double getAngleTime() { - return this.angleTime; - } - - - public String getText() { - return this.text; - } - - - public float[] getCharScores() { - return this.charScores; - } - - public double getCrnnTime() { - return this.crnnTime; - } - - public double getBlockTime() { - return this.blockTime; - } -} diff --git a/src/main/java/com/litongjava/djl/paddle/ocr/v4/OcrV4DetExample.java b/src/main/java/com/litongjava/djl/paddle/ocr/v4/OcrV4DetExample.java deleted file mode 100644 index 9dd6986..0000000 --- a/src/main/java/com/litongjava/djl/paddle/ocr/v4/OcrV4DetExample.java +++ /dev/null @@ -1,51 +0,0 @@ -package com.litongjava.djl.paddle.ocr.v4; - -import ai.djl.ModelException; -import ai.djl.inference.Predictor; -import ai.djl.modality.cv.Image; -import ai.djl.ndarray.NDList; -import ai.djl.ndarray.NDManager; -import ai.djl.opencv.OpenCVImageFactory; -import ai.djl.repository.zoo.ModelZoo; -import ai.djl.repository.zoo.ZooModel; -import ai.djl.translate.TranslateException; - -import com.litongjava.djl.paddle.ocr.v4.common.ImageUtils; -import com.litongjava.djl.paddle.ocr.v4.detection.OcrV4Detection; -import org.opencv.core.Mat; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.nio.file.Path; -import java.nio.file.Paths; - -public final class OcrV4DetExample { - - private static final Logger logger = LoggerFactory.getLogger(OcrV4DetExample.class); - - private OcrV4DetExample() { - } - - public static void main(String[] args) throws IOException, ModelException, TranslateException { - Path imageFile = Paths.get("src/test/resources/2.jpg"); - Image image = OpenCVImageFactory.getInstance().fromFile(imageFile); - - OcrV4Detection detection = new OcrV4Detection(); - try (ZooModel detectionModel = ModelZoo.loadModel(detection.chDetCriteria()); - Predictor detector = detectionModel.newPredictor(); - NDManager manager = NDManager.newBaseManager();) { - - NDList dt_boxes = detector.predict(image); - // 交给 NDManager自动管理内存 - // attach to manager for automatic memory management - dt_boxes.attach(manager); - - for (int i = 0; i < dt_boxes.size(); i++) { - ImageUtils.drawRect((Mat) image.getWrappedImage(), dt_boxes.get(i)); - } - ImageUtils.saveImage(image, "detect_rect.png", "build/output"); - ((Mat) image.getWrappedImage()).release(); - } - } -} \ No newline at end of file diff --git a/src/main/java/com/litongjava/djl/paddle/ocr/v4/OcrV4RecExample.java b/src/main/java/com/litongjava/djl/paddle/ocr/v4/OcrV4RecExample.java deleted file mode 100644 index 3945727..0000000 --- a/src/main/java/com/litongjava/djl/paddle/ocr/v4/OcrV4RecExample.java +++ /dev/null @@ -1,131 +0,0 @@ -package com.litongjava.djl.paddle.ocr.v4; - -import ai.djl.ModelException; -import ai.djl.inference.Predictor; -import ai.djl.modality.cv.Image; -import ai.djl.ndarray.NDList; -import ai.djl.ndarray.NDManager; -import ai.djl.opencv.OpenCVImageFactory; -import ai.djl.repository.zoo.ModelZoo; -import ai.djl.repository.zoo.ZooModel; -import ai.djl.translate.TranslateException; -import com.litongjava.djl.paddle.ocr.v4.common.ImageUtils; -import com.litongjava.djl.paddle.ocr.v4.common.RotatedBox; -import com.litongjava.djl.paddle.ocr.v4.common.RotatedBoxCompX; -import com.litongjava.djl.paddle.ocr.v4.detection.OcrV4Detection; -import com.litongjava.djl.paddle.ocr.v4.opencv.OpenCVUtils; -import com.litongjava.djl.paddle.ocr.v4.recognition.OcrV4Recognition; -import org.opencv.core.Mat; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.awt.image.BufferedImage; -import java.io.IOException; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -/** - * OCR V4模型 文字识别. 支持文本有旋转角度 - * OCR V4 model for text recognition. Supports text with rotation angles. - */ -public final class OcrV4RecExample { - - private static final Logger logger = LoggerFactory.getLogger(OcrV4RecExample.class); - - private OcrV4RecExample() { - } - - public static void main(String[] args) throws IOException, ModelException, TranslateException { - Path imageFile = Paths.get("src/test/resources/2.jpg"); - Image image = OpenCVImageFactory.getInstance().fromFile(imageFile); - - OcrV4Detection detection = new OcrV4Detection(); - OcrV4Recognition recognition = new OcrV4Recognition(); - try (ZooModel detectionModel = ModelZoo.loadModel(detection.chDetCriteria()); - Predictor detector = detectionModel.newPredictor(); - ZooModel recognitionModel = ModelZoo.loadModel(recognition.chRecCriteria()); - Predictor recognizer = recognitionModel.newPredictor(); - NDManager manager = NDManager.newBaseManager()) { - - long timeInferStart = System.currentTimeMillis(); - List detections = recognition.predict(manager, image, detector, recognizer); - -// for (int i = 0; i < 1000; i++) { -// detections = recognition.predict(image, detector, recognizer); -// for (RotatedBox result : detections) { -// System.out.println(result.getText()); -// } -// System.out.println("index : " + i); -// } - - long timeInferEnd = System.currentTimeMillis(); - System.out.println("time: " + (timeInferEnd - timeInferStart)); - - // 对检测结果根据坐标位置,根据从上到下,从做到右,重新排序,下面算法对图片倾斜旋转角度较小的情形适用 - // 如果图片旋转角度较大,则需要自行改进算法,需要根据斜率校正计算位置。 - // Reorder the detection results based on the coordinate positions, from top to bottom, from left to right. The algorithm below is suitable for situations where the image is slightly tilted or rotated. - // If the image rotation angle is large, the algorithm needs to be improved, and the position needs to be calculated based on the slope correction. - List initList = new ArrayList<>(); - for (RotatedBox result : detections) { - // put low Y value at the head of the queue. - initList.add(result); - } - Collections.sort(initList); - - List> lines = new ArrayList<>(); - List line = new ArrayList<>(); - RotatedBoxCompX firstBox = new RotatedBoxCompX(initList.get(0).getBox(), initList.get(0).getText()); - line.add(firstBox); - lines.add((ArrayList) line); - for (int i = 1; i < initList.size(); i++) { - RotatedBoxCompX tmpBox = new RotatedBoxCompX(initList.get(i).getBox(), initList.get(i).getText()); - float y1 = firstBox.getBox().toFloatArray()[1]; - float y2 = tmpBox.getBox().toFloatArray()[1]; - float dis = Math.abs(y2 - y1); - if (dis < 20) { // 认为是同 1 行 - Considered to be in the same line - line.add(tmpBox); - } else { // 换行 - Line break - firstBox = tmpBox; - Collections.sort(line); - line = new ArrayList<>(); - line.add(firstBox); - lines.add((ArrayList) line); - } - } - - - String fullText = ""; - for (int i = 0; i < lines.size(); i++) { - for (int j = 0; j < lines.get(i).size(); j++) { - String text = lines.get(i).get(j).getText(); - if (text.trim().equals("")) - continue; - fullText += text + " "; - } - fullText += '\n'; - } - - System.out.println(fullText); - - - // 转 BufferedImage 解决 Imgproc.putText 中文乱码问题 - Mat wrappedImage = (Mat) image.getWrappedImage(); - BufferedImage bufferedImage = OpenCVUtils.mat2Image(wrappedImage); - for (RotatedBox result : detections) { - ImageUtils.drawImageRectWithText(bufferedImage, result.getBox(), result.getText()); - } - - Mat image2Mat = OpenCVUtils.image2Mat(bufferedImage); - image = OpenCVImageFactory.getInstance().fromImage(image2Mat); - ImageUtils.saveImage(image, "ocr_result.png", "build/output"); - - wrappedImage.release(); - image2Mat.release(); - - logger.info("{}", detections); - } - } -} \ No newline at end of file diff --git a/src/main/java/com/litongjava/djl/paddle/ocr/v4/common/ImageUtils.java b/src/main/java/com/litongjava/djl/paddle/ocr/v4/common/ImageUtils.java deleted file mode 100644 index 6c67331..0000000 --- a/src/main/java/com/litongjava/djl/paddle/ocr/v4/common/ImageUtils.java +++ /dev/null @@ -1,241 +0,0 @@ -package com.litongjava.djl.paddle.ocr.v4.common; - -import ai.djl.modality.cv.Image; -import ai.djl.modality.cv.ImageFactory; -import ai.djl.modality.cv.output.DetectedObjects; -import ai.djl.ndarray.NDArray; -import org.opencv.core.Mat; -import org.opencv.core.Point; -import org.opencv.core.Scalar; -import org.opencv.imgproc.Imgproc; - -import java.awt.*; -import java.awt.image.BufferedImage; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; - -/** - * 图像工具类 - */ -public class ImageUtils { - - /** - * 保存BufferedImage图片 - * - * @param img - * @param name - * @param path - */ - public static void saveImage(BufferedImage img, String name, String path) { - Image djlImg = ImageFactory.getInstance().fromImage(img); // 支持多种图片格式,自动适配 - Path outputDir = Paths.get(path); - Path imagePath = outputDir.resolve(name); - // OpenJDK 不能保存 jpg 图片的 alpha channel - try { - djlImg.save(Files.newOutputStream(imagePath), "png"); - } catch (IOException e) { - e.printStackTrace(); - } - } - - /** - * 保存DJL图片 - * - * @param img - * @param name - * @param path - */ - public static void saveImage(Image img, String name, String path) { - Path outputDir = Paths.get(path); - Path imagePath = outputDir.resolve(name); - // OpenJDK 不能保存 jpg 图片的 alpha channel - try { - img.save(Files.newOutputStream(imagePath), "png"); - } catch (IOException e) { - e.printStackTrace(); - } - } - - /** - * 保存图片,含检测框 - * - * @param img - * @param detection - * @param name - * @param path - * @throws IOException - */ - public static void saveBoundingBoxImage( - Image img, DetectedObjects detection, String name, String path) throws IOException { - // Make image copy with alpha channel because original image was jpg - img.drawBoundingBoxes(detection); - Path outputDir = Paths.get(path); - Files.createDirectories(outputDir); - Path imagePath = outputDir.resolve(name); - // OpenJDK can't save jpg with alpha channel - img.save(Files.newOutputStream(imagePath), "png"); - } - - /** - * 画矩形 - * - * @param mat - * @param box - */ - public static void drawRect(Mat mat, NDArray box) { - - float[] points = box.toFloatArray(); - List list = new ArrayList<>(); - - for (int i = 0; i < 4; i++) { - Point point = new Point((int) points[2 * i], (int) points[2 * i + 1]); - list.add(point); - } - - Imgproc.line(mat, list.get(0), list.get(1), new Scalar(0, 255, 0), 1); - Imgproc.line(mat, list.get(1), list.get(2), new Scalar(0, 255, 0), 1); - Imgproc.line(mat, list.get(2), list.get(3), new Scalar(0, 255, 0), 1); - Imgproc.line(mat, list.get(3), list.get(0), new Scalar(0, 255, 0), 1); - } - - /** - * 画矩形 - * - * @param mat - * @param box - * @param text - */ - public static void drawRectWithText(Mat mat, NDArray box, String text) { - - float[] points = box.toFloatArray(); - List list = new ArrayList<>(); - - for (int i = 0; i < 4; i++) { - Point point = new Point((int) points[2 * i], (int) points[2 * i + 1]); - list.add(point); - } - - Imgproc.line(mat, list.get(0), list.get(1), new Scalar(0, 255, 0), 1); - Imgproc.line(mat, list.get(1), list.get(2), new Scalar(0, 255, 0), 1); - Imgproc.line(mat, list.get(2), list.get(3), new Scalar(0, 255, 0), 1); - Imgproc.line(mat, list.get(3), list.get(0), new Scalar(0, 255, 0), 1); - // 中文乱码 - Imgproc.putText(mat, text, list.get(0), Imgproc.FONT_HERSHEY_SCRIPT_SIMPLEX, 1.0, new Scalar(0, 255, 0), 1); - } - - /** - * 画检测框(有倾斜角) - * - * @param image - * @param box - */ - public static void drawImageRect(BufferedImage image, NDArray box) { - float[] points = box.toFloatArray(); - int[] xPoints = new int[5]; - int[] yPoints = new int[5]; - - for (int i = 0; i < 4; i++) { - xPoints[i] = (int) points[2 * i]; - yPoints[i] = (int) points[2 * i + 1]; - } - xPoints[4] = xPoints[0]; - yPoints[4] = yPoints[0]; - - // 将绘制图像转换为Graphics2D - Graphics2D g = (Graphics2D) image.getGraphics(); - try { - g.setColor(new Color(0, 255, 0)); - // 声明画笔属性 :粗 细(单位像素)末端无修饰 折线处呈尖角 - BasicStroke bStroke = new BasicStroke(4, BasicStroke.CAP_BUTT, BasicStroke.JOIN_MITER); - g.setStroke(bStroke); - g.drawPolyline(xPoints, yPoints, 5); // xPoints, yPoints, nPoints - } finally { - g.dispose(); - } - } - - /** - * 画检测框(有倾斜角)和文本 - * - * @param image - * @param box - * @param text - */ - public static void drawImageRectWithText(BufferedImage image, NDArray box, String text) { - float[] points = box.toFloatArray(); - int[] xPoints = new int[5]; - int[] yPoints = new int[5]; - - for (int i = 0; i < 4; i++) { - xPoints[i] = (int) points[2 * i]; - yPoints[i] = (int) points[2 * i + 1]; - } - xPoints[4] = xPoints[0]; - yPoints[4] = yPoints[0]; - - // 将绘制图像转换为Graphics2D - Graphics2D g = (Graphics2D) image.getGraphics(); - try { - int fontSize = 32; - Font font = new Font("楷体", Font.PLAIN, fontSize); - g.setFont(font); - g.setColor(new Color(0, 0, 255)); - // 声明画笔属性 :粗 细(单位像素)末端无修饰 折线处呈尖角 - BasicStroke bStroke = new BasicStroke(2, BasicStroke.CAP_BUTT, BasicStroke.JOIN_MITER); - g.setStroke(bStroke); - g.drawPolyline(xPoints, yPoints, 5); // xPoints, yPoints, nPoints - g.drawString(text, xPoints[0], yPoints[0]); - } finally { - g.dispose(); - } - } - - /** - * 画检测框 - * - * @param image - * @param x - * @param y - * @param width - * @param height - */ - public static void drawImageRect(BufferedImage image, int x, int y, int width, int height) { - // 将绘制图像转换为Graphics2D - Graphics2D g = (Graphics2D) image.getGraphics(); - try { - g.setColor(new Color(0, 255, 0)); - // 声明画笔属性 :粗 细(单位像素)末端无修饰 折线处呈尖角 - BasicStroke bStroke = new BasicStroke(2, BasicStroke.CAP_BUTT, BasicStroke.JOIN_MITER); - g.setStroke(bStroke); - g.drawRect(x, y, width, height); - } finally { - g.dispose(); - } - } - - /** - * 显示文字 - * - * @param image - * @param text - * @param x - * @param y - */ - public static void drawImageText(BufferedImage image, String text, int x, int y) { - Graphics graphics = image.getGraphics(); - int fontSize = 32; - Font font = new Font("楷体", Font.PLAIN, fontSize); - try { - graphics.setFont(font); - graphics.setColor(new Color(0, 0, 255)); - int strWidth = graphics.getFontMetrics().stringWidth(text); - graphics.drawString(text, x, y); - } finally { - graphics.dispose(); - } - } -} \ No newline at end of file diff --git a/src/main/java/com/litongjava/djl/paddle/ocr/v4/common/RotatedBox.java b/src/main/java/com/litongjava/djl/paddle/ocr/v4/common/RotatedBox.java deleted file mode 100644 index 1858258..0000000 --- a/src/main/java/com/litongjava/djl/paddle/ocr/v4/common/RotatedBox.java +++ /dev/null @@ -1,47 +0,0 @@ -package com.litongjava.djl.paddle.ocr.v4.common; - -import ai.djl.ndarray.NDArray; - -/** - * 旋转检测框 - */ -public class RotatedBox implements Comparable { - private NDArray box; - private String text; - - public RotatedBox(NDArray box, String text) { - this.box = box; - this.text = text; - } - - /** - * 将左上角 Y 坐标升序排序 - * - * @param o - * @return - */ - @Override - public int compareTo(RotatedBox o) { - NDArray lowBox = this.getBox(); - NDArray highBox = o.getBox(); - float lowY = lowBox.toFloatArray()[1]; - float highY = highBox.toFloatArray()[1]; - return (lowY < highY) ? -1 : 1; - } - - public NDArray getBox() { - return box; - } - - public void setBox(NDArray box) { - this.box = box; - } - - public String getText() { - return text; - } - - public void setText(String text) { - this.text = text; - } -} \ No newline at end of file diff --git a/src/main/java/com/litongjava/djl/paddle/ocr/v4/common/RotatedBoxCompX.java b/src/main/java/com/litongjava/djl/paddle/ocr/v4/common/RotatedBoxCompX.java deleted file mode 100644 index 3010457..0000000 --- a/src/main/java/com/litongjava/djl/paddle/ocr/v4/common/RotatedBoxCompX.java +++ /dev/null @@ -1,46 +0,0 @@ -package com.litongjava.djl.paddle.ocr.v4.common; - -import ai.djl.ndarray.NDArray; - -/** - */ -public class RotatedBoxCompX implements Comparable { - private NDArray box; - private String text; - - public RotatedBoxCompX(NDArray box, String text) { - this.box = box; - this.text = text; - } - - /** - * 将左上角 X 坐标升序排序 - * - * @param o - * @return - */ - @Override - public int compareTo(RotatedBoxCompX o) { - NDArray leftBox = this.getBox(); - NDArray rightBox = o.getBox(); - float leftX = leftBox.toFloatArray()[0]; - float rightX = rightBox.toFloatArray()[0]; - return (leftX < rightX) ? -1 : 1; - } - - public NDArray getBox() { - return box; - } - - public void setBox(NDArray box) { - this.box = box; - } - - public String getText() { - return text; - } - - public void setText(String text) { - this.text = text; - } -} \ No newline at end of file diff --git a/src/main/java/com/litongjava/djl/paddle/ocr/v4/detection/OCRDetectionTranslator.java b/src/main/java/com/litongjava/djl/paddle/ocr/v4/detection/OCRDetectionTranslator.java deleted file mode 100644 index ee59fdb..0000000 --- a/src/main/java/com/litongjava/djl/paddle/ocr/v4/detection/OCRDetectionTranslator.java +++ /dev/null @@ -1,525 +0,0 @@ -package com.litongjava.djl.paddle.ocr.v4.detection; - -import ai.djl.modality.cv.Image; -import ai.djl.modality.cv.util.NDImageUtils; -import ai.djl.ndarray.NDArray; -import ai.djl.ndarray.NDArrays; -import ai.djl.ndarray.NDList; -import ai.djl.ndarray.NDManager; -import ai.djl.ndarray.index.NDIndex; -import ai.djl.ndarray.types.DataType; -import ai.djl.ndarray.types.Shape; -import ai.djl.translate.Batchifier; -import ai.djl.translate.Translator; -import ai.djl.translate.TranslatorContext; -import com.litongjava.djl.paddle.ocr.v4.opencv.NDArrayUtils; -import org.opencv.core.*; -import org.opencv.imgproc.Imgproc; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -/** - * 文字检测前后处理 - */ -public class OCRDetectionTranslator implements Translator { - // det_algorithm == "DB" - private final float thresh = 0.3f; - private final boolean use_dilation = false; - private final String score_mode = "fast"; - private final String box_type = "quad"; - - private final int limit_side_len; - private final int max_candidates; - private final int min_size; - private final float box_thresh; - private final float unclip_ratio; - private float ratio_h; - private float ratio_w; - private int img_height; - private int img_width; - - public OCRDetectionTranslator(Map arguments) { - limit_side_len = - arguments.containsKey("limit_side_len") - ? Integer.parseInt(arguments.get("limit_side_len").toString()) - : 960; - max_candidates = - arguments.containsKey("max_candidates") - ? Integer.parseInt(arguments.get("max_candidates").toString()) - : 1000; - min_size = - arguments.containsKey("min_size") - ? Integer.parseInt(arguments.get("min_size").toString()) - : 3; - box_thresh = - arguments.containsKey("box_thresh") - ? Float.parseFloat(arguments.get("box_thresh").toString()) - : 0.6f; // 0.5f - unclip_ratio = - arguments.containsKey("unclip_ratio") - ? Float.parseFloat(arguments.get("unclip_ratio").toString()) - : 1.6f; - } - - @Override - public NDList processOutput(TranslatorContext ctx, NDList list) { - NDManager manager = ctx.getNDManager(); - NDArray pred = list.get(0); - pred = pred.squeeze(); - NDArray segmentation = pred.gt(thresh); // thresh=0.3 .mul(255f) - - segmentation = segmentation.toType(DataType.UINT8, true); - Shape shape = segmentation.getShape(); - int rows = (int) shape.get(0); - int cols = (int) shape.get(1); - - Mat newMask = new Mat(); - if (this.use_dilation) { - Mat mask = new Mat(); - //convert from NDArray to Mat - Mat srcMat = NDArrayUtils.uint8NDArrayToMat(segmentation); - // size 越小,腐蚀的单位越小,图片越接近原图 - // Mat dilation_kernel = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(2, 2)); - Mat dilation_kernel = NDArrayUtils.uint8ArrayToMat(new byte[][]{{1, 1}, {1, 1}}); - /** - * 膨胀说明: 图像的一部分区域与指定的核进行卷积, 求核的最`大`值并赋值给指定区域。 膨胀可以理解为图像中`高亮区域`的'领域扩大'。 - * 意思是高亮部分会侵蚀不是高亮的部分,使高亮部分越来越多。 - */ - Imgproc.dilate(srcMat, mask, dilation_kernel); - //destination Matrix - Scalar scalar = new Scalar(255); - Core.multiply(mask, scalar, newMask); - // release Mat - mask.release(); - srcMat.release(); - dilation_kernel.release(); - } else { - Mat srcMat = NDArrayUtils.uint8NDArrayToMat(segmentation); - //destination Matrix - Scalar scalar = new Scalar(255); - Core.multiply(srcMat, scalar, newMask); - // release Mat - srcMat.release(); - } - - NDList dt_boxes = null; - NDArray boxes = boxes_from_bitmap(manager, pred, newMask); - if (boxes != null) { - //boxes[:, :, 0] = boxes[:, :, 0] / ratio_w - NDArray boxes1 = boxes.get(":, :, 0").div(ratio_w); - boxes.set(new NDIndex(":, :, 0"), boxes1); - //boxes[:, :, 1] = boxes[:, :, 1] / ratio_h - NDArray boxes2 = boxes.get(":, :, 1").div(ratio_h); - boxes.set(new NDIndex(":, :, 1"), boxes2); - - dt_boxes = this.filter_tag_det_res(boxes); - - dt_boxes.detach(); - } - - // release Mat - newMask.release(); - - return dt_boxes; - } - - - private NDList filter_tag_det_res(NDArray dt_boxes) { - NDList boxesList = new NDList(); - - int num = (int) dt_boxes.getShape().get(0); - for (int i = 0; i < num; i++) { - NDArray box = dt_boxes.get(i); - box = order_points_clockwise(box); - box = clip_det_res(box); - float[] box0 = box.get(0).toFloatArray(); - float[] box1 = box.get(1).toFloatArray(); - float[] box3 = box.get(3).toFloatArray(); - int rect_width = (int) Math.sqrt(Math.pow(box1[0] - box0[0], 2) + Math.pow(box1[1] - box0[1], 2)); - int rect_height = (int) Math.sqrt(Math.pow(box3[0] - box0[0], 2) + Math.pow(box3[1] - box0[1], 2)); - if (rect_width <= 3 || rect_height <= 3) - continue; - boxesList.add(box); - } - - return boxesList; - } - - private NDArray clip_det_res(NDArray points) { - for (int i = 0; i < points.getShape().get(0); i++) { - int value = Math.max((int) points.get(i, 0).toFloatArray()[0], 0); - value = Math.min(value, img_width - 1); - points.set(new NDIndex(i + ",0"), value); - value = Math.max((int) points.get(i, 1).toFloatArray()[0], 0); - value = Math.min(value, img_height - 1); - points.set(new NDIndex(i + ",1"), value); - } - - return points; - } - - /** - * sort the points based on their x-coordinates - * 顺时针 - * - * @param pts - * @return - */ - - private NDArray order_points_clockwise(NDArray pts) { - NDList list = new NDList(); - long[] indexes = pts.get(":, 0").argSort().toLongArray(); - - // grab the left-most and right-most points from the sorted - // x-roodinate points - Shape s1 = pts.getShape(); - NDArray leftMost1 = pts.get(indexes[0] + ",:"); - NDArray leftMost2 = pts.get(indexes[1] + ",:"); - NDArray leftMost = leftMost1.concat(leftMost2).reshape(2, 2); - NDArray rightMost1 = pts.get(indexes[2] + ",:"); - NDArray rightMost2 = pts.get(indexes[3] + ",:"); - NDArray rightMost = rightMost1.concat(rightMost2).reshape(2, 2); - - // now, sort the left-most coordinates according to their - // y-coordinates so we can grab the top-left and bottom-left - // points, respectively - indexes = leftMost.get(":, 1").argSort().toLongArray(); - NDArray lt = leftMost.get(indexes[0] + ",:"); - NDArray lb = leftMost.get(indexes[1] + ",:"); - indexes = rightMost.get(":, 1").argSort().toLongArray(); - NDArray rt = rightMost.get(indexes[0] + ",:"); - NDArray rb = rightMost.get(indexes[1] + ",:"); - - list.add(lt); - list.add(rt); - list.add(rb); - list.add(lb); - - NDArray rect = NDArrays.concat(list).reshape(4, 2); - return rect; - } - - /** - * Get boxes from the binarized image predicted by DB - * - * @param manager - * @param pred the binarized image predicted by DB. - * @param bitmap new 'pred' after threshold filtering. - */ - private NDArray boxes_from_bitmap(NDManager manager, NDArray pred, Mat bitmap) { - int dest_height = (int) pred.getShape().get(0); - int dest_width = (int) pred.getShape().get(1); - int height = bitmap.rows(); - int width = bitmap.cols(); - - List contours = new ArrayList<>(); - Mat hierarchy = new Mat(); - // 寻找轮廓 - Imgproc.findContours( - bitmap, - contours, - hierarchy, - Imgproc.RETR_LIST, - Imgproc.CHAIN_APPROX_SIMPLE); - - int num_contours = Math.min(contours.size(), max_candidates); - NDList boxList = new NDList(); - float[] scores = new float[num_contours]; - - for (int index = 0; index < num_contours; index++) { - MatOfPoint contour = contours.get(index); - MatOfPoint2f newContour = new MatOfPoint2f(contour.toArray()); - float[][] pointsArr = new float[4][2]; - int sside = get_mini_boxes(newContour, pointsArr); - if (sside < this.min_size) - continue; - NDArray points = manager.create(pointsArr); - float score = box_score_fast(manager, pred, points); - if (score < this.box_thresh) - continue; - - NDArray box = unclip(manager, points); // TODO get_mini_boxes(box) - - // box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width) - NDArray boxes1 = box.get(":,0").div(width).mul(dest_width).round().clip(0, dest_width); - box.set(new NDIndex(":, 0"), boxes1); - // box[:, 1] = np.clip(np.round(box[:, 1] / height * dest_height), 0, dest_height) - NDArray boxes2 = box.get(":,1").div(height).mul(dest_height).round().clip(0, dest_height); - box.set(new NDIndex(":, 1"), boxes2); - - boxList.add(box); - scores[index] = score; - - // release memory - contour.release(); - newContour.release(); - } - - // release - hierarchy.release(); - - NDArray boxes = null; - if (boxList.size() > 0) { - boxes = NDArrays.stack(boxList); - return boxes; - } - - return boxes; - - - } - - /** - * Shrink or expand the boxaccording to 'unclip_ratio' - * - * @param points The predicted box. - * @return uncliped box - */ - private NDArray unclip(NDManager manager, NDArray points) { - points = order_points_clockwise(points); - float[] pointsArr = points.toFloatArray(); - float[] lt = java.util.Arrays.copyOfRange(pointsArr, 0, 2); - float[] lb = java.util.Arrays.copyOfRange(pointsArr, 6, 8); - - float[] rt = java.util.Arrays.copyOfRange(pointsArr, 2, 4); - float[] rb = java.util.Arrays.copyOfRange(pointsArr, 4, 6); - - float width = distance(lt, rt); - float height = distance(lt, lb); - - if (width > height) { - float k = (lt[1] - rt[1]) / (lt[0] - rt[0]); // y = k * x + b - - float delta_dis = height; - float delta_x = (float) Math.sqrt((delta_dis * delta_dis) / (k * k + 1)); - float delta_y = Math.abs(k * delta_x); - - if (k > 0) { - pointsArr[0] = lt[0] - delta_x + delta_y; - pointsArr[1] = lt[1] - delta_y - delta_x; - pointsArr[2] = rt[0] + delta_x + delta_y; - pointsArr[3] = rt[1] + delta_y - delta_x; - - pointsArr[4] = rb[0] + delta_x - delta_y; - pointsArr[5] = rb[1] + delta_y + delta_x; - pointsArr[6] = lb[0] - delta_x - delta_y; - pointsArr[7] = lb[1] - delta_y + delta_x; - } else { - pointsArr[0] = lt[0] - delta_x - delta_y; - pointsArr[1] = lt[1] + delta_y - delta_x; - pointsArr[2] = rt[0] + delta_x - delta_y; - pointsArr[3] = rt[1] - delta_y - delta_x; - - pointsArr[4] = rb[0] + delta_x + delta_y; - pointsArr[5] = rb[1] - delta_y + delta_x; - pointsArr[6] = lb[0] - delta_x + delta_y; - pointsArr[7] = lb[1] + delta_y + delta_x; - } - } else { - float k = (lt[1] - rt[1]) / (lt[0] - rt[0]); // y = k * x + b - - float delta_dis = width; - float delta_y = (float) Math.sqrt((delta_dis * delta_dis) / (k * k + 1)); - float delta_x = Math.abs(k * delta_y); - - if (k > 0) { - pointsArr[0] = lt[0] + delta_x - delta_y; - pointsArr[1] = lt[1] - delta_y - delta_x; - pointsArr[2] = rt[0] + delta_x + delta_y; - pointsArr[3] = rt[1] - delta_y + delta_x; - - pointsArr[4] = rb[0] - delta_x + delta_y; - pointsArr[5] = rb[1] + delta_y + delta_x; - pointsArr[6] = lb[0] - delta_x - delta_y; - pointsArr[7] = lb[1] + delta_y - delta_x; - } else { - pointsArr[0] = lt[0] - delta_x - delta_y; - pointsArr[1] = lt[1] - delta_y + delta_x; - pointsArr[2] = rt[0] - delta_x + delta_y; - pointsArr[3] = rt[1] - delta_y - delta_x; - - pointsArr[4] = rb[0] + delta_x + delta_y; - pointsArr[5] = rb[1] + delta_y - delta_x; - pointsArr[6] = lb[0] + delta_x - delta_y; - pointsArr[7] = lb[1] + delta_y + delta_x; - } - } - points = manager.create(pointsArr).reshape(4, 2); - - return points; - } - - private float distance(float[] point1, float[] point2) { - float disX = point1[0] - point2[0]; - float disY = point1[1] - point2[1]; - float dis = (float) Math.sqrt(disX * disX + disY * disY); - return dis; - } - - /** - * Get boxes from the contour or box. - * - * @param contour The predicted contour. - * @param pointsArr The predicted box. - * @return smaller side of box - */ - private int get_mini_boxes(MatOfPoint2f contour, float[][] pointsArr) { - // https://blog.csdn.net/qq_37385726/article/details/82313558 - // bounding_box[1] - rect 返回矩形的长和宽 - RotatedRect rect = Imgproc.minAreaRect(contour); - Mat points = new Mat(); - Imgproc.boxPoints(rect, points); - - float[][] fourPoints = new float[4][2]; - for (int row = 0; row < 4; row++) { - fourPoints[row][0] = (float) points.get(row, 0)[0]; - fourPoints[row][1] = (float) points.get(row, 1)[0]; - } - - float[] tmpPoint = new float[2]; - for (int i = 0; i < 4; i++) { - for (int j = i + 1; j < 4; j++) { - if (fourPoints[j][0] < fourPoints[i][0]) { - tmpPoint[0] = fourPoints[i][0]; - tmpPoint[1] = fourPoints[i][1]; - fourPoints[i][0] = fourPoints[j][0]; - fourPoints[i][1] = fourPoints[j][1]; - fourPoints[j][0] = tmpPoint[0]; - fourPoints[j][1] = tmpPoint[1]; - } - } - } - - int index_1 = 0; - int index_2 = 1; - int index_3 = 2; - int index_4 = 3; - - if (fourPoints[1][1] > fourPoints[0][1]) { - index_1 = 0; - index_4 = 1; - } else { - index_1 = 1; - index_4 = 0; - } - - if (fourPoints[3][1] > fourPoints[2][1]) { - index_2 = 2; - index_3 = 3; - } else { - index_2 = 3; - index_3 = 2; - } - - pointsArr[0] = fourPoints[index_1]; - pointsArr[1] = fourPoints[index_2]; - pointsArr[2] = fourPoints[index_3]; - pointsArr[3] = fourPoints[index_4]; - - int height = rect.boundingRect().height; - int width = rect.boundingRect().width; - int sside = Math.min(height, width); - - // release - points.release(); - - return sside; - } - - /** - * Calculate the score of box. - * - * @param bitmap The binarized image predicted by DB. - * @param points The predicted box - * @return - */ - private float box_score_fast(NDManager manager, NDArray bitmap, NDArray points) { - NDArray box = points.get(":"); - long h = bitmap.getShape().get(0); - long w = bitmap.getShape().get(1); - // xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1) - int xmin = box.get(":, 0").min().floor().clip(0, w - 1).toType(DataType.INT32, true).toIntArray()[0]; - int xmax = box.get(":, 0").max().ceil().clip(0, w - 1).toType(DataType.INT32, true).toIntArray()[0]; - int ymin = box.get(":, 1").min().floor().clip(0, h - 1).toType(DataType.INT32, true).toIntArray()[0]; - int ymax = box.get(":, 1").max().ceil().clip(0, h - 1).toType(DataType.INT32, true).toIntArray()[0]; - - NDArray mask = manager.zeros(new Shape(ymax - ymin + 1, xmax - xmin + 1), DataType.UINT8); - - box.set(new NDIndex(":, 0"), box.get(":, 0").sub(xmin)); - box.set(new NDIndex(":, 1"), box.get(":, 1").sub(ymin)); - - //mask - convert from NDArray to Mat - Mat maskMat = NDArrayUtils.uint8NDArrayToMat(mask); - - //mask - convert from NDArray to Mat - 4 rows, 2 cols - Mat boxMat = NDArrayUtils.floatNDArrayToMat(box, CvType.CV_32S); - -// boxMat.reshape(1, new int[]{1, 4, 2}); - List pts = new ArrayList<>(); - MatOfPoint matOfPoint = NDArrayUtils.matToMatOfPoint(boxMat); // new MatOfPoint(boxMat); - pts.add(matOfPoint); - Imgproc.fillPoly(maskMat, pts, new Scalar(1)); - - - NDArray subBitMap = bitmap.get(ymin + ":" + (ymax + 1) + "," + xmin + ":" + (xmax + 1)); - Mat bitMapMat = NDArrayUtils.floatNDArrayToMat(subBitMap); - - Scalar score = Core.mean(bitMapMat, maskMat); - float scoreValue = (float) score.val[0]; - // release - maskMat.release(); - boxMat.release(); - bitMapMat.release(); - - return scoreValue; - } - - @Override - public NDList processInput(TranslatorContext ctx, Image input) { - NDArray img = input.toNDArray(ctx.getNDManager()); - int h = input.getHeight(); - int w = input.getWidth(); - img_height = h; - img_width = w; - - // limit the max side - float ratio = 1.0f; - if (Math.max(h, w) > limit_side_len) { - if (h > w) { - ratio = (float) limit_side_len / (float) h; - } else { - ratio = (float) limit_side_len / (float) w; - } - } - - int resize_h = (int) (h * ratio); - int resize_w = (int) (w * ratio); - - resize_h = Math.round((float) resize_h / 32f) * 32; - resize_w = Math.round((float) resize_w / 32f) * 32; - - ratio_h = resize_h / (float) h; - ratio_w = resize_w / (float) w; - - img = NDImageUtils.resize(img, resize_w, resize_h); - - img = NDImageUtils.toTensor(img); - - img = - NDImageUtils.normalize( - img, - new float[]{0.485f, 0.456f, 0.406f}, - new float[]{0.229f, 0.224f, 0.225f}); - - img = img.expandDims(0); - - return new NDList(img); - } - - @Override - public Batchifier getBatchifier() { - return null; - } -} \ No newline at end of file diff --git a/src/main/java/com/litongjava/djl/paddle/ocr/v4/detection/OcrV4Detection.java b/src/main/java/com/litongjava/djl/paddle/ocr/v4/detection/OcrV4Detection.java deleted file mode 100644 index f2e2bc8..0000000 --- a/src/main/java/com/litongjava/djl/paddle/ocr/v4/detection/OcrV4Detection.java +++ /dev/null @@ -1,36 +0,0 @@ -package com.litongjava.djl.paddle.ocr.v4.detection; - -import ai.djl.modality.cv.Image; -import ai.djl.ndarray.NDList; -import ai.djl.repository.zoo.Criteria; -import ai.djl.training.util.ProgressBar; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.concurrent.ConcurrentHashMap; -import java.nio.file.Paths; - -/** - * 文字检测 - */ -public final class OcrV4Detection { - /** - * 中文文本检测 - * - * @return - */ - public Criteria chDetCriteria() { - Criteria criteria = - Criteria.builder() - .optEngine("OnnxRuntime") - // .optModelName("inference") - .setTypes(Image.class, NDList.class) - .optModelPath(Paths.get("models/ch_PP-OCRv4_det_infer/inference.onnx")) - .optTranslator(new OCRDetectionTranslator(new ConcurrentHashMap())) - .optProgress(new ProgressBar()) - .build(); - - return criteria; - } - -} \ No newline at end of file diff --git a/src/main/java/com/litongjava/djl/paddle/ocr/v4/opencv/NDArrayUtils.java b/src/main/java/com/litongjava/djl/paddle/ocr/v4/opencv/NDArrayUtils.java deleted file mode 100644 index 1e3bc4c..0000000 --- a/src/main/java/com/litongjava/djl/paddle/ocr/v4/opencv/NDArrayUtils.java +++ /dev/null @@ -1,236 +0,0 @@ -package com.litongjava.djl.paddle.ocr.v4.opencv; - -import ai.djl.ndarray.NDArray; -import org.opencv.core.CvType; -import org.opencv.core.Mat; -import org.opencv.core.MatOfPoint; -import org.opencv.core.Point; - -import java.util.ArrayList; -import java.util.List; - -public class NDArrayUtils { - /** - * Mat To MatOfPoint - * - * @param mat - * @return - */ - public static MatOfPoint matToMatOfPoint(Mat mat) { - int rows = mat.rows(); - MatOfPoint matOfPoint = new MatOfPoint(); - - List list = new ArrayList<>(); - for (int i = 0; i < rows; i++) { - Point point = new Point((float) mat.get(i, 0)[0], (float) mat.get(i, 1)[0]); - list.add(point); - } - matOfPoint.fromList(list); - - return matOfPoint; - } - - /** - * float NDArray To float[][] Array - * - * @param ndArray - * @return - */ - public static float[][] floatNDArrayToArray(NDArray ndArray) { - int rows = (int) (ndArray.getShape().get(0)); - int cols = (int) (ndArray.getShape().get(1)); - float[][] arr = new float[rows][cols]; - - float[] arrs = ndArray.toFloatArray(); - for (int i = 0; i < rows; i++) { - for (int j = 0; j < cols; j++) { - arr[i][j] = arrs[i * cols + j]; - } - } - return arr; - } - - /** - * Mat To double[][] Array - * - * @param mat - * @return - */ - public static double[][] matToDoubleArray(Mat mat) { - int rows = mat.rows(); - int cols = mat.cols(); - - double[][] doubles = new double[rows][cols]; - - for (int i = 0; i < rows; i++) { - for (int j = 0; j < cols; j++) { - doubles[i][j] = mat.get(i, j)[0]; - } - } - - return doubles; - } - - /** - * Mat To float[][] Array - * - * @param mat - * @return - */ - public static float[][] matToFloatArray(Mat mat) { - int rows = mat.rows(); - int cols = mat.cols(); - - float[][] floats = new float[rows][cols]; - - for (int i = 0; i < rows; i++) { - for (int j = 0; j < cols; j++) { - floats[i][j] = (float) mat.get(i, j)[0]; - } - } - - return floats; - } - - /** - * Mat To byte[][] Array - * - * @param mat - * @return - */ - public static byte[][] matToUint8Array(Mat mat) { - int rows = mat.rows(); - int cols = mat.cols(); - - byte[][] bytes = new byte[rows][cols]; - - for (int i = 0; i < rows; i++) { - for (int j = 0; j < cols; j++) { - bytes[i][j] = (byte) mat.get(i, j)[0]; - } - } - - return bytes; - } - - /** - * float NDArray To float[][] Array - * - * @param ndArray - * @param cvType - * @return - */ - public static Mat floatNDArrayToMat(NDArray ndArray, int cvType) { - int rows = (int) (ndArray.getShape().get(0)); - int cols = (int) (ndArray.getShape().get(1)); - Mat mat = new Mat(rows, cols, cvType); - - float[] arrs = ndArray.toFloatArray(); - for (int i = 0; i < rows; i++) { - for (int j = 0; j < cols; j++) { - mat.put(i, j, arrs[i * cols + j]); - } - } - return mat; - } - - /** - * float NDArray To Mat - * - * @param ndArray - * @return - */ - public static Mat floatNDArrayToMat(NDArray ndArray) { - int rows = (int) (ndArray.getShape().get(0)); - int cols = (int) (ndArray.getShape().get(1)); - Mat mat = new Mat(rows, cols, CvType.CV_32F); - - float[] arrs = ndArray.toFloatArray(); - for (int i = 0; i < rows; i++) { - for (int j = 0; j < cols; j++) { - mat.put(i, j, arrs[i * cols + j]); - } - } - - return mat; - - } - - /** - * uint8 NDArray To Mat - * - * @param ndArray - * @return - */ - public static Mat uint8NDArrayToMat(NDArray ndArray) { - int rows = (int) (ndArray.getShape().get(0)); - int cols = (int) (ndArray.getShape().get(1)); - Mat mat = new Mat(rows, cols, CvType.CV_8U); - - byte[] arrs = ndArray.toByteArray(); - - for (int i = 0; i < rows; i++) { - for (int j = 0; j < cols; j++) { - mat.put(i, j, arrs[i * cols + j]); - } - } - return mat; - } - - /** - * float[][] Array To Mat - * - * @param arr - * @return - */ - public static Mat floatArrayToMat(float[][] arr) { - int rows = arr.length; - int cols = arr[0].length; - Mat mat = new Mat(rows, cols, CvType.CV_32F); - - for (int i = 0; i < rows; i++) { - for (int j = 0; j < cols; j++) { - mat.put(i, j, arr[i][j]); - } - } - - return mat; - } - - /** - * byte[][] Array To Mat - * - * @param arr - * @return - */ - public static Mat uint8ArrayToMat(byte[][] arr) { - int rows = arr.length; - int cols = arr[0].length; - Mat mat = new Mat(rows, cols, CvType.CV_8U); - - for (int i = 0; i < rows; i++) { - for (int j = 0; j < cols; j++) { - mat.put(i, j, arr[i][j]); - } - } - - return mat; - } - - /** - * List To Mat - * - * @param points - * @return - */ - public static Mat toMat(List points) { - Mat mat = new Mat(points.size(), 2, CvType.CV_32F); - for (int i = 0; i < points.size(); i++) { - ai.djl.modality.cv.output.Point point = points.get(i); - mat.put(i, 0, (float) point.getX()); - mat.put(i, 1, (float) point.getY()); - } - - return mat; - } -} \ No newline at end of file diff --git a/src/main/java/com/litongjava/djl/paddle/ocr/v4/opencv/OpenCVUtils.java b/src/main/java/com/litongjava/djl/paddle/ocr/v4/opencv/OpenCVUtils.java deleted file mode 100644 index 81c0f44..0000000 --- a/src/main/java/com/litongjava/djl/paddle/ocr/v4/opencv/OpenCVUtils.java +++ /dev/null @@ -1,60 +0,0 @@ -package com.litongjava.djl.paddle.ocr.v4.opencv; - -import org.opencv.core.CvType; -import org.opencv.core.Mat; -import org.opencv.imgproc.Imgproc; - -import java.awt.image.BufferedImage; -import java.awt.image.DataBufferByte; - -public class OpenCVUtils { - - /** - * 透视变换 - * - * @param src - * @param srcPoints - * @param dstPoints - * @return - */ - public static Mat perspectiveTransform(Mat src, Mat srcPoints, Mat dstPoints) { - Mat dst = src.clone(); - Mat warp_mat = Imgproc.getPerspectiveTransform(srcPoints, dstPoints); - Imgproc.warpPerspective(src, dst, warp_mat, dst.size()); - warp_mat.release(); - - return dst; - } - - /** - * Mat to BufferedImage - * - * @param mat - * @return - */ - public static BufferedImage mat2Image(Mat mat) { - int width = mat.width(); - int height = mat.height(); - byte[] data = new byte[width * height * (int) mat.elemSize()]; - Imgproc.cvtColor(mat, mat, 4); - mat.get(0, 0, data); - BufferedImage ret = new BufferedImage(width, height, 5); - ret.getRaster().setDataElements(0, 0, width, height, data); - return ret; - } - - /** - * BufferedImage to Mat - * - * @param img - * @return - */ - public static Mat image2Mat(BufferedImage img) { - int width = img.getWidth(); - int height = img.getHeight(); - byte[] data = ((DataBufferByte) img.getRaster().getDataBuffer()).getData(); - Mat mat = new Mat(height, width, CvType.CV_8UC3); - mat.put(0, 0, data); - return mat; - } -} \ No newline at end of file diff --git a/src/main/java/com/litongjava/djl/paddle/ocr/v4/recognition/OcrV4Recognition.java b/src/main/java/com/litongjava/djl/paddle/ocr/v4/recognition/OcrV4Recognition.java deleted file mode 100644 index 9ce8df4..0000000 --- a/src/main/java/com/litongjava/djl/paddle/ocr/v4/recognition/OcrV4Recognition.java +++ /dev/null @@ -1,154 +0,0 @@ -package com.litongjava.djl.paddle.ocr.v4.recognition; - -import ai.djl.inference.Predictor; -import ai.djl.modality.cv.Image; -import ai.djl.modality.cv.ImageFactory; -import ai.djl.modality.cv.output.Point; -import ai.djl.modality.cv.util.NDImageUtils; -import ai.djl.ndarray.NDArray; -import ai.djl.ndarray.NDList; -import ai.djl.ndarray.NDManager; -import ai.djl.opencv.OpenCVImageFactory; -import ai.djl.repository.zoo.Criteria; -import ai.djl.training.util.ProgressBar; -import ai.djl.translate.TranslateException; -import com.litongjava.djl.paddle.ocr.v4.common.RotatedBox; -import com.litongjava.djl.paddle.ocr.v4.opencv.NDArrayUtils; -import com.litongjava.djl.paddle.ocr.v4.opencv.OpenCVUtils; -import org.opencv.core.Mat; - -import java.awt.image.BufferedImage; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.ConcurrentHashMap; - -/** - * 文字识别 - */ -public final class OcrV4Recognition { - - - /** - * 中文简体 - * - * @return - */ - public Criteria chRecCriteria() { - Path modelPath = Paths.get("models/ch_PP-OCRv4_rec_infer/inference.onnx"); - Criteria criteria = - Criteria.builder() - .optEngine("OnnxRuntime") - //.optModelName("inference") - .setTypes(Image.class, String.class) - .optModelPath(modelPath) - .optProgress(new ProgressBar()) - .optTranslator(new PpWordRecTranslator(new ConcurrentHashMap())) - .build(); - return criteria; - } - - - /** - * 图像推理 - * - * @param manager - * @param image - * @param detector - * @param recognizer - * @return - * @throws TranslateException - */ - public List predict(NDManager manager, - Image image, Predictor detector, Predictor recognizer) - throws TranslateException { - NDList boxes = detector.predict(image); - if (boxes == null) { - return null; - } - // 交给 NDManager自动管理内存 - // attach to manager for automatic memory management - boxes.attach(manager); - - List result = new ArrayList<>(); - - Mat mat = (Mat) image.getWrappedImage(); - - for (int i = 0; i < boxes.size(); i++) { - NDArray box = boxes.get(i); - - float[] pointsArr = box.toFloatArray(); - float[] lt = java.util.Arrays.copyOfRange(pointsArr, 0, 2); - float[] rt = java.util.Arrays.copyOfRange(pointsArr, 2, 4); - float[] rb = java.util.Arrays.copyOfRange(pointsArr, 4, 6); - float[] lb = java.util.Arrays.copyOfRange(pointsArr, 6, 8); - int img_crop_width = (int) Math.max(distance(lt, rt), distance(rb, lb)); - int img_crop_height = (int) Math.max(distance(lt, lb), distance(rt, rb)); - List srcPoints = new ArrayList<>(); - srcPoints.add(new Point(lt[0], lt[1])); - srcPoints.add(new Point(rt[0], rt[1])); - srcPoints.add(new Point(rb[0], rb[1])); - srcPoints.add(new Point(lb[0], lb[1])); - List dstPoints = new ArrayList<>(); - dstPoints.add(new Point(0, 0)); - dstPoints.add(new Point(img_crop_width, 0)); - dstPoints.add(new Point(img_crop_width, img_crop_height)); - dstPoints.add(new Point(0, img_crop_height)); - - Mat srcPoint2f = NDArrayUtils.toMat(srcPoints); - Mat dstPoint2f = NDArrayUtils.toMat(dstPoints); - - Mat cvMat = OpenCVUtils.perspectiveTransform(mat, srcPoint2f, dstPoint2f); - - Image subImg = OpenCVImageFactory.getInstance().fromImage(cvMat); -// ImageUtils.saveImage(subImg, i + ".png", "build/output"); - - subImg = subImg.getSubImage(0, 0, img_crop_width, img_crop_height); - if (subImg.getHeight() * 1.0 / subImg.getWidth() > 1.5) { - subImg = rotateImg(manager, subImg); - } - - String name = recognizer.predict(subImg); - RotatedBox rotatedBox = new RotatedBox(box, name); - result.add(rotatedBox); - - cvMat.release(); - srcPoint2f.release(); - dstPoint2f.release(); - - } - - return result; - } - - private BufferedImage get_rotate_crop_image(Image image, NDArray box) { - return null; - } - - /** - * 欧式距离计算 - * - * @param point1 - * @param point2 - * @return - */ - private float distance(float[] point1, float[] point2) { - float disX = point1[0] - point2[0]; - float disY = point1[1] - point2[1]; - float dis = (float) Math.sqrt(disX * disX + disY * disY); - return dis; - } - - /** - * 图片旋转 - * - * @param manager - * @param image - * @return - */ - private Image rotateImg(NDManager manager, Image image) { - NDArray rotated = NDImageUtils.rotate90(image.toNDArray(manager), 1); - return ImageFactory.getInstance().fromNDArray(rotated); - } -} \ No newline at end of file diff --git a/src/main/java/com/litongjava/djl/paddle/ocr/v4/recognition/PpWordRecTranslator.java b/src/main/java/com/litongjava/djl/paddle/ocr/v4/recognition/PpWordRecTranslator.java deleted file mode 100644 index 473559c..0000000 --- a/src/main/java/com/litongjava/djl/paddle/ocr/v4/recognition/PpWordRecTranslator.java +++ /dev/null @@ -1,121 +0,0 @@ -package com.litongjava.djl.paddle.ocr.v4.recognition; - -import ai.djl.Model; -import ai.djl.modality.cv.Image; -import ai.djl.modality.cv.util.NDImageUtils; -import ai.djl.ndarray.NDArray; -import ai.djl.ndarray.NDList; -import ai.djl.ndarray.index.NDIndex; -import ai.djl.ndarray.types.DataType; -import ai.djl.ndarray.types.Shape; -import ai.djl.translate.Batchifier; -import ai.djl.translate.Translator; -import ai.djl.translate.TranslatorContext; -import ai.djl.util.Utils; - -import java.io.IOException; -import java.io.InputStream; -import java.util.Arrays; -import java.util.List; -import java.util.Map; - -/** - * 文字识别前后处理 - */ -public class PpWordRecTranslator implements Translator { - private List table; - private final boolean use_space_char; - - public PpWordRecTranslator(Map arguments) { - use_space_char = - arguments.containsKey("use_space_char") - ? Boolean.parseBoolean(arguments.get("use_space_char").toString()) - : true; - } - - @Override - public void prepare(TranslatorContext ctx) throws IOException { - Model model = ctx.getModel(); - try (InputStream is = model.getArtifact("dict.txt").openStream()) { - table = Utils.readLines(is, true); - table.add(0, "blank"); - if (use_space_char) { - table.add(" "); - table.add(" "); - } else { - table.add(""); - table.add(""); - } - - } - } - - @Override - public String processOutput(TranslatorContext ctx, NDList list) throws IOException { - StringBuilder sb = new StringBuilder(); - NDArray tokens = list.singletonOrThrow(); - - long[] indices = tokens.get(0).argMax(1).toLongArray(); - boolean[] selection = new boolean[indices.length]; - Arrays.fill(selection, true); - for (int i = 1; i < indices.length; i++) { - if (indices[i] == indices[i - 1]) { - selection[i] = false; - } - } - - // 字符置信度 -// float[] probs = new float[indices.length]; -// for (int row = 0; row < indices.length; row++) { -// NDArray value = tokens.get(0).get(new NDIndex(""+ row +":" + (row + 1) +"," + indices[row] +":" + ( indices[row] + 1))); -// probs[row] = value.toFloatArray()[0]; -// } - - int lastIdx = 0; - for (int i = 0; i < indices.length; i++) { - if (selection[i] == true && indices[i] > 0 && !(i > 0 && indices[i] == lastIdx)) { - sb.append(table.get((int) indices[i])); - } - } - return sb.toString(); - } - - @Override - public NDList processInput(TranslatorContext ctx, Image input) { - NDArray img = input.toNDArray(ctx.getNDManager(), Image.Flag.COLOR); - int imgC = 3; - int imgH = 48; - int imgW = 320; - - float max_wh_ratio = (float) imgW / (float) imgH; - - int h = input.getHeight(); - int w = input.getWidth(); - float wh_ratio = (float) w / (float) h; - - max_wh_ratio = Math.max(max_wh_ratio, wh_ratio); - imgW = (int) (imgH * max_wh_ratio); - - int resized_w; - if (Math.ceil(imgH * wh_ratio) > imgW) { - resized_w = imgW; - } else { - resized_w = (int) (Math.ceil(imgH * wh_ratio)); - } - NDArray resized_image = NDImageUtils.resize(img, resized_w, imgH); - resized_image = resized_image.transpose(2, 0, 1).toType(DataType.FLOAT32, false); - resized_image.divi(255f).subi(0.5f).divi(0.5f); - NDArray padding_im = ctx.getNDManager().zeros(new Shape(imgC, imgH, imgW), DataType.FLOAT32); - padding_im.set(new NDIndex(":,:,0:" + resized_w), resized_image); - - padding_im = padding_im.flip(0); - padding_im = padding_im.expandDims(0); - return new NDList(padding_im); - } - - @Override - public Batchifier getBatchifier() { - return null; - } - -} \ No newline at end of file diff --git a/src/main/java/com/litongjava/project/config/ProjectConfig.java b/src/main/java/com/litongjava/project/config/ProjectConfig.java index ca58b63..900559b 100644 --- a/src/main/java/com/litongjava/project/config/ProjectConfig.java +++ b/src/main/java/com/litongjava/project/config/ProjectConfig.java @@ -1,6 +1,9 @@ package com.litongjava.project.config; -import java.io.*; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.Properties; diff --git a/src/main/java/com/luooqi/ocr/OcrApp.java b/src/main/java/com/luooqi/ocr/OcrApp.java index 7a82e10..e1b830a 100644 --- a/src/main/java/com/luooqi/ocr/OcrApp.java +++ b/src/main/java/com/luooqi/ocr/OcrApp.java @@ -19,7 +19,7 @@ public static void main(String[] args) { @Override public void init() throws Exception { super.init(); - InitConfig.init(); + //InitConfig.init(); } diff --git a/src/main/java/com/luooqi/ocr/config/InitConfig.java b/src/main/java/com/luooqi/ocr/config/InitConfig.java index 577dcfb..481200e 100644 --- a/src/main/java/com/luooqi/ocr/config/InitConfig.java +++ b/src/main/java/com/luooqi/ocr/config/InitConfig.java @@ -1,6 +1,5 @@ package com.luooqi.ocr.config; -import com.luooqi.ocr.local.PaddlePaddleOCRV4; import com.luooqi.ocr.utils.GlobalKeyListener; import com.luooqi.ocr.utils.VoidDispatchService; import org.jnativehook.GlobalScreen; @@ -23,7 +22,7 @@ public static void init() { // map.put(ConfigKeys.recName, "ch_PP-OCRv3_rec_infer"); // map.put(ConfigKeys.keysName, "ppocr_keys_v1.txt"); // projectConfig.batchPut(map); - PaddlePaddleOCRV4.INSTANCE.init(); + } diff --git a/src/main/java/com/luooqi/ocr/controller/ProcessController.java b/src/main/java/com/luooqi/ocr/controller/ProcessController.java index f09f020..1080320 100644 --- a/src/main/java/com/luooqi/ocr/controller/ProcessController.java +++ b/src/main/java/com/luooqi/ocr/controller/ProcessController.java @@ -1,6 +1,7 @@ package com.luooqi.ocr.controller; import com.luooqi.ocr.utils.CommUtils; + import javafx.geometry.Insets; import javafx.geometry.Pos; import javafx.scene.Scene; diff --git a/src/main/java/com/luooqi/ocr/local/PaddlePaddleOCRV4.java b/src/main/java/com/luooqi/ocr/local/PaddlePaddleOCRV4.java index e0a860f..878ddc5 100644 --- a/src/main/java/com/luooqi/ocr/local/PaddlePaddleOCRV4.java +++ b/src/main/java/com/luooqi/ocr/local/PaddlePaddleOCRV4.java @@ -1,121 +1,36 @@ package com.luooqi.ocr.local; -import ai.djl.MalformedModelException; -import ai.djl.inference.Predictor; -import ai.djl.modality.cv.Image; -import ai.djl.ndarray.NDList; -import ai.djl.ndarray.NDManager; -import ai.djl.opencv.OpenCVImageFactory; -import ai.djl.repository.zoo.ModelNotFoundException; -import ai.djl.repository.zoo.ModelZoo; -import ai.djl.repository.zoo.ZooModel; -import ai.djl.translate.TranslateException; -import com.litongjava.djl.paddle.ocr.v4.common.RotatedBox; -import com.litongjava.djl.paddle.ocr.v4.common.RotatedBoxCompX; -import com.litongjava.djl.paddle.ocr.v4.detection.OcrV4Detection; -import com.litongjava.djl.paddle.ocr.v4.recognition.OcrV4Recognition; - import java.io.File; -import java.io.IOException; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; + +import com.benjaminwan.ocrlibrary.OcrResult; + +import io.github.mymonstercat.Model; +import io.github.mymonstercat.ocr.InferenceEngine; +import io.github.mymonstercat.ocr.config.HardwareConfig; /** * Created by litonglinux@qq.com on 11/23/2023_2:09 AM */ public enum PaddlePaddleOCRV4 { INSTANCE; - private OcrV4Detection detection; - private OcrV4Recognition recognition; - private Predictor detector; - private Predictor recognizer; - private NDManager manager; - - PaddlePaddleOCRV4() { - detection = new OcrV4Detection(); - recognition = new OcrV4Recognition(); - ZooModel detectionModel = null; - ZooModel recognitionModel = null; - try { - detectionModel = ModelZoo.loadModel(detection.chDetCriteria()); - recognitionModel = ModelZoo.loadModel(recognition.chRecCriteria()); - } catch (IOException e) { - e.printStackTrace(); - } catch (ModelNotFoundException e) { - e.printStackTrace(); - } catch (MalformedModelException e) { - e.printStackTrace(); - } - detector = detectionModel.newPredictor(); - - recognizer = recognitionModel.newPredictor(); - manager = NDManager.newBaseManager(); - } + static InferenceEngine engine = null; - //noting not to do.but init - public void init() { + PaddlePaddleOCRV4() { } - public String ocr(File imageFile) throws Exception { - Path path = imageFile.toPath(); - Image image = OpenCVImageFactory.getInstance().fromFile(path); - return ocr(image); + // noting not to do.but init + public static void init() { + HardwareConfig onnxConfig = HardwareConfig.getOnnxConfig(); + onnxConfig.setNumThread(2); + engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4_SERVER, onnxConfig); } - public String ocr(Image image) throws Exception { - List detections = recognition.predict(manager, image, detector, recognizer); - if (detections == null) { - return null; - } - - List initList = new ArrayList<>(); - for (RotatedBox result : detections) { - // put low Y value at the head of the queue. - initList.add(result); - } - Collections.sort(initList); - - List> lines = new ArrayList<>(); - List line = new ArrayList<>(); - RotatedBoxCompX firstBox = new RotatedBoxCompX(initList.get(0).getBox(), initList.get(0).getText()); - line.add(firstBox); - lines.add((ArrayList) line); - for (int i = 1; i < initList.size(); i++) { - RotatedBoxCompX tmpBox = new RotatedBoxCompX(initList.get(i).getBox(), initList.get(i).getText()); - float y1 = firstBox.getBox().toFloatArray()[1]; - float y2 = tmpBox.getBox().toFloatArray()[1]; - float dis = Math.abs(y2 - y1); - if (dis < 20) { // 认为是同 1 行 - Considered to be in the same line - line.add(tmpBox); - } else { // 换行 - Line break - firstBox = tmpBox; - Collections.sort(line); - line = new ArrayList<>(); - line.add(firstBox); - lines.add((ArrayList) line); - } - } - - - StringBuffer fullText = new StringBuffer(); - for (int i = 0; i < lines.size(); i++) { - for (int j = 0; j < lines.get(i).size(); j++) { - String text = lines.get(i).get(j).getText(); - if (text.trim().equals("")) - continue; - fullText.append(text + " "); - } - fullText.append('\n'); - } - return fullText.toString(); + public OcrResult ocr(File imageFile) { + return engine.runOcr(imageFile.getAbsolutePath()); } public void close() { - detector.close(); - recognizer.close(); } } diff --git a/src/main/java/com/luooqi/ocr/snap/ScreenCapture.java b/src/main/java/com/luooqi/ocr/snap/ScreenCapture.java index 0afbe19..b2febe9 100644 --- a/src/main/java/com/luooqi/ocr/snap/ScreenCapture.java +++ b/src/main/java/com/luooqi/ocr/snap/ScreenCapture.java @@ -1,11 +1,17 @@ package com.luooqi.ocr.snap; -import cn.hutool.core.swing.ScreenUtil; -import cn.hutool.log.StaticLog; +import java.awt.AWTException; +import java.awt.Rectangle; +import java.awt.Robot; +import java.awt.image.BufferedImage; + import com.luooqi.ocr.model.CaptureInfo; import com.luooqi.ocr.utils.CommUtils; import com.luooqi.ocr.windows.MainForm; + +import cn.hutool.core.swing.ScreenUtil; +import cn.hutool.log.StaticLog; import javafx.animation.AnimationTimer; import javafx.application.Platform; import javafx.embed.swing.SwingFXUtils; @@ -17,15 +23,18 @@ import javafx.scene.input.KeyCode; import javafx.scene.input.KeyEvent; import javafx.scene.input.MouseButton; -import javafx.scene.layout.*; +import javafx.scene.layout.Background; +import javafx.scene.layout.BackgroundImage; +import javafx.scene.layout.BackgroundPosition; +import javafx.scene.layout.BackgroundRepeat; +import javafx.scene.layout.BackgroundSize; +import javafx.scene.layout.BorderPane; +import javafx.scene.layout.Pane; import javafx.scene.paint.Color; import javafx.scene.text.Font; import javafx.scene.text.FontWeight; import javafx.stage.Stage; -import java.awt.*; -import java.awt.image.BufferedImage; - /** * This is the Window which is used from the user to draw the rectangle representing an area on the screen to be captured. * @@ -353,23 +362,23 @@ private void repaintCanvas() { // smart calculation of where the mouse has been dragged data.rectWidth = (data.mouseXNow > data.mouseXPressed) ? data.mouseXNow - data.mouseXPressed // RIGHT - : data.mouseXPressed - data.mouseXNow // LEFT + : data.mouseXPressed - data.mouseXNow // LEFT ; data.rectHeight = (data.mouseYNow > data.mouseYPressed) ? data.mouseYNow - data.mouseYPressed // DOWN - : data.mouseYPressed - data.mouseYNow // UP + : data.mouseYPressed - data.mouseYNow // UP ; data.rectUpperLeftX = // -------->UPPER_LEFT_X - (data.mouseXNow > data.mouseXPressed) ? data.mouseXPressed // RIGHT - : data.mouseXNow// LEFT + (data.mouseXNow > data.mouseXPressed) ? data.mouseXPressed // RIGHT + : data.mouseXNow// LEFT ; data.rectUpperLeftY = // -------->UPPER_LEFT_Y - (data.mouseYNow > data.mouseYPressed) ? data.mouseYPressed // DOWN - : data.mouseYNow // UP + (data.mouseYNow > data.mouseYPressed) ? data.mouseYPressed // DOWN + : data.mouseYNow // UP ; gc.strokeRect(data.rectUpperLeftX - 1.00, data.rectUpperLeftY - 1.00, data.rectWidth + 2.00, - data.rectHeight + 2.00); + data.rectHeight + 2.00); gc.clearRect(data.rectUpperLeftX, data.rectUpperLeftY, data.rectWidth, data.rectHeight); // draw the text @@ -378,10 +387,10 @@ private void repaintCanvas() { gc.setLineWidth(1); gc.setFill(Color.FIREBRICK); gc.fillRect(middle - 77, data.rectUpperLeftY < 50 ? data.rectUpperLeftY + 2 : data.rectUpperLeftY - 18.00, 100, - 18); + 18); gc.setFill(Color.WHITE); gc.fillText(data.rectWidth + " * " + data.rectHeight, middle - 77 + 9, - data.rectUpperLeftY < 50 ? data.rectUpperLeftY + 17.00 : data.rectUpperLeftY - 4.00); + data.rectUpperLeftY < 50 ? data.rectUpperLeftY + 17.00 : data.rectUpperLeftY - 4.00); } } @@ -418,8 +427,8 @@ public void prepareForCapture() { mainCanvas.setCursor(Cursor.CROSSHAIR); initGraphContent(); rootPane.setBackground(new Background(new BackgroundImage(fxImage, BackgroundRepeat.NO_REPEAT, - BackgroundRepeat.NO_REPEAT, BackgroundPosition.CENTER, - new BackgroundSize(CaptureInfo.ScreenWidth, CaptureInfo.ScreenHeight, false, false, true, true)))); + BackgroundRepeat.NO_REPEAT, BackgroundPosition.CENTER, + new BackgroundSize(CaptureInfo.ScreenWidth, CaptureInfo.ScreenHeight, false, false, true, true)))); repaintCanvas(); stage.setScene(scene); stage.setFullScreenExitHint(""); @@ -439,8 +448,8 @@ private void prepareImage() { try { mainCanvas.setDisable(true); image = new Robot().createScreenCapture(new Rectangle(data.rectUpperLeftX + CaptureInfo.ScreenMinX, - data.rectUpperLeftY + (int) CommUtils.getCrtScreen(stage).getVisualBounds().getMinY(), data.rectWidth, - data.rectHeight)); + data.rectUpperLeftY + (int) CommUtils.getCrtScreen(stage).getVisualBounds().getMinY(), data.rectWidth, + data.rectHeight)); } catch (AWTException ex) { StaticLog.error(ex); return; diff --git a/src/main/java/com/luooqi/ocr/utils/OcrUtils.java b/src/main/java/com/luooqi/ocr/utils/OcrUtils.java index 85264de..c0152fe 100644 --- a/src/main/java/com/luooqi/ocr/utils/OcrUtils.java +++ b/src/main/java/com/luooqi/ocr/utils/OcrUtils.java @@ -1,7 +1,24 @@ package com.luooqi.ocr.utils; -import ai.djl.modality.cv.Image; -import ai.djl.opencv.OpenCVImageFactory; +import java.awt.Point; +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.FileOutputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import javax.imageio.ImageIO; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.rendering.PDFRenderer; + +import com.benjaminwan.ocrlibrary.OcrResult; +import com.luooqi.ocr.local.PaddlePaddleOCRV4; +import com.luooqi.ocr.model.TextBlock; + import cn.hutool.core.codec.Base64; import cn.hutool.core.io.FileUtil; import cn.hutool.core.lang.UUID; @@ -17,21 +34,6 @@ import cn.hutool.json.JSONObject; import cn.hutool.json.JSONUtil; import cn.hutool.log.StaticLog; -import com.benjaminwan.ocrlibrary.OcrResult; -import com.luooqi.ocr.local.PaddlePaddleOCRV4; -import com.luooqi.ocr.model.TextBlock; -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.rendering.PDFRenderer; - -import javax.imageio.ImageIO; -import java.awt.*; -import java.awt.image.BufferedImage; -import java.awt.image.DataBufferInt; -import java.io.File; -import java.io.FileOutputStream; -import java.io.InputStream; -import java.util.List; -import java.util.*; /** * tools-ocr @@ -39,7 +41,6 @@ */ public class OcrUtils { - public static String recImgLocal(byte[] imgData) { String path = "tmp_" + Math.abs(Arrays.hashCode(imgData)) + ".png"; File file = FileUtil.writeBytes(imgData, path); @@ -54,7 +55,7 @@ public static String recImgLocal(BufferedImage image) { public static String recImgLocal(File file) { if (file.exists()) { try { - return PaddlePaddleOCRV4.INSTANCE.ocr(file); + return extractLocalResult(PaddlePaddleOCRV4.INSTANCE.ocr(file)); } catch (Exception e) { e.printStackTrace(); return e.getMessage(); @@ -63,7 +64,6 @@ public static String recImgLocal(File file) { return "文件不存在"; } - public static String recPdfLocal(File pdfFile) { if (pdfFile.exists()) { try (PDDocument document = PDDocument.load(pdfFile)) { @@ -90,19 +90,18 @@ public static String recPdfLocal(File pdfFile) { return null; } - public static String ocrImg(byte[] imgData) { int i = Math.abs(UUID.randomUUID().hashCode()) % 4; StaticLog.info("OCR Engine: " + i); switch (i) { - case 0: - return bdGeneralOcr(imgData); - case 1: - return bdAccurateOcr(imgData); - case 2: - return sogouMobileOcr(imgData); - default: - return sogouWebOcr(imgData); + case 0: + return bdGeneralOcr(imgData); + case 1: + return bdAccurateOcr(imgData); + case 2: + return sogouMobileOcr(imgData); + default: + return sogouWebOcr(imgData); } } @@ -115,7 +114,8 @@ private static String bdAccurateOcr(byte[] imgData) { } private static String bdBaseOcr(byte[] imgData, String type) { - String[] urlArr = new String[]{"http://ai.baidu.com/tech/ocr/general", "http://ai.baidu.com/index/seccode?action=show"}; + String[] urlArr = new String[] { "http://ai.baidu.com/tech/ocr/general", + "http://ai.baidu.com/index/seccode?action=show" }; StringBuilder cookie = new StringBuilder(); for (String url : urlArr) { HttpResponse cookieResp = WebUtils.get(url); @@ -129,7 +129,8 @@ private static String bdBaseOcr(byte[] imgData, String type) { HashMap header = new HashMap<>(); header.put("Referer", "http://ai.baidu.com/tech/ocr/general"); header.put("Cookie", cookie.toString()); - String data = "type=" + URLUtil.encodeQuery(type) + "&detect_direction=false&image_url&image=" + URLUtil.encodeQuery("data:image/jpeg;base64," + Base64.encode(imgData)) + "&language_type=CHN_ENG"; + String data = "type=" + URLUtil.encodeQuery(type) + "&detect_direction=false&image_url&image=" + + URLUtil.encodeQuery("data:image/jpeg;base64," + Base64.encode(imgData)) + "&language_type=CHN_ENG"; HttpResponse response = WebUtils.postRaw("http://ai.baidu.com/aidemo", data, 0, header); return extractBdResult(WebUtils.getSafeHtml(response)); } @@ -137,9 +138,11 @@ private static String bdBaseOcr(byte[] imgData, String type) { public static String sogouMobileOcr(byte[] imgData) { String boundary = "------WebKitFormBoundary8orYTmcj8BHvQpVU"; String url = "http://ocr.shouji.sogou.com/v2/ocr/json"; - String header = boundary + "\r\nContent-Disposition: form-data; name=\"pic\"; filename=\"pic.jpg\"\r\nContent-Type: image/jpeg\r\n\r\n"; + String header = boundary + + "\r\nContent-Disposition: form-data; name=\"pic\"; filename=\"pic.jpg\"\r\nContent-Type: image/jpeg\r\n\r\n"; String footer = "\r\n" + boundary + "--\r\n"; - byte[] postData = CommUtils.mergeByte(header.getBytes(CharsetUtil.CHARSET_ISO_8859_1), imgData, footer.getBytes(CharsetUtil.CHARSET_ISO_8859_1)); + byte[] postData = CommUtils.mergeByte(header.getBytes(CharsetUtil.CHARSET_ISO_8859_1), imgData, + footer.getBytes(CharsetUtil.CHARSET_ISO_8859_1)); return extractSogouResult(CommUtils.postMultiData(url, postData, boundary.substring(2))); } @@ -148,7 +151,8 @@ public static String sogouWebOcr(byte[] imgData) { String referer = "https://deepi.sogou.com/?from=picsearch&tdsourcetag=s_pctim_aiomsg"; String imageData = Base64.encode(imgData); long t = System.currentTimeMillis(); - String sign = SecureUtil.md5("sogou_ocr_just_for_deepibasicOpenOcr" + t + imageData.substring(0, Math.min(1024, imageData.length())) + "4b66a37108dab018ace616c4ae07e644"); + String sign = SecureUtil.md5("sogou_ocr_just_for_deepibasicOpenOcr" + t + + imageData.substring(0, Math.min(1024, imageData.length())) + "4b66a37108dab018ace616c4ae07e644"); Map data = new HashMap<>(); data.put("image", imageData); data.put("lang", "zh-Chs"); @@ -178,7 +182,7 @@ private static String extractSogouResult(String html) { JSONObject jObj = jsonArray.getJSONObject(i); TextBlock textBlock = new TextBlock(); textBlock.setText(jObj.getStr("content").trim()); - //noinspection SuspiciousToArrayCall + // noinspection SuspiciousToArrayCall String[] frames = jObj.getJSONArray("frame").toArray(new String[0]); textBlock.setTopLeft(CommUtils.frameToPoint(frames[0])); textBlock.setTopRight(CommUtils.frameToPoint(frames[1])); @@ -205,7 +209,7 @@ private static String extractBdResult(String html) { JSONObject jObj = jsonArray.getJSONObject(i); TextBlock textBlock = new TextBlock(); textBlock.setText(jObj.getStr("words").trim()); - //noinspection SuspiciousToArrayCall + // noinspection SuspiciousToArrayCall JSONObject location = jObj.getJSONObject("location"); int top = location.getInt("top"); int left = location.getInt("left"); @@ -220,7 +224,6 @@ private static String extractBdResult(String html) { return CommUtils.combineTextBlocks(textBlocks, isEng); } - private static String extractLocalResult(OcrResult ocrResult) { if (ocrResult == null) { return ""; @@ -240,5 +243,4 @@ private static String extractLocalResult(OcrResult ocrResult) { return CommUtils.combineTextBlocks(textBlocks, isEng); } - } diff --git a/src/main/java/com/luooqi/ocr/windows/MainForm.java b/src/main/java/com/luooqi/ocr/windows/MainForm.java index 23f9519..8053cd5 100644 --- a/src/main/java/com/luooqi/ocr/windows/MainForm.java +++ b/src/main/java/com/luooqi/ocr/windows/MainForm.java @@ -1,16 +1,23 @@ package com.luooqi.ocr.windows; -import cn.hutool.core.io.FileTypeUtil; -import cn.hutool.core.thread.ThreadUtil; -import cn.hutool.core.util.StrUtil; -import cn.hutool.log.StaticLog; +import java.awt.image.BufferedImage; +import java.io.File; +import java.util.HashMap; +import java.util.Map; + import com.luooqi.ocr.config.InitConfig; import com.luooqi.ocr.controller.ProcessController; +import com.luooqi.ocr.local.PaddlePaddleOCRV4; import com.luooqi.ocr.model.CaptureInfo; import com.luooqi.ocr.model.StageInfo; import com.luooqi.ocr.snap.ScreenCapture; import com.luooqi.ocr.utils.CommUtils; import com.luooqi.ocr.utils.OcrUtils; + +import cn.hutool.core.io.FileTypeUtil; +import cn.hutool.core.thread.ThreadUtil; +import cn.hutool.core.util.StrUtil; +import cn.hutool.log.StaticLog; import javafx.application.Platform; import javafx.beans.property.SimpleStringProperty; import javafx.geometry.Insets; @@ -20,7 +27,13 @@ import javafx.scene.control.ToolBar; import javafx.scene.input.Clipboard; import javafx.scene.input.DataFormat; -import javafx.scene.layout.*; +import javafx.scene.layout.Border; +import javafx.scene.layout.BorderPane; +import javafx.scene.layout.BorderStroke; +import javafx.scene.layout.BorderStrokeStyle; +import javafx.scene.layout.BorderWidths; +import javafx.scene.layout.CornerRadii; +import javafx.scene.layout.HBox; import javafx.scene.paint.Color; import javafx.scene.text.Font; import javafx.scene.text.FontPosture; @@ -28,11 +41,6 @@ import javafx.stage.Stage; import lombok.extern.slf4j.Slf4j; -import java.awt.image.BufferedImage; -import java.io.File; -import java.util.HashMap; -import java.util.Map; - /** * Created by litonglinux@qq.com on 12/9/2023_4:40 PM */ @@ -50,8 +58,8 @@ public int hashCode() { private static ScreenCapture screenCapture; private static ProcessController processController; private static TextArea textArea; - //private static boolean isSegment = true; - //private static String ocrText = ""; + // private static boolean isSegment = true; + // private static String ocrText = ""; public void init(Stage primaryStage) { @@ -80,19 +88,24 @@ public void init(Stage primaryStage) { root.setTop(topBar); root.setCenter(textArea); root.setBottom(footerBar); - root.getStylesheets().addAll( - getClass().getResource("/css/main.css").toExternalForm() - ); + root.getStylesheets().addAll(getClass().getResource("/css/main.css").toExternalForm()); CommUtils.initStage(primaryStage); mainScene = new Scene(root, 670, 470); stage.setScene(mainScene); + // 启动引擎,加载模型,如果模型加载错误下屏幕显示错误 + try { + PaddlePaddleOCRV4.init(); + } catch (Exception e) { + e.printStackTrace(); + } } private TextArea getCenter() { TextArea textArea = new TextArea(); textArea.setId("ocrTextArea"); textArea.setWrapText(true); - textArea.setBorder(new Border(new BorderStroke(Color.DARKGRAY, BorderStrokeStyle.SOLID, CornerRadii.EMPTY, BorderWidths.DEFAULT))); + textArea.setBorder( + new Border(new BorderStroke(Color.DARKGRAY, BorderStrokeStyle.SOLID, CornerRadii.EMPTY, BorderWidths.DEFAULT))); textArea.setFont(Font.font("Arial", FontPosture.REGULAR, 14)); return textArea; } @@ -102,21 +115,21 @@ private ToolBar getFooterBar() { footerBar.setId("statsToolbar"); Label statsLabel = new Label(); SimpleStringProperty statsProperty = new SimpleStringProperty("总字数:0"); - textArea.textProperty().addListener((observable, oldValue, newValue) -> statsProperty.set("总字数:" + newValue.replaceAll(CommUtils.SPECIAL_CHARS, "").length())); + textArea.textProperty().addListener((observable, oldValue, newValue) -> statsProperty + .set("总字数:" + newValue.replaceAll(CommUtils.SPECIAL_CHARS, "").length())); statsLabel.textProperty().bind(statsProperty); footerBar.getItems().add(statsLabel); return footerBar; } private HBox getTopBar() { - HBox topBar = new HBox( - CommUtils.createButton("snapBtn", MainForm::screenShotOcr, "截图"), - CommUtils.createButton("openImageBtn", this::openImageOcr, "打开"), - CommUtils.createButton("copyBtn", this::copyText, "复制"), - CommUtils.createButton("pasteBtn", this::pasteText, "粘贴"), - CommUtils.createButton("clearBtn", this::clearText, "清空"), - CommUtils.createButton("wrapBtn", this::wrapText, "换行") - //CommUtils.SEPARATOR, resetBtn, segmentBtn + HBox topBar = new HBox(CommUtils.createButton("snapBtn", MainForm::screenShotOcr, "截图"), + CommUtils.createButton("openImageBtn", this::openImageOcr, "打开"), + CommUtils.createButton("copyBtn", this::copyText, "复制"), + CommUtils.createButton("pasteBtn", this::pasteText, "粘贴"), + CommUtils.createButton("clearBtn", this::clearText, "清空"), + CommUtils.createButton("wrapBtn", this::wrapText, "换行") + // CommUtils.SEPARATOR, resetBtn, segmentBtn ); topBar.setId("topBar"); topBar.setMinHeight(40); @@ -143,7 +156,6 @@ private void wrapText() { textArea.setWrapText(!textArea.isWrapText()); } - private void clearText() { textArea.setText(""); } @@ -153,9 +165,8 @@ private void pasteText() { if (StrUtil.isBlank(text)) { return; } - textArea.setText(textArea.getText() - + (StrUtil.isBlank(textArea.getText()) ? "" : "\n") - + Clipboard.getSystemClipboard().getString()); + textArea.setText(textArea.getText() + (StrUtil.isBlank(textArea.getText()) ? "" : "\n") + + Clipboard.getSystemClipboard().getString()); } private void copyText() { @@ -184,24 +195,22 @@ public static void screenShotOcr() { private void openImageOcr() { FileChooser fileChooser = new FileChooser(); fileChooser.setTitle("Please Select Image File"); - String[] extensions = {"*.png", "*.jpg", "*.pdf", "*.PDF"}; + String[] extensions = { "*.png", "*.jpg", "*.pdf", "*.PDF" }; fileChooser.getExtensionFilters().addAll(new FileChooser.ExtensionFilter("Image Files", extensions)); File selectedFile = fileChooser.showOpenDialog(stage); if (selectedFile == null || !selectedFile.isFile()) { return; } - stageInfo = new StageInfo(stage.getX(), stage.getY(), - stage.getWidth(), stage.getHeight(), stage.isFullScreen()); + stageInfo = new StageInfo(stage.getX(), stage.getY(), stage.getWidth(), stage.getHeight(), stage.isFullScreen()); try { - //BufferedImage image = ImageIO.read(selectedFile); + // BufferedImage image = ImageIO.read(selectedFile); doOcr(selectedFile); } catch (Exception e) { StaticLog.error(e); } } - public static void cancelSnap() { Platform.runLater(screenCapture::cancelSnap); } diff --git a/src/main/resources/images/01.png b/src/main/resources/images/01.png new file mode 100644 index 0000000..51df578 Binary files /dev/null and b/src/main/resources/images/01.png differ diff --git a/src/test/java/com/benjaminwan/ocrlibrary/OcrEngineTest.java b/src/test/java/com/benjaminwan/ocrlibrary/OcrEngineTest.java deleted file mode 100644 index 2a3a681..0000000 --- a/src/test/java/com/benjaminwan/ocrlibrary/OcrEngineTest.java +++ /dev/null @@ -1,47 +0,0 @@ -package com.benjaminwan.ocrlibrary; - -import cn.hutool.log.StaticLog; -import com.luooqi.ocr.utils.LibraryUtils; -import org.junit.Test; - -import static org.junit.Assert.*; - -/** - * Created by litonglinux@qq.com on 10/11/2023_3:01 AM - */ -public class OcrEngineTest { - - @Test - public void test1() { - // https://github.com/RapidAI/RapidOcrNcnnLibTest/tree/main/resource/models - - String libPath = "D:\\lib\\ocr-lib\\win64\\bin"; - LibraryUtils.addLibary(libPath); - - OcrEngine ocrEngine = new OcrEngine(); - StaticLog.info("version=" + ocrEngine.getVersion()); - ocrEngine.setNumThread(8); - //------- init Logger ------- - ocrEngine.initLogger(true, false, false); - //ocrEngine.enableResultText(""); - ocrEngine.setGpuIndex(-1); - String modelsDir = "D:\\model\\ppocr-v3-NCNN-models"; - String detName = "ch_PP-OCRv3_det_infer"; - String clsName = "ch_ppocr_mobile_v2.0_cls_infer"; - String recName = "ch_PP-OCRv3_rec_infer"; - String keysName = "ppocr_keys_v1.txt"; - - boolean initModelsRet = ocrEngine.initModels(modelsDir, detName, clsName, recName, keysName); - if (!initModelsRet) { - StaticLog.error("Error in models initialization, please check the models/keys path!"); - return; - } - StaticLog.info("padding(%d) boxScoreThresh(%f) boxThresh(%f) unClipRatio(%f) doAngle(%b) mostAngle(%b)", ocrEngine.getPadding(), ocrEngine.getBoxScoreThresh(), ocrEngine.getBoxThresh(), ocrEngine.getUnClipRatio(), ocrEngine.getDoAngle(), ocrEngine.getMostAngle()); - - String imagePath = "D:\\images\\Snipaste_2023-10-11_02-08-03.png"; - OcrResult ocrResult = ocrEngine.detect(imagePath); - System.out.println(ocrResult.getStrRes()); - - } - -} \ No newline at end of file diff --git a/src/test/java/com/litongjava/RapidOcrTest.java b/src/test/java/com/litongjava/RapidOcrTest.java new file mode 100644 index 0000000..0bfba20 --- /dev/null +++ b/src/test/java/com/litongjava/RapidOcrTest.java @@ -0,0 +1,22 @@ +package com.litongjava; + +import com.benjaminwan.ocrlibrary.OcrResult; + +import io.github.mymonstercat.Model; +import io.github.mymonstercat.ocr.InferenceEngine; +import io.github.mymonstercat.ocr.config.HardwareConfig; + +public class RapidOcrTest { + public static void main(String[] args) { + String imagePath = "C:\\Users\\Administrator\\Desktop\\01.jpg"; + + // init + HardwareConfig onnxConfig = HardwareConfig.getOnnxConfig(); + onnxConfig.setNumThread(2); + InferenceEngine engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4_SERVER, onnxConfig); + + // run + OcrResult ocrResult = engine.runOcr(imagePath); + System.out.println(ocrResult.getStrRes().trim()); + } +} diff --git a/src/test/java/com/luooqi/ocr/utils/OcrUtilsTest.java b/src/test/java/com/luooqi/ocr/utils/OcrUtilsTest.java index 478b01a..d6ea182 100644 --- a/src/test/java/com/luooqi/ocr/utils/OcrUtilsTest.java +++ b/src/test/java/com/luooqi/ocr/utils/OcrUtilsTest.java @@ -1,20 +1,12 @@ package com.luooqi.ocr.utils; -import cn.hutool.core.swing.ScreenUtil; -import cn.hutool.json.JSONArray; -import cn.hutool.json.JSONObject; -import cn.hutool.json.JSONUtil; -import com.luooqi.ocr.model.TextBlock; -import javafx.scene.SnapshotParameters; -import org.junit.Test; - -import java.awt.*; +import java.awt.GraphicsConfiguration; +import java.awt.GraphicsEnvironment; +import java.awt.Point; import java.awt.geom.AffineTransform; import java.io.File; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; + +import org.junit.Test; public class OcrUtilsTest { @@ -32,7 +24,8 @@ private Point frameToPoint(String text) { @Test public void sogouWebOcr() { - GraphicsConfiguration asdf = GraphicsEnvironment.getLocalGraphicsEnvironment().getDefaultScreenDevice().getDefaultConfiguration(); + GraphicsConfiguration asdf = GraphicsEnvironment.getLocalGraphicsEnvironment().getDefaultScreenDevice() + .getDefaultConfiguration(); AffineTransform asfd2 = asdf.getDefaultTransform(); double scaleX = asfd2.getScaleX(); double scaleY = asfd2.getScaleY(); @@ -46,7 +39,7 @@ public void recPdfLocal() { } @Test - public void recImageLocal(){ + public void recImageLocal() { OcrUtils.recImgLocal(new File("temp_1010298_4.png")); } }