http://www.open-open.com/lib/view/open1363156299203.html

国内最专业的OCR软件只有2家,清华TH-OCR和汉王OCR,看了很多的OCR技术发现好多对英文与数字的支持都很好,可惜很多都不支持中文字符。 Asprise-OCR,Tesseract 3.0以前的版本,都不支持中文,其实我用了下Asprise-OCR算是速度比较的快了,可惜他鄙视中文,这个没有办法,正好这段时间知名的开源OCR 引擎Tesseract 3.0版本发布了,他给我们带来的好消息就是支持中文,相关的下载项目网站是:http://code.google.com/p/tesseract-ocr

虽然速度不是很客观可是毕竟人家开始支持中文也算是不错的,一个英文的语言包大概是1.8M,中文简体的语言包是39.5M,中文繁体的语言包是53M,这样就知道为什么识别中文慢的原因了.

package com.ocr;
import java.awt.Graphics2D;
import java.awt.color.ColorSpace;
import java.awt.geom.AffineTransform;
import java.awt.image.AffineTransformOp;
import java.awt.image.BufferedImage;
import java.awt.image.ColorConvertOp;
import java.awt.image.ColorModel;
import java.awt.image.MemoryImageSource;
import java.awt.image.PixelGrabber;
/*** * 图像过滤,增强OCR识别成功率* */
public class ImageFilter {private BufferedImage image;private int iw, ih;private int[] pixels;public ImageFilter(BufferedImage image) {this.image = image;iw = image.getWidth();ih = image.getHeight();pixels = new int[iw * ih];}/** 图像二值化 */public BufferedImage changeGrey() {PixelGrabber pg = new PixelGrabber(image.getSource(), 0, 0, iw, ih, pixels, 0, iw);try {pg.grabPixels();} catch (InterruptedException e) {e.printStackTrace();}// 设定二值化的域值,默认值为100int grey = 100;// 对图像进行二值化处理,Alpha值保持不变ColorModel cm = ColorModel.getRGBdefault();for (int i = 0; i < iw * ih; i++) {int red, green, blue;int alpha = cm.getAlpha(pixels[i]);if (cm.getRed(pixels[i]) > grey) {red = 255;} else {red = 0;}if (cm.getGreen(pixels[i]) > grey) {green = 255;} else {green = 0;}if (cm.getBlue(pixels[i]) > grey) {blue = 255;} else {blue = 0;}pixels[i] = alpha << 24 | red << 16 | green << 8 | blue;}// 将数组中的象素产生一个图像return ImageIOHelper.imageProducerToBufferedImage(new MemoryImageSource(iw, ih, pixels, 0, iw));}/** 提升清晰度,进行锐化 */public BufferedImage sharp() {PixelGrabber pg = new PixelGrabber(image.getSource(), 0, 0, iw, ih, pixels, 0, iw);try {pg.grabPixels();} catch (InterruptedException e) {e.printStackTrace();}// 象素的中间变量int tempPixels[] = new int[iw * ih];for (int i = 0; i < iw * ih; i++) {tempPixels[i] = pixels[i];}// 对图像进行尖锐化处理,Alpha值保持不变ColorModel cm = ColorModel.getRGBdefault();for (int i = 1; i < ih - 1; i++) {for (int j = 1; j < iw - 1; j++) {int alpha = cm.getAlpha(pixels[i * iw + j]);// 对图像进行尖锐化int red6 = cm.getRed(pixels[i * iw + j + 1]);int red5 = cm.getRed(pixels[i * iw + j]);int red8 = cm.getRed(pixels[(i + 1) * iw + j]);int sharpRed = Math.abs(red6 - red5) + Math.abs(red8 - red5);int green5 = cm.getGreen(pixels[i * iw + j]);int green6 = cm.getGreen(pixels[i * iw + j + 1]);int green8 = cm.getGreen(pixels[(i + 1) * iw + j]);int sharpGreen = Math.abs(green6 - green5) + Math.abs(green8 - green5);int blue5 = cm.getBlue(pixels[i * iw + j]);int blue6 = cm.getBlue(pixels[i * iw + j + 1]);int blue8 = cm.getBlue(pixels[(i + 1) * iw + j]);int sharpBlue = Math.abs(blue6 - blue5) + Math.abs(blue8 - blue5);if (sharpRed > 255) {sharpRed = 255;}if (sharpGreen > 255) {sharpGreen = 255;}if (sharpBlue > 255) {sharpBlue = 255;}tempPixels[i * iw + j] = alpha << 24 | sharpRed << 16 | sharpGreen << 8 | sharpBlue;}}// 将数组中的象素产生一个图像return ImageIOHelper.imageProducerToBufferedImage(new MemoryImageSource(iw, ih, tempPixels, 0, iw));}/** 中值滤波 */public BufferedImage median() {PixelGrabber pg = new PixelGrabber(image.getSource(), 0, 0, iw, ih, pixels, 0, iw);try {pg.grabPixels();} catch (InterruptedException e) {e.printStackTrace();}// 对图像进行中值滤波,Alpha值保持不变ColorModel cm = ColorModel.getRGBdefault();for (int i = 1; i < ih - 1; i++) {for (int j = 1; j < iw - 1; j++) {int red, green, blue;int alpha = cm.getAlpha(pixels[i * iw + j]);// int red2 = cm.getRed(pixels[(i - 1) * iw + j]);int red4 = cm.getRed(pixels[i * iw + j - 1]);int red5 = cm.getRed(pixels[i * iw + j]);int red6 = cm.getRed(pixels[i * iw + j + 1]);// int red8 = cm.getRed(pixels[(i + 1) * iw + j]);// 水平方向进行中值滤波if (red4 >= red5) {if (red5 >= red6) {red = red5;} else {if (red4 >= red6) {red = red6;} else {red = red4;}}} else {if (red4 > red6) {red = red4;} else {if (red5 > red6) {red = red6;} else {red = red5;}}}// int green2 = cm.getGreen(pixels[(i - 1) * iw + j]);int green4 = cm.getGreen(pixels[i * iw + j - 1]);int green5 = cm.getGreen(pixels[i * iw + j]);int green6 = cm.getGreen(pixels[i * iw + j + 1]);// int green8 = cm.getGreen(pixels[(i + 1) * iw + j]);// 水平方向进行中值滤波if (green4 >= green5) {if (green5 >= green6) {green = green5;} else {if (green4 >= green6) {green = green6;} else {green = green4;}}} else {if (green4 > green6) {green = green4;} else {if (green5 > green6) {green = green6;} else {green = green5;}}}// int blue2 = cm.getBlue(pixels[(i - 1) * iw + j]);int blue4 = cm.getBlue(pixels[i * iw + j - 1]);int blue5 = cm.getBlue(pixels[i * iw + j]);int blue6 = cm.getBlue(pixels[i * iw + j + 1]);// int blue8 = cm.getBlue(pixels[(i + 1) * iw + j]);// 水平方向进行中值滤波if (blue4 >= blue5) {if (blue5 >= blue6) {blue = blue5;} else {if (blue4 >= blue6) {blue = blue6;} else {blue = blue4;}}} else {if (blue4 > blue6) {blue = blue4;} else {if (blue5 > blue6) {blue = blue6;} else {blue = blue5;}}}pixels[i * iw + j] = alpha << 24 | red << 16 | green << 8 | blue;}}// 将数组中的象素产生一个图像return ImageIOHelper.imageProducerToBufferedImage(new MemoryImageSource(iw, ih, pixels, 0, iw));}/** 线性灰度变换 */public BufferedImage lineGrey() {PixelGrabber pg = new PixelGrabber(image.getSource(), 0, 0, iw, ih, pixels, 0, iw);try {pg.grabPixels();} catch (InterruptedException e) {e.printStackTrace();}// 对图像进行进行线性拉伸,Alpha值保持不变ColorModel cm = ColorModel.getRGBdefault();for (int i = 0; i < iw * ih; i++) {int alpha = cm.getAlpha(pixels[i]);int red = cm.getRed(pixels[i]);int green = cm.getGreen(pixels[i]);int blue = cm.getBlue(pixels[i]);// 增加了图像的亮度red = (int) (1.1 * red + 30);green = (int) (1.1 * green + 30);blue = (int) (1.1 * blue + 30);if (red >= 255) {red = 255;}if (green >= 255) {green = 255;}if (blue >= 255) {blue = 255;}pixels[i] = alpha << 24 | red << 16 | green << 8 | blue;}// 将数组中的象素产生一个图像return ImageIOHelper.imageProducerToBufferedImage(new MemoryImageSource(iw, ih, pixels, 0, iw));}/** 转换为黑白灰度图 */public BufferedImage grayFilter() {ColorSpace cs = ColorSpace.getInstance(ColorSpace.CS_GRAY);ColorConvertOp op = new ColorConvertOp(cs, null);return op.filter(image, null);}/** 平滑缩放 */public BufferedImage scaling(double s) {AffineTransform tx = new AffineTransform();tx.scale(s, s);AffineTransformOp op = new AffineTransformOp(tx, AffineTransformOp.TYPE_BILINEAR);return op.filter(image, null);}public BufferedImage scale(Float s) {int srcW = image.getWidth();int srcH = image.getHeight();int newW = Math.round(srcW * s);int newH = Math.round(srcH * s);// 先做水平方向上的伸缩变换BufferedImage tmp=new BufferedImage(newW, newH, image.getType()); Graphics2D g= tmp.createGraphics(); for (int x = 0; x < newW; x++) {g.setClip(x, 0, 1, srcH);// 按比例放缩g.drawImage(image, x - x * srcW / newW, 0, null);}// 再做垂直方向上的伸缩变换BufferedImage dst = new BufferedImage(newW, newH, image.getType()); g = dst.createGraphics();for (int y = 0; y < newH; y++) {g.setClip(0, y, newW, 1);// 按比例放缩g.drawImage(tmp, 0, y - y * srcH / newH, null);}return dst;}}package com.ocr;
import java.awt.Graphics2D;
import java.awt.Image;
import java.awt.Toolkit;
import java.awt.image.BufferedImage;
import java.awt.image.DataBufferByte;import java.awt.image.ImageProducer;import java.awt.image.WritableRaster;import java.io.File;import java.io.IOException;import java.util.Iterator;import java.util.Locale;import javax.imageio.IIOImage;import javax.imageio.ImageIO;import javax.imageio.ImageReader;import javax.imageio.ImageWriteParam;import javax.imageio.ImageWriter;import javax.imageio.metadata.IIOMetadata;import javax.imageio.stream.ImageInputStream;import javax.imageio.stream.ImageOutputStream;import javax.swing.JOptionPane;import com.sun.media.imageio.plugins.tiff.TIFFImageWriteParam;
public class ImageIOHelper {public ImageIOHelper() {}public static File createImage(File imageFile, String imageFormat) {File tempFile = null;try {Iterator readers = ImageIO.getImageReadersByFormatName(imageFormat);ImageReader reader = readers.next();ImageInputStream iis = ImageIO.createImageInputStream(imageFile);reader.setInput(iis);// Read the stream metadataIIOMetadata streamMetadata = reader.getStreamMetadata();// Set up the writeParamTIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US);tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);// Get tif writer and set output to fileIterator writers = ImageIO.getImageWritersByFormatName("tiff");ImageWriter writer = writers.next();BufferedImage bi = reader.read(0);IIOImage image = new IIOImage(bi, null, reader.getImageMetadata(0));tempFile = tempImageFile(imageFile);ImageOutputStream ios = ImageIO.createImageOutputStream(tempFile);writer.setOutput(ios);writer.write(streamMetadata, image, tiffWriteParam);ios.close();writer.dispose();reader.dispose();} catch (Exception exc) {exc.printStackTrace();}return tempFile;}public static File createImage(BufferedImage bi) {File tempFile = null;try {tempFile = File.createTempFile("tempImageFile", ".tif");tempFile.deleteOnExit();TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US);tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);// Get tif writer and set output to fileIterator writers = ImageIO.getImageWritersByFormatName("tiff");ImageWriter writer = writers.next();IIOImage image = new IIOImage(bi, null, null);tempFile = tempImageFile(tempFile);ImageOutputStream ios = ImageIO.createImageOutputStream(tempFile);writer.setOutput(ios);writer.write(null, image, tiffWriteParam);ios.close();writer.dispose();} catch (Exception exc) {exc.printStackTrace();}return tempFile;}public static File tempImageFile(File imageFile) {String path = imageFile.getPath();StringBuffer strB = new StringBuffer(path);strB.insert(path.lastIndexOf('.'), 0);return new File(strB.toString().replaceFirst("(?<=//.)(//w+)$", "tif"));}public static BufferedImage getImage(File imageFile) {BufferedImage al = null;try {String imageFileName = imageFile.getName();String imageFormat = imageFileName.substring(imageFileName.lastIndexOf('.') + 1);Iterator readers = ImageIO.getImageReadersByFormatName(imageFormat);ImageReader reader = readers.next();if (reader == null) {JOptionPane.showConfirmDialog(null,"Need to install JAI Image I/O package./nhttps://jai-imageio.dev.java.net");return null;}ImageInputStream iis = ImageIO.createImageInputStream(imageFile);reader.setInput(iis);al = reader.read(0);reader.dispose();} catch (IOException ioe) {System.err.println(ioe.getMessage());} catch (Exception e) {System.err.println(e.getMessage());}return al;}public static BufferedImage imageToBufferedImage(Image image) {BufferedImage bufferedImage = new BufferedImage(image.getWidth(null), image.getHeight(null),BufferedImage.TYPE_INT_RGB);Graphics2D g = bufferedImage.createGraphics();g.drawImage(image, 0, 0, null);return bufferedImage;}public static BufferedImage imageProducerToBufferedImage(ImageProducer imageProducer) {return imageToBufferedImage(Toolkit.getDefaultToolkit().createImage(imageProducer));}public static byte[] image_byte_data(BufferedImage image) {WritableRaster raster = image.getRaster();DataBufferByte buffer = (DataBufferByte) raster.getDataBuffer();return buffer.getData();}
}package com.ocr;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import org.jdesktop.swingx.util.OS;
public class OCR {private final String LANG_OPTION = "-l";private final String EOL = System.getProperty("line.separator");private String tessPath = new File("tesseract").getAbsolutePath();//private String tessPath="C://Program Files (x86)//Tesseract-OCR//";public String recognizeText(File imageFile, String imageFormat) throws Exception {File tempImage = ImageIOHelper.createImage(imageFile, imageFormat);File outputFile = new File(imageFile.getParentFile(), "output");StringBuffer strB = new StringBuffer();List cmd = new ArrayList();if (OS.isWindowsXP()) {cmd.add(tessPath + "//tesseract");//cmd.add(tessPath + "//Tesseract-OCR");} else if (OS.isLinux()) {cmd.add("tesseract");} else {//cmd.add(tessPath + "//Tesseract-OCR")cmd.add(tessPath + "//tesseract");}cmd.add("");  cmd.add(outputFile.getName());  cmd.add(LANG_OPTION);  cmd.add("chi_sim");cmd.add("eng");  ProcessBuilder pb = new ProcessBuilder();pb.directory(imageFile.getParentFile());cmd.set(1, tempImage.getName());pb.command(cmd);pb.redirectErrorStream(true);Process process = pb.start();//tesseract.exe 1.jpg 1 -l chi_simint w = process.waitFor();// delete temp working filestempImage.delete();if (w == 0) {BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(outputFile.getAbsolutePath()+ ".txt"), "UTF-8"));String str;while ((str = in.readLine()) != null) {strB.append(str).append(EOL);}in.close();} else {String msg;switch (w) {case 1:msg = "Errors accessing files. There may be spaces in your image's filename.";break;case 29:msg = "Cannot recognize the image or its selected region.";break;case 31:msg = "Unsupported image format.";break;default:msg = "Errors occurred.";}tempImage.delete();throw new RuntimeException(msg);}new File(outputFile.getAbsolutePath() + ".txt").delete();return strB.toString();}}
package com.ocr;
import java.io.File;
public class Test {/*** @param args*/public static void main(String[] args) {// TODO Auto-generated method stubOCR ocr=new OCR();try {String maybe = new OCR().recognizeText(new  File("E://temp//222.jpg"), "jpg");System.out.println(maybe);} catch (Exception e) {// TODO Auto-generated catch blocke.printStackTrace();} }
}

java 目录结构如上图

效果图:

解析出来的效果

Java OCR 图像智能字符识别技术[可识别中文]相关推荐

  1. java零碎要点---Tesseract 3.0,Java OCR 图像智能字符识别技术,可识别中文

    2.Java OCR 图像智能字符识别技术,可识别中文  几天一直在研究OCR技术,据我了解的情况,国内最专业的OCR软件只有2家,清华TH-OCR和汉王OCR,看了很多的OCR 技术发现好多对英文与 ...

  2. java ocr识别中文_java零碎要点—Tesseract 3.0,Java OCR 图像智能字符识别技术,可识别中文 | 学步园...

    2.Java OCR 图像智能字符识别技术,可识别中文 几天一直在研究OCR技术,据我了解的情况,国内最专业的OCR软件只有2家,清华TH-OCR和汉王OCR,看了很多的OCR 技术发现好多对英文与数 ...

  3. java整理软件--- Java OCR 图像智能字符识别技术,可识别中文,但是验证码不可以识别...已测识别中文效果很好...

    国内最专业的OCR软件只有2家,清华TH-OCR和汉王OCR,看了很多的OCR技术 发现好多对英文与数字的支持都很好,可惜很多都不支持中文字符.Asprise-OCR,Tesseract 3.0以前的 ...

  4. java整理软件--- Java OCR 图像智能字符识别技术,可识别中文,但是验证码不可以识别...已测识别中文效果很好

    国内最专业的OCR软件只有2家,清华TH-OCR和汉王OCR,看了很多的OCR技术发现好多对英文与数字的支持都很好,可惜很多都不支持中文字符.Asprise-OCR,Tesseract 3.0以前的版 ...

  5. Java OCR 图像智能字符识别技术,可识别中文

    几天一直在研究OCR技术,据我了解的情况,国内最专业的OCR软件只有2家,清华TH-OCR和汉王OCR,看了很多的OCR技术发现好多对英文与数字的支持都很好,可惜很多都不支持中文字符.Asprise- ...

  6. java ocr识别中文_[转]Java OCR 图像智能字符识别技术,可识别中文

    几天一直在研究OCR技术,据我了解的情况,国内最专业的OCR软件只有2家,清华TH-OCR和汉王OCR,看了很多的OCR技术发现好多对英文与数字的支持都很好,可惜很多都不支持中文字符.Asprise- ...

  7. java ocr技术原理_Java OCR 图像智能字符识别技术,可识别中文

    几天一直在研究OCR技术,据我了解的情况,国内最专业的OCR软件只有2家,清华TH-OCR和汉王OCR,看了很多的OCR技术发现好多对英文与数字的支持都很好,可惜很多都不支持中文字符.Asprise- ...

  8. OCR 图像智能字符识别技术

    几天一直在研究OCR技术,据我了解的情况,国内最专业的OCR软件只有2家,清华TH-OCR和汉王OCR,看了很多的OCR技术发现好多对英文与数字的支持都很好,可惜很多都不支持中文字符.Asprise- ...

  9. Java OCR tesseract 图像智能字符识别技术 Java实现

    Java OCR tesseract 图像智能字符识别技术 Java代码实现 接着上一篇OCR所说的,上一篇给大家介绍了tesseract 在命令行的简单用法,当然了要继承到我们的程序中,还是需要代码 ...

最新文章

  1. 华为安卓11是鸿蒙系统吗,恭喜华为手机,EMUI11曝光,是披着Android 11的“鸿蒙系统”?...
  2. Windows 10安装CUDA10.1+cudnn7.6.0+Pytorch1.3.0
  3. A Tutorial on Clustering Algorithms-聚类小知识
  4. 类和对象:一些相关的BIF - 零基础入门学习Python040
  5. javascript对下拉列表框(select)的操作
  6. .NET 6新特性试用 | 可写JSON DOM API
  7. 看国外女神级程序员,直播写代码一年的感悟
  8. 查找标题已知的窗口句柄,遍历窗口控件句柄
  9. android 获取键盘回车键事件,设置软键盘回车键显示内容,点击空白处隐藏软键盘
  10. scrapy 工作流程
  11. sencha touch 入门学习资料大全
  12. fcn网络训练代码_FCN网络训练 菜鸟版
  13. 关于备考软考过程中历年真题的说明
  14. CentOS7:Ruby安装
  15. kinova_j2s6s300【Cartesian position control Finger position control】
  16. iOS 开发者账号初试
  17. PageRank算法与特征向量和特征值(eigenvector和eigenvalue)
  18. html5鼠标移过切换图片,鼠标移动到图片上切换到另一张图片,移出时又切默认图片...
  19. Linux 显示行数 number
  20. 常见的网络故障以及解决方法

热门文章

  1. Unity Hub简介
  2. 微型计算机配置表,典型微型计算机配置的特征
  3. py-21-Android
  4. 7寸 android,7英寸Android:华为Smakit S7_华为 S7_手机导购-中关村在线
  5. pcm5102a解码芯片音质评测_漫步者LolliPods无线蓝牙耳机音质超越价格
  6. 杀毒软件如何被XP SP2的安全中心识别(转)
  7. IDE 10 月指数榜:Eclipse 反超 Visual Studio
  8. 可视化神器Plotly绘制3D图
  9. cmake 安装(高版本切换到低版本)
  10. 联想如何解除硬盘BitLocker