package cn.tworice.ocr.tess4j;

import cn.tworice.ocr.OCRApplication;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.IOException;
import java.util.Objects;

/**
 * 基于Tess4J的OCR识别
 * @author 二饭 [2023/2/12]
 **/
public class OCRClient {
    // 创建实例
    private final ITesseract instance;

    public OCRClient(){
        instance = new Tesseract();
        // 设置识别语言
        // https://github.com/tesseract-ocr/langdata
        instance.setLanguage("chi_sim");
//        instance.setLanguage("zwp");
        // 设置识别引擎
        instance.setOcrEngineMode(1);
    }

    public void setOcrEngineMode(int i){
        instance.setOcrEngineMode(i);
    }

    public void setLanguage(String language){
        instance.setLanguage(language);
    }

    /**
     * 识别
     * @param image 图片
     * @return java.lang.String 结果
     * @author 二饭 [2023/2/12]
     **/
    public String ocr(BufferedImage image) throws TesseractException {
        // 识别
        return instance.doOCR(image);
    }

    /**
     * 识别
     * @param imgPath 图片在项目资源目录下的路径
     * @return java.lang.String 结果
     * @author 二饭 [2023/2/12]
     **/
    public String ocr(String imgPath) throws IOException, TesseractException {
        // 读取文件
        BufferedImage image = ImageIO.read(Objects.requireNonNull(OCRApplication.class.getResourceAsStream(imgPath)));
        return this.ocr(image);
    }
}
