前言

最近利用apache的httpclient模拟抓了一些东西，可惜验证码这一块让我很头疼，不知是Google的tesseract能力有限，还是我验证码处理的不到位，稍微一模糊的就效果很差；这不上网看了下，除了人工打码识别的，还有一个在线OCR的API——有道API，url：http://ai.youdao.com。

正文

接口调用参数

调用API需要向接口发送以下字段来访问服务。

字段名类型含义必填备注imgtext要识别的图片，需要Base64编码True必须是Base64编码langTypetext要识别的语言类型True目前支持英文：en，和中英混合：zh-endetectTypetext识别类型，目前只支持片段识别true片段识别：10011imageTypetext图片类型，目前只支持Base64True目前只支持Base64：1，imageType的值为1appKeytext应用申请的keyTrue可在管理控制台查看salttext随机数True

signtext签名，通过md5(appkey+img+salt+密钥)生成TrueappKey+img+salt+密钥的MD5值docTypetext服务器响应类型，目前只支持jsonTruejson

签名生成方法如下：

1、将请求参数中的appKey（应用ID）,img(注意为图片的Base64编码), 随机数salt和密钥按照appKey+img+salt+密钥的顺序拼接得到字符串str。

2、对字符串str做 md5，得到32位大写的sign(参考Java生成MD5示例)。

注意:

请先将需要识别的图片转换为 Base64 编码。
在发送HTTP请求之前需要对各字段做 URL encode。
在生成签名拼接appid+img+salt+密钥字符串时，img不需要做 URL encode，在生成签名之后，发送 HTTP 请求之前才需要对要发送的待翻译文本字段img做URL encode。

输出结果

返回的结果是json格式，包含字段与FROM和TO的值有关，具体说明如下：

字段名类型含义备注errorCodetext错误返回码一定存在Resulttext识别结果查询正确时一定存在

json格式

 
{ "errorCode": "0", 
 "Result": { 
 "orientation": "Up", 
 "textAngle": 0, 
 "language": "en", 
 "lines": [{"boundingBox": "30,33,25,10", 
 "words": "hello"}]}
}

其中，orientation代表方向，textAngle代表与垂直向上的偏差角度，language代表识别的语言，lines代表每行的返回结果；boundingBox的四个值代表识别的文字左上角的坐标（x,y），宽度和高度；words代表识别的字符；

errorCode列表

错误码含义101缺少必填的参数，出现这个情况还可能是et的值和实际加密方式不对应102不支持的语言类型103翻译文本过长104不支持的API类型105不支持的签名类型106不支持的响应类型107不支持的传输加密类型108appKey无效，注册账号，登录后台创建应用和实例并完成绑定，可获得应用ID和密钥等信息，其中应用ID就是appKey（注意不是应用密钥）109batchLog格式不正确110无相关服务的有效实例111开发者账号异常201解密失败，可能为DES,BASE64,URLDecode的错误202签名检验失败203访问IP地址不在可访问IP列表301辞典查询失败302小语种查询失败303服务端的其它异常401账户已经欠费停1001无效的OCR类型1002不支持的OCR image类型1003不支持的OCR Language类型1004识别图片过大1201图片base64解密失败1301OCR段落识别失败1411访问频率受限1412超过最大识别字节数

示例demo

api官网上有多种语言的demo，限于环境，下面只介绍java的主要代码,本机实测通过；

 
public class OCRDemoForHttp {
    public static void main(String[] args) throws Exception{
        Map <String, String>map = new HashMap<String, String>();
        String url = "http://openapi.youdao.com/ocrapi";
        String appKey = "你的appid";
        String detectType = "10011";
        String imageType = "1";
        String langType = "en";
        String docType = "json";
        String path = "D:\\1 (36).jpg";
        String salt = String.valueOf(System.currentTimeMillis());
        saveImage(path);
        String img = getImageStr(path);
        map.put("appKey", appKey);
        map.put("img", img);
        map.put("detectType", detectType);
        map.put("imageType", imageType);
        map.put("langType", langType);
        map.put("salt", salt);
        map.put("docType", docType);
        String sign = md5(appKey + img + salt + "你的app secret");
        map.put("sign", sign);
        String result= requestOCRForHttp(url,map);
        JSONObject jsonObject = new JSONObject(result);
        JSONObject obj1 = (JSONObject) jsonObject.get("Result");
        org.json.JSONArray arr1 = obj1.getJSONArray("regions");
        StringBuffer stringBuffer2 = new StringBuffer();
        /***遍历jsonarry取出返回的多行text***/ 
        for (int k = 0; k < arr1.length(); k++) {
            JSONObject obj2 = (JSONObject) arr1.get(k);
            org.json.JSONArray arr2 = obj2.getJSONArray("lines");
            StringBuffer stringBuffer = new StringBuffer();
            if (arr2.length() > 1) {
                for (int i = 0; i < arr2.length(); i++) {
                    JSONObject obj3 = (JSONObject) arr2.get(i);
                    org.json.JSONArray arr3 = obj3.getJSONArray("words");
                    for (int j = 0; j < arr3.length(); j++) {
                        JSONObject obj4 = (JSONObject) arr3.get(j);
                        String str = obj4.get("text")+" ";
                        stringBuffer.append(str);
                        //System.out.println("您识别的图片为"+obj4.get("text"));
                    }
                    stringBuffer.append("\r\n");
                }
            }else {
                JSONObject obj3 = (JSONObject) arr2.get(0);
                org.json.JSONArray arr3 = obj3.getJSONArray("words");
                JSONObject obj4 = (JSONObject) arr3.get(0);
                String str = obj4.get("text")+" ";
                stringBuffer.append(str);
            }
            stringBuffer2.append(stringBuffer+"\n\r");
        }
        System.out.println("stringbuffer为-----"+"\n\r"+stringBuffer2);
    }
    protected transient final Log log = LogFactory.getLog(getClass());
    @SuppressWarnings("finally")/***构造参数请求接口*****/ 
    public static String requestOCRForHttp(String url,Map <String, String> requestParams) throws Exception{
        String result = null;
        CloseableHttpClient httpClient = HttpClients.createDefault();
        /**HttpPost*/ 
        HttpPost httpPost = new HttpPost(url);
        List<BasicNameValuePair> params = new ArrayList<BasicNameValuePair>();
        params.add(new BasicNameValuePair("appKey", requestParams.get("appKey")));
        params.add(new BasicNameValuePair("img", requestParams.get("img")));
        params.add(new BasicNameValuePair("detectType", requestParams.get("detectType")));
        params.add(new BasicNameValuePair("imageType", requestParams.get("imageType")));
        params.add(new BasicNameValuePair("langType", requestParams.get("langType")));
        params.add(new BasicNameValuePair("salt", requestParams.get("salt")));
        params.add(new BasicNameValuePair("sign", requestParams.get("sign")));
        params.add(new BasicNameValuePair("docType", requestParams.get("docType")));
        httpPost.setEntity(new UrlEncodedFormEntity(params,"UTF-8"));
        /**HttpResponse*/ 
        CloseableHttpResponse httpResponse = httpClient.execute(httpPost);
        try{
            HttpEntity httpEntity = httpResponse.getEntity();
            result = EntityUtils.toString(httpEntity, "utf-8");
            EntityUtils.consume(httpEntity);
        }finally{
            try{
                if(httpResponse!=null){
                httpResponse.close();
            }
            }catch(IOException e){
        }
        return result;
        }
    }
    /**
    * 获得图片的Base64编码
    * @param imgFile
    * @return
    */ 
    public static String getImageStr(String imgFile) throws Exception
    {
        //将图片文件转化为字节数组字符串，并对其进行Base64编码处理 
        InputStream in = null;
        byte[] data = null;
        String imgstr = "";
        //读取图片字节数组 
        try
        {
            in = new FileInputStream(imgFile);
            data = new byte[in.available()];
            in.read(data);
            in.close();
        }
        catch (IOException e)
        {
         e.printStackTrace();
        }
        //对字节数组Base64编码byte [] by= Base64.encode(data);
        imgstr = new String(by,"UTF-8");
        return imgstr;//返回Base64编码过的字节数组字符串
    }
    /**
    * 生成32位MD5摘要
    * @param string
    * @return
    */ 
    public static String md5(String string) {
        if(string == null){
            return null;
        }
        char hexDigits[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
        'A', 'B', 'C', 'D', 'E', 'F'};
        byte[] btInput = string.getBytes();
        try{
            /** 获得MD5摘要算法的 MessageDigest 对象 */ 
            MessageDigest mdInst = MessageDigest.getInstance("MD5");
            /** 使用指定的字节更新摘要 */
            mdInst.update(btInput);
            /** 获得密文 */ 
            byte[] md = mdInst.digest();
            /** 把密文转换成十六进制的字符串形式 */ 
            int j = md.length;
            char str[] = new char[j * 2];
            int k = 0;
            for (byte byte0 : md) {
                str[k++] = hexDigits[byte0 >>> 4 & 0xf];
                str[k++] = hexDigits[byte0 & 0xf];
            }
            return new String(str);
        }catch(NoSuchAlgorithmException e){
            return null;
        }
    }
    /*****利用IO流保存url图像为文件*****/ 
    public static void saveImage(String path) throws Exception {
        URL url;
        url = new URL("http://img.jishux.com/jishux/2017/10/26/4859b33bed842978df19f9906ba013fb649aeb9d_.jpg");
        URLConnection urlConnection = url.openConnection();
        urlConnection.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");
        urlConnection.addRequestProperty("Referer", "http://www.jishux.com");
        
        InputStream inputStream = urlConnection.getInputStream();
        byte[] by = new byte[1024];
        int len ;
        OutputStream outputStream = new FileOutputStream(path);
        while ((len = inputStream.read(by)) != -1) {
            outputStream.write(by, 0, len);
        }
        inputStream.close();
        outputStream.close();
    }
}

大体流程
main方法主要是解析接口返回来的json数据，剩下的就是利用http构造请求体，将参数传给api，其中加入了md5加密的方法和base64转码的方法，以及最后利用http保存io流的操作；
识别效果

利用有道ocr api的一点心得体会

后记

经过多番测试，这个api总体效果不错，英文识别率能达到95%以上，中文也能达到90%左右，确实还算理想，虽然就是识别验证码能力很差（笑哭），最后欢迎大家来技术栈官方网站：www.jishux.com学习参观。

	{ "errorCode": "0",
	"Result": {
	"orientation": "Up",
	"textAngle": 0,
	"language": "en",
	"lines": [{"boundingBox": "30,33,25,10",
	"words": "hello"}]}
	}

	public class OCRDemoForHttp {
	public static void main(String[] args) throws Exception{
	Map <String, String>map = new HashMap<String, String>();
	String url = "http://openapi.youdao.com/ocrapi";
	String appKey = "你的appid";
	String detectType = "10011";
	String imageType = "1";
	String langType = "en";
	String docType = "json";
	String path = "D:\\1 (36).jpg";
	String salt = String.valueOf(System.currentTimeMillis());
	saveImage(path);
	String img = getImageStr(path);
	map.put("appKey", appKey);
	map.put("img", img);
	map.put("detectType", detectType);
	map.put("imageType", imageType);
	map.put("langType", langType);
	map.put("salt", salt);
	map.put("docType", docType);
	String sign = md5(appKey + img + salt + "你的app secret");
	map.put("sign", sign);
	String result= requestOCRForHttp(url,map);
	JSONObject jsonObject = new JSONObject(result);
	JSONObject obj1 = (JSONObject) jsonObject.get("Result");
	org.json.JSONArray arr1 = obj1.getJSONArray("regions");
	StringBuffer stringBuffer2 = new StringBuffer();
	/*遍历jsonarry取出返回的多行text*/
	for (int k = 0; k < arr1.length(); k++) {
	JSONObject obj2 = (JSONObject) arr1.get(k);
	org.json.JSONArray arr2 = obj2.getJSONArray("lines");
	StringBuffer stringBuffer = new StringBuffer();
	if (arr2.length() > 1) {
	for (int i = 0; i < arr2.length(); i++) {
	JSONObject obj3 = (JSONObject) arr2.get(i);
	org.json.JSONArray arr3 = obj3.getJSONArray("words");
	for (int j = 0; j < arr3.length(); j++) {
	JSONObject obj4 = (JSONObject) arr3.get(j);
	String str = obj4.get("text")+" ";
	stringBuffer.append(str);
	//System.out.println("您识别的图片为"+obj4.get("text"));
	}
	stringBuffer.append("\r\n");
	}
	}else {
	JSONObject obj3 = (JSONObject) arr2.get(0);
	org.json.JSONArray arr3 = obj3.getJSONArray("words");
	JSONObject obj4 = (JSONObject) arr3.get(0);
	String str = obj4.get("text")+" ";
	stringBuffer.append(str);
	}
	stringBuffer2.append(stringBuffer+"\n\r");
	}
	System.out.println("stringbuffer为-----"+"\n\r"+stringBuffer2);
	}
	protected transient final Log log = LogFactory.getLog(getClass());
	@SuppressWarnings("finally")/*构造参数请求接口***/
	public static String requestOCRForHttp(String url,Map <String, String> requestParams) throws Exception{
	String result = null;
	CloseableHttpClient httpClient = HttpClients.createDefault();
	/*HttpPost/
	HttpPost httpPost = new HttpPost(url);
	List<BasicNameValuePair> params = new ArrayList<BasicNameValuePair>();
	params.add(new BasicNameValuePair("appKey", requestParams.get("appKey")));
	params.add(new BasicNameValuePair("img", requestParams.get("img")));
	params.add(new BasicNameValuePair("detectType", requestParams.get("detectType")));
	params.add(new BasicNameValuePair("imageType", requestParams.get("imageType")));
	params.add(new BasicNameValuePair("langType", requestParams.get("langType")));
	params.add(new BasicNameValuePair("salt", requestParams.get("salt")));
	params.add(new BasicNameValuePair("sign", requestParams.get("sign")));
	params.add(new BasicNameValuePair("docType", requestParams.get("docType")));
	httpPost.setEntity(new UrlEncodedFormEntity(params,"UTF-8"));
	/*HttpResponse/
	CloseableHttpResponse httpResponse = httpClient.execute(httpPost);
	try{
	HttpEntity httpEntity = httpResponse.getEntity();
	result = EntityUtils.toString(httpEntity, "utf-8");
	EntityUtils.consume(httpEntity);
	}finally{
	try{
	if(httpResponse!=null){
	httpResponse.close();
	}
	}catch(IOException e){
	}
	return result;
	}
	}
	/**
	* 获得图片的Base64编码
	* @param imgFile
	* @return
	*/
	public static String getImageStr(String imgFile) throws Exception
	{
	//将图片文件转化为字节数组字符串，并对其进行Base64编码处理
	InputStream in = null;
	byte[] data = null;
	String imgstr = "";
	//读取图片字节数组
	try
	{
	in = new FileInputStream(imgFile);
	data = new byte[in.available()];
	in.read(data);
	in.close();
	}
	catch (IOException e)
	{
	e.printStackTrace();
	}
	//对字节数组Base64编码byte [] by= Base64.encode(data);
	imgstr = new String(by,"UTF-8");
	return imgstr;//返回Base64编码过的字节数组字符串
	}
	/**
	* 生成32位MD5摘要
	* @param string
	* @return
	*/
	public static String md5(String string) {
	if(string == null){
	return null;
	}
	char hexDigits[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
	'A', 'B', 'C', 'D', 'E', 'F'};
	byte[] btInput = string.getBytes();
	try{
	/** 获得MD5摘要算法的 MessageDigest 对象 */
	MessageDigest mdInst = MessageDigest.getInstance("MD5");
	/** 使用指定的字节更新摘要 */
	mdInst.update(btInput);
	/** 获得密文 */
	byte[] md = mdInst.digest();
	/** 把密文转换成十六进制的字符串形式 */
	int j = md.length;
	char str[] = new char[j * 2];
	int k = 0;
	for (byte byte0 : md) {
	str[k++] = hexDigits[byte0 >>> 4 & 0xf];
	str[k++] = hexDigits[byte0 & 0xf];
	}
	return new String(str);
	}catch(NoSuchAlgorithmException e){
	return null;
	}
	}
	/***利用IO流保存url图像为文件***/
	public static void saveImage(String path) throws Exception {
	URL url;
	url = new URL("http://img.jishux.com/jishux/2017/10/26/4859b33bed842978df19f9906ba013fb649aeb9d_.jpg");
	URLConnection urlConnection = url.openConnection();
	urlConnection.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");
	urlConnection.addRequestProperty("Referer", "http://www.jishux.com");

	InputStream inputStream = urlConnection.getInputStream();
	byte[] by = new byte[1024];
	int len ;
	OutputStream outputStream = new FileOutputStream(path);
	while ((len = inputStream.read(by)) != -1) {
	outputStream.write(by, 0, len);
	}
	inputStream.close();
	outputStream.close();
	}
	}