# @Author : lightXu # @File : brain_api.py # @Time : 2018/11/21 0021 下午 16:20 import shutil import requests import base64 from urllib import parse, request import cv2 import time import numpy as np import pytesseract from segment.server import ocr_login from segment.sheet_resolve.tools import utils import xml.etree.cElementTree as ET # access_token = '24.82b09618f94abe2a35113177f4eec593.2592000.1546765941.282335-14614857' access_token = ocr_login() OCR_BOX_URL = 'https://aip.baidubce.com/rest/2.0/ocr/v1/' OCR_URL = 'https://aip.baidubce.com/rest/2.0/ocr/v1/' OCR_HAND_URL = 'https://aip.baidubce.com/rest/2.0/ocr/v1/handwriting' # OCR_ACCURACY = 'general' OCR_ACCURACY = 'accurate' OCR_CLIENT_ID = 'AVH7VGKG8QxoSotp6wG9LyZq' OCR_CLIENT_SECRET = 'gG7VYvBWLU8Rusnin8cS8Ta4dOckGFl6' OCR_TOKEN_UPDATE_DATE = 10 def preprocess(img): scale = 0 dilate = 1 blur = 3 # rescale the image if scale != 0: img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) # Convert to gray img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Apply dilation and erosion to remove some noise if dilate != 0: kernel = np.ones((dilate, dilate), np.uint8) img = cv2.dilate(img, kernel, iterations=1) img = cv2.erode(img, kernel, iterations=1) # Apply blur to smooth out the edges if blur != 0: img = cv2.GaussianBlur(img, (blur, blur), 0) # Apply threshold to get image with only b&w (binarization) img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] return img def opecv2base64(img): image = cv2.imencode('.jpg', img)[1] base64_data = str(base64.b64encode(image))[2:-1] return base64_data def get_ocr_raw_result(img, ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG'): textmod = {'access_token': access_token} textmod = parse.urlencode(textmod) url = '{}{}{}{}'.format(OCR_BOX_URL, ocr_accuracy, '?', textmod) url_general = '{}{}{}{}'.format(OCR_BOX_URL, 'general', '?', textmod) headers = {'Content-Type': 'application/x-www-form-urlencoded'} image_type = 'base64' group_id = 'group001' user_id = 'usr001' image = opecv2base64(img) data = { 'image_type': image_type, 'group_id': group_id, 'user_id': user_id, 'image': image, 'detect_direction': 'true', 'recognize_granularity': 'small', 'language_type': language_type, # 'vertexes_location': 'true', # 'probability': 'true' } resp = requests.post(url, data=data, headers=headers, timeout=15).json() if resp.get('error_msg'): if 'internal error' in resp.get('error_msg'): resp = requests.post(url_general, data=data, headers=headers).json() if resp.get('error_msg'): raise Exception("ocr {}!".format(resp.get('error_msg'))) else: raise Exception("ocr {}!".format(resp.get('error_msg'))) return resp def get_ocr_text_and_coordinate(img, ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG'): textmod = {'access_token': access_token} textmod = parse.urlencode(textmod) url = '{}{}{}{}'.format(OCR_BOX_URL, ocr_accuracy, '?', textmod) url_general = '{}{}{}{}'.format(OCR_BOX_URL, 'general', '?', textmod) headers = {'Content-Type': 'application/x-www-form-urlencoded'} image_type = 'base64' group_id = 'group001' user_id = 'usr001' image = opecv2base64(img) data = { 'image_type': image_type, 'group_id': group_id, 'user_id': user_id, 'image': image, # 'detect_direction': 'true', 'recognize_granularity': 'small', 'language_type': language_type, # 'vertexes_location': 'true', # 'probability': 'true' } # resp = requests.post(url, data=data, headers=headers, timeout=15).json() resp = requests.post(url, data=data, headers=headers).json() if resp.get('error_msg'): if 'internal error' in resp.get('error_msg'): resp = requests.post(url_general, data=data, headers=headers).json() if resp.get('error_msg'): raise Exception("ocr {}!".format(resp.get('error_msg'))) else: raise Exception("ocr {}!".format(resp.get('error_msg'))) words_result = resp.get('words_result') return words_result def get_ocr_text_and_coordinate0(img, ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG'): textmod = {'access_token': access_token} textmod = parse.urlencode(textmod) url = '{}{}{}{}'.format(OCR_BOX_URL, ocr_accuracy, '?', textmod) url_general = '{}{}{}{}'.format(OCR_BOX_URL, 'general', '?', textmod) headers = {'Content-Type': 'application/x-www-form-urlencoded'} image_type = 'base64' group_id = 'group001' user_id = 'usr001' image = opecv2base64(img) data = { 'image_type': image_type, 'group_id': group_id, 'user_id': user_id, 'image': image, 'detect_direction': 'false', 'recognize_granularity': 'small', 'language_type': language_type, # 'vertexes_location': 'true', # 'probability': 'true' } # resp = requests.post(url, data=data, headers=headers, timeout=15).json() resp = requests.post(url, data=data, headers=headers).json() if resp.get('error_msg'): if 'internal error' in resp.get('error_msg'): resp = requests.post(url_general, data=data, headers=headers).json() if resp.get('error_msg'): raise Exception("ocr {}!".format(resp.get('error_msg'))) else: raise Exception("ocr {}!".format(resp.get('error_msg'))) words_result = resp.get('words_result') return words_result def get_ocr_text_and_coordinate_direction(img, ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG'): textmod = {'access_token': access_token} textmod = parse.urlencode(textmod) url = '{}{}{}{}'.format(OCR_BOX_URL, ocr_accuracy, '?', textmod) url_general = '{}{}{}{}'.format(OCR_BOX_URL, 'general', '?', textmod) headers = {'Content-Type': 'application/x-www-form-urlencoded'} image_type = 'base64' group_id = 'group001' user_id = 'usr001' image = opecv2base64(img) data = { 'image_type': image_type, 'group_id': group_id, 'user_id': user_id, 'image': image, 'detect_direction': 'true', 'recognize_granularity': 'small', 'language_type': language_type, # 'vertexes_location': 'true', # 'probability': 'true' } resp = requests.post(url, data=data, headers=headers, timeout=15).json() if resp.get('error_msg'): if 'internal error' in resp.get('error_msg'): resp = requests.post(url_general, data=data, headers=headers).json() if resp.get('error_msg'): raise Exception("ocr {}!".format(resp.get('error_msg'))) else: raise Exception("ocr {}!".format(resp.get('error_msg'))) words_result = resp.get('words_result') direction = resp.get('direction') # d_map = {0: 180, # - 1: 90, # - 2: -180, # - 3: -270} d_map = {0: 180, -1: 90, -2: 180, -3: 90} return words_result, d_map[direction] def get_ocr_text_and_coordinate_in_google_format(img, ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG'): textmod = {'access_token': access_token} textmod = parse.urlencode(textmod) url = '{}{}{}{}'.format(OCR_BOX_URL, ocr_accuracy, '?', textmod) url_general = '{}{}{}{}'.format(OCR_BOX_URL, 'general', '?', textmod) headers = {'Content-Type': 'application/x-www-form-urlencoded'} image_type = 'base64' group_id = 'group001' user_id = 'usr001' image = opecv2base64(img) data = { 'image_type': image_type, 'group_id': group_id, 'user_id': user_id, 'image': image, 'detect_direction': 'true', 'recognize_granularity': 'small', 'language_type': language_type, # 'vertexes_location': 'true', # 'probability': 'true' } resp = requests.post(url, data=data, headers=headers).json() if resp.get('error_msg'): if 'internal error' in resp.get('error_msg'): resp = requests.post(url_general, data=data, headers=headers).json() if resp.get('error_msg'): raise Exception("ocr {}!".format(resp.get('error_msg'))) else: raise Exception("ocr {}!".format(resp.get('error_msg'))) words_result = resp.get('words_result') dict_list = [item2.get('location') for item in words_result for item2 in item['chars']] char_list = [item2.get('char') for item in words_result for item2 in item['chars']] words = [item.get('words') for item in words_result] matrix = [] for adict in dict_list: xmin = adict['left'] ymin = adict['top'] xmax = adict['width'] + adict['left'] ymax = adict['top'] + adict['height'] item0 = (xmin, ymin, xmax, ymax) matrix.append(item0) res_dict = {'chars': char_list, 'coordinates': matrix, 'words': words} return res_dict def change_format_baidu_to_google(words_result): dict_list = [item2.get('location') for item in words_result for item2 in item['chars']] char_list = [item2.get('char') for item in words_result for item2 in item['chars']] words = [item.get('words') for item in words_result] matrix = [] for adict in dict_list: xmin = adict['left'] ymin = adict['top'] xmax = adict['width'] + adict['left'] ymax = adict['top'] + adict['height'] item0 = (xmin, ymin, xmax, ymax) matrix.append(item0) res_dict = {'chars': char_list, 'coordinates': matrix, 'words': words} return res_dict def get_handwriting_ocr_text_and_coordinate_in_google_format(img, words_type='words'): textmod = {'access_token': access_token} textmod = parse.urlencode(textmod) url = '{}{}{}'.format(OCR_HAND_URL, '?', textmod) headers = {'Content-Type': 'application/x-www-form-urlencoded'} image = opecv2base64(img) data = { 'image': image, 'recognize_granularity': 'small', 'words_type': words_type, } resp = requests.post(url, data=data, headers=headers).json() if resp.get('error_msg'): raise Exception("ocr {}!".format(resp.get('error_msg'))) words_result = resp.get('words_result') dict_list = [item2.get('location') for item in words_result for item2 in item['chars']] char_list = [item2.get('char') for item in words_result for item2 in item['chars']] words = [item.get('words') for item in words_result] matrix = [] for adict in dict_list: xmin = adict['left'] ymin = adict['top'] xmax = adict['width'] + adict['left'] ymax = adict['top'] + adict['height'] item0 = (xmin, ymin, xmax, ymax) matrix.append(item0) res_dict = {'chars': char_list, 'coordinates': matrix, 'words': words} return res_dict def tesseract_boxes_by_py(image, ocr_lang='chi_sim+eng'): img = preprocess(image) txt = pytesseract.image_to_boxes(img, lang=ocr_lang, output_type='dict') h, w = img.shape char_list = txt['char'] left = txt['left'] bottom = [(h - top) for top in txt['top']] right = txt['right'] top = [(h - bottom) for bottom in txt['bottom']] matrix = [] for i, ele in enumerate(left): matrix.append((ele, top[i], right[i], bottom[i])) res_dict = {'chars': char_list, 'coordinates': matrix} return res_dict def gen_xml_of_per_char(img_path): img = utils.read_single_img(img_path) res_dict = get_ocr_text_and_coordinate_in_google_format(img, 'accurate', 'CHN_ENG') box_list = res_dict['coordinates'] tree = ET.parse(r'./000000-template.xml') # xml tree for index_num, exam_bbox in enumerate(box_list): tree = utils.create_xml('{}'.format(res_dict['chars'][index_num]), tree, exam_bbox[0], exam_bbox[1], exam_bbox[2], exam_bbox[3]) # print(exam_items_bbox) tree.write(img_path.replace('.jpg', '.xml')) res_dict_google = tesseract_boxes_by_py(img, ocr_lang='chi_sim+equ+eng') box_list_g = res_dict_google['coordinates'] tree_g = ET.parse(r'./000000-template.xml') # xml tree for index_num, exam_bbox in enumerate(box_list_g): tree_g = utils.create_xml('{}'.format(res_dict_google['chars'][index_num]), tree_g, exam_bbox[0], exam_bbox[1], exam_bbox[2], exam_bbox[3]) # print(exam_items_bbox) tree_g.write(img_path.replace('.jpg', '_g.xml')) shutil.copy(img_path, img_path.replace('.jpg', '_g.jpg')) if __name__ == '__main__': img_path0 = r'C:\Users\Administrator\Desktop\sheet\mark-test\002_mark.jpg' image0 = cv2.imread(img_path0) t1 = time.time() res = get_ocr_text_and_coordinate(image0) t2 = time.time() print(t2 - t1) print(res)