# -*- coding: utf-8 -*- import json import sys import base64 import pandas as pd import numpy as np # import cv2 import re import requests import my_config from tencentcloud.common import credential from concurrent.futures.thread import ThreadPoolExecutor from tencentcloud.common.profile.client_profile import ClientProfile from tencentcloud.common.profile.http_profile import HttpProfile from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException from tencentcloud.ocr.v20181119 import ocr_client, models TENCENT_SK = {"LQY": {"ID": "AKIDBPOs761NcaK3YxrxveEXaOIkyxhCqdT2", "KEY": "mIabM1QhhiU1IIJ6dtEWdoKaJM8C1f5D"}, "ZWJ": {"ID": "AKIDc2C9qIBpseBRSrrv06xceT2KMBl96Mcc", "KEY": "rTpyrUtnRoBHAnESRAhx2tTnumOlrgKh"}, "WHQ": {"ID": "AKIDd2VieJvsnTqgLkCGcDajEUVF8mJZr0CC", "KEY": "w3vkJbKmytue9NKuDrGPK0kHXWEfopZW"}, "ZY": {"ID": "AKIDNUMzLZcumOOAftdb5JIIFwMYVBbmooTr", "KEY": "FF4exDDfrmRpwmTxqSC2cvxioiNozuY8"}, "GYW": {"ID": "AKIDHRJtI87z2lMhmPq8j2h4AyTgU3KEIK4W", "KEY": "2nHTO04uSecJHyNVNxFEbgUfogroUt6F"}, "LWW": {"ID": "AKIDPYZy5GfImzEIY7q4QehGaeT5Q2PVv6Fk", "KEY": "MRIsBJA3g15PRDivra0DNWOzJN9ex60d"}, "HJ": {"ID": "AKIDnKdhUVyn27ewzouOGsEuLXS7qVt2bTKV", "KEY": "4P3SxCm7E4eyCBw85mFThgQuBwb0A1cy"}} def tencent_handwriting(img): """ 每秒只能发<=10张 :param img: :return: """ cred = credential.Credential(TENCENT_SK["HJ"]["ID"], # 改成自己的SecretId TENCENT_SK["HJ"]["KEY"]) # SecretKey httpProfile = HttpProfile() httpProfile.endpoint = "ocr.tencentcloudapi.com" clientProfile = ClientProfile(signMethod="TC3-HMAC-SHA256") # 默认签名方法:HmacSHA256 clientProfile.httpProfile = httpProfile client = ocr_client.OcrClient(cred, "ap-shanghai", clientProfile) imgbase64 = cimg(img) # print("imgbase64:", imgbase64) try: req = models.GeneralHandwritingOCRRequest() req.ImageBase64 = imgbase64 print("图片占内存大小:【{}】".format(sys.getsizeof(req.ImageBase64) / (1024 ** 2))) resp = client.GeneralHandwritingOCR(req) res_str = resp.to_json_string() # print('结果:', res_str) # mongo_id = my_config.mycol.insert_one({'b64': imgbase64, 'result': res_str}) try: res_dict = json.loads(res_str) except: res_dict = eval(res_str) textres = "".join(pd.DataFrame(res_dict["TextDetections"])["DetectedText"]) print('结果:', textres) return textres, res_str except TencentCloudSDKException as err: print('ocr异常:', err) # mongo_id = my_config.mycol.insert_one({'b64': imgbase64, 'result': 'ocr异常:' + str(err)}) return "", 'ocr异常:' + str(err) def cimg(image): """ 将各来源的图片转为base64编码,先默认为本地图片路径 :param image: :return: """ if re.search("^https?:", str(image)): # 线上图片 filebyte = requests.get(image).content elif re.search("^[A-H]:", str(image)): # 本地图片 local_img = image filebyte = open(local_img, 'rb').read() else: # 二进制文件流 filebyte = image.read() # encoded = base64.b64encode(open(local_img, 'rb').read()) base64_data = base64.b64encode(filebyte).rstrip().decode('utf-8') return base64_data def tencent_ocr(imgbase64): # 通用手写体 if type(imgbase64) is np.ndarray: # 传进来的是np imgbase64 = np2base64(imgbase64) try: # print(bas64) cred = credential.Credential("AKIDnKdhUVyn27ewzouOGsEuLXS7qVt2bTKV", "4P3SxCm7E4eyCBw85mFThgQuBwb0A1cy") httpProfile = HttpProfile() httpProfile.endpoint = "ocr.tencentcloudapi.com" clientProfile = ClientProfile(signMethod="TC3-HMAC-SHA256") clientProfile.httpProfile = httpProfile client = ocr_client.OcrClient(cred, "ap-shanghai", clientProfile) # req = models.TableOCRRequest() req = models.GeneralHandwritingOCRRequest() req.ImageBase64 = imgbase64 # resp = client.TableOCR(req) resp = client.GeneralHandwritingOCR(req) print('结果', json.loads(resp.to_json_string())) # mycol.insert_one({'b64': bas64, 'result': resp.to_json_string()}) return resp.to_json_string() except TencentCloudSDKException as err: print('再次ocr异常:', err) return None def np2base64(img_arr): """ :param img_arr: :return: """ # retval, buffer = cv2.imencode('.jpg', img_arr) # base64_str = base64.b64encode(buffer) # base64_str = base64_str.decode() # return base64_str if __name__ == '__main__': aimg = r"G:\zwj\WL\en2cn\files\imgs\8-17.png" # aimg2 = "http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/aizj/d6d1a0b4ccd67643a1b8c51993b1b344_answer.png" res = tencent_handwriting(aimg) # imgbase64 = cimg(aimg2) # res = tencent_ocr(imgbase64) print("最后的结果:", res) import requests # res = requests.post("http://192.168.1.65:10116/E2CC", # files={"answer_hw": open(r"G:\zwj\WL\en2cn\files\7-18\3.jpg", "rb")}) # f2 = open(r"G:\zwj\WL\en2cn\files\7-18\3.jpg", "rb") # print(type(cimg(f2.read())))