123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138 |
- # -*- coding: utf-8 -*-
- import json
- import sys
- import base64
- import pandas as pd
- import numpy as np
- # import cv2
- import re
- import requests
- import my_config
- from tencentcloud.common import credential
- from concurrent.futures.thread import ThreadPoolExecutor
- from tencentcloud.common.profile.client_profile import ClientProfile
- from tencentcloud.common.profile.http_profile import HttpProfile
- from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
- from tencentcloud.ocr.v20181119 import ocr_client, models
- TENCENT_SK = {"LQY": {"ID": "AKIDBPOs761NcaK3YxrxveEXaOIkyxhCqdT2", "KEY": "mIabM1QhhiU1IIJ6dtEWdoKaJM8C1f5D"},
- "ZWJ": {"ID": "AKIDc2C9qIBpseBRSrrv06xceT2KMBl96Mcc", "KEY": "rTpyrUtnRoBHAnESRAhx2tTnumOlrgKh"},
- "WHQ": {"ID": "AKIDd2VieJvsnTqgLkCGcDajEUVF8mJZr0CC", "KEY": "w3vkJbKmytue9NKuDrGPK0kHXWEfopZW"},
- "ZY": {"ID": "AKIDNUMzLZcumOOAftdb5JIIFwMYVBbmooTr", "KEY": "FF4exDDfrmRpwmTxqSC2cvxioiNozuY8"},
- "GYW": {"ID": "AKIDHRJtI87z2lMhmPq8j2h4AyTgU3KEIK4W", "KEY": "2nHTO04uSecJHyNVNxFEbgUfogroUt6F"},
- "LWW": {"ID": "AKIDPYZy5GfImzEIY7q4QehGaeT5Q2PVv6Fk", "KEY": "MRIsBJA3g15PRDivra0DNWOzJN9ex60d"},
- "HJ": {"ID": "AKIDnKdhUVyn27ewzouOGsEuLXS7qVt2bTKV", "KEY": "4P3SxCm7E4eyCBw85mFThgQuBwb0A1cy"}}
- def tencent_handwriting(img):
- """
- 每秒只能发<=10张
- :param img:
- :return:
- """
- cred = credential.Credential(TENCENT_SK["HJ"]["ID"], # 改成自己的SecretId
- TENCENT_SK["HJ"]["KEY"]) # SecretKey
- httpProfile = HttpProfile()
- httpProfile.endpoint = "ocr.tencentcloudapi.com"
- clientProfile = ClientProfile(signMethod="TC3-HMAC-SHA256") # 默认签名方法:HmacSHA256
- clientProfile.httpProfile = httpProfile
- client = ocr_client.OcrClient(cred, "ap-shanghai", clientProfile)
- imgbase64 = cimg(img)
- # print("imgbase64:", imgbase64)
- try:
- req = models.GeneralHandwritingOCRRequest()
- req.ImageBase64 = imgbase64
- print("图片占内存大小:【{}】".format(sys.getsizeof(req.ImageBase64) / (1024 ** 2)))
- resp = client.GeneralHandwritingOCR(req)
- res_str = resp.to_json_string()
- # print('结果:', res_str)
- # mongo_id = my_config.mycol.insert_one({'b64': imgbase64, 'result': res_str})
- try:
- res_dict = json.loads(res_str)
- except:
- res_dict = eval(res_str)
- textres = "".join(pd.DataFrame(res_dict["TextDetections"])["DetectedText"])
- print('结果:', textres)
- return textres, res_str
- except TencentCloudSDKException as err:
- print('ocr异常:', err)
- # mongo_id = my_config.mycol.insert_one({'b64': imgbase64, 'result': 'ocr异常:' + str(err)})
- return "", 'ocr异常:' + str(err)
- def cimg(image):
- """
- 将各来源的图片转为base64编码,先默认为本地图片路径
- :param image:
- :return:
- """
- if re.search("^https?:", str(image)): # 线上图片
- filebyte = requests.get(image).content
- elif re.search("^[A-H]:", str(image)): # 本地图片
- local_img = image
- filebyte = open(local_img, 'rb').read()
- else: # 二进制文件流
- filebyte = image.read()
- # encoded = base64.b64encode(open(local_img, 'rb').read())
- base64_data = base64.b64encode(filebyte).rstrip().decode('utf-8')
- return base64_data
- def tencent_ocr(imgbase64):
- # 通用手写体
- if type(imgbase64) is np.ndarray: # 传进来的是np
- imgbase64 = np2base64(imgbase64)
- try:
- # print(bas64)
- cred = credential.Credential("AKIDnKdhUVyn27ewzouOGsEuLXS7qVt2bTKV", "4P3SxCm7E4eyCBw85mFThgQuBwb0A1cy")
- httpProfile = HttpProfile()
- httpProfile.endpoint = "ocr.tencentcloudapi.com"
- clientProfile = ClientProfile(signMethod="TC3-HMAC-SHA256")
- clientProfile.httpProfile = httpProfile
- client = ocr_client.OcrClient(cred, "ap-shanghai", clientProfile)
- # req = models.TableOCRRequest()
- req = models.GeneralHandwritingOCRRequest()
- req.ImageBase64 = imgbase64
- # resp = client.TableOCR(req)
- resp = client.GeneralHandwritingOCR(req)
- print('结果', json.loads(resp.to_json_string()))
- # mycol.insert_one({'b64': bas64, 'result': resp.to_json_string()})
- return resp.to_json_string()
- except TencentCloudSDKException as err:
- print('再次ocr异常:', err)
- return None
- def np2base64(img_arr):
- """
- :param img_arr:
- :return:
- """
- # retval, buffer = cv2.imencode('.jpg', img_arr)
- # base64_str = base64.b64encode(buffer)
- # base64_str = base64_str.decode()
- # return base64_str
- if __name__ == '__main__':
- aimg = r"G:\zwj\WL\en2cn\files\imgs\8-17.png"
- # aimg2 = "http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/aizj/d6d1a0b4ccd67643a1b8c51993b1b344_answer.png"
- res = tencent_handwriting(aimg)
- # imgbase64 = cimg(aimg2)
- # res = tencent_ocr(imgbase64)
- print("最后的结果:", res)
- import requests
- # res = requests.post("http://192.168.1.65:10116/E2CC",
- # files={"answer_hw": open(r"G:\zwj\WL\en2cn\files\7-18\3.jpg", "rb")})
- # f2 = open(r"G:\zwj\WL\en2cn\files\7-18\3.jpg", "rb")
- # print(type(cimg(f2.read())))
|