tencent_hw.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. # -*- coding: utf-8 -*-
  2. import json
  3. import sys
  4. import base64
  5. import pandas as pd
  6. import numpy as np
  7. # import cv2
  8. import re
  9. import requests
  10. import my_config
  11. from tencentcloud.common import credential
  12. from concurrent.futures.thread import ThreadPoolExecutor
  13. from tencentcloud.common.profile.client_profile import ClientProfile
  14. from tencentcloud.common.profile.http_profile import HttpProfile
  15. from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
  16. from tencentcloud.ocr.v20181119 import ocr_client, models
  17. TENCENT_SK = {"LQY": {"ID": "AKIDBPOs761NcaK3YxrxveEXaOIkyxhCqdT2", "KEY": "mIabM1QhhiU1IIJ6dtEWdoKaJM8C1f5D"},
  18. "ZWJ": {"ID": "AKIDc2C9qIBpseBRSrrv06xceT2KMBl96Mcc", "KEY": "rTpyrUtnRoBHAnESRAhx2tTnumOlrgKh"},
  19. "WHQ": {"ID": "AKIDd2VieJvsnTqgLkCGcDajEUVF8mJZr0CC", "KEY": "w3vkJbKmytue9NKuDrGPK0kHXWEfopZW"},
  20. "ZY": {"ID": "AKIDNUMzLZcumOOAftdb5JIIFwMYVBbmooTr", "KEY": "FF4exDDfrmRpwmTxqSC2cvxioiNozuY8"},
  21. "GYW": {"ID": "AKIDHRJtI87z2lMhmPq8j2h4AyTgU3KEIK4W", "KEY": "2nHTO04uSecJHyNVNxFEbgUfogroUt6F"},
  22. "LWW": {"ID": "AKIDPYZy5GfImzEIY7q4QehGaeT5Q2PVv6Fk", "KEY": "MRIsBJA3g15PRDivra0DNWOzJN9ex60d"},
  23. "HJ": {"ID": "AKIDnKdhUVyn27ewzouOGsEuLXS7qVt2bTKV", "KEY": "4P3SxCm7E4eyCBw85mFThgQuBwb0A1cy"}}
  24. def tencent_handwriting(img):
  25. """
  26. 每秒只能发<=10张
  27. :param img:
  28. :return:
  29. """
  30. cred = credential.Credential(TENCENT_SK["HJ"]["ID"], # 改成自己的SecretId
  31. TENCENT_SK["HJ"]["KEY"]) # SecretKey
  32. httpProfile = HttpProfile()
  33. httpProfile.endpoint = "ocr.tencentcloudapi.com"
  34. clientProfile = ClientProfile(signMethod="TC3-HMAC-SHA256") # 默认签名方法:HmacSHA256
  35. clientProfile.httpProfile = httpProfile
  36. client = ocr_client.OcrClient(cred, "ap-shanghai", clientProfile)
  37. imgbase64 = cimg(img)
  38. # print("imgbase64:", imgbase64)
  39. try:
  40. req = models.GeneralHandwritingOCRRequest()
  41. req.ImageBase64 = imgbase64
  42. print("图片占内存大小:【{}】".format(sys.getsizeof(req.ImageBase64) / (1024 ** 2)))
  43. resp = client.GeneralHandwritingOCR(req)
  44. res_str = resp.to_json_string()
  45. # print('结果:', res_str)
  46. # mongo_id = my_config.mycol.insert_one({'b64': imgbase64, 'result': res_str})
  47. try:
  48. res_dict = json.loads(res_str)
  49. except:
  50. res_dict = eval(res_str)
  51. textres = "".join(pd.DataFrame(res_dict["TextDetections"])["DetectedText"])
  52. print('结果:', textres)
  53. return textres, res_str
  54. except TencentCloudSDKException as err:
  55. print('ocr异常:', err)
  56. # mongo_id = my_config.mycol.insert_one({'b64': imgbase64, 'result': 'ocr异常:' + str(err)})
  57. return "", 'ocr异常:' + str(err)
  58. def cimg(image):
  59. """
  60. 将各来源的图片转为base64编码,先默认为本地图片路径
  61. :param image:
  62. :return:
  63. """
  64. if re.search("^https?:", str(image)): # 线上图片
  65. filebyte = requests.get(image).content
  66. elif re.search("^[A-H]:", str(image)): # 本地图片
  67. local_img = image
  68. filebyte = open(local_img, 'rb').read()
  69. else: # 二进制文件流
  70. filebyte = image.read()
  71. # encoded = base64.b64encode(open(local_img, 'rb').read())
  72. base64_data = base64.b64encode(filebyte).rstrip().decode('utf-8')
  73. return base64_data
  74. def tencent_ocr(imgbase64):
  75. # 通用手写体
  76. if type(imgbase64) is np.ndarray: # 传进来的是np
  77. imgbase64 = np2base64(imgbase64)
  78. try:
  79. # print(bas64)
  80. cred = credential.Credential("AKIDnKdhUVyn27ewzouOGsEuLXS7qVt2bTKV", "4P3SxCm7E4eyCBw85mFThgQuBwb0A1cy")
  81. httpProfile = HttpProfile()
  82. httpProfile.endpoint = "ocr.tencentcloudapi.com"
  83. clientProfile = ClientProfile(signMethod="TC3-HMAC-SHA256")
  84. clientProfile.httpProfile = httpProfile
  85. client = ocr_client.OcrClient(cred, "ap-shanghai", clientProfile)
  86. # req = models.TableOCRRequest()
  87. req = models.GeneralHandwritingOCRRequest()
  88. req.ImageBase64 = imgbase64
  89. # resp = client.TableOCR(req)
  90. resp = client.GeneralHandwritingOCR(req)
  91. print('结果', json.loads(resp.to_json_string()))
  92. # mycol.insert_one({'b64': bas64, 'result': resp.to_json_string()})
  93. return resp.to_json_string()
  94. except TencentCloudSDKException as err:
  95. print('再次ocr异常:', err)
  96. return None
  97. def np2base64(img_arr):
  98. """
  99. :param img_arr:
  100. :return:
  101. """
  102. # retval, buffer = cv2.imencode('.jpg', img_arr)
  103. # base64_str = base64.b64encode(buffer)
  104. # base64_str = base64_str.decode()
  105. # return base64_str
  106. if __name__ == '__main__':
  107. aimg = r"G:\zwj\WL\en2cn\files\imgs\8-17.png"
  108. # aimg2 = "http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/aizj/d6d1a0b4ccd67643a1b8c51993b1b344_answer.png"
  109. res = tencent_handwriting(aimg)
  110. # imgbase64 = cimg(aimg2)
  111. # res = tencent_ocr(imgbase64)
  112. print("最后的结果:", res)
  113. import requests
  114. # res = requests.post("http://192.168.1.65:10116/E2CC",
  115. # files={"answer_hw": open(r"G:\zwj\WL\en2cn\files\7-18\3.jpg", "rb")})
  116. # f2 = open(r"G:\zwj\WL\en2cn\files\7-18\3.jpg", "rb")
  117. # print(type(cimg(f2.read())))