base64crnn.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. import base64
  2. import os
  3. import re
  4. from crnn.crnn import crnnOcr
  5. from ocrapi import ocr
  6. import numpy as np
  7. import cv2
  8. def get_image():
  9. image_list = []
  10. for i in os.listdir('./result/image'):
  11. if re.match('(\d+)-(\d+)-(\d+)-(\d+)',i):
  12. with open(os.path.join('./result/image',i), 'rb') as f:
  13. base64_data = base64.b64encode(f.read())
  14. s = base64_data.decode()
  15. y_min, y_max, x_min, x_max = re.match('(\d+)-(\d+)-(\d+)-(\d+)', i).groups()
  16. image_list.append({'left':x_min,'top':y_min,'b64':s})
  17. return image_list
  18. def get_text(online=False):
  19. image_list = []
  20. for i in os.listdir('./result/text_img'):
  21. if re.match('(\d+)-(\d+)-(\d+)-(\d+)',i):
  22. with open(os.path.join('./result/text_img',i), 'rb') as f:
  23. if online:
  24. s = ocr(os.path.join('./result/text_img',i))
  25. else:
  26. s = crnnOcr(os.path.join('./result/text_img', i))
  27. y_min, y_max, x_min, x_max = re.match('(\d+)-(\d+)-(\d+)-(\d+)', i).groups()
  28. image_list.append({'left':x_min,'top':y_min,
  29. 'w':int(x_max)-int(x_min),
  30. 'h':int(y_max)-int(y_min),
  31. 'size':int((int(y_max)-int(y_min))*0.6),
  32. 's':s})
  33. # print(i)
  34. # print({'left':x_min,'top':y_min,
  35. # 'w':int(x_max)-int(x_min),
  36. # 'h':int(y_max)-int(y_min),
  37. # 'size':int((int(y_max)-int(y_min))*0.6),
  38. # 's':s})
  39. return image_list
  40. def get_image_html():
  41. ...
  42. def get_text_html():
  43. image_list = []
  44. for i in os.listdir('./result/text_img'):
  45. if re.match('(\d+)-(\d+)-(\d+)-(\d+)',i):
  46. with open(os.path.join('./result/text_img',i), 'rb') as f:
  47. s = ocr(os.path.join('./result/text_img',i))
  48. y_min, y_max, x_min, x_max = re.match('(\d+)-(\d+)-(\d+)-(\d+)', i).groups()
  49. image_list.append({'left':x_min,'top':y_min,
  50. 'w':int(x_max)-int(x_min),
  51. 'h':int(y_max)-int(y_min),
  52. 'size':int((int(y_max)-int(y_min))*0.6),
  53. 's':s})
  54. import json
  55. def base64ocr(base64_str):
  56. imgString = base64.b64decode(base64_str)
  57. array = np.fromstring(imgString, np.uint8)
  58. image = cv2.imdecode(array, cv2.IMREAD_COLOR)
  59. cv2.imshow('1',image)
  60. cv2.waitKey()
  61. string = crnnOcr(image)
  62. return string
  63. if __name__ == '__main__':
  64. import pandas as pd
  65. data = pd.read_csv(r'F:\exam_segment_django_0330\x11.csv',encoding='utf8')
  66. for i in data['base64']:
  67. try:
  68. print(base64ocr(i))
  69. except Exception as e:
  70. print(e)
  71. # with open(r'F:\exam_segment_django_0330\2.txt') as f:
  72. # for i in f:
  73. # print(base64ocr(i))