info_section.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. # @Author : lightXu
  2. # @File : info_section.py
  3. # @Time : 2019/4/2 0002 下午 15:38
  4. import cv2
  5. from segment.sheet_resolve.tools.brain_api import get_ocr_text_and_coordinate_in_google_format
  6. from segment.sheet_resolve.tools.utils import crop_region, read_xml_to_json, read_single_img
  7. info_section_class = ['alarm_info',
  8. 'info_title',
  9. 'attention',
  10. 'page',
  11. 'full_filling',
  12. 'print_info',
  13. 'ban_area',
  14. 'type_score',
  15. 'time',
  16. 'total_score',
  17. 'executor',
  18. 'verify']
  19. def get_text(sheet, raw_image):
  20. for ele in sheet['regions']:
  21. if ele['class_name'] in info_section_class:
  22. bbox = ele['bounding_box']
  23. img_region = crop_region(raw_image, bbox)
  24. try:
  25. text_dict = get_ocr_text_and_coordinate_in_google_format(img_region)
  26. text_list = text_dict['chars']
  27. text = ''.join(text_list)
  28. ele['text'] = text
  29. except Exception:
  30. ele['text'] = ''
  31. return sheet
  32. # if __name__ == '__main__':
  33. # xml_path = r'C:\Users\Administrator\Desktop\test\third_raw\010515.xml'
  34. # jpg_path = r'C:\Users\Administrator\Desktop\test\third_raw\010515.jpg'
  35. # sheet_dict = read_xml_to_json(xml_path)
  36. # image = read_single_img(jpg_path)
  37. # get_text(sheet_dict, image)