12345678910111213141516171819202122232425262728293031323334353637383940414243 |
- # @Author : lightXu
- # @File : info_section.py
- # @Time : 2019/4/2 0002 下午 15:38
- import cv2
- from segment.sheet_resolve.tools.brain_api import get_ocr_text_and_coordinate_in_google_format
- from segment.sheet_resolve.tools.utils import crop_region, read_xml_to_json, read_single_img
- info_section_class = ['alarm_info',
- 'info_title',
- 'attention',
- 'page',
- 'full_filling',
- 'print_info',
- 'ban_area',
- 'type_score',
- 'time',
- 'total_score',
- 'executor',
- 'verify']
- def get_text(sheet, raw_image):
- for ele in sheet['regions']:
- if ele['class_name'] in info_section_class:
- bbox = ele['bounding_box']
- img_region = crop_region(raw_image, bbox)
- try:
- text_dict = get_ocr_text_and_coordinate_in_google_format(img_region)
- text_list = text_dict['chars']
- text = ''.join(text_list)
- ele['text'] = text
- except Exception:
- ele['text'] = ''
- return sheet
- # if __name__ == '__main__':
- # xml_path = r'C:\Users\Administrator\Desktop\test\third_raw\010515.xml'
- # jpg_path = r'C:\Users\Administrator\Desktop\test\third_raw\010515.jpg'
- # sheet_dict = read_xml_to_json(xml_path)
- # image = read_single_img(jpg_path)
- # get_text(sheet_dict, image)
|