123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818 |
- # @Author : fantuan
- # @Time : 2019.9.4
- import requests
- import base64
- from urllib import parse, request
- import cv2
- import re
- from segment.sheet_resolve.tools.brain_api import get_ocr_text_and_coordinate_in_google_format
- from segment.sheet_resolve.analysis.sheet.ocr_key_words import key_words
- OCR_ACCURACY = 'accurate'
- # def ocr_login():
- # grant_type = 'client_credentials'
- # client_id = OCR_CLIENT_ID
- # client_secret = OCR_CLIENT_SECRET
- #
- # textmod = {'grant_type': grant_type, 'client_id': client_id, 'client_secret': client_secret}
- # textmod = parse.urlencode(textmod)
- #
- # # 输出内容:user=admin&password=admin
- # header_dict = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko'}
- # url = 'https://aip.baidubce.com/oauth/2.0/token'
- # req = request.Request(url='{}{}{}'.format(url, '?', textmod), headers=header_dict)
- # res = request.urlopen(req).read()
- # token = eval(res.decode(encoding='utf-8'))['access_token']
- # return token
- # def opecv2base64(img):
- # image = cv2.imencode('.jpg', img)[1]
- # base64_data = str(base64.b64encode(image))[2:-1]
- # return base64_data
- # def get_ocr_raw_result(img, ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG'):
- # textmod = {'access_token': access_token}
- # textmod = parse.urlencode(textmod)
- # url = '{}{}{}{}'.format(OCR_BOX_URL, ocr_accuracy, '?', textmod)
- # url_general = '{}{}{}{}'.format(OCR_BOX_URL, 'general', '?', textmod)
- #
- # headers = {'Content-Type': 'application/x-www-form-urlencoded'}
- #
- # image_type = 'base64'
- # group_id = 'group001'
- # user_id = 'usr001'
- #
- # image = opecv2base64(img)
- #
- # data = {
- # 'image_type': image_type,
- # 'group_id': group_id,
- # 'user_id': user_id,
- # 'image': image,
- # 'detect_direction': 'true',
- # 'recognize_granularity': 'small',
- # 'language_type': language_type,
- # # 'vertexes_location': 'true',
- # # 'probability': 'true'
- # }
- #
- # resp = requests.post(url, data=data, headers=headers).json()
- # if resp.get('error_msg'):
- # if 'internal error' in resp.get('error_msg'):
- # resp = requests.post(url_general, data=data, headers=headers).json()
- # if resp.get('error_msg'):
- # raise Exception("ocr {}!".format(resp.get('error_msg')))
- # else:
- # raise Exception("ocr {}!".format(resp.get('error_msg')))
- #
- # return resp
- # def get_ocr_text_and_coordinate(img, ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG'):
- # textmod = {'access_token': access_token}
- # textmod = parse.urlencode(textmod)
- # url = '{}{}{}{}'.format(OCR_BOX_URL, ocr_accuracy, '?', textmod)
- # url_general = '{}{}{}{}'.format(OCR_BOX_URL, 'general', '?', textmod)
- #
- # headers = {'Content-Type': 'application/x-www-form-urlencoded'}
- #
- # image_type = 'base64'
- # group_id = 'group001'
- # user_id = 'usr001'
- #
- # image = opecv2base64(img)
- #
- # data = {
- # 'image_type': image_type,
- # 'group_id': group_id,
- # 'user_id': user_id,
- # 'image': image,
- # 'detect_direction': 'true',
- # 'recognize_granularity': 'small',
- # 'language_type': language_type,
- # # 'vertexes_location': 'true',
- # # 'probability': 'true'
- # }
- #
- # resp = requests.post(url, data=data, headers=headers).json()
- # if resp.get('error_msg'):
- # if 'internal error' in resp.get('error_msg'):
- # resp = requests.post(url_general, data=data, headers=headers).json()
- # if resp.get('error_msg'):
- # raise Exception("ocr {}!".format(resp.get('error_msg')))
- # else:
- # raise Exception("ocr {}!".format(resp.get('error_msg')))
- #
- # words_result = resp.get('words_result')
- # return words_result
- # def get_ocr_text_and_coordinate_in_google_format(img, ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG'):
- # textmod = {'access_token': access_token}
- # textmod = parse.urlencode(textmod)
- # url = '{}{}{}{}'.format(OCR_BOX_URL, ocr_accuracy, '?', textmod)
- # url_general = '{}{}{}{}'.format(OCR_BOX_URL, 'general', '?', textmod)
- #
- # headers = {'Content-Type': 'application/x-www-form-urlencoded'}
- #
- # image_type = 'base64'
- # group_id = 'group001'
- # user_id = 'usr001'
- #
- # image = opecv2base64(img)
- #
- # data = {
- # 'image_type': image_type,
- # 'group_id': group_id,
- # 'user_id': user_id,
- # 'image': image,
- # 'detect_direction': 'true',
- # 'recognize_granularity': 'small',
- # 'language_type': language_type,
- # # 'vertexes_location': 'true',
- # # 'probability': 'true'
- # }
- #
- # resp = requests.post(url, data=data, headers=headers).json()
- # if resp.get('error_msg'):
- # if 'internal error' in resp.get('error_msg'):
- # resp = requests.post(url_general, data=data, headers=headers).json()
- # if resp.get('error_msg'):
- # raise Exception("ocr {}!".format(resp.get('error_msg')))
- # else:
- # raise Exception("ocr {}!".format(resp.get('error_msg')))
- #
- # words_result = resp.get('words_result')
- # dict_list = [item2.get('location') for item in words_result for item2 in item['chars']]
- # char_list = [item2.get('char') for item in words_result for item2 in item['chars']]
- # words = [item.get('words') for item in words_result]
- # matrix = []
- # for adict in dict_list:
- # xmin = adict['left']
- # ymin = adict['top']
- # xmax = adict['width'] + adict['left']
- # ymax = adict['top'] + adict['height']
- # item0 = (xmin, ymin, xmax, ymax)
- # matrix.append(item0)
- #
- # res_dict = {'chars': char_list, 'coordinates': matrix, 'words': words}
- # return res_dict
- def model_type_score(all_type_score_one, choice_box, cloze_box, solve_box,composition_box): # 与模型得到边框与type_score对应
- '''
- :param all_type_score_one: 模型得到的单个type_score的坐标位置
- :param choice_box: 模型得到的选择题坐标位置
- :param cloze_box: 模型得到的填空题坐标位置
- :param solve_box: 模型得到的解答题坐标位置
- :return:
- '''
- N_choice = len(choice_box)
- N_cloze = len(cloze_box)
- N_solve_box = len(solve_box)
- N_composition = len(composition_box)
- min_choice_dict = {}
- min_cloze_dict = {}
- min_solve_dict = {}
- min_composition_dict = {}
- test_result1 = 0
- min_choice = 100000000
- min_cloze = 100000000
- min_solve = 100000000
- min_composition = 100000000
- for j in range(N_choice):
- choice_distance_x = abs(list(all_type_score_one)[0] - choice_box[j][0])
- choice_distance_y = abs(list(all_type_score_one)[1] - choice_box[j][1])
- choice_distance = choice_distance_x + choice_distance_y
- if choice_distance < min_choice:
- min_choice = choice_distance
- min_choice_dict = {'min_choice': min_choice,
- 'bounding_box': choice_box[j],
- 'label': 'choice',
- 'type_box': all_type_score_one}
- for k in range(N_cloze):
- cloze_distance_x = abs(list(all_type_score_one)[0] - cloze_box[k][0])
- cloze_distance_y = abs(list(all_type_score_one)[1] - cloze_box[k][1])
- cloze_distance = cloze_distance_x + cloze_distance_y
- if cloze_distance < min_cloze:
- min_cloze = cloze_distance
- min_cloze_dict = {'min_cloze': min_cloze,
- 'bounding_box': cloze_box[k],
- 'label': 'cloze',
- 'type_box': all_type_score_one}
- for l in range(N_solve_box):
- solve_distance_x = abs(list(all_type_score_one)[0] - solve_box[l][0])
- solve_distance_y = abs(list(all_type_score_one)[1] - solve_box[l][1])
- solve_distance = solve_distance_x + solve_distance_y
- if solve_distance < min_solve:
- min_solve = solve_distance
- min_solve_dict = {'min_solve': min_solve,
- 'bounding_box': solve_box[l],
- 'label': 'solve',
- 'type_box': all_type_score_one}
- for m in range(N_composition):
- composition_distance_x = abs(list(all_type_score_one)[0] - composition_box[m][0])
- composition_distance_y = abs(list(all_type_score_one)[1] - composition_box[m][1])
- composition_distance = composition_distance_x + composition_distance_y
- if composition_distance < min_composition:
- min_composition = composition_distance
- min_composition_dict = {'min_cloze': min_composition,
- 'bounding_box': composition_box[m],
- 'label': 'composition',
- 'type_box': all_type_score_one}
- if min_choice < min_cloze and min_choice < min_solve and min_choice < min_composition:
- # 建立相互关联的关系。 即表示该type_score对应于选择题
- if test_result1 < 400:
- test_result1 = min_choice_dict
- else:
- test_result1 = -1
- elif min_cloze < min_choice and min_cloze < min_solve and min_cloze < min_composition:
- # 建立相互关联的关系。 即表示该type_score对应于填空题
- if test_result1 < 400:
- test_result1 = min_cloze_dict
- else:
- test_result1 = -1
- elif min_solve < min_cloze and min_solve < min_choice and min_solve < min_composition:
- # 建立相互关联的关系。 即表示该type_score对应于解答题
- if test_result1 < 300:
- test_result1 = min_solve_dict
- else:
- test_result1 = -1
- elif min_composition < min_cloze and min_composition < min_choice and min_composition < min_solve:
- # 建立相互关联的关系。 即表示该type_score对应于解答题
- if test_result1 < 300:
- test_result1 = min_composition_dict
- else:
- test_result1 = -1
- return test_result1
- def ocr_key_words(rect, type_score_dict): # 将ocr识别得到的文字与模型得到的type_score对应
- '''
- :param rect: OCR识别结果数组,格式:res = {'chars': [},'coordinates': [(),()},'words': []}
- :param type_score_dict: 模型得到的type_score(与模型得到的边框相对应)
- :return: 字典中添加word
- '''
- len_ocr = len(rect['chars'])
- xmin = type_score_dict['type_box'][0]
- ymin = type_score_dict['type_box'][1]
- xmax = type_score_dict['type_box'][2]
- ymax = type_score_dict['type_box'][3]
- words = []
- for j in range(len_ocr):
- if rect['coordinates'][j][0] - xmin > -30 and rect['coordinates'][j][1] - ymin > -30 and rect['coordinates'][j][
- 2] - xmax < 30 and rect['coordinates'][j][3] - ymax < 30:
- word = rect['chars'][j]
- words.append(word)
- type_score_dict['words'] = words
- type_score_dict_ocr = type_score_dict
- return type_score_dict_ocr
- def get_sheet_number_total(answer_sheet, res, img0):
- # img_path0 = r'C:\Users\Administrator\Desktop\score_test\image\dili\000003.jpg'
- # img0 = cv2.imread(img_path0)
- # access_token = ocr_login()
- # res = get_ocr_text_and_coordinate_in_google_format(img0, ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG') # 整张OCR
- # answer_sheet = {"img_name": "000003.jpg", "analysis_type": "geography", "regions": [{"class_name": "cloze_s", "bounding_box": {"xmin": 57, "ymin": 412, "xmax": 1554, "ymax": 514}, "score": "1.0000"}, {"class_name": "cloze_s", "bounding_box": {"xmin": 82, "ymin": 988, "xmax": 1496, "ymax": 1073}, "score": "1.0000"}, {"class_name": "cloze_s", "bounding_box": {"xmin": 101, "ymin": 508, "xmax": 1540, "ymax": 602}, "score": "0.9999"}, {"class_name": "cloze_s", "bounding_box": {"xmin": 170, "ymin": 1485, "xmax": 1532, "ymax": 1556}, "score": "0.9999"}, {"class_name": "cloze_s", "bounding_box": {"xmin": 49, "ymin": 1298, "xmax": 1516, "ymax": 1383}, "score": "0.9994"}, {"class_name": "cloze_s", "bounding_box": {"xmin": 179, "ymin": 1067, "xmax": 1502, "ymax": 1164}, "score": "0.9994"}, {"class_name": "cloze_s", "bounding_box": {"xmin": 118, "ymin": 1529, "xmax": 1532, "ymax": 1635}, "score": "0.9983"}, {"class_name": "cloze_s", "bounding_box": {"xmin": 74, "ymin": 1395, "xmax": 923, "ymax": 1456}, "score": "0.9976"}, {"class_name": "cloze_s", "bounding_box": {"xmin": 52, "ymin": 830, "xmax": 1466, "ymax": 903}, "score": "0.9960"}, {"class_name": "cloze_s", "bounding_box": {"xmin": 79, "ymin": 1594, "xmax": 1540, "ymax": 1737}, "score": "0.9365"}, {"class_name": "cloze_s", "bounding_box": {"xmin": 85, "ymin": 266, "xmax": 940, "ymax": 342}, "score": "0.7881"}, {"class_name": "cloze_s", "bounding_box": {"xmin": 99, "ymin": 900, "xmax": 956, "ymax": 974}, "score": "0.7662"}, {"class_name": "page", "bounding_box": {"xmin": 716, "ymin": 2220, "xmax": 995, "ymax": 2284}, "score": "0.8190"}, {"class_name": "type_score", "bounding_box": {"xmin": 63, "ymin": 757, "xmax": 242, "ymax": 821}, "score": "0.9989"}, {"class_name": "type_score", "bounding_box": {"xmin": 66, "ymin": 105, "xmax": 250, "ymax": 163}, "score": "0.9613"}, {"class_name": "type_score", "bounding_box": {"xmin": 55, "ymin": 1228, "xmax": 239, "ymax": 1287}, "score": "0.9506"}, {"class_name": "cloze", "bounding_box": {"xmin": 49, "ymin": 93, "xmax": 1518, "ymax": 754}, "score": "0.9954"}, {"class_name": "cloze", "bounding_box": {"xmin": 88, "ymin": 780, "xmax": 1524, "ymax": 1579}, "score": "0.9208"}], "series_number": "20190912000015", "is_success": 1}
- choice_boxs = []
- choice_m_boxs = []
- cloze_boxs = []
- solve_boxs = []
- type_score_boxs = []
- select_boxs = []
- all_test = []
- num_choice = 0
- num_choice_m = 0
- num_cloze = 0
- num_slove = 0
- num_select = 0
- num_type_score = 0
- volume_last = []
- Score_last = []
- score_last_one = 0
- volume_last_one = 0
- model_box2 = []
- composition_boxs = []
- score2 = []
- num_redundance = 0
- num_composition = 0
- j_temp = []
- jj_temp =[]
- for ele in answer_sheet["regions"]: # 从模型输出获取对应标签的边框信息
- if ele["class_name"] == 'choice':
- choice_box = ele['bounding_box']
- choice_one = (
- int(choice_box['xmin']), int(choice_box['ymin']), int(choice_box['xmax']), int(choice_box['ymax']))
- choice_boxs.append(choice_one)
- num_choice = num_choice + 1
- elif ele["class_name"] == 'choice_m':
- choice_m_boxs.append(ele)
- num_choice_m = num_choice_m + 1
- elif ele["class_name"] == 'cloze':
- cloze_box = ele['bounding_box']
- cloze_one = (int(cloze_box['xmin']), int(cloze_box['ymin']), int(cloze_box['xmax']), int(cloze_box['ymax']))
- cloze_boxs.append(cloze_one)
- num_cloze = num_cloze + 1
- elif ele["class_name"] == 'composition0' or ele["class_name"] == 'composition' or ele[
- "class_name"] == 'correction':
- composition_box = ele['bounding_box']
- composition_one = (int(composition_box['xmin']), int(composition_box['ymin']), int(composition_box['xmax']),
- int(composition_box['ymax']))
- composition_boxs.append(composition_one)
- num_composition = num_composition + 1
- elif ele["class_name"] == 'solve' or ele["class_name"] == 'solve0':
- solve_box = ele['bounding_box']
- solve_one = (int(solve_box['xmin']), int(solve_box['ymin']), int(solve_box['xmax']), int(solve_box['ymax']))
- solve_boxs.append(solve_one)
- num_slove = num_slove + 1
- elif ele["class_name"] == 'select_s' or ele["class_name"] == 'select_b':
- select_box = ele['bounding_box']
- select_one = (
- int(select_box['xmin']), int(select_box['ymin']), int(select_box['xmax']), int(select_box['ymax']))
- select_boxs.append(select_one)
- num_select = num_select + 1
- elif ele["class_name"] == 'type_score':
- type_score_box = ele['bounding_box']
- type_score_one = (int(type_score_box['xmin']), int(type_score_box['ymin']), int(type_score_box['xmax']),
- int(type_score_box['ymax']))
- type_score_boxs.append(type_score_one)
- num_type_score = num_type_score + 1
- for i in range(len(type_score_boxs)):
- test_result1 = model_type_score(type_score_boxs[i], choice_boxs, cloze_boxs, solve_boxs, composition_boxs)
- if test_result1 != -1 and test_result1 != 0:
- type_score_dict_ocr = ocr_key_words(res, test_result1)
- if type_score_dict_ocr['words'] == []: # type_score没有ocr结果,暂定可能ocr漏识别到分数,再过一遍ocr
- type_score_dict_ocr['words'] = get_ocr_text_and_coordinate_in_google_format(img0[type_score_boxs[i][1]:type_score_boxs[i][3],type_score_boxs[i][0]:type_score_boxs[i][2]],ocr_accuracy=OCR_ACCURACY,language_type='CHN_ENG')['words']
- test = key_words(type_score_dict_ocr)
- if test == {} or test['Score_structure'] == -1: # 如果存在type_score未识别到分数,暂定可能ocr漏识别到分数,再过一遍ocr
- image = img0[test_result1['bounding_box'][1]: test_result1['bounding_box'][3],
- test_result1['bounding_box'][0]: test_result1['bounding_box'][2]]
- res1 = get_ocr_text_and_coordinate_in_google_format(image,ocr_accuracy=OCR_ACCURACY,language_type='CHN_ENG')
- aa = []
- for ii in range(len(res1['coordinates'])):
- xmin11 = res1['coordinates'][ii][0] + test_result1['bounding_box'][0]
- ymin11 = res1['coordinates'][ii][1] + test_result1['bounding_box'][1]
- xmax11 = res1['coordinates'][ii][2] + test_result1['bounding_box'][0]
- ymax11 = res1['coordinates'][ii][3] + test_result1['bounding_box'][1]
- aaa = (xmin11, ymin11, xmax11, ymax11)
- aa.append(aaa)
- res1['coordinates'] = aa
- if len(res1['words']) > 0:
- type_score_dict_ocr['words'] = res1['words'][0]
- new_test = key_words(type_score_dict_ocr)
- if new_test == {} or new_test['Score_structure'] == -1:
- if len(res1['words']) > 1:
- type_score_dict_ocr['words'] = res1['words'][1]
- new_test = key_words(type_score_dict_ocr)
- if new_test == {} or new_test['Score_structure'] == -1:
- if len(res1['words']) > 2:
- type_score_dict_ocr['words'] = res1['words'][2]
- new_test = key_words(type_score_dict_ocr)
- if new_test != {}:
- if new_test['volume_structure'] != -1 and int(new_test['volume_structure'][0][
- 'volume_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
- new_test['volume_structure'][0]['volume_total_score'] = int(
- new_test['volume_structure'][0]['volume_total_score']) % 100
- elif new_test['volume_structure'] == -1 and new_test['Score_structure'] != -1 and int(
- new_test['Score_structure'][0]['item_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
- new_test['Score_structure'][0]['item_total_score'] = int(
- new_test['Score_structure'][0]['item_total_score']) % 100
- all_test.append(new_test)
- else:
- if test['volume_structure'] != -1 and int(
- test['volume_structure'][0]['volume_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
- test['volume_structure'][0]['volume_total_score'] = int(
- test['volume_structure'][0]['volume_total_score']) % 100
- elif test['volume_structure'] == -1 and test['Score_structure'] != -1 and int(
- test['Score_structure'][0]['item_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
- test['Score_structure'][0]['item_total_score'] = int(
- test['Score_structure'][0]['item_total_score']) % 100
- all_test.append(test)
- for jjjj in range(len(all_test)):
- if all_test[jjjj]['Score_structure'] != -1:
- label_1 = all_test[jjjj]['Score_structure'][0]['label']
- if label_1 == 'choice':
- if choice_boxs.count(all_test[jjjj]['Score_structure'][0]['bounding_box']):
- choice_boxs.remove(all_test[jjjj]['Score_structure'][0]['bounding_box'])
- elif label_1 == 'cloze':
- if cloze_boxs.count(all_test[jjjj]['Score_structure'][0]['bounding_box']):
- cloze_boxs.remove(all_test[jjjj]['Score_structure'][0]['bounding_box'])
- elif label_1 == 'solve':
- if solve_boxs.count(all_test[jjjj]['Score_structure'][0]['bounding_box']):
- solve_boxs.remove(all_test[jjjj]['Score_structure'][0]['bounding_box'])
- elif label_1 == 'composition':
- if solve_boxs.count(all_test[jjjj]['Score_structure'][0]['bounding_box']):
- solve_boxs.remove(all_test[jjjj]['Score_structure'][0]['bounding_box'])
- if choice_boxs != []: # 9月16号修改
- for ij in range(len(choice_boxs)):
- if choice_boxs[ij][1] - 150 > 0:
- yminss = choice_boxs[ij][1] - 150
- else:
- yminss = choice_boxs[ij][1]
- if choice_boxs[ij][0] - 100 > 0:
- xminss = choice_boxs[ij][0] - 100
- else:
- xminss = choice_boxs[ij][0]
- res1 = get_ocr_text_and_coordinate_in_google_format(
- img0[yminss:choice_boxs[ij][3], xminss:choice_boxs[ij][2]], ocr_accuracy=OCR_ACCURACY,
- language_type='CHN_ENG')
- aa = []
- type_score_dict_ocrs = {}
- for ii in range(len(res1['coordinates'])):
- xmin11 = res1['coordinates'][ii][0] + choice_boxs[ij][0]
- ymin11 = res1['coordinates'][ii][1] + choice_boxs[ij][1]
- xmax11 = res1['coordinates'][ii][2] + choice_boxs[ij][0]
- ymax11 = res1['coordinates'][ii][3] + choice_boxs[ij][1]
- aaa = (xmin11, ymin11, xmax11, ymax11)
- aa.append(aaa)
- res1['coordinates'] = aa
- # print(res1)
- if len(res1['words']) > 0:
- type_score_dict_ocrs['words'] = res1['words'][0]
- new_test = key_words(type_score_dict_ocrs)
- if new_test == {} or new_test['Score_structure'] == -1:
- if len(res1['words']) > 1:
- type_score_dict_ocrs['words'] = res1['words'][1]
- new_test = key_words(type_score_dict_ocrs)
- if new_test == {} or new_test['Score_structure'] == -1:
- if len(res1['words']) > 2:
- type_score_dict_ocrs['words'] = res1['words'][2]
- new_test = key_words(type_score_dict_ocrs)
- if new_test == {} or new_test['Score_structure'] == -1:
- if len(res1['words']) > 3:
- type_score_dict_ocrs['words'] = res1['words'][3]
- new_test = key_words(type_score_dict_ocrs)
- if new_test == {} or new_test['Score_structure'] == -1:
- if len(res1['words']) > 4:
- type_score_dict_ocrs['words'] = res1['words'][4]
- new_test = key_words(type_score_dict_ocrs)
- if new_test != {} and new_test['volume_structure'] != -1 and (int(new_test['volume_structure'][0]['volume_total_score']) > 4 or int(new_test['volume_structure'][0]['volume_score']) > 4): # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
- if int(new_test['volume_structure'][0]['volume_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
- new_test['volume_structure'][0]['volume_total_score'] = int(
- new_test['volume_structure'][0]['volume_total_score']) % 100
- new_test['volume_structure'][0]['bounding_box'] = choice_boxs[ij]
- new_test['volume_structure'][0]['label'] = 'choice'
- all_test.append(new_test)
- elif new_test != {} and new_test['volume_structure'] == -1 and new_test['Score_structure'] != -1 and (int(new_test['Score_structure'][0]['item_total_score']) >4 or int(new_test['Score_structure'][0]['item_score']) > 4): # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
- if int(new_test['Score_structure'][0]['item_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
- new_test['Score_structure'][0]['item_total_score'] = int(
- new_test['Score_structure'][0]['item_total_score']) % 100
- new_test['Score_structure'][0]['bounding_box'] = choice_boxs[ij]
- new_test['Score_structure'][0]['label'] = 'choice'
- all_test.append(new_test)
- if cloze_boxs != []:
- for ij in range(len(cloze_boxs)):
- if cloze_boxs[ij][1] - 100 > 0:
- yminss = cloze_boxs[ij][1] - 100
- else:
- yminss = cloze_boxs[ij][1]
- if cloze_boxs[ij][0] - 100 > 0:
- xminss = cloze_boxs[ij][0] - 100
- else:
- xminss = cloze_boxs[ij][0]
- res1 = get_ocr_text_and_coordinate_in_google_format(
- img0[yminss:cloze_boxs[ij][3], xminss:cloze_boxs[ij][2]], ocr_accuracy=OCR_ACCURACY,
- language_type='CHN_ENG')
- aa = []
- type_score_dict_ocrs = {}
- for ii in range(len(res1['coordinates'])):
- xmin11 = res1['coordinates'][ii][0] + cloze_boxs[ij][0]
- ymin11 = res1['coordinates'][ii][1] + cloze_boxs[ij][1]
- xmax11 = res1['coordinates'][ii][2] + cloze_boxs[ij][0]
- ymax11 = res1['coordinates'][ii][3] + cloze_boxs[ij][1]
- aaa = (xmin11, ymin11, xmax11, ymax11)
- aa.append(aaa)
- res1['coordinates'] = aa
- if len(res1['words']) > 0:
- type_score_dict_ocrs['words'] = res1['words'][0]
- new_test = key_words(type_score_dict_ocrs)
- if new_test == {} or new_test['Score_structure'] == -1:
- if len(res1['words']) > 1:
- type_score_dict_ocrs['words'] = res1['words'][1]
- new_test = key_words(type_score_dict_ocrs)
- if new_test == {} or new_test['Score_structure'] == -1:
- if len(res1['words']) > 2:
- type_score_dict_ocrs['words'] = res1['words'][2]
- new_test = key_words(type_score_dict_ocrs)
- if new_test == {} or new_test['Score_structure'] == -1:
- if len(res1['words']) > 3:
- type_score_dict_ocrs['words'] = res1['words'][3]
- new_test = key_words(type_score_dict_ocrs)
- if new_test == {} or new_test['Score_structure'] == -1:
- if len(res1['words']) > 4:
- type_score_dict_ocrs['words'] = res1['words'][4]
- new_test = key_words(type_score_dict_ocrs)
- if new_test != {} and new_test['volume_structure'] != -1 and (int(new_test['volume_structure'][0]['volume_total_score']) > 4 or int(new_test['volume_structure'][0]['volume_score']) > 4): # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
- if int(new_test['volume_structure'][0]['volume_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
- new_test['volume_structure'][0]['volume_total_score'] = int(
- new_test['volume_structure'][0]['volume_total_score']) % 100
- new_test['volume_structure'][0]['bounding_box'] = cloze_boxs[ij]
- new_test['volume_structure'][0]['label'] = 'cloze'
- all_test.append(new_test)
- elif new_test != {} and new_test['volume_structure'] == -1 and new_test['Score_structure'] != -1 and (int(new_test['Score_structure'][0]['item_total_score']) > 4 or int(new_test['Score_structure'][0]['item_score']) > 4): # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
- if int(new_test['Score_structure'][0]['item_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
- new_test['Score_structure'][0]['item_total_score'] = int(
- new_test['Score_structure'][0]['item_total_score']) % 100
- new_test['Score_structure'][0]['bounding_box'] = cloze_boxs[ij]
- new_test['Score_structure'][0]['label'] = 'cloze'
- all_test.append(new_test)
- if solve_boxs != []:
- for ij in range(len(solve_boxs)):
- yminss = solve_boxs[ij][1]
- xminss = solve_boxs[ij][0]
- res1 = get_ocr_text_and_coordinate_in_google_format(
- img0[yminss:solve_boxs[ij][3], xminss:solve_boxs[ij][2]], ocr_accuracy=OCR_ACCURACY,
- language_type='CHN_ENG')
- aa = []
- type_score_dict_ocrs = {}
- for ii in range(len(res1['coordinates'])):
- xmin11 = res1['coordinates'][ii][0] + solve_boxs[ij][0]
- ymin11 = res1['coordinates'][ii][1] + solve_boxs[ij][1]
- xmax11 = res1['coordinates'][ii][2] + solve_boxs[ij][0]
- ymax11 = res1['coordinates'][ii][3] + solve_boxs[ij][1]
- aaa = (xmin11, ymin11, xmax11, ymax11)
- aa.append(aaa)
- res1['coordinates'] = aa
- if len(res1['words']) > 0:
- type_score_dict_ocrs['words'] = res1['words'][0]
- new_test = key_words(type_score_dict_ocrs)
- if new_test == {} or new_test['Score_structure'] == -1:
- if len(res1['words']) > 1:
- type_score_dict_ocrs['words'] = res1['words'][1]
- new_test = key_words(type_score_dict_ocrs)
- if new_test == {} or new_test['Score_structure'] == -1:
- if len(res1['words']) > 2:
- type_score_dict_ocrs['words'] = res1['words'][2]
- new_test = key_words(type_score_dict_ocrs)
- if new_test == {} or new_test['Score_structure'] == -1:
- if len(res1['words']) > 3:
- type_score_dict_ocrs['words'] = res1['words'][3]
- new_test = key_words(type_score_dict_ocrs)
- if new_test == {} or new_test['Score_structure'] == -1:
- if len(res1['words']) > 4:
- type_score_dict_ocrs['words'] = res1['words'][4]
- new_test = key_words(type_score_dict_ocrs)
- if new_test != {} and new_test['volume_structure'] != -1 and int(new_test['volume_structure'][0]['volume_total_score']) > 5: # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
- if int(new_test['volume_structure'][0]['volume_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
- new_test['volume_structure'][0]['volume_total_score'] = int(
- new_test['volume_structure'][0]['volume_total_score']) % 100
- new_test['volume_structure'][0]['bounding_box'] = solve_boxs[ij]
- new_test['volume_structure'][0]['label'] = 'solve'
- all_test.append(new_test)
- elif new_test != {} and new_test['volume_structure'] == -1 and new_test['Score_structure'] != -1 and (int(new_test['Score_structure'][0]['item_total_score']) > 5 or int(new_test['Score_structure'][0]['item_total_score']) == -1): # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
- if int(new_test['Score_structure'][0]['item_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
- new_test['Score_structure'][0]['item_total_score'] = int(
- new_test['Score_structure'][0]['item_total_score']) % 100
- new_test['Score_structure'][0]['bounding_box'] = solve_boxs[ij]
- new_test['Score_structure'][0]['label'] = 'solve'
- all_test.append(new_test)
- if composition_boxs != []:
- for ij in range(len(composition_boxs)):
- if composition_boxs[ij][1] - 250 > 0:
- yminss = composition_boxs[ij][1] - 250
- else:
- yminss = composition_boxs[ij][1]
- if composition_boxs[ij][0] - 100 > 0:
- xminss = composition_boxs[ij][0] - 100
- else:
- xminss = composition_boxs[ij][0]
- res1 = get_ocr_text_and_coordinate_in_google_format(
- img0[yminss:composition_boxs[ij][3], xminss:composition_boxs[ij][2]], ocr_accuracy=OCR_ACCURACY,
- language_type='CHN_ENG')
- aa = []
- type_score_dict_ocrs = {}
- for ii in range(len(res1['coordinates'])):
- xmin11 = res1['coordinates'][ii][0] + composition_boxs[ij][0]
- ymin11 = res1['coordinates'][ii][1] + composition_boxs[ij][1]
- xmax11 = res1['coordinates'][ii][2] + composition_boxs[ij][0]
- ymax11 = res1['coordinates'][ii][3] + composition_boxs[ij][1]
- aaa = (xmin11, ymin11, xmax11, ymax11)
- aa.append(aaa)
- res1['coordinates'] = aa
- if len(res1['words']) > 0:
- type_score_dict_ocrs['words'] = res1['words'][0]
- new_test = key_words(type_score_dict_ocrs)
- if new_test == {} or new_test['Score_structure'] == -1:
- if len(res1['words']) > 1:
- type_score_dict_ocrs['words'] = res1['words'][1]
- new_test = key_words(type_score_dict_ocrs)
- if new_test == {} or new_test['Score_structure'] == -1:
- if len(res1['words']) > 2:
- type_score_dict_ocrs['words'] = res1['words'][2]
- new_test = key_words(type_score_dict_ocrs)
- if new_test == {} or new_test['Score_structure'] == -1:
- if len(res1['words']) > 3:
- type_score_dict_ocrs['words'] = res1['words'][3]
- new_test = key_words(type_score_dict_ocrs)
- if new_test == {} or new_test['Score_structure'] == -1:
- if len(res1['words']) > 4:
- type_score_dict_ocrs['words'] = res1['words'][4]
- new_test = key_words(type_score_dict_ocrs)
- if new_test != {} and new_test['volume_structure'] != -1 and int(
- new_test['volume_structure'][0]['volume_total_score']) > 4: # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
- if int(new_test['volume_structure'][0]['volume_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
- new_test['volume_structure'][0]['volume_total_score'] = int(
- new_test['volume_structure'][0]['volume_total_score']) % 100
- new_test['volume_structure'][0]['bounding_box'] = composition_boxs[ij]
- new_test['volume_structure'][0]['label'] = 'composition'
- all_test.append(new_test)
- elif new_test != {} and new_test['volume_structure'] == -1 and new_test[
- 'Score_structure'] != -1 and int(
- new_test['Score_structure'][0]['item_total_score']) > 4: # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
- if int(new_test['Score_structure'][0]['item_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
- new_test['Score_structure'][0]['item_total_score'] = int(
- new_test['Score_structure'][0]['item_total_score']) % 100
- new_test['Score_structure'][0]['bounding_box'] = composition_boxs[ij]
- new_test['Score_structure'][0]['label'] = 'composition'
- all_test.append(new_test)
- for aaa in range(len(all_test)):
- if all_test[aaa]['Score_structure'] != -1 and all_test[aaa]['volume_structure'] == -1:
- score_last_one = {'model_box': dict(all_test[aaa])['Score_structure'][0]['bounding_box'],
- 'label': dict(all_test[aaa])['Score_structure'][0]['label'],
- 'number': dict(all_test[aaa])['Score_structure'][0]['item_N'],
- 'score': dict(all_test[aaa])['Score_structure'][0]['item_total_score'],
- 'number_score': dict(all_test[aaa])['Score_structure'][0]['item_score'],
- 'counts': dict(all_test[aaa])['Score_structure'][0]['item_count']}
- Score_last.append(score_last_one)
- continue
- elif all_test[aaa]['Score_structure'] != -1 and all_test[aaa]['volume_structure'] != -1:
- score_last_one = {'model_box': dict(all_test[aaa])['Score_structure'][0]['bounding_box'],
- 'label': dict(all_test[aaa])['Score_structure'][0]['label'],
- 'number': -1,
- 'score': dict(all_test[aaa])['Score_structure'][0]['volume_total_score'],
- 'number_score': dict(all_test[aaa])['Score_structure'][0]['volume_score'],
- 'counts': dict(all_test[aaa])['Score_structure'][0]['volume_count']}
- Score_last.append(score_last_one)
- volume_last_one = {'volume_N': dict(all_test[aaa])['volume_structure'][0]['volume_N'],
- 'volume_total_score': dict(all_test[aaa])['volume_structure'][0]['volume_total_score'],
- 'volume_count': dict(all_test[aaa])['volume_structure'][0]['volume_count'],
- 'volume_score': dict(all_test[aaa])['volume_structure'][0]['volume_score'],
- 'keyword_type': dict(all_test[aaa])['volume_structure'][0]['keyword_type']}
- volume_last.append(volume_last_one)
- continue
- elif all_test[aaa]['volume_structure'] != -1:
- volume_last_one = {'volume_N': dict(all_test[aaa])['volume_structure'][0]['volume_N'],
- 'volume_total_score': dict(all_test[aaa])['volume_structure'][0]['volume_total_score'],
- 'volume_count': dict(all_test[aaa])['volume_structure'][0]['volume_count'],
- 'volume_score': dict(all_test[aaa])['volume_structure'][0]['volume_score'],
- 'keyword_type': dict(all_test[aaa])['volume_structure'][0]['keyword_type']}
- volume_last.append(volume_last_one)
- continue
- # Score_last = sorted(Score_last, key=lambda x: (
- # x['model_box'][0], x['model_box'][0] + x['model_box'][1], -x['score'])) # 按答题卡顺序输出
- len_Score_last = len(Score_last)
- for i in range(len_Score_last): # 去重一个边框可能对应多个type_score的情况
- if Score_last[i]['model_box'] in model_box2:
- index2 = model_box2.index(Score_last[i]['model_box'])
- score = Score_last[i]['score']
- if score < score2[index2] and score2[index2] < 30: # 去重,type_score多余的包含小题分数
- Score_last[i] = -1
- elif score < score2[index2] and score2[index2] > 30: # 去重,type_score多余的包含分卷分数
- Score_last[index2] = -1
- elif score > score2[index2] and score < 30: # 去重,type_score在不大于30分的情况下,暂定保留更大的分数
- Score_last[index2] = -1
- elif score > score2[index2] and score > 30: # 去重,type_score去除大于30分的重复分数
- Score_last[i] = -1
- else:
- Score_last[i] = -1
- else:
- model_box2.append(Score_last[i]['model_box'])
- score2.append(Score_last[i]['score'])
- while num_redundance < len_Score_last: # 去重一个边框可能对应多个type_score的情况
- if Score_last[num_redundance] == -1:
- del Score_last[num_redundance]
- len_Score_last = len_Score_last - 1
- else:
- num_redundance = num_redundance + 1
- # print(Score_last)
- # print(volume_last) # 分卷信息,暂不输出
- # print(answer_sheet['regions'])
- if Score_last != []:
- for i in range(len(Score_last)):
- if type(Score_last[i]['number']) is list:
- C_q_s = len(Score_last[i]['number']) * [Score_last[i]['score']]
- Score_last[i]['score'] = C_q_s
- if Score_last != []:
- if num_choice == 1 or num_cloze == 1: # 对应choice_m分数
- for i in range(len(Score_last)):
- if Score_last[i]['label'] == 'choice':
- count_choice_m = 0
- for j in range(len(answer_sheet['regions'])):
- if answer_sheet['regions'][j]['class_name'] == 'choice_m':
- if Score_last[i]['number_score'] != -1 and ('number' in answer_sheet['regions'][j].keys()):
- answer_sheet['regions'][j]['default_points'] = len(answer_sheet['regions'][j]['number']) * [float(Score_last[i]['number_score'])]
- elif Score_last[i]['number_score'] == -1 and Score_last[i]['score'] != -1 and 'number' in answer_sheet['regions'][j].keys():
- count_choice_m = count_choice_m + len(answer_sheet['regions'][j]['number'])
- j_temp.append(j)
- if j == len(answer_sheet['regions']) - 1 and j_temp !=[]:
- for index, jj in enumerate(j_temp):
- num_score_m = round(float(Score_last[i]['score'] / count_choice_m),1)
- answer_sheet['regions'][jj]['default_points'] = len(answer_sheet['regions'][jj]['number']) * [num_score_m]
- break
- elif Score_last[i]['label'] == 'cloze':
- count_cloze_s = 0
- for j in range(len(answer_sheet['regions'])):
- if answer_sheet['regions'][j]['class_name'] == 'cloze_s':
- if Score_last[i]['number_score'] != -1:
- answer_sheet['regions'][j]['default_points'] = Score_last[i]['number_score']
- else:
- count_cloze_s = count_cloze_s + 1
- jj_temp.append(j)
- if j == len(answer_sheet['regions']) - 1 and jj_temp !=[] and Score_last[i]['score']!=-1:
- for index, jj in enumerate(jj_temp):
- num_score_m = round(float(Score_last[i]['score'] / count_cloze_s),1)
- answer_sheet['regions'][jj]['default_points'] = num_score_m
- break
- elif num_choice > 1 or num_cloze >1:
- for i in range(len(Score_last)):
- if Score_last[i]['label'] == 'choice':
- count_choice_m = 0
- for j in range(len(answer_sheet['regions'])):
- if answer_sheet['regions'][j]['class_name'] == 'choice_m':
- xmin_dis = answer_sheet['regions'][j]['bounding_box']['xmin'] - \
- Score_last[i]['model_box'][0]
- ymin_dis = answer_sheet['regions'][j]['bounding_box']['ymin'] - \
- Score_last[i]['model_box'][1]
- xmax_dis = answer_sheet['regions'][j]['bounding_box']['xmax'] - \
- Score_last[i]['model_box'][2]
- ymax_dis = answer_sheet['regions'][j]['bounding_box']['ymax'] - \
- Score_last[i]['model_box'][3]
- if xmin_dis > -30 and ymin_dis > -30 and xmax_dis < 30 and ymax_dis < 30:
- if Score_last[i]['number_score'] != -1 and 'number' in answer_sheet['regions'][j].keys():
- answer_sheet['regions'][j]['default_points'] = len(answer_sheet['regions'][j]['number']) * [float(Score_last[i]['number_score'])]
- elif Score_last[i]['number_score'] == -1 and Score_last[i]['score'] != -1 and 'number' in answer_sheet['regions'][j].keys():
- count_choice_m = count_choice_m + len(answer_sheet['regions'][j]['number'])
- j_temp.append(j)
- if j == len(answer_sheet['regions']) - 1 and j_temp !=[]:
- for index ,jj in enumerate(j_temp):
- num_score_m = round(float(Score_last[i]['score'] / count_choice_m),1)
- answer_sheet['regions'][jj]['default_points'] = len(answer_sheet['regions'][jj]['number']) * [num_score_m]
- break
- elif Score_last[i]['label'] == 'cloze':
- count_cloze_s = 0
- for j in range(len(answer_sheet['regions'])):
- if answer_sheet['regions'][j]['class_name'] == 'cloze_s':
- xmin_dis = answer_sheet['regions'][j]['bounding_box']['xmin'] - \
- Score_last[i]['model_box'][0]
- ymin_dis = answer_sheet['regions'][j]['bounding_box']['ymin'] - \
- Score_last[i]['model_box'][1]
- xmax_dis = answer_sheet['regions'][j]['bounding_box']['xmax'] - \
- Score_last[i]['model_box'][2]
- ymax_dis = answer_sheet['regions'][j]['bounding_box']['ymax'] - \
- Score_last[i]['model_box'][3]
- if xmin_dis > -30 and ymin_dis > -30 and xmax_dis < 30 and ymax_dis < 30:
- if Score_last[i]['number_score'] != -1 :
- answer_sheet['regions'][j]['default_points'] = Score_last[i]['number_score']
- elif Score_last[i]['number_score'] == -1 and Score_last[i]['score'] != -1 :
- count_cloze_s = count_cloze_s + 1
- jj_temp.append(j)
- if j == len(answer_sheet['regions']) - 1 and jj_temp !=[]:
- for index ,jj in enumerate(jj_temp):
- num_score_m = round(float(Score_last[i]['score'] / count_cloze_s),1)
- answer_sheet['regions'][jj]['default_points'] = len(answer_sheet['regions'][jj]['number']) * [num_score_m]
- break
- if Score_last != []:
- for i in range(len(answer_sheet['regions'])):
- for j in range(len(Score_last)):
- if (Score_last[j]['model_box'][0] == answer_sheet['regions'][i]['bounding_box']['xmin']
- and Score_last[j]['model_box'][1] == answer_sheet['regions'][i]['bounding_box']['ymin']
- and Score_last[j]['model_box'][2] == answer_sheet['regions'][i]['bounding_box']['xmax']
- and Score_last[j]['model_box'][3] == answer_sheet['regions'][i]['bounding_box']['ymax']):
- if Score_last[j]['number'] != -1:
- answer_sheet['regions'][i]['number'] = Score_last[j]['number'] # 题号
- if Score_last[j]['score'] != -1:
- # score = Score_last[j]['score']
- # try:
- # length = len(answer_sheet['regions'][i]['number'])
- # answer_sheet['regions'][i]['default_points'] = length * [score]
- # except Exception:
- # answer_sheet['regions'][i]['default_points'] = score
- answer_sheet['regions'][i]['default_points'] = Score_last[j]['score']
- if type(answer_sheet['regions'][i]['default_points']) is list and (
- answer_sheet['regions'][i]['class_name'] == 'solve' or answer_sheet['regions'][i][
- 'class_name'] == 'solve0'):
- answer_sheet['regions'][i]['class_name'] = 'optional_solve'
- # answer_sheet['regions'][i]['number_score'] = Score_last[j]['number_score'] # 小题分数
- # answer_sheet['regions'][i]['counts'] = Score_last[j]['counts'] # 小题个数
- return answer_sheet
|