1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081 |
- # -*- coding: utf-8 -*-
- # @Time : 2020/6/15 0015 10:10
- # @Author : LF
- # @FileName: sheet_point_total.py
- # @Software: PyCharm
- import requests
- import base64
- from urllib import parse, request
- import cv2
- import re
- from threading import Thread
- import copy
- from collections import OrderedDict
- from PIL import Image
- from segment.sheet_resolve.tools.brain_api import get_ocr_text_and_coordinate_in_google_format
- from segment.sheet_resolve.analysis.sheet.ocr_key_words import key_words
- try:
- import tr.tr as tr
- except Exception:
- pass
- OCR_ACCURACY = 'accurate'
- # def ocr_login():
- # grant_type = 'client_credentials'
- # client_id = OCR_CLIENT_ID
- # client_secret = OCR_CLIENT_SECRET
- #
- # textmod = {'grant_type': grant_type, 'client_id': client_id, 'client_secret': client_secret}
- # textmod = parse.urlencode(textmod)
- #
- # # 输出内容:user=admin&password=admin
- # header_dict = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko'}
- # url = 'https://aip.baidubce.com/oauth/2.0/token'
- # req = request.Request(url='{}{}{}'.format(url, '?', textmod), headers=header_dict)
- # res = request.urlopen(req).read()
- # token = eval(res.decode(encoding='utf-8'))['access_token']
- # return token
- # def opecv2base64(img):
- # image = cv2.imencode('.jpg', img)[1]
- # base64_data = str(base64.b64encode(image))[2:-1]
- # return base64_data
- # def get_ocr_raw_result(img, ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG'):
- # textmod = {'access_token': access_token}
- # textmod = parse.urlencode(textmod)
- # url = '{}{}{}{}'.format(OCR_BOX_URL, ocr_accuracy, '?', textmod)
- # url_general = '{}{}{}{}'.format(OCR_BOX_URL, 'general', '?', textmod)
- #
- # headers = {'Content-Type': 'application/x-www-form-urlencoded'}
- #
- # image_type = 'base64'
- # group_id = 'group001'
- # user_id = 'usr001'
- #
- # image = opecv2base64(img)
- #
- # data = {
- # 'image_type': image_type,
- # 'group_id': group_id,
- # 'user_id': user_id,
- # 'image': image,
- # 'detect_direction': 'true',
- # 'recognize_granularity': 'small',
- # 'language_type': language_type,
- # # 'vertexes_location': 'true',
- # # 'probability': 'true'
- # }
- #
- # resp = requests.post(url, data=data, headers=headers).json()
- # if resp.get('error_msg'):
- # if 'internal error' in resp.get('error_msg'):
- # resp = requests.post(url_general, data=data, headers=headers).json()
- # if resp.get('error_msg'):
- # raise Exception("ocr {}!".format(resp.get('error_msg')))
- # else:
- # raise Exception("ocr {}!".format(resp.get('error_msg')))
- #
- # return resp
- # def get_ocr_text_and_coordinate(img, ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG'):
- # textmod = {'access_token': access_token}
- # textmod = parse.urlencode(textmod)
- # url = '{}{}{}{}'.format(OCR_BOX_URL, ocr_accuracy, '?', textmod)
- # url_general = '{}{}{}{}'.format(OCR_BOX_URL, 'general', '?', textmod)
- #
- # headers = {'Content-Type': 'application/x-www-form-urlencoded'}
- #
- # image_type = 'base64'
- # group_id = 'group001'
- # user_id = 'usr001'
- #
- # image = opecv2base64(img)
- #
- # data = {
- # 'image_type': image_type,
- # 'group_id': group_id,
- # 'user_id': user_id,
- # 'image': image,
- # 'detect_direction': 'true',
- # 'recognize_granularity': 'small',
- # 'language_type': language_type,
- # # 'vertexes_location': 'true',
- # # 'probability': 'true'
- # }
- #
- # resp = requests.post(url, data=data, headers=headers).json()
- # if resp.get('error_msg'):
- # if 'internal error' in resp.get('error_msg'):
- # resp = requests.post(url_general, data=data, headers=headers).json()
- # if resp.get('error_msg'):
- # raise Exception("ocr {}!".format(resp.get('error_msg')))
- # else:
- # raise Exception("ocr {}!".format(resp.get('error_msg')))
- #
- # words_result = resp.get('words_result')
- # return words_result
- # def get_ocr_text_and_coordinate_in_google_format(img, ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG'):
- # textmod = {'access_token': access_token}
- # textmod = parse.urlencode(textmod)
- # url = '{}{}{}{}'.format(OCR_BOX_URL, ocr_accuracy, '?', textmod)
- # url_general = '{}{}{}{}'.format(OCR_BOX_URL, 'general', '?', textmod)
- #
- # headers = {'Content-Type': 'application/x-www-form-urlencoded'}
- #
- # image_type = 'base64'
- # group_id = 'group001'
- # user_id = 'usr001'
- #
- # image = opecv2base64(img)
- #
- # data = {
- # 'image_type': image_type,
- # 'group_id': group_id,
- # 'user_id': user_id,
- # 'image': image,
- # 'detect_direction': 'true',
- # 'recognize_granularity': 'small',
- # 'language_type': language_type,
- # # 'vertexes_location': 'true',
- # # 'probability': 'true'
- # }
- #
- # resp = requests.post(url, data=data, headers=headers).json()
- # if resp.get('error_msg'):
- # if 'internal error' in resp.get('error_msg'):
- # resp = requests.post(url_general, data=data, headers=headers).json()
- # if resp.get('error_msg'):
- # raise Exception("ocr {}!".format(resp.get('error_msg')))
- # else:
- # raise Exception("ocr {}!".format(resp.get('error_msg')))
- #
- # words_result = resp.get('words_result')
- # dict_list = [item2.get('location') for item in words_result for item2 in item['chars']]
- # char_list = [item2.get('char') for item in words_result for item2 in item['chars']]
- # words = [item.get('words') for item in words_result]
- # matrix = []
- # for adict in dict_list:
- # xmin = adict['left']
- # ymin = adict['top']
- # xmax = adict['width'] + adict['left']
- # ymax = adict['top'] + adict['height']
- # item0 = (xmin, ymin, xmax, ymax)
- # matrix.append(item0)
- #
- # res_dict = {'chars': char_list, 'coordinates': matrix, 'words': words}
- # return res_dict
- def model_type_score(all_type_score_one, choice_box, cloze_box, solve_box,composition_box): # 与模型得到边框与type_score对应
- '''
- :param all_type_score_one: 模型得到的单个type_score的坐标位置
- :param choice_box: 模型得到的选择题坐标位置
- :param cloze_box: 模型得到的填空题坐标位置
- :param solve_box: 模型得到的解答题坐标位置
- :return:
- '''
- N_choice = len(choice_box)
- N_cloze = len(cloze_box)
- N_solve_box = len(solve_box)
- N_composition = len(composition_box)
- min_choice_dict = {}
- min_cloze_dict = {}
- min_solve_dict = {}
- min_composition_dict = {}
- test_result1 = 0
- min_choice = 100000000
- min_cloze = 100000000
- min_solve = 100000000
- min_composition = 100000000
- for j in range(N_choice):
- choice_distance_x = abs(list(all_type_score_one)[0] - choice_box[j][0])
- choice_distance_y = abs(list(all_type_score_one)[1] - choice_box[j][1])
- choice_distance = choice_distance_x + choice_distance_y
- if choice_distance < min_choice:
- min_choice = choice_distance
- min_choice_dict = {'min_choice': min_choice,
- 'bounding_box': choice_box[j],
- 'label': 'choice',
- 'type_box': all_type_score_one}
- for k in range(N_cloze):
- cloze_distance_x = abs(list(all_type_score_one)[0] - cloze_box[k][0])
- cloze_distance_y = abs(list(all_type_score_one)[1] - cloze_box[k][1])
- cloze_distance = cloze_distance_x + cloze_distance_y
- if cloze_distance < min_cloze:
- min_cloze = cloze_distance
- min_cloze_dict = {'min_cloze': min_cloze,
- 'bounding_box': cloze_box[k],
- 'label': 'cloze',
- 'type_box': all_type_score_one}
- for l in range(N_solve_box):
- solve_distance_x = abs(list(all_type_score_one)[0] - solve_box[l][0])
- solve_distance_y = abs(list(all_type_score_one)[1] - solve_box[l][1])
- solve_distance = solve_distance_x + solve_distance_y
- if solve_distance < min_solve:
- min_solve = solve_distance
- min_solve_dict = {'min_solve': min_solve,
- 'bounding_box': solve_box[l],
- 'label': 'solve',
- 'type_box': all_type_score_one}
- for m in range(N_composition):
- composition_distance_x = abs(list(all_type_score_one)[0] - composition_box[m][0])
- composition_distance_y = abs(list(all_type_score_one)[1] - composition_box[m][1])
- composition_distance = composition_distance_x + composition_distance_y
- if composition_distance < min_composition:
- min_composition = composition_distance
- min_composition_dict = {'min_cloze': min_composition,
- 'bounding_box': composition_box[m],
- 'label': 'composition',
- 'type_box': all_type_score_one}
- if min_choice < min_cloze and min_choice < min_solve and min_choice < min_composition:
- # 建立相互关联的关系。 即表示该type_score对应于选择题
- if min_choice < 600:
- test_result1 = min_choice_dict
- else:
- test_result1 = -1
- elif min_cloze < min_choice and min_cloze < min_solve and min_cloze < min_composition:
- # 建立相互关联的关系。 即表示该type_score对应于填空题
- if min_cloze < 600:
- test_result1 = min_cloze_dict
- else:
- test_result1 = -1
- elif min_solve < min_cloze and min_solve < min_choice and min_solve < min_composition:
- # 建立相互关联的关系。 即表示该type_score对应于解答题
- if min_solve < 400:
- test_result1 = min_solve_dict
- else:
- test_result1 = -1
- elif min_composition < min_cloze and min_composition < min_choice and min_composition < min_solve:
- # 建立相互关联的关系。 即表示该type_score对应于解答题
- if min_composition < 400:
- test_result1 = min_composition_dict
- else:
- test_result1 = -1
- return test_result1
- def module_type_score(all_type_score_one, choice_box, cloze_box, solve_box, composition_box): # 每个模块内包含的type_score
- '''
- :param all_type_score_one: 模型得到的单个type_score的坐标位置
- :param choice_box: 模型得到的选择题坐标位置
- :param cloze_box: 模型得到的填空题坐标位置
- :param solve_box: 模型得到的解答题坐标位置
- :return:
- '''
- N_choice = len(choice_box)
- N_cloze = len(cloze_box)
- N_solve = len(solve_box)
- N_composition = len(composition_box)
- choice_type_score = {}
- cloze_type_score = {}
- solve_type_score = {}
- composition_type_score = {}
- test_result1 = {}
- temp_dis = 100000
- for j in range(N_choice):
- if (list(all_type_score_one)[0] and list(all_type_score_one)[2]) in range(choice_box[j][0] - 100, choice_box[j][2] + 50) and (list(all_type_score_one)[1] and list(all_type_score_one)[3]) in range(choice_box[j][1] - 150, choice_box[j][3] - 50):
- choice_type_score = {'bounding_box': choice_box[j],
- 'label': 'choice',
- 'type_box': all_type_score_one}
- break
- for j in range(N_cloze):
- if (list(all_type_score_one)[0] and list(all_type_score_one)[2]) in range(cloze_box[j][0] - 100, cloze_box[j][2] + 50) and (list(all_type_score_one)[1] and list(all_type_score_one)[3]) in range(cloze_box[j][1] - 100, cloze_box[j][3] - 50):
- cloze_type_score = {'bounding_box': cloze_box[j],
- 'label': 'cloze',
- 'type_box': all_type_score_one}
- break
- for j in range(N_solve):
- if (list(all_type_score_one)[0] and list(all_type_score_one)[2]) in range(solve_box[j][0] - 50, solve_box[j][2] + 50) and (list(all_type_score_one)[1] and list(all_type_score_one)[3]) in range(solve_box[j][1]-50, solve_box[j][3]):
- solve_type_score = {'bounding_box': solve_box[j],
- 'label': 'solve',
- 'type_box': all_type_score_one}
- break
- for j in range(N_composition):
- if (list(all_type_score_one)[0] and list(all_type_score_one)[2]) in range(composition_box[j][0] - 100, composition_box[j][2] + 50) and (list(all_type_score_one)[1] and list(all_type_score_one)[3]) in range(composition_box[j][1] - 200, composition_box[j][3] - 50):
- composition_type_score = {'bounding_box': composition_box[j],
- 'label': 'composition',
- 'type_box': all_type_score_one}
- break
- if choice_type_score != {}:
- # 建立相互关联的关系。 即表示该type_score对应于选择题
- test_result1 = choice_type_score
- elif cloze_type_score != {}:
- # 建立相互关联的关系。 即表示该type_score对应于填空题
- test_result1 = cloze_type_score
- elif solve_type_score != {}:
- # 建立相互关联的关系。 即表示该type_score对应于解答题
- test_result1 = solve_type_score
- elif composition_type_score != {}:
- test_result1 = composition_type_score
- else:
- test_result1 = -1
- return test_result1
- def ocr_key_words(rect, type_score_dict): # 将ocr识别得到的文字与模型得到的type_score对应
- '''
- :param rect: OCR识别结果数组,格式:res = {'chars': [},'coordinates': [(),()},'words': []}
- :param type_score_dict: 模型得到的type_score(与模型得到的边框相对应)
- :return: 字典中添加word
- '''
- len_ocr = len(rect['chars'])
- xmin = type_score_dict['type_box'][0]
- ymin = type_score_dict['type_box'][1]
- xmax = type_score_dict['type_box'][2]
- ymax = type_score_dict['type_box'][3]
- words = []
- for j in range(len_ocr):
- if rect['coordinates'][j][0] - xmin > -30 and rect['coordinates'][j][1] - ymin > -30 and rect['coordinates'][j][2] - xmax < 30 and rect['coordinates'][j][3] - ymax < 30:
- word = rect['chars'][j]
- words.append(word)
- type_score_dict['words'] = words
- type_score_dict_ocr = type_score_dict
- return type_score_dict_ocr
- def big_block_score(img0,xmins_b,ymins_b,xmaxs_b,ymaxs_b):
- res1 = get_ocr_text_and_coordinate_in_google_format(img0[ymins_b:ymaxs_b, xmins_b:xmaxs_b],ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG')
- aa = []
- type_score_dict_ocrs = {}
- for ii in range(len(res1['coordinates'])):
- xmin11 = res1['coordinates'][ii][0] + xmins_b
- ymin11 = res1['coordinates'][ii][1] + ymins_b
- xmax11 = res1['coordinates'][ii][2] + xmins_b
- ymax11 = res1['coordinates'][ii][3] + ymins_b
- aaa = (xmin11, ymin11, xmax11, ymax11)
- aa.append(aaa)
- res1['coordinates'] = aa
- new_test = {}
- coordinates = 0
- if len(res1['words']) > 0:
- type_score_dict_ocrs['words'] = res1['words'][0]
- coordinates = res1['coordinates'][0]
- new_test = key_words(type_score_dict_ocrs)
- if new_test == {} or new_test['Score_structure'] == -1:
- if len(res1['words']) > 1:
- type_score_dict_ocrs['words'] = res1['words'][1]
- coordinates = res1['coordinates'][1]
- new_test = key_words(type_score_dict_ocrs)
- if new_test == {} or new_test['Score_structure'] == -1:
- if len(res1['words']) > 2:
- type_score_dict_ocrs['words'] = res1['words'][2]
- coordinates = res1['coordinates'][2]
- new_test = key_words(type_score_dict_ocrs)
- if new_test == {} or new_test['Score_structure'] == -1:
- if len(res1['words']) > 3:
- type_score_dict_ocrs['words'] = res1['words'][3]
- coordinates = res1['coordinates'][3]
- new_test = key_words(type_score_dict_ocrs)
- if new_test == {} or new_test['Score_structure'] == -1:
- if len(res1['words']) > 4:
- type_score_dict_ocrs['words'] = res1['words'][4]
- coordinates = res1['coordinates'][4]
- new_test = key_words(type_score_dict_ocrs)
- if new_test != {} and new_test['volume_structure'] != -1 and new_test['volume_structure'] != 1: # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
- if int(new_test['volume_structure'][0]['volume_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
- new_test['volume_structure'][0]['volume_total_score'] = int(
- new_test['volume_structure'][0]['volume_total_score']) % 100
- return new_test
- elif new_test != {} and (new_test['volume_structure'] == -1 or new_test['volume_structure'] == 1) and new_test['Score_structure'] != -1: # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
- if int(new_test['Score_structure'][0]['item_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
- new_test['Score_structure'][0]['item_total_score'] = int(
- new_test['Score_structure'][0]['item_total_score']) % 100
- new_test['Score_structure'][0]['type_box'] = coordinates
- return new_test
- else:
- return -1
- def get_sheet_number_total(answer_sheet, res, img0):
- # img0 = cv2.imread(img_path0)
- image_src = Image.fromarray(cv2.cvtColor(img0, cv2.COLOR_BGR2RGB))
- img_w, img_h = image_src.size
- if (image_src.mode == 'RGB'):
- image_src = image_src.convert("L")
- choice_boxs = []
- choice_m_boxs = []
- cloze_boxs = []
- solve_boxs = []
- type_score_boxs = []
- select_boxs = []
- all_test = []
- num_choice = 0
- num_choice_m = 0
- num_cloze = 0
- num_slove = 0
- num_select = 0
- num_type_score = 0
- volume_last = []
- Score_last = []
- score_last_one = 0
- volume_last_one = 0
- composition_boxs = []
- num_composition = 0
- j_temp = []
- jj_temp =[]
- eles = []
- yy_max = []
- score_del = []
- key_modules_classes = ['choice', 'cloze', 'solve', 'solve0', 'composition0', 'composition', 'correction', 'type_score']
- for ele in answer_sheet["regions"]: # 从模型输出获取对应标签的边框信息
- if ele["class_name"] == 'choice':
- choice_box = ele['bounding_box']
- choice_one = (
- int(choice_box['xmin']), int(choice_box['ymin']), int(choice_box['xmax']), int(choice_box['ymax']))
- choice_boxs.append(choice_one)
- num_choice = num_choice + 1
- elif ele["class_name"] == 'choice_m':
- choice_m_boxs.append(ele['bounding_box'])
- num_choice_m = num_choice_m + 1
- elif ele["class_name"] == 'cloze':
- cloze_box = ele['bounding_box']
- cloze_one = (int(cloze_box['xmin']), int(cloze_box['ymin']), int(cloze_box['xmax']), int(cloze_box['ymax']))
- cloze_boxs.append(cloze_one)
- num_cloze = num_cloze + 1
- elif ele["class_name"] == 'composition0' or ele["class_name"] == 'composition' or ele[
- "class_name"] == 'correction':
- composition_box = ele['bounding_box']
- composition_one = (int(composition_box['xmin']), int(composition_box['ymin']), int(composition_box['xmax']),
- int(composition_box['ymax']))
- composition_boxs.append(composition_one)
- num_composition = num_composition + 1
- elif ele["class_name"] == 'solve' or ele["class_name"] == 'solve0':
- solve_box = ele['bounding_box']
- solve_one = (int(solve_box['xmin']), int(solve_box['ymin']), int(solve_box['xmax']), int(solve_box['ymax']))
- solve_boxs.append(solve_one)
- num_slove = num_slove + 1
- elif ele["class_name"] == 'select_s' or ele["class_name"] == 'select_b':
- select_box = ele['bounding_box']
- select_one = (
- int(select_box['xmin']), int(select_box['ymin']), int(select_box['xmax']), int(select_box['ymax']))
- select_boxs.append(select_one)
- num_select = num_select + 1
- elif ele["class_name"] == 'type_score':
- type_score_box = ele['bounding_box']
- type_score_one = (int(type_score_box['xmin']), int(type_score_box['ymin']), int(type_score_box['xmax']),
- int(type_score_box['ymax']))
- type_score_boxs.append(type_score_one)
- num_type_score = num_type_score + 1
- '''解析type_score与对应分割模块的分数'''
- for i in range(len(type_score_boxs)):
- test_result1 = module_type_score(type_score_boxs[i], choice_boxs, cloze_boxs, solve_boxs, composition_boxs)
- if test_result1 != -1 and test_result1 != 0:
- xminss = (type_score_boxs[i][0] - 5) if type_score_boxs[i][0] - 5 > 0 else type_score_boxs[i][0]
- yminss = (type_score_boxs[i][1] - 5) if type_score_boxs[i][1] - 5 > 0 else type_score_boxs[i][1]
- if type_score_boxs[i][2] + 5 < img_w:
- xmaxss = type_score_boxs[i][2] + 5
- else:
- xmaxss = type_score_boxs[i][2]
- if type_score_boxs[i][3] + 5 < img_h:
- ymaxss = type_score_boxs[i][3] + 5
- else:
- ymaxss = type_score_boxs[i][3]
- test_result1['words'] = str()
- try: # tr_OCR
- image_src_type_score = image_src.crop((xminss, yminss, xmaxss, ymaxss))
- w_small = xmaxss - xminss
- h_small = ymaxss - yminss
- if h_small < 100 and w_small > 100:
- image_src_type_score = Image.new(image_src.mode, (w_small, 100), (255))
- image_src_type_score.paste(image_src, [0, 0, w_small, h_small])
- elif w_small < 100 and h_small > 100:
- image_src_type_score = Image.new(image_src.mode, (100, h_small), (255))
- image_src_type_score.paste(image_src, [0, 0, w_small, h_small])
- elif w_small < 100 and h_small < 100:
- image_src_type_score = Image.new(image_src.mode, (100, 100), (255))
- image_src_type_score.paste(image_src, [0, 0, w_small, h_small])
- type_score_dict_ocr = tr.run(image_src_type_score)
- print('tr_OCR')
- for t in range(len(type_score_dict_ocr)):
- test_result1['words'] = test_result1['words'] + type_score_dict_ocr[t][1]
- except Exception as e: # baidu_OCR
- print('baidu_OCR')
- type_score_dict_ocr = get_ocr_text_and_coordinate_in_google_format(img0[yminss:ymaxss, xminss:xmaxss], ocr_accuracy=OCR_ACCURACY,language_type='CHN_ENG')
- for t in range(len(type_score_dict_ocr['words'])):
- test_result1['words'] = test_result1['words'] + type_score_dict_ocr['words'][t]
- test = key_words(test_result1)
- if test == {}:
- ### 添加返回值OCR结果
- add_ocr = {}
- add_ocr['model_box'] = test_result1['bounding_box']
- add_ocr['label'] = test_result1['label']
- add_ocr['number'] = -1
- add_ocr['score'] = -1
- add_ocr['number_score'] = -1
- add_ocr['counts'] = -1
- add_ocr['type_score_box'] = type_score_boxs[i]
- add_ocr['ocr'] = test_result1['words']
- Score_last.append(add_ocr)
- elif test['volume_structure'] == -1 and test['Score_structure'] == -1:
- ### 添加返回值OCR结果
- add_ocr = {}
- add_ocr['model_box'] = test_result1['bounding_box']
- add_ocr['label'] = test_result1['label']
- add_ocr['number'] = -1
- add_ocr['score'] = -1
- add_ocr['number_score'] = -1
- add_ocr['counts'] = -1
- add_ocr['type_score_box'] = type_score_boxs[i]
- add_ocr['ocr'] = test_result1['words']
- Score_last.append(add_ocr)
- elif test != {}:
- if test['volume_structure'] != -1 and test['volume_structure'] != 1 and int(
- test['volume_structure'][0]['volume_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
- test['volume_structure'][0]['volume_total_score'] = int(
- test['volume_structure'][0]['volume_total_score']) % 100
- elif (test['volume_structure'] == -1 or test['volume_structure'] == 1) and test['Score_structure'] != -1 and int(
- test['Score_structure'][0]['item_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
- test['Score_structure'][0]['item_total_score'] = int(
- test['Score_structure'][0]['item_total_score']) % 100
- all_test.append(test)
- else:
- ### 添加返回值OCR结果
- add_ocr = {}
- add_ocr['model_box'] = test_result1['bounding_box']
- add_ocr['label'] = test_result1['label']
- add_ocr['number'] = -1
- add_ocr['score'] = -1
- add_ocr['number_score'] = -1
- add_ocr['counts'] = -1
- add_ocr['type_score_box'] = type_score_boxs[i]
- add_ocr['ocr'] = test_result1['words']
- Score_last.append(add_ocr)
- ''' 解析模型分割模块没有对应的type_score时的分数'''
- for jjjj in range(len(all_test)):
- if all_test[jjjj]['Score_structure'] != -1:
- label_1 = all_test[jjjj]['Score_structure'][0]['label']
- num_1 = all_test[jjjj]['Score_structure'][0]['item_N']
- if label_1 == 'choice' :
- if choice_boxs.count(all_test[jjjj]['Score_structure'][0]['bounding_box']):
- choice_boxs.remove(all_test[jjjj]['Score_structure'][0]['bounding_box'])
- elif label_1 == 'cloze':
- if cloze_boxs.count(all_test[jjjj]['Score_structure'][0]['bounding_box']):
- cloze_boxs.remove(all_test[jjjj]['Score_structure'][0]['bounding_box'])
- elif label_1 == 'solve' and num_1 != 10000 and num_1 != -1:
- if solve_boxs.count(all_test[jjjj]['Score_structure'][0]['bounding_box']):
- solve_boxs.remove(all_test[jjjj]['Score_structure'][0]['bounding_box'])
- elif label_1 == 'composition':
- if solve_boxs.count(all_test[jjjj]['Score_structure'][0]['bounding_box']):
- solve_boxs.remove(all_test[jjjj]['Score_structure'][0]['bounding_box'])
- if choice_boxs != []: # 9月16号修改
- for ij in range(len(choice_boxs)):
- yminss = choice_boxs[ij][1] - 150 if choice_boxs[ij][1] - 150 > 0 else choice_boxs[ij][1]
- xminss = choice_boxs[ij][0] - 100 if choice_boxs[ij][0] - 100 > 0 else choice_boxs[ij][0]
- try:
- new_test = big_block_score(img0, xminss, yminss, choice_boxs[ij][2], choice_boxs[ij][3])
- if new_test != -1:
- new_test['Score_structure'][0]['bounding_box'] = choice_boxs[ij]
- new_test['Score_structure'][0]['label'] = 'choice'
- all_test.append(new_test)
- except Exception:
- print('choice_boxs_score_NULL_or_error')
- if cloze_boxs != []:
- for ij in range(len(cloze_boxs)):
- yminss = cloze_boxs[ij][1] - 100 if cloze_boxs[ij][1] - 100 > 0 else cloze_boxs[ij][1]
- xminss = cloze_boxs[ij][0] - 100 if cloze_boxs[ij][0] - 100 > 0 else cloze_boxs[ij][0]
- try:
- new_test = big_block_score(img0, xminss, yminss, cloze_boxs[ij][2], cloze_boxs[ij][3])
- if new_test != -1:
- new_test['Score_structure'][0]['bounding_box'] = cloze_boxs[ij]
- new_test['Score_structure'][0]['label'] = 'cloze'
- all_test.append(new_test)
- except Exception:
- print('cloze_boxs_score_NULL_or_error')
- if solve_boxs != []:
- for ij in range(len(solve_boxs)):
- yminss = solve_boxs[ij][1] - 50 if solve_boxs[ij][1] - 50 > 0 else solve_boxs[ij][1]
- xminss = solve_boxs[ij][0] - 50 if solve_boxs[ij][0] - 50 > 0 else solve_boxs[ij][0]
- try:
- new_test = big_block_score(img0, xminss, yminss, solve_boxs[ij][2], solve_boxs[ij][3])
- if new_test != -1:
- new_test['Score_structure'][0]['bounding_box'] = solve_boxs[ij]
- new_test['Score_structure'][0]['label'] = 'solve'
- all_test.append(new_test)
- except Exception:
- print('solve_boxs_score_NULL_or_error')
- if composition_boxs != []:
- for ij in range(len(composition_boxs)):
- yminss = composition_boxs[ij][1] - 240 if composition_boxs[ij][1] - 240 > 0 else composition_boxs[ij][1]
- xminss = composition_boxs[ij][0] - 100 if composition_boxs[ij][0] - 100 > 0 else composition_boxs[ij][0]
- try:
- new_test = big_block_score(img0, xminss, yminss, composition_boxs[ij][2], composition_boxs[ij][3])
- if new_test != -1:
- new_test['Score_structure'][0]['bounding_box'] = composition_boxs[ij]
- new_test['Score_structure'][0]['label'] = 'composition'
- all_test.append(new_test)
- except Exception:
- print('composition_boxs_score_NULL_or_error')
- for aaa in range(len(all_test)):
- if all_test[aaa]['Score_structure'] != -1 and all_test[aaa]['volume_structure'] == -1:
- score_last_one = {'model_box': dict(all_test[aaa])['Score_structure'][0]['bounding_box'],
- 'label': dict(all_test[aaa])['Score_structure'][0]['label'],
- 'number': dict(all_test[aaa])['Score_structure'][0]['item_N'],
- 'score': dict(all_test[aaa])['Score_structure'][0]['item_total_score'],
- 'number_score': dict(all_test[aaa])['Score_structure'][0]['item_score'],
- 'counts': dict(all_test[aaa])['Score_structure'][0]['item_count'],
- 'type_score_box': dict(all_test[aaa])['Score_structure'][0]['type_box']}
- Score_last.append(score_last_one)
- continue
- elif all_test[aaa]['Score_structure'] != -1 and all_test[aaa]['volume_structure'] == 1:
- score_last_one = {'model_box': dict(all_test[aaa])['Score_structure'][0]['bounding_box'],
- 'label': dict(all_test[aaa])['Score_structure'][0]['label'],
- 'number': 10000,
- 'score': dict(all_test[aaa])['Score_structure'][0]['item_total_score'],
- 'number_score': dict(all_test[aaa])['Score_structure'][0]['item_score'],
- 'counts': dict(all_test[aaa])['Score_structure'][0]['item_count'],
- 'type_score_box': dict(all_test[aaa])['Score_structure'][0]['type_box']}
- Score_last.append(score_last_one)
- continue
- elif all_test[aaa]['Score_structure'] != -1 and all_test[aaa]['volume_structure'] != -1:
- score_last_one = {'model_box': dict(all_test[aaa])['Score_structure'][0]['bounding_box'],
- 'label': dict(all_test[aaa])['Score_structure'][0]['label'],
- 'number': -1,
- 'score': dict(all_test[aaa])['Score_structure'][0]['volume_total_score'],
- 'number_score': dict(all_test[aaa])['Score_structure'][0]['volume_score'],
- 'counts': dict(all_test[aaa])['Score_structure'][0]['volume_count'],
- 'type_score_box': dict(all_test[aaa])['Score_structure'][0]['type_box']}
- Score_last.append(score_last_one)
- volume_last_one = {'volume_N': dict(all_test[aaa])['volume_structure'][0]['volume_N'],
- 'volume_total_score': dict(all_test[aaa])['volume_structure'][0]['volume_total_score'],
- 'volume_count': dict(all_test[aaa])['volume_structure'][0]['volume_count'],
- 'volume_score': dict(all_test[aaa])['volume_structure'][0]['volume_score'],
- 'keyword_type': dict(all_test[aaa])['volume_structure'][0]['keyword_type']}
- volume_last.append(volume_last_one)
- continue
- elif all_test[aaa]['volume_structure'] != -1:
- volume_last_one = {'volume_N': dict(all_test[aaa])['volume_structure'][0]['volume_N'],
- 'volume_total_score': dict(all_test[aaa])['volume_structure'][0]['volume_total_score'],
- 'volume_count': dict(all_test[aaa])['volume_structure'][0]['volume_count'],
- 'volume_score': dict(all_test[aaa])['volume_structure'][0]['volume_score'],
- 'keyword_type': dict(all_test[aaa])['volume_structure'][0]['keyword_type']}
- volume_last.append(volume_last_one)
- continue
- '''去重一个边框可能对应多个type_score的情况,英语单独解析'''
- Score_last_Remove_Duplicates = OrderedDict()
- for item in Score_last:
- Score_last_Remove_Duplicates.setdefault(item['model_box'], {**item, })
- Score_last_Remove_Duplicates = list(Score_last_Remove_Duplicates.values())
- if len(Score_last_Remove_Duplicates) != len(Score_last):
- len_Score_last = len(Score_last)
- Score_last = sorted(Score_last, key=lambda x: (x['model_box'][0] + x['model_box'][1] + x['type_score_box'][0] + x['type_score_box'][1]),reverse=True)
- if answer_sheet['subject'] == 'english': # 暂定英语只去除重复分数,不修改主观题边框
- for i in range(len_Score_last-1, -1, -1):
- if Score_last[i]['label'] == 'cloze':
- model_box2 = []; score2 = []; num2 = []; type_score2 = []; temp22 = []
- if Score_last[i]['model_box'] in model_box2:
- index21 = model_box2.index(Score_last[i]['model_box'])
- index2 = temp22[index21]
- score = Score_last[i]['score']
- if score < score2[index21] and score2[index21] < 20: # 去重,type_score多余的包含小题分数
- Score_last[i] = -1
- elif score < score2[index21] and score2[index21] > 20: # 去重,type_score多余的包含分卷分数
- Score_last[index2] = -1
- temp22[index21] = i
- score2[index21] = Score_last[i]['score']
- num2[index21] = Score_last[i]['number']
- type_score2[index21] = Score_last[i]['type_score_box']
- elif score > score2[index21] and score < 20: # 去重,type_score在不大于20分的情况下,暂定保留更大的分数
- Score_last[index2] = -1
- temp22[index21] = i
- score2[index21] = Score_last[i]['score']
- num2[index21] = Score_last[i]['number']
- type_score2[index21] = Score_last[i]['type_score_box']
- elif score > score2[index21] and score > 20: # 去重,type_score去除大于20分的重复分数
- Score_last[i] = -1
- else:
- Score_last[i] = -1
- else:
- model_box2.append(Score_last[i]['model_box'])
- score2.append(Score_last[i]['score'])
- num2.append(Score_last[i]['number'])
- type_score2.append(Score_last[i]['type_score_box'])
- temp22.append(i)
- else:
- model_box2 = []; score2 = []; num2 = []; type_score2 = []; temp22 = []
- if Score_last[i]['model_box'] in model_box2:
- index21 = model_box2.index(Score_last[i]['model_box'])
- index2 = temp22[index21]
- score = Score_last[i]['score']
- if score < score2[index21]: # 去重,暂定保留更大的分数
- Score_last[i] = -1
- elif score > score2[index21]: # 去重,暂定保留更大的分数
- Score_last[index2] = -1
- temp22[index21] = i
- score2[index21] = Score_last[i]['score']
- num2[index21] = Score_last[i]['number']
- type_score2[index21] = Score_last[i]['type_score_box']
- else:
- Score_last[i] = -1
- else:
- model_box2.append(Score_last[i]['model_box'])
- score2.append(Score_last[i]['score'])
- num2.append(Score_last[i]['number'])
- type_score2.append(Score_last[i]['type_score_box'])
- temp22.append(i)
- else: # 除去英语外的主观题边框修正
- model_box2 = []; score2 = []; num2 = []; type_score2 = []; temp22 = []
- for i in range(len_Score_last - 1, -1, -1): # 根据type_score切分
- if Score_last[i]['label'] == 'solve' or Score_last[i]['label'] == 'solve0':
- if Score_last[i]['model_box'] in model_box2 and type(Score_last[i]['number']) is not list:
- index21 = model_box2.index(Score_last[i]['model_box'])
- index2 = temp22[index21]
- score = Score_last[i]['score']
- num = Score_last[i]['number']
- type_score = Score_last[i]['type_score_box']
- del_box = 0
- if num == -1: # 去除同一主观题内对应的多个边框内的小项分数
- if 'ocr' in Score_last[index2]:
- Score_last[index2] = -1
- temp22[index21] = i
- score2[index21] = Score_last[i]['score']
- num2[index21] = Score_last[i]['number']
- type_score2[index21] = Score_last[i]['type_score_box']
- elif 'ocr' in Score_last[i]:
- Score_last[i] = -1
- else:
- Score_last[i] = -1
- elif num == 10000:
- Score_last[i] = -1
- elif num2[index21] == 10000 or num2[index21] == -1 and type_score[1] - type_score2[index21][1] > 100: # 同一主观题包含大题分数和小项分数,且大题分数位于边框中间,切分为两个主观题
- yy_max.append(Score_last[i]['model_box'][3])
- del_box = copy.deepcopy(Score_last[index2]['model_box'])
- score_del.append(del_box)
- Score_last[index2]['model_box'] = (
- Score_last[index2]['model_box'][0], Score_last[index2]['model_box'][1],
- Score_last[index2]['model_box'][2], type_score[1])
- Score_last[i]['model_box'] = (
- Score_last[i]['model_box'][0], type_score[1], Score_last[i]['model_box'][2],
- Score_last[i]['model_box'][3])
- score2[index21] = Score_last[i]['score']
- num2[index21] = Score_last[i]['number']
- type_score2[index21] = Score_last[i]['type_score_box']
- temp22[index21] = i
- elif score < score2[index21] and score2[index21] > 30: # 默认有效的分数值小于30分
- Score_last[index2] = -1
- temp22[index21] = i
- score2[index21] = Score_last[i]['score']
- num2[index21] = Score_last[i]['number']
- type_score2[index21] = Score_last[i]['type_score_box']
- elif score < 30 and score2[index21] < 30 and type_score[1] - type_score2[index21][1] > 100: # 同一主观题包含两个大题分数,且距离 >00 的情况下切分为两个主观题
- yy_max.append(Score_last[i]['model_box'][3])
- del_box = copy.deepcopy(Score_last[index2]['model_box'])
- score_del.append(del_box)
- Score_last[index2]['model_box'] = (
- Score_last[index2]['model_box'][0], Score_last[index2]['model_box'][1],
- Score_last[index2]['model_box'][2], type_score[1])
- Score_last[i]['model_box'] = (
- Score_last[i]['model_box'][0], type_score[1], Score_last[i]['model_box'][2],
- Score_last[i]['model_box'][3])
- score2[index21] = Score_last[i]['score']
- num2[index21] = Score_last[i]['number']
- type_score2[index21] = Score_last[i]['type_score_box']
- temp22[index21] = i
- elif score > 30 and score2[index21] > 30: # 默认有效的分数值小于30分
- temp_del = i if score > score2[index21] else index2
- Score_last[temp_del] = -1
- if temp_del == index2:
- temp22[index21] = i
- score2[index21] = Score_last[i]['score']
- num2[index21] = Score_last[i]['number']
- type_score2[index21] = Score_last[i]['type_score_box']
- elif score < 30 and score2[index21] < 30: # 多个分数小于30,且距离 <100 的情况下,删除
- temp_del = i if score < score2[index21] else index2
- Score_last[temp_del] = -1
- if temp_del == index2:
- temp22[index21] = i
- score2[index21] = Score_last[i]['score']
- num2[index21] = Score_last[i]['number']
- type_score2[index21] = Score_last[i]['type_score_box']
- else:
- Score_last[i] = -1
- else:
- model_box2.append(Score_last[i]['model_box'])
- score2.append(Score_last[i]['score'])
- num2.append(Score_last[i]['number'])
- type_score2.append(Score_last[i]['type_score_box'])
- temp22.append(i)
- for del_i in range(len_Score_last - 1, -1, -1):
- if Score_last[del_i] == -1:
- del Score_last[del_i]
- if answer_sheet['subject'] != 'english':
- Score_last = sorted(Score_last, key=lambda x: (x['model_box'][0], x['model_box'][0] + x['model_box'][1]),reverse=True)
- temp33 = 0
- len3 = len(Score_last)
- for i in range(len3 - 1, -1, -1): # 根据type_score合并
- if Score_last[i]['label'] == 'solve' or Score_last[i]['label'] == 'solve0':
- num = Score_last[i]['number']
- type_score = Score_last[i]['type_score_box']
- if type(Score_last[i]['number']) is not list:
- if num == 10000 or num == -1 or num < 4 and (Score_last[i]['label'] == 'solve' or Score_last[i]['label'] == 'solve0'): # 小题题号
- temp3 = 100000
- for indexi, model_box31 in enumerate(Score_last):
- if indexi != i and ((type_score[0] and type_score[2]) in range(model_box31['model_box'][0] - 30,model_box31['model_box'][2])) and ((type_score[1] and type_score[3]) in range(model_box31['model_box'][1],model_box31['model_box'][3]+30)): # 根据小题边框的xmin与主观题xmin的差值判断为一栏,以及纵坐标差值判断条件
- del_box = copy.deepcopy(Score_last[i]['model_box'])
- score_del.append(del_box)
- score_del.append(Score_last[indexi]['model_box'])
- Score_last[indexi]['model_box'] = (
- Score_last[indexi]['model_box'][0], Score_last[indexi]['model_box'][1],
- Score_last[indexi]['model_box'][2], Score_last[i]['model_box'][3])
- del Score_last[i]
- break
- temp31 = int(type_score[1] - model_box31['model_box'][3]) # 计算小题边框的ymin与主观题边框ymax的距离
- if (type_score[0] in range(model_box31['model_box'][0] - 30,model_box31['model_box'][2])) and temp31 > -20 and temp31 < temp3: # 根据小题边框的xmin与主观题xmin的差值判断为一栏,以及纵坐标差值判断条件
- temp3 = temp31
- temp33 = indexi
- yy_max.append(Score_last[i]['model_box'][3])
- if indexi == len(Score_last) - 1:
- del_box = copy.deepcopy(Score_last[temp33]['model_box'])
- score_del.append(del_box)
- score_del.append(Score_last[i]['model_box'])
- Score_last[temp33]['model_box'] = (
- Score_last[temp33]['model_box'][0], Score_last[temp33]['model_box'][1],
- Score_last[temp33]['model_box'][2], Score_last[i]['model_box'][3])
- del Score_last[i]
- break
- elif indexi == len(Score_last) - 1 and temp3 != 100000:
- del_box = copy.deepcopy(Score_last[temp33]['model_box'])
- score_del.append(del_box)
- score_del.append(Score_last[i]['model_box'])
- Score_last[temp33]['model_box'] = (
- Score_last[temp33]['model_box'][0], Score_last[temp33]['model_box'][1],
- Score_last[temp33]['model_box'][2], Score_last[i]['model_box'][3])
- del Score_last[i]
- break
- if Score_last != []:
- for i in range(len(Score_last)): # 多选题题号和分数逐个显示
- if type(Score_last[i]['number']) is list:
- C_q_s = len(Score_last[i]['number']) * [Score_last[i]['score']]
- Score_last[i]['score'] = C_q_s
- '''choice_m/cloze_s分数解析'''
- if num_choice == 1 or num_cloze == 1: # 对应choice_m分数
- for i in range(len(Score_last)):
- if Score_last[i]['label'] == 'choice':
- count_choice_m = 0
- for j in range(len(answer_sheet['regions'])):
- if answer_sheet['regions'][j]['class_name'] == 'choice_m':
- if Score_last[i]['number_score'] != -1 and ('number' in answer_sheet['regions'][j].keys()):
- answer_sheet['regions'][j]['default_points'] = len(answer_sheet['regions'][j]['number']) * [float(Score_last[i]['number_score'])]
- elif Score_last[i]['number_score'] == -1 and Score_last[i]['score'] != -1 and 'number' in answer_sheet['regions'][j].keys():
- count_choice_m = count_choice_m + len(answer_sheet['regions'][j]['number'])
- j_temp.append(j)
- if j == len(answer_sheet['regions']) - 1 and j_temp !=[]:
- try:
- for index, jj in enumerate(j_temp):
- num_score_m = round(float(Score_last[i]['score'] / count_choice_m), 2)
- answer_sheet['regions'][jj]['default_points'] = len(answer_sheet['regions'][jj]['number']) * [num_score_m]
- break
- except Exception:
- print('choice_m_error')
- elif Score_last[i]['label'] == 'cloze':
- for j in range(len(answer_sheet['regions'])):
- if answer_sheet['regions'][j]['class_name'] == 'cloze_s':
- if Score_last[i]['number_score'] != -1:
- answer_sheet['regions'][j]['default_points'] = Score_last[i]['number_score']
- elif num_choice > 1 or num_cloze > 1:
- for i in range(len(Score_last)):
- if Score_last[i]['label'] == 'choice':
- count_choice_m = 0
- for j in range(len(answer_sheet['regions'])):
- if answer_sheet['regions'][j]['class_name'] == 'choice_m':
- xmin_dis = answer_sheet['regions'][j]['bounding_box']['xmin'] - \
- Score_last[i]['model_box'][0]
- ymin_dis = answer_sheet['regions'][j]['bounding_box']['ymin'] - \
- Score_last[i]['model_box'][1]
- xmax_dis = answer_sheet['regions'][j]['bounding_box']['xmax'] - \
- Score_last[i]['model_box'][2]
- ymax_dis = answer_sheet['regions'][j]['bounding_box']['ymax'] - \
- Score_last[i]['model_box'][3]
- if xmin_dis > -30 and ymin_dis > -30 and xmax_dis < 30 and ymax_dis < 30:
- if Score_last[i]['number_score'] != -1 and 'number' in answer_sheet['regions'][j].keys():
- answer_sheet['regions'][j]['default_points'] = len(answer_sheet['regions'][j]['number']) * [float(Score_last[i]['number_score'])]
- elif Score_last[i]['number_score'] == -1 and Score_last[i]['score'] != -1 and 'number' in answer_sheet['regions'][j].keys():
- count_choice_m = count_choice_m + len(answer_sheet['regions'][j]['number'])
- j_temp.append(j)
- if j == len(answer_sheet['regions']) - 1 and j_temp !=[]:
- try:
- for index, jj in enumerate(j_temp):
- num_score_m = round(float(Score_last[i]['score'] / count_choice_m), 2)
- answer_sheet['regions'][jj]['default_points'] = len(answer_sheet['regions'][jj]['number']) * [num_score_m]
- break
- except Exception:
- print('choice_m_error')
- elif Score_last[i]['label'] == 'cloze':
- for j in range(len(answer_sheet['regions'])):
- if answer_sheet['regions'][j]['class_name'] == 'cloze_s':
- xmin_dis = answer_sheet['regions'][j]['bounding_box']['xmin'] - \
- Score_last[i]['model_box'][0]
- ymin_dis = answer_sheet['regions'][j]['bounding_box']['ymin'] - \
- Score_last[i]['model_box'][1]
- xmax_dis = answer_sheet['regions'][j]['bounding_box']['xmax'] - \
- Score_last[i]['model_box'][2]
- ymax_dis = answer_sheet['regions'][j]['bounding_box']['ymax'] - \
- Score_last[i]['model_box'][3]
- if xmin_dis > -30 and ymin_dis > -30 and xmax_dis < 30 and ymax_dis < 30:
- if Score_last[i]['number_score'] != -1 :
- answer_sheet['regions'][j]['default_points'] = Score_last[i]['number_score']
- elif choice_m_boxs !=[]:
- x_choice_m_min = 10000
- y_choice_m_min = 10000
- x_choice_m_max = 0
- y_choice_m_max = 0
- test = {}
- for index_m, choice_m_s in enumerate(choice_m_boxs):
- if int(choice_m_s['xmin']) < x_choice_m_min:
- x_choice_m_min = int(choice_m_s['xmin'])
- if int(choice_m_s['ymin']) < y_choice_m_min:
- y_choice_m_min = int(choice_m_s['ymin'])
- if int(choice_m_s['xmax']) > x_choice_m_max:
- x_choice_m_max = int(choice_m_s['xmax'])
- if int(choice_m_s['ymax']) > y_choice_m_max:
- y_choice_m_max = int(choice_m_s['ymax'])
- # 模型输出没有choice的情况下暂时假定只有一个choice
- type_score_choice_m = -1
- for index_t1, type_score_s1 in enumerate(type_score_boxs):
- if type_score_s1[0] < x_choice_m_max and type_score_s1[3] < y_choice_m_min:
- type_score_choice_m = type_score_s1
- break
- if type_score_choice_m != -1:
- test_result1 = {'min_choice_m': -1,
- 'bounding_box': choice_m_boxs,
- 'label': 'choice_m',
- 'type_box': type_score_choice_m}
- test_result1['words'] = str()
- try: # tr_OCR
- image_choice = image_src.crop((type_score_choice_m[0], type_score_choice_m[1], type_score_choice_m[2], type_score_choice_m[3]))
- res1 = tr.run(image_choice)
- print('tr_OCR')
- for t in range(len(res1)):
- test_result1['words'] = test_result1['words'] + res1[t][1]
- except Exception as e: # baidu_OCR
- print('baidu_OCR')
- res1 = get_ocr_text_and_coordinate_in_google_format(
- img0[type_score_choice_m[1]:type_score_choice_m[3], type_score_choice_m[0]:type_score_choice_m[2]], ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG')
- for t in range(len(res1['words'])):
- test_result1['words'] = test_result1['words'] + res1['words'][t]
- if test_result1['words'] != {}:
- test = key_words(test_result1)
- choice_m_score = -1
- if test == {}:
- choice_m_type_score_ocr = test_result1['words']
- elif test['volume_structure'] == -1 and test['Score_structure'] == -1:
- choice_m_type_score_ocr = test_result1['words']
- else:
- if test['volume_structure'] != -1 and test['volume_structure'][0]['volume_score'] != -1:
- choice_m_score = test['volume_structure'][0]['volume_score']
- elif test['volume_structure'] == -1 and test['Score_structure'] != -1 and test['Score_structure'][0]['item_score'] != -1:
- choice_m_score = test['Score_structure'][0]['item_score']
- if choice_m_score != -1:
- for j in range(len(answer_sheet['regions'])):
- if answer_sheet['regions'][j]['class_name'] == 'choice_m':
- answer_sheet['regions'][j]['default_points'] = len(
- answer_sheet['regions'][j]['number']) * [float(choice_m_score)]
- elif test_result1['words'] != {}:
- for j in range(len(answer_sheet['regions'])):
- if answer_sheet['regions'][j]['class_name'] == 'choice_m':
- answer_sheet['regions'][j]['type_score_ocr'] = choice_m_type_score_ocr
- '''分数与模型对应'''
- ocr_flag = 0
- for i in range(len(answer_sheet['regions'])-1, -1, -1):
- for j in range(len(Score_last)-1, -1, -1):
- if (int(Score_last[j]['model_box'][0]) == int(answer_sheet['regions'][i]['bounding_box']['xmin']) and
- int(Score_last[j]['model_box'][1]) == int(answer_sheet['regions'][i]['bounding_box']['ymin']) and
- int(Score_last[j]['model_box'][2]) == int(answer_sheet['regions'][i]['bounding_box']['xmax']) and
- int(Score_last[j]['model_box'][3]) != int(answer_sheet['regions'][i]['bounding_box']['ymax'])) and (answer_sheet['regions'][i]['class_name'] == 'solve' or answer_sheet['regions'][i]['class_name'] == 'solve0'):
- answer_sheet['regions'][i]['bounding_box']['ymax'] = int(Score_last[j]['model_box'][3])
- if Score_last[j]['number'] == 10000:
- answer_sheet['regions'][i]['number'] = -1 # 题号
- elif Score_last[j]['number'] != -1 and Score_last[j]['number'] != 10000:
- answer_sheet['regions'][i]['number'] = Score_last[j]['number'] # 题号
- else:
- answer_sheet['regions'][i]['number'] = -1 # 题号
- if Score_last[j]['score'] != -1:
- answer_sheet['regions'][i]['default_points'] = Score_last[j]['score']
- if type(answer_sheet['regions'][i]['default_points']) is list and (
- answer_sheet['regions'][i]['class_name'] == 'solve' or answer_sheet['regions'][i][
- 'class_name'] == 'solve0'):
- answer_sheet['regions'][i]['class_name'] = 'optional_solve'
- elif 'default_points' not in answer_sheet['regions'][i].keys():
- answer_sheet['regions'][i]['default_points'] = -1
- del Score_last[j]
- elif (int(Score_last[j]['model_box'][0]) == int(answer_sheet['regions'][i]['bounding_box']['xmin']) and
- int(Score_last[j]['model_box'][1]) == int(answer_sheet['regions'][i]['bounding_box']['ymin']) and
- int(Score_last[j]['model_box'][2]) == int(answer_sheet['regions'][i]['bounding_box']['xmax']) and
- int(Score_last[j]['model_box'][3]) == int(answer_sheet['regions'][i]['bounding_box']['ymax'])):
- if Score_last[j]['number'] == 10000:
- answer_sheet['regions'][i]['number'] = -1 # 题号
- elif Score_last[j]['number'] != -1 and Score_last[j]['number'] != 10000:
- answer_sheet['regions'][i]['number'] = Score_last[j]['number'] # 题号
- else:
- answer_sheet['regions'][i]['number'] = -1 # 题号
- if Score_last[j]['score'] != -1:
- answer_sheet['regions'][i]['default_points'] = Score_last[j]['score']
- if type(answer_sheet['regions'][i]['default_points']) is list and (
- answer_sheet['regions'][i]['class_name'] == 'solve' or answer_sheet['regions'][i][
- 'class_name'] == 'solve0'):
- answer_sheet['regions'][i]['class_name'] = 'optional_solve'
- ocr_flag = 1
- if 'type_score_ocr' in answer_sheet['regions'][i].keys():
- del answer_sheet['regions'][i]['type_score_ocr']
- elif 'default_points' not in answer_sheet['regions'][i].keys():
- answer_sheet['regions'][i]['default_points'] = -1
- if ocr_flag == 0 and 'ocr' in Score_last[j]: # 没有识别到分数的模块添加type_score_ocr结果
- answer_sheet['regions'][i]['type_score_ocr'] = Score_last[j]['ocr']
- del Score_last[j]
- if score_del != []: # del_model_boxs
- for j in range(len(score_del)):
- if score_del != [] and (score_del[j][0] == answer_sheet['regions'][i]['bounding_box']['xmin'] and score_del[j][1] ==
- answer_sheet['regions'][i]['bounding_box']['ymin'] and score_del[j][2] ==
- answer_sheet['regions'][i]['bounding_box']['xmax'] and score_del[j][3] ==
- answer_sheet['regions'][i]['bounding_box']['ymax']):
- del answer_sheet['regions'][i]
- for jj in range(len(Score_last)): # add_model_boxs
- answer_sheet_one = {}
- answer_sheet_one['class_name'] = Score_last[jj]['label']
- box_one = {}
- box_one['xmin'] = int(Score_last[jj]['model_box'][0])
- box_one['ymin'] = int(Score_last[jj]['model_box'][1])
- box_one['xmax'] = int(Score_last[jj]['model_box'][2])
- box_one['ymax'] = int(Score_last[jj]['model_box'][3])
- answer_sheet_one['bounding_box'] = box_one
- if Score_last[jj]['number'] == 10000:
- answer_sheet_one['number'] = -1 # 题号
- elif Score_last[jj]['number'] != -1 and Score_last[jj]['number'] != 10000:
- answer_sheet_one['number'] = Score_last[jj]['number'] # 题号
- else:
- answer_sheet_one['number'] = -1 # 题号
- if Score_last[jj]['score'] != -1:
- answer_sheet_one['default_points'] = Score_last[jj]['score']
- else:
- answer_sheet_one['default_points'] = -1
- answer_sheet['regions'].append(answer_sheet_one)
- return answer_sheet
|