|
@@ -1,8 +1,9 @@
|
|
|
# -*- coding: utf-8 -*-
|
|
|
-# @Time : 2020/5/22 0022 17:02
|
|
|
+# @Time : 2020/5/28 0022 17:02
|
|
|
# @Author : LF
|
|
|
# @FileName: sheet_points_total.py
|
|
|
# @Software: PyCharm
|
|
|
+# local_baidu_OCR
|
|
|
|
|
|
import requests
|
|
|
import base64
|
|
@@ -14,10 +15,10 @@ from PIL import Image
|
|
|
from segment.sheet_resolve.tools.brain_api import get_ocr_text_and_coordinate_in_google_format
|
|
|
from segment.sheet_resolve.analysis.sheet.ocr_key_words import key_words
|
|
|
|
|
|
-try:
|
|
|
- import tr
|
|
|
-except Exception:
|
|
|
- pass
|
|
|
+# try:
|
|
|
+# import tr
|
|
|
+# except Exception:
|
|
|
+# pass
|
|
|
|
|
|
OCR_ACCURACY = 'accurate'
|
|
|
|
|
@@ -360,7 +361,6 @@ def get_sheet_number_total(answer_sheet, res, img0):
|
|
|
|
|
|
'''解析type_score与对应分割模块的分数'''
|
|
|
for i in range(len(type_score_boxs)):
|
|
|
- type_score_flag = 1
|
|
|
test_result1 = model_type_score(type_score_boxs[i], choice_boxs, cloze_boxs, solve_boxs, composition_boxs)
|
|
|
if test_result1 != -1 and test_result1 != 0:
|
|
|
if type_score_boxs[i][0] - 5 > 0:
|
|
@@ -380,19 +380,19 @@ def get_sheet_number_total(answer_sheet, res, img0):
|
|
|
else:
|
|
|
ymaxss = type_score_boxs[i][3]
|
|
|
test_result1['words'] = str()
|
|
|
- try: # tr_OCR
|
|
|
- print('tr_OCR')
|
|
|
- image_src_type_score = image_src.crop((xminss, yminss, xmaxss, ymaxss))
|
|
|
- type_score_dict_ocr = tr.run(image_src_type_score)
|
|
|
- for t in range(len(type_score_dict_ocr)):
|
|
|
- test_result1['words'] = test_result1['words'] + type_score_dict_ocr[t][1]
|
|
|
- except Exception as e: # baidu_OCR
|
|
|
- print('baidu_OCR')
|
|
|
- type_score_dict_ocr = get_ocr_text_and_coordinate_in_google_format(img0[yminss:ymaxss, xminss:xmaxss], ocr_accuracy=OCR_ACCURACY,language_type='CHN_ENG')
|
|
|
- for t in range(len(type_score_dict_ocr['words'])):
|
|
|
- test_result1['words'] = test_result1['words'] + type_score_dict_ocr['words'][t]
|
|
|
-
|
|
|
- test = key_words(test_result1, type_score_flag)
|
|
|
+ # try: # tr_OCR
|
|
|
+ # image_src_type_score = image_src.crop((xminss, yminss, xmaxss, ymaxss))
|
|
|
+ # type_score_dict_ocr = tr.run(image_src_type_score)
|
|
|
+ # print('tr_OCR')
|
|
|
+ # for t in range(len(type_score_dict_ocr)):
|
|
|
+ # test_result1['words'] = test_result1['words'] + type_score_dict_ocr[t][1]
|
|
|
+ # except Exception as e: # baidu_OCR
|
|
|
+ # print('baidu_OCR')
|
|
|
+ type_score_dict_ocr = get_ocr_text_and_coordinate_in_google_format(img0[yminss:ymaxss, xminss:xmaxss], ocr_accuracy=OCR_ACCURACY,language_type='CHN_ENG')
|
|
|
+ for t in range(len(type_score_dict_ocr['words'])):
|
|
|
+ test_result1['words'] = test_result1['words'] + type_score_dict_ocr['words'][t]
|
|
|
+
|
|
|
+ test = key_words(test_result1)
|
|
|
if test == {}:
|
|
|
### 添加返回值OCR结果
|
|
|
add_ocr = {}
|
|
@@ -443,7 +443,6 @@ def get_sheet_number_total(answer_sheet, res, img0):
|
|
|
if solve_boxs.count(all_test[jjjj]['Score_structure'][0]['bounding_box']):
|
|
|
solve_boxs.remove(all_test[jjjj]['Score_structure'][0]['bounding_box'])
|
|
|
if choice_boxs != []: # 9月16号修改
|
|
|
- type_score_flag = 0
|
|
|
for ij in range(len(choice_boxs)):
|
|
|
if choice_boxs[ij][1] - 150 > 0:
|
|
|
yminss = choice_boxs[ij][1] - 150
|
|
@@ -453,52 +452,59 @@ def get_sheet_number_total(answer_sheet, res, img0):
|
|
|
xminss = choice_boxs[ij][0] - 100
|
|
|
else:
|
|
|
xminss = choice_boxs[ij][0]
|
|
|
- if yminss + 200 < img_h:
|
|
|
- ymaxss = yminss + 200
|
|
|
- else:
|
|
|
- ymaxss = choice_boxs[ij][3]
|
|
|
- type_score_dict_ocrs = {}
|
|
|
- new_test = {}
|
|
|
-
|
|
|
- try: # tr_OCR
|
|
|
- print('tr_OCR')
|
|
|
- image_choice = image_src.crop((xminss, yminss, choice_boxs[ij][2], ymaxss))
|
|
|
- res1 = tr.run(image_choice)
|
|
|
- for i in range(len(res1)):
|
|
|
- if res1[i][1].find('分') != -1:
|
|
|
- type_score_dict_ocrs['words'] = res1[i][1]
|
|
|
- else:
|
|
|
- continue
|
|
|
- except Exception as e: # baidu_OCR
|
|
|
- print('baidu_OCR')
|
|
|
- res1 = get_ocr_text_and_coordinate_in_google_format(img0[yminss:ymaxss, xminss:choice_boxs[ij][2]], ocr_accuracy=OCR_ACCURACY,language_type='CHN_ENG')
|
|
|
- for i in range(len(res1['words'])):
|
|
|
- if res1['words'][i].find('分') != -1:
|
|
|
- type_score_dict_ocrs['words'] = res1['words'][i]
|
|
|
- else:
|
|
|
- continue
|
|
|
- if type_score_dict_ocrs != {}:
|
|
|
- new_test = key_words(type_score_dict_ocrs, type_score_flag)
|
|
|
- if new_test != {} and new_test['volume_structure'] != -1 and (
|
|
|
- int(new_test['volume_structure'][0]['volume_total_score']) > 4 or int(
|
|
|
- new_test['volume_structure'][0]['volume_score']) > 4): # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
|
|
|
- if int(new_test['volume_structure'][0]['volume_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
|
|
|
- new_test['volume_structure'][0]['volume_total_score'] = int(
|
|
|
- new_test['volume_structure'][0]['volume_total_score']) % 100
|
|
|
- new_test['volume_structure'][0]['bounding_box'] = choice_boxs[ij]
|
|
|
- new_test['volume_structure'][0]['label'] = 'choice'
|
|
|
- all_test.append(new_test)
|
|
|
- elif new_test != {} and new_test['volume_structure'] == -1 and new_test['Score_structure'] != -1 and (
|
|
|
- int(new_test['Score_structure'][0]['item_total_score']) > 4 or int(
|
|
|
- new_test['Score_structure'][0]['item_score']) > 4): # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
|
|
|
- if int(new_test['Score_structure'][0]['item_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
|
|
|
- new_test['Score_structure'][0]['item_total_score'] = int(
|
|
|
- new_test['Score_structure'][0]['item_total_score']) % 100
|
|
|
- new_test['Score_structure'][0]['bounding_box'] = choice_boxs[ij]
|
|
|
- new_test['Score_structure'][0]['label'] = 'choice'
|
|
|
- all_test.append(new_test)
|
|
|
+ try:
|
|
|
+ res1 = get_ocr_text_and_coordinate_in_google_format(img0[yminss:choice_boxs[ij][3], xminss:choice_boxs[ij][2]], ocr_accuracy=OCR_ACCURACY,language_type='CHN_ENG')
|
|
|
+ aa = []
|
|
|
+ type_score_dict_ocrs = {}
|
|
|
+ for ii in range(len(res1['coordinates'])):
|
|
|
+ xmin11 = res1['coordinates'][ii][0] + choice_boxs[ij][0]
|
|
|
+ ymin11 = res1['coordinates'][ii][1] + choice_boxs[ij][1]
|
|
|
+ xmax11 = res1['coordinates'][ii][2] + choice_boxs[ij][0]
|
|
|
+ ymax11 = res1['coordinates'][ii][3] + choice_boxs[ij][1]
|
|
|
+ aaa = (xmin11, ymin11, xmax11, ymax11)
|
|
|
+ aa.append(aaa)
|
|
|
+ res1['coordinates'] = aa
|
|
|
+ new_test = {}
|
|
|
+ if len(res1['words']) > 0:
|
|
|
+ type_score_dict_ocrs['words'] = res1['words'][0]
|
|
|
+ new_test = key_words(type_score_dict_ocrs)
|
|
|
+ if new_test == {} or new_test['Score_structure'] == -1:
|
|
|
+ if len(res1['words']) > 1:
|
|
|
+ type_score_dict_ocrs['words'] = res1['words'][1]
|
|
|
+ new_test = key_words(type_score_dict_ocrs)
|
|
|
+ if new_test == {} or new_test['Score_structure'] == -1:
|
|
|
+ if len(res1['words']) > 2:
|
|
|
+ type_score_dict_ocrs['words'] = res1['words'][2]
|
|
|
+ new_test = key_words(type_score_dict_ocrs)
|
|
|
+ if new_test == {} or new_test['Score_structure'] == -1:
|
|
|
+ if len(res1['words']) > 3:
|
|
|
+ type_score_dict_ocrs['words'] = res1['words'][3]
|
|
|
+ new_test = key_words(type_score_dict_ocrs)
|
|
|
+ if new_test == {} or new_test['Score_structure'] == -1:
|
|
|
+ if len(res1['words']) > 4:
|
|
|
+ type_score_dict_ocrs['words'] = res1['words'][4]
|
|
|
+ new_test = key_words(type_score_dict_ocrs)
|
|
|
+ if new_test != {} and new_test['volume_structure'] != -1 and (
|
|
|
+ int(new_test['volume_structure'][0]['volume_total_score']) > 4 or int(
|
|
|
+ new_test['volume_structure'][0]['volume_score']) > 4): # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
|
|
|
+ if int(new_test['volume_structure'][0]['volume_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
|
|
|
+ new_test['volume_structure'][0]['volume_total_score'] = int(
|
|
|
+ new_test['volume_structure'][0]['volume_total_score']) % 100
|
|
|
+ new_test['volume_structure'][0]['bounding_box'] = choice_boxs[ij]
|
|
|
+ new_test['volume_structure'][0]['label'] = 'choice'
|
|
|
+ all_test.append(new_test)
|
|
|
+ elif new_test != {} and new_test['volume_structure'] == -1 and new_test['Score_structure'] != -1 and (
|
|
|
+ int(new_test['Score_structure'][0]['item_total_score']) > 4 or int(
|
|
|
+ new_test['Score_structure'][0]['item_score']) > 4): # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
|
|
|
+ if int(new_test['Score_structure'][0]['item_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
|
|
|
+ new_test['Score_structure'][0]['item_total_score'] = int(
|
|
|
+ new_test['Score_structure'][0]['item_total_score']) % 100
|
|
|
+ new_test['Score_structure'][0]['bounding_box'] = choice_boxs[ij]
|
|
|
+ new_test['Score_structure'][0]['label'] = 'choice'
|
|
|
+ all_test.append(new_test)
|
|
|
+ except Exception:
|
|
|
+ print('choice_boxs_score_NULL_or_error')
|
|
|
if cloze_boxs != []:
|
|
|
- type_score_flag = 0
|
|
|
for ij in range(len(cloze_boxs)):
|
|
|
if cloze_boxs[ij][1] - 100 > 0:
|
|
|
yminss = cloze_boxs[ij][1] - 100
|
|
@@ -508,115 +514,107 @@ def get_sheet_number_total(answer_sheet, res, img0):
|
|
|
xminss = cloze_boxs[ij][0] - 100
|
|
|
else:
|
|
|
xminss = cloze_boxs[ij][0]
|
|
|
- type_score_dict_ocrs = {}
|
|
|
- new_test = {}
|
|
|
-
|
|
|
- try: # tr_OCR
|
|
|
- print('tr_OCR')
|
|
|
- image_choice = image_src.crop((xminss, yminss, cloze_boxs[ij][2], cloze_boxs[ij][3]))
|
|
|
- res1 = tr.run(image_choice)
|
|
|
- for i in range(len(res1)):
|
|
|
- if res1[i][1].find('分') != -1:
|
|
|
- type_score_dict_ocrs['words'] = res1[i][1]
|
|
|
- else:
|
|
|
- continue
|
|
|
- except Exception as e: # baidu_OCR
|
|
|
- print('baidu_OCR')
|
|
|
- res1 = get_ocr_text_and_coordinate_in_google_format(
|
|
|
- img0[yminss:cloze_boxs[ij][3], xminss:cloze_boxs[ij][2]], ocr_accuracy=OCR_ACCURACY,
|
|
|
- language_type='CHN_ENG')
|
|
|
- for i in range(len(res1['words'])):
|
|
|
- if res1['words'][i].find('分') != -1:
|
|
|
- type_score_dict_ocrs['words'] = res1['words'][i]
|
|
|
- else:
|
|
|
- continue
|
|
|
- if type_score_dict_ocrs != {}:
|
|
|
- new_test = key_words(type_score_dict_ocrs, type_score_flag)
|
|
|
- if new_test != {} and new_test['volume_structure'] != -1 and (int(new_test['volume_structure'][0]['volume_total_score']) > 4 or int(new_test['volume_structure'][0]['volume_score']) > 4): # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
|
|
|
- if int(new_test['volume_structure'][0]['volume_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
|
|
|
- new_test['volume_structure'][0]['volume_total_score'] = int(
|
|
|
- new_test['volume_structure'][0]['volume_total_score']) % 100
|
|
|
- new_test['volume_structure'][0]['bounding_box'] = cloze_boxs[ij]
|
|
|
- new_test['volume_structure'][0]['label'] = 'cloze'
|
|
|
- all_test.append(new_test)
|
|
|
- elif new_test != {} and new_test['volume_structure'] == -1 and new_test['Score_structure'] != -1 and (int(new_test['Score_structure'][0]['item_total_score']) > 4 or int(new_test['Score_structure'][0]['item_score']) > 4): # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
|
|
|
- if int(new_test['Score_structure'][0]['item_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
|
|
|
- new_test['Score_structure'][0]['item_total_score'] = int(
|
|
|
- new_test['Score_structure'][0]['item_total_score']) % 100
|
|
|
- new_test['Score_structure'][0]['bounding_box'] = cloze_boxs[ij]
|
|
|
- new_test['Score_structure'][0]['label'] = 'cloze'
|
|
|
- all_test.append(new_test)
|
|
|
+ try:
|
|
|
+ res1 = get_ocr_text_and_coordinate_in_google_format(img0[yminss:cloze_boxs[ij][3], xminss:cloze_boxs[ij][2]], ocr_accuracy=OCR_ACCURACY,language_type='CHN_ENG')
|
|
|
+ aa = []
|
|
|
+ type_score_dict_ocrs = {}
|
|
|
+ for ii in range(len(res1['coordinates'])):
|
|
|
+ xmin11 = res1['coordinates'][ii][0] + cloze_boxs[ij][0]
|
|
|
+ ymin11 = res1['coordinates'][ii][1] + cloze_boxs[ij][1]
|
|
|
+ xmax11 = res1['coordinates'][ii][2] + cloze_boxs[ij][0]
|
|
|
+ ymax11 = res1['coordinates'][ii][3] + cloze_boxs[ij][1]
|
|
|
+ aaa = (xmin11, ymin11, xmax11, ymax11)
|
|
|
+ aa.append(aaa)
|
|
|
+ res1['coordinates'] = aa
|
|
|
+ new_test = {}
|
|
|
+ if len(res1['words']) > 0:
|
|
|
+ type_score_dict_ocrs['words'] = res1['words'][0]
|
|
|
+ new_test = key_words(type_score_dict_ocrs)
|
|
|
+ if new_test == {} or new_test['Score_structure'] == -1:
|
|
|
+ if len(res1['words']) > 1:
|
|
|
+ type_score_dict_ocrs['words'] = res1['words'][1]
|
|
|
+ new_test = key_words(type_score_dict_ocrs)
|
|
|
+ if new_test == {} or new_test['Score_structure'] == -1:
|
|
|
+ if len(res1['words']) > 2:
|
|
|
+ type_score_dict_ocrs['words'] = res1['words'][2]
|
|
|
+ new_test = key_words(type_score_dict_ocrs)
|
|
|
+ if new_test == {} or new_test['Score_structure'] == -1:
|
|
|
+ if len(res1['words']) > 3:
|
|
|
+ type_score_dict_ocrs['words'] = res1['words'][3]
|
|
|
+ new_test = key_words(type_score_dict_ocrs)
|
|
|
+ if new_test == {} or new_test['Score_structure'] == -1:
|
|
|
+ if len(res1['words']) > 4:
|
|
|
+ type_score_dict_ocrs['words'] = res1['words'][4]
|
|
|
+ new_test = key_words(type_score_dict_ocrs)
|
|
|
+ if new_test != {} and new_test['volume_structure'] != -1 and (int(new_test['volume_structure'][0]['volume_total_score']) > 4 or int(new_test['volume_structure'][0]['volume_score']) > 4): # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
|
|
|
+ if int(new_test['volume_structure'][0]['volume_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
|
|
|
+ new_test['volume_structure'][0]['volume_total_score'] = int(
|
|
|
+ new_test['volume_structure'][0]['volume_total_score']) % 100
|
|
|
+ new_test['volume_structure'][0]['bounding_box'] = cloze_boxs[ij]
|
|
|
+ new_test['volume_structure'][0]['label'] = 'cloze'
|
|
|
+ all_test.append(new_test)
|
|
|
+ elif new_test != {} and new_test['volume_structure'] == -1 and new_test['Score_structure'] != -1 and (int(new_test['Score_structure'][0]['item_total_score']) > 4 or int(new_test['Score_structure'][0]['item_score']) > 4): # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
|
|
|
+ if int(new_test['Score_structure'][0]['item_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
|
|
|
+ new_test['Score_structure'][0]['item_total_score'] = int(
|
|
|
+ new_test['Score_structure'][0]['item_total_score']) % 100
|
|
|
+ new_test['Score_structure'][0]['bounding_box'] = cloze_boxs[ij]
|
|
|
+ new_test['Score_structure'][0]['label'] = 'cloze'
|
|
|
+ all_test.append(new_test)
|
|
|
+ except Exception:
|
|
|
+ print('cloze_boxs_score_NULL_or_error')
|
|
|
if solve_boxs != []:
|
|
|
- type_score_flag = 0
|
|
|
for ij in range(len(solve_boxs)):
|
|
|
- xminss = solve_boxs[ij][0]
|
|
|
yminss = solve_boxs[ij][1]
|
|
|
- if solve_boxs[ij][2] - xminss > 1000:
|
|
|
- xmaxss = xminss + 1000
|
|
|
- else:
|
|
|
- xmaxss = solve_boxs[ij][2]
|
|
|
- if yminss + 500 > img_h:
|
|
|
- ymaxss = yminss + 500
|
|
|
- else:
|
|
|
- ymaxss = solve_boxs[ij][3]
|
|
|
- type_score_dict_ocrs = {}
|
|
|
- new_test = {}
|
|
|
-
|
|
|
- try: # tr_OCR
|
|
|
- print('tr_OCR')
|
|
|
- image_choice = image_src.crop((xminss, yminss, xmaxss, ymaxss))
|
|
|
- res1 = tr.run(image_choice)
|
|
|
- for i in range(len(res1)):
|
|
|
- if res1[i][1].find('分') != -1:
|
|
|
- type_score_dict_ocrs['words'] = res1[i][1]
|
|
|
- elif i == len(res1)-1:
|
|
|
- for ii in range(len(res1)):
|
|
|
- if res1[ii][1].find('题') != -1 or res1[ii][1].find('.') != -1 or res1[ii][1].find('、') != -1:
|
|
|
- type_score_dict_ocrs['words'] = res1[ii][1]
|
|
|
- else:
|
|
|
- continue
|
|
|
- else:
|
|
|
- continue
|
|
|
- except Exception as e: # baidu_OCR
|
|
|
- print('baidu_OCR')
|
|
|
- res1 = get_ocr_text_and_coordinate_in_google_format(
|
|
|
- img0[yminss:ymaxss, xminss:xmaxss], ocr_accuracy=OCR_ACCURACY,
|
|
|
- language_type='CHN_ENG')
|
|
|
- for i in range(len(res1['words'])):
|
|
|
- if res1['words'][i].find('分') != -1:
|
|
|
- type_score_dict_ocrs['words'] = res1['words'][i]
|
|
|
- elif i == len(res1['words'])-1:
|
|
|
- for ii in range(len(res1['words'])):
|
|
|
- if res1['words'][ii].find('题') != -1 or res1['words'][ii][1].find('.') != -1 or res1['words'][ii].find('、') != -1:
|
|
|
- type_score_dict_ocrs['words'] = res1['words'][ii]
|
|
|
- else:
|
|
|
- continue
|
|
|
- else:
|
|
|
- continue
|
|
|
- if type_score_dict_ocrs != {}:
|
|
|
- new_test = key_words(type_score_dict_ocrs, type_score_flag)
|
|
|
- if new_test != {} and new_test['volume_structure'] != -1 and int(new_test['volume_structure'][0][
|
|
|
- 'volume_total_score']) > 5: # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
|
|
|
- if int(new_test['volume_structure'][0][
|
|
|
- 'volume_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
|
|
|
- new_test['volume_structure'][0]['volume_total_score'] = int(
|
|
|
- new_test['volume_structure'][0]['volume_total_score']) % 100
|
|
|
- new_test['volume_structure'][0]['bounding_box'] = solve_boxs[ij]
|
|
|
- new_test['volume_structure'][0]['label'] = 'solve'
|
|
|
- all_test.append(new_test)
|
|
|
- elif new_test != {} and new_test['volume_structure'] == -1 and new_test[
|
|
|
- 'Score_structure'] != -1 and (
|
|
|
- int(new_test['Score_structure'][0]['item_total_score']) > 5 or int(
|
|
|
- new_test['Score_structure'][0][
|
|
|
- 'item_total_score']) == -1): # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
|
|
|
- if int(new_test['Score_structure'][0]['item_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
|
|
|
- new_test['Score_structure'][0]['item_total_score'] = int(
|
|
|
- new_test['Score_structure'][0]['item_total_score']) % 100
|
|
|
- new_test['Score_structure'][0]['bounding_box'] = solve_boxs[ij]
|
|
|
- new_test['Score_structure'][0]['label'] = 'solve'
|
|
|
- all_test.append(new_test)
|
|
|
+ xminss = solve_boxs[ij][0]
|
|
|
+ try:
|
|
|
+ res1 = get_ocr_text_and_coordinate_in_google_format(img0[yminss:solve_boxs[ij][3], xminss:solve_boxs[ij][2]], ocr_accuracy=OCR_ACCURACY,language_type='CHN_ENG')
|
|
|
+ aa = []
|
|
|
+ type_score_dict_ocrs = {}
|
|
|
+ for ii in range(len(res1['coordinates'])):
|
|
|
+ xmin11 = res1['coordinates'][ii][0] + solve_boxs[ij][0]
|
|
|
+ ymin11 = res1['coordinates'][ii][1] + solve_boxs[ij][1]
|
|
|
+ xmax11 = res1['coordinates'][ii][2] + solve_boxs[ij][0]
|
|
|
+ ymax11 = res1['coordinates'][ii][3] + solve_boxs[ij][1]
|
|
|
+ aaa = (xmin11, ymin11, xmax11, ymax11)
|
|
|
+ aa.append(aaa)
|
|
|
+ res1['coordinates'] = aa
|
|
|
+ new_test = {}
|
|
|
+ if len(res1['words']) > 0:
|
|
|
+ type_score_dict_ocrs['words'] = res1['words'][0]
|
|
|
+ new_test = key_words(type_score_dict_ocrs)
|
|
|
+ if new_test == {} or new_test['Score_structure'] == -1:
|
|
|
+ if len(res1['words']) > 1:
|
|
|
+ type_score_dict_ocrs['words'] = res1['words'][1]
|
|
|
+ new_test = key_words(type_score_dict_ocrs)
|
|
|
+ if new_test == {} or new_test['Score_structure'] == -1:
|
|
|
+ if len(res1['words']) > 2:
|
|
|
+ type_score_dict_ocrs['words'] = res1['words'][2]
|
|
|
+ new_test = key_words(type_score_dict_ocrs)
|
|
|
+ if new_test == {} or new_test['Score_structure'] == -1:
|
|
|
+ if len(res1['words']) > 3:
|
|
|
+ type_score_dict_ocrs['words'] = res1['words'][3]
|
|
|
+ new_test = key_words(type_score_dict_ocrs)
|
|
|
+ if new_test == {} or new_test['Score_structure'] == -1:
|
|
|
+ if len(res1['words']) > 4:
|
|
|
+ type_score_dict_ocrs['words'] = res1['words'][4]
|
|
|
+ new_test = key_words(type_score_dict_ocrs)
|
|
|
+ if new_test != {} and new_test['volume_structure'] != -1 and int(new_test['volume_structure'][0]['volume_total_score']) > 5: # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
|
|
|
+ if int(new_test['volume_structure'][0]['volume_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
|
|
|
+ new_test['volume_structure'][0]['volume_total_score'] = int(new_test['volume_structure'][0]['volume_total_score']) % 100
|
|
|
+ new_test['volume_structure'][0]['bounding_box'] = solve_boxs[ij]
|
|
|
+ new_test['volume_structure'][0]['label'] = 'solve'
|
|
|
+ all_test.append(new_test)
|
|
|
+ elif new_test != {} and new_test['volume_structure'] == -1 and new_test[
|
|
|
+ 'Score_structure'] != -1 and (
|
|
|
+ int(new_test['Score_structure'][0]['item_total_score']) > 5 or int(new_test['Score_structure'][0]['item_total_score']) == -1): # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
|
|
|
+ if int(new_test['Score_structure'][0]['item_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
|
|
|
+ new_test['Score_structure'][0]['item_total_score'] = int(new_test['Score_structure'][0]['item_total_score']) % 100
|
|
|
+ new_test['Score_structure'][0]['bounding_box'] = solve_boxs[ij]
|
|
|
+ new_test['Score_structure'][0]['label'] = 'solve'
|
|
|
+ all_test.append(new_test)
|
|
|
+ except Exception:
|
|
|
+ print('solve_boxs_score_NULL_or_error')
|
|
|
if composition_boxs != []:
|
|
|
- type_score_flag = 0
|
|
|
for ij in range(len(composition_boxs)):
|
|
|
if composition_boxs[ij][1] - 250 > 0:
|
|
|
yminss = composition_boxs[ij][1] - 250
|
|
@@ -626,55 +624,57 @@ def get_sheet_number_total(answer_sheet, res, img0):
|
|
|
xminss = composition_boxs[ij][0] - 100
|
|
|
else:
|
|
|
xminss = composition_boxs[ij][0]
|
|
|
- type_score_dict_ocrs = {}
|
|
|
-
|
|
|
- try: # tr_OCR
|
|
|
- print('tr_OCR')
|
|
|
- image_choice = image_src.crop((xminss, yminss, composition_boxs[ij][2], composition_boxs[ij][3]))
|
|
|
- res1 = tr.run(image_choice)
|
|
|
- for i in range(len(res1)):
|
|
|
- if res1[i][1].find('分') != -1:
|
|
|
- type_score_dict_ocrs['words'] = res1[i][1]
|
|
|
- elif i == len(res1):
|
|
|
- for ii in range(len(res1)):
|
|
|
- if res1[i][1].find('题') != -1 or res1[i][1].find('.') != -1 or res1[i][1].find('、') != -1:
|
|
|
- type_score_dict_ocrs['words'] = res1[i][1]
|
|
|
- else:
|
|
|
- continue
|
|
|
- except Exception as e: # baidu_OCR
|
|
|
- print('baidu_OCR')
|
|
|
- res1 = get_ocr_text_and_coordinate_in_google_format(
|
|
|
- img0[yminss:composition_boxs[ij][3], xminss:composition_boxs[ij][2]], ocr_accuracy=OCR_ACCURACY,
|
|
|
- language_type='CHN_ENG')
|
|
|
- for i in range(len(res1['words'])):
|
|
|
- if res1['words'][i].find('分') != -1:
|
|
|
- type_score_dict_ocrs['words'] = res1['words'][i]
|
|
|
- elif i == len(res1):
|
|
|
- for ii in range(len(res1['words'])):
|
|
|
- if res1['words'][i].find('题') != -1 or res1['words'][i][1].find('.') != -1 or res1['words'][
|
|
|
- i].find('、') != -1:
|
|
|
- type_score_dict_ocrs['words'] = res1['words'][i]
|
|
|
- else:
|
|
|
- continue
|
|
|
- if type_score_dict_ocrs != {}:
|
|
|
- new_test = key_words(type_score_dict_ocrs, type_score_flag)
|
|
|
- if new_test != {} and new_test['volume_structure'] != -1 and int(
|
|
|
- new_test['volume_structure'][0]['volume_total_score']) > 4: # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
|
|
|
- if int(new_test['volume_structure'][0]['volume_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
|
|
|
- new_test['volume_structure'][0]['volume_total_score'] = int(
|
|
|
- new_test['volume_structure'][0]['volume_total_score']) % 100
|
|
|
- new_test['volume_structure'][0]['bounding_box'] = composition_boxs[ij]
|
|
|
- new_test['volume_structure'][0]['label'] = 'composition'
|
|
|
- all_test.append(new_test)
|
|
|
- elif new_test != {} and new_test['volume_structure'] == -1 and new_test[
|
|
|
- 'Score_structure'] != -1 and int(
|
|
|
- new_test['Score_structure'][0]['item_total_score']) > 4: # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
|
|
|
- if int(new_test['Score_structure'][0]['item_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
|
|
|
- new_test['Score_structure'][0]['item_total_score'] = int(
|
|
|
- new_test['Score_structure'][0]['item_total_score']) % 100
|
|
|
- new_test['Score_structure'][0]['bounding_box'] = composition_boxs[ij]
|
|
|
- new_test['Score_structure'][0]['label'] = 'composition'
|
|
|
- all_test.append(new_test)
|
|
|
+ try:
|
|
|
+ res1 = get_ocr_text_and_coordinate_in_google_format(img0[yminss:composition_boxs[ij][3], xminss:composition_boxs[ij][2]], ocr_accuracy=OCR_ACCURACY,language_type='CHN_ENG')
|
|
|
+ aa = []
|
|
|
+ type_score_dict_ocrs = {}
|
|
|
+ for ii in range(len(res1['coordinates'])):
|
|
|
+ xmin11 = res1['coordinates'][ii][0] + composition_boxs[ij][0]
|
|
|
+ ymin11 = res1['coordinates'][ii][1] + composition_boxs[ij][1]
|
|
|
+ xmax11 = res1['coordinates'][ii][2] + composition_boxs[ij][0]
|
|
|
+ ymax11 = res1['coordinates'][ii][3] + composition_boxs[ij][1]
|
|
|
+ aaa = (xmin11, ymin11, xmax11, ymax11)
|
|
|
+ aa.append(aaa)
|
|
|
+ res1['coordinates'] = aa
|
|
|
+ new_test = {}
|
|
|
+ if len(res1['words']) > 0:
|
|
|
+ type_score_dict_ocrs['words'] = res1['words'][0]
|
|
|
+ new_test = key_words(type_score_dict_ocrs)
|
|
|
+ if new_test == {} or new_test['Score_structure'] == -1:
|
|
|
+ if len(res1['words']) > 1:
|
|
|
+ type_score_dict_ocrs['words'] = res1['words'][1]
|
|
|
+ new_test = key_words(type_score_dict_ocrs)
|
|
|
+ if new_test == {} or new_test['Score_structure'] == -1:
|
|
|
+ if len(res1['words']) > 2:
|
|
|
+ type_score_dict_ocrs['words'] = res1['words'][2]
|
|
|
+ new_test = key_words(type_score_dict_ocrs)
|
|
|
+ if new_test == {} or new_test['Score_structure'] == -1:
|
|
|
+ if len(res1['words']) > 3:
|
|
|
+ type_score_dict_ocrs['words'] = res1['words'][3]
|
|
|
+ new_test = key_words(type_score_dict_ocrs)
|
|
|
+ if new_test == {} or new_test['Score_structure'] == -1:
|
|
|
+ if len(res1['words']) > 4:
|
|
|
+ type_score_dict_ocrs['words'] = res1['words'][4]
|
|
|
+ new_test = key_words(type_score_dict_ocrs)
|
|
|
+ if new_test != {} and new_test['volume_structure'] != -1 and int(
|
|
|
+ new_test['volume_structure'][0]['volume_total_score']) > 4: # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
|
|
|
+ if int(new_test['volume_structure'][0]['volume_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
|
|
|
+ new_test['volume_structure'][0]['volume_total_score'] = int(
|
|
|
+ new_test['volume_structure'][0]['volume_total_score']) % 100
|
|
|
+ new_test['volume_structure'][0]['bounding_box'] = composition_boxs[ij]
|
|
|
+ new_test['volume_structure'][0]['label'] = 'composition'
|
|
|
+ all_test.append(new_test)
|
|
|
+ elif new_test != {} and new_test['volume_structure'] == -1 and new_test[
|
|
|
+ 'Score_structure'] != -1 and int(
|
|
|
+ new_test['Score_structure'][0]['item_total_score']) > 4: # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
|
|
|
+ if int(new_test['Score_structure'][0]['item_total_score']) > 200: # 暂定试卷分数都在200以内,超过200的表示识别错误
|
|
|
+ new_test['Score_structure'][0]['item_total_score'] = int(
|
|
|
+ new_test['Score_structure'][0]['item_total_score']) % 100
|
|
|
+ new_test['Score_structure'][0]['bounding_box'] = composition_boxs[ij]
|
|
|
+ new_test['Score_structure'][0]['label'] = 'composition'
|
|
|
+ all_test.append(new_test)
|
|
|
+ except Exception:
|
|
|
+ print('composition_boxs_score_NULL_or_error')
|
|
|
for aaa in range(len(all_test)):
|
|
|
if all_test[aaa]['Score_structure'] != -1 and all_test[aaa]['volume_structure'] == -1:
|
|
|
score_last_one = {'model_box': dict(all_test[aaa])['Score_structure'][0]['bounding_box'],
|
|
@@ -793,17 +793,20 @@ def get_sheet_number_total(answer_sheet, res, img0):
|
|
|
count_choice_m = count_choice_m + len(answer_sheet['regions'][j]['number'])
|
|
|
j_temp.append(j)
|
|
|
if j == len(answer_sheet['regions']) - 1 and j_temp !=[]:
|
|
|
- for index, jj in enumerate(j_temp):
|
|
|
- num_score_m = round(float(Score_last[i]['score'] / count_choice_m),1)
|
|
|
- answer_sheet['regions'][jj]['default_points'] = len(answer_sheet['regions'][jj]['number']) * [num_score_m]
|
|
|
- break
|
|
|
-
|
|
|
+ try:
|
|
|
+ for index, jj in enumerate(j_temp):
|
|
|
+ num_score_m_infer = round(float(Score_last[i]['score'] / count_choice_m), 2)
|
|
|
+ num_score_m = [str(num_score_m_infer), int(num_score_m_infer)][int(num_score_m_infer) == num_score_m_infer]
|
|
|
+ answer_sheet['regions'][jj]['default_points'] = len(answer_sheet['regions'][jj]['number']) * [num_score_m]
|
|
|
+ break
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
elif Score_last[i]['label'] == 'cloze':
|
|
|
- count_cloze_s = 0
|
|
|
for j in range(len(answer_sheet['regions'])):
|
|
|
if answer_sheet['regions'][j]['class_name'] == 'cloze_s':
|
|
|
if Score_last[i]['number_score'] != -1:
|
|
|
answer_sheet['regions'][j]['default_points'] = Score_last[i]['number_score']
|
|
|
+
|
|
|
elif num_choice > 1 or num_cloze >1:
|
|
|
for i in range(len(Score_last)):
|
|
|
if Score_last[i]['label'] == 'choice':
|
|
@@ -825,12 +828,16 @@ def get_sheet_number_total(answer_sheet, res, img0):
|
|
|
count_choice_m = count_choice_m + len(answer_sheet['regions'][j]['number'])
|
|
|
j_temp.append(j)
|
|
|
if j == len(answer_sheet['regions']) - 1 and j_temp !=[]:
|
|
|
- for index ,jj in enumerate(j_temp):
|
|
|
- num_score_m = round(float(Score_last[i]['score'] / count_choice_m),1)
|
|
|
- answer_sheet['regions'][jj]['default_points'] = len(answer_sheet['regions'][jj]['number']) * [num_score_m]
|
|
|
- break
|
|
|
+ try:
|
|
|
+ for index, jj in enumerate(j_temp):
|
|
|
+ num_score_m_infer = round(float(Score_last[i]['score'] / count_choice_m), 2)
|
|
|
+ num_score_m = [str(num_score_m_infer), int(num_score_m_infer)][int(num_score_m_infer) == num_score_m_infer]
|
|
|
+ answer_sheet['regions'][jj]['default_points'] = len(answer_sheet['regions'][jj]['number']) * [num_score_m]
|
|
|
+ break
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+
|
|
|
elif Score_last[i]['label'] == 'cloze':
|
|
|
- count_cloze_s = 0
|
|
|
for j in range(len(answer_sheet['regions'])):
|
|
|
if answer_sheet['regions'][j]['class_name'] == 'cloze_s':
|
|
|
xmin_dis = answer_sheet['regions'][j]['bounding_box']['xmin'] - \
|
|
@@ -844,6 +851,7 @@ def get_sheet_number_total(answer_sheet, res, img0):
|
|
|
if xmin_dis > -30 and ymin_dis > -30 and xmax_dis < 30 and ymax_dis < 30:
|
|
|
if Score_last[i]['number_score'] != -1 :
|
|
|
answer_sheet['regions'][j]['default_points'] = Score_last[i]['number_score']
|
|
|
+
|
|
|
elif choice_m_boxs !=[]:
|
|
|
x_choice_m_min = 10000
|
|
|
y_choice_m_min = 10000
|
|
@@ -870,63 +878,26 @@ def get_sheet_number_total(answer_sheet, res, img0):
|
|
|
'bounding_box': choice_m_boxs,
|
|
|
'label': 'choice_m',
|
|
|
'type_box': type_score_choice_m}
|
|
|
- type_score_flag = 0
|
|
|
- type_score_dict_ocrs = {}
|
|
|
- try: # tr_OCR
|
|
|
- print('tr_OCR')
|
|
|
- image_choice = image_src.crop((type_score_boxs[0][0], type_score_boxs[0][1], type_score_boxs[0][2], type_score_boxs[0][3]))
|
|
|
- res1 = tr.run(image_choice)
|
|
|
- for i in range(len(res1)):
|
|
|
- if res1[i][1].find('分') != -1:
|
|
|
- type_score_dict_ocrs['words'] = res1[i][1]
|
|
|
- elif i == len(res1):
|
|
|
- for ii in range(len(res1)):
|
|
|
- if res1[i][1].find('题') != -1 or res1[i][1].find('.') != -1 or res1[i][1].find(
|
|
|
- '、') != -1:
|
|
|
- type_score_dict_ocrs['words'] = res1[i][1]
|
|
|
- else:
|
|
|
- continue
|
|
|
- except Exception as e: # baidu_OCR
|
|
|
- print('baidu_OCR')
|
|
|
- res1 = get_ocr_text_and_coordinate_in_google_format(
|
|
|
- img0[type_score_boxs[0][1]:type_score_boxs[0][3], type_score_boxs[0][0]:type_score_boxs[0][2]], ocr_accuracy=OCR_ACCURACY,
|
|
|
- language_type='CHN_ENG')
|
|
|
- for i in range(len(res1['words'])):
|
|
|
- if res1['words'][i].find('分') != -1:
|
|
|
- type_score_dict_ocrs['words'] = res1['words'][i]
|
|
|
- elif i == len(res1):
|
|
|
- for ii in range(len(res1['words'])):
|
|
|
- if res1['words'][i].find('题') != -1 or res1['words'][i][1].find('.') != -1 or \
|
|
|
- res1['words'][
|
|
|
- i].find('、') != -1:
|
|
|
- type_score_dict_ocrs['words'] = res1['words'][i]
|
|
|
- else:
|
|
|
- continue
|
|
|
- if type_score_dict_ocrs != {}:
|
|
|
- test = key_words(type_score_dict_ocrs, type_score_flag)
|
|
|
+ test_result1['words'] = str()
|
|
|
+ # try: # tr_OCR
|
|
|
+ # image_choice = image_src.crop((type_score_choice_m[0], type_score_choice_m[1], type_score_choice_m[2], type_score_choice_m[3]))
|
|
|
+ # res1 = tr.run(image_choice)
|
|
|
+ # print('tr_OCR')
|
|
|
+ # for t in range(len(res1)):
|
|
|
+ # test_result1['words'] = test_result1['words'] + res1[t][1]
|
|
|
+ # except Exception as e: # baidu_OCR
|
|
|
+ # print('baidu_OCR')
|
|
|
+ res1 = get_ocr_text_and_coordinate_in_google_format(
|
|
|
+ img0[type_score_choice_m[1]:type_score_choice_m[3], type_score_choice_m[0]:type_score_choice_m[2]], ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG')
|
|
|
+ for t in range(len(res1['words'])):
|
|
|
+ test_result1['words'] = test_result1['words'] + res1['words'][t]
|
|
|
+ if test_result1['words'] != {}:
|
|
|
+ test = key_words(test_result1)
|
|
|
choice_m_score = -1
|
|
|
if test == {}:
|
|
|
- ### 添加返回值OCR结果
|
|
|
- add_ocr = {}
|
|
|
- add_ocr['model_box'] = test_result1['bounding_box']
|
|
|
- add_ocr['label'] = test_result1['label']
|
|
|
- add_ocr['number'] = -1
|
|
|
- add_ocr['score'] = -1
|
|
|
- add_ocr['number_score'] = -1
|
|
|
- add_ocr['counts'] = -1
|
|
|
- add_ocr['ocr'] = test_result1['words']
|
|
|
- Score_last.append(add_ocr)
|
|
|
+ choice_m_type_score_ocr = test_result1['words']
|
|
|
elif test['volume_structure'] == -1 and test['Score_structure'] == -1:
|
|
|
- ### 添加返回值OCR结果
|
|
|
- add_ocr = {}
|
|
|
- add_ocr['model_box'] = test_result1['bounding_box']
|
|
|
- add_ocr['label'] = test_result1['label']
|
|
|
- add_ocr['number'] = -1
|
|
|
- add_ocr['score'] = -1
|
|
|
- add_ocr['number_score'] = -1
|
|
|
- add_ocr['counts'] = -1
|
|
|
- add_ocr['ocr'] = test_result1['words']
|
|
|
- Score_last.append(add_ocr)
|
|
|
+ choice_m_type_score_ocr = test_result1['words']
|
|
|
else:
|
|
|
if test['volume_structure'] != -1 and test['volume_structure'][0]['volume_score'] != -1:
|
|
|
choice_m_score = test['volume_structure'][0]['volume_score']
|
|
@@ -937,8 +908,14 @@ def get_sheet_number_total(answer_sheet, res, img0):
|
|
|
if answer_sheet['regions'][j]['class_name'] == 'choice_m':
|
|
|
answer_sheet['regions'][j]['default_points'] = len(
|
|
|
answer_sheet['regions'][j]['number']) * [float(choice_m_score)]
|
|
|
+ elif test_result1['words'] != {}:
|
|
|
+ for j in range(len(answer_sheet['regions'])):
|
|
|
+ if answer_sheet['regions'][j]['class_name'] == 'choice_m':
|
|
|
+ answer_sheet['regions'][j]['type_score_ocr'] = choice_m_type_score_ocr
|
|
|
+
|
|
|
|
|
|
'''分数与模型对应'''
|
|
|
+ ocr_flag = 0
|
|
|
for i in range(len(answer_sheet['regions'])):
|
|
|
for j in range(len(Score_last)):
|
|
|
if (Score_last[j]['model_box'][0] == answer_sheet['regions'][i]['bounding_box']['xmin']
|
|
@@ -959,8 +936,11 @@ def get_sheet_number_total(answer_sheet, res, img0):
|
|
|
answer_sheet['regions'][i]['class_name'] == 'solve' or answer_sheet['regions'][i][
|
|
|
'class_name'] == 'solve0'):
|
|
|
answer_sheet['regions'][i]['class_name'] = 'optional_solve'
|
|
|
+ ocr_flag = 1
|
|
|
+ if 'type_score_ocr' in answer_sheet['regions'][i].keys():
|
|
|
+ del answer_sheet['regions'][i]['type_score_ocr']
|
|
|
# answer_sheet['regions'][i]['number_score'] = Score_last[j]['number_score'] # 小题分数
|
|
|
# answer_sheet['regions'][i]['counts'] = Score_last[j]['counts'] # 小题个数
|
|
|
- if 'ocr' in Score_last[j]: # 没有识别到分数的模块添加type_score_ocr结果
|
|
|
+ if ocr_flag == 0 and 'ocr' in Score_last[j]: # 没有识别到分数的模块添加type_score_ocr结果
|
|
|
answer_sheet['regions'][i]['type_score_ocr'] = Score_last[j]['ocr']
|
|
|
return answer_sheet
|