4 years ago · aa5a5cc974
--- a/db.sqlite3
+++ b/db.sqlite3
--- a/segment/sheet_resolve/analysis/choice/choice_line_box.py
+++ b/segment/sheet_resolve/analysis/choice/choice_line_box.py
@@ -3,7 +3,9 @@
 
															 # @Time    : 2018/11/22 0022 下午 16:01
														
 
															 import time
														
 
															 import re
														
 
															-import cv2, os
														
 
															+import cv2
														
 
															+import os
														
 
															+import random
														
 
															 import traceback
														
 
															 import numpy as np
														
 
															 import xml.etree.cElementTree as ET
														
@@ -376,7 +378,7 @@ def choice_bbox_vague(choice_m_list0, x_y_interval_ave, singe_box_width_height_a
 
															             y_diff = x_y_interval_ave[1]
														
 
															             s_height = singe_box_width_height_ave[1]
														
 
															             choice_bbox = (np.hstack((np.array([min(xmin0), min(ymin0) - y_diff - 3 * s_height]), np.array([max(xmax0), max(ymax0)])))).tolist()
														
 
															-            choice_bbox_with_index_list = (choice_bbox, choice_m_list[1])
														
 
															+            choice_bbox_with_index_list = (choice_bbox, choice_m_list1[1])
														
 
															             choice_bbox_all.append(choice_bbox_with_index_list)
														
 
															     return choice_bbox_all
														
@@ -432,7 +434,9 @@ def choice_m_row_col(image, choice_m_bbox_list, xml_path):
 
															     a_z = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
														
 
															     choice_m_dict_list = []
														
 
															-    choice_m_for_dircetion = utils.crop_region(image, choice_m_bbox_list[0]['bounding_box'])
														
 
															+    # 或在长宽比接近的choice_m中选取
														
 
															+    random_one = random.randint(0, len(choice_m_bbox_list)-1)
														
 
															+    choice_m_for_dircetion = utils.crop_region(image, choice_m_bbox_list[random_one]['bounding_box'])
														
 
															     res_dict = get_ocr_text_and_coordinate(choice_m_for_dircetion, ocr_accuracy='accurate', language_type='ENG')
														
 
															     direction = get_direction(res_dict)
														
 
															     for index0, box in enumerate(choice_m_bbox_list):  # rcnn识别的框匹配题号
														
@@ -441,7 +445,7 @@ def choice_m_row_col(image, choice_m_bbox_list, xml_path):
 
															         # box_coordiante = (m_left, m_top, box['xmax'], box['ymax'])
														
 
															         single_choice_m = utils.crop_region(image, box)
														
 
															         try:
														
 
															-            row_col_dict = get_choice_m_row_and_col(m_left, m_top, single_choice_m)     # 所有的小框， 行列等
														
 
															+            row_col_dict = get_choice_m_row_and_col(m_left, m_top, single_choice_m)     # 所有的小框，行列等
														
 
															             if len(row_col_dict) > 0:
														
 
															                 if direction == 90:
														
@@ -503,8 +507,11 @@ def choice_m_row_col(image, choice_m_bbox_list, xml_path):
 
															                     s_box_w_h.append(s_box_wid_hei)
														
 
															         x_y_interval_arr = np.array(x_y_interval_all)
														
 
															         if len(x_y_interval_arr) == 1:
														
 
															+            x_y_interval_all_arr = np.array(x_y_interval_all)
														
 
															+            x_ = int(np.mean(x_y_interval_all_arr[:, 0]))
														
 
															+            y_ = int(np.mean(x_y_interval_all_arr[:, 1]))
														
 
															+            x_y_interval_ave = (x_, y_)
														
 
															-            x_y_interval_ave = x_y_interval_all[0][0]
														
 
															             singe_box_width_height_ave = s_box_w_h[0]
														
 
															             image_height, image_width, _ = image.shape
														
@@ -521,8 +528,8 @@ def choice_m_row_col(image, choice_m_bbox_list, xml_path):
 
															         choice_m_dict_list_all_tmp = []
														
 
															         for index, choice_box_ele in enumerate(choice_bbox):
														
 
															             choice_region = utils.crop_region_direct(image, choice_box_ele[0])
														
 
															-            choice_path = xml_path[: xml_path.rfind('\\')]
														
 
															-            cv2.imwrite(os.path.join(choice_path, 'choice_region_' + str(index) + '.jpg'), choice_region)
														
 
															+            # choice_path = xml_path[: xml_path.rfind('\\')]
														
 
															+            # cv2.imwrite(os.path.join(choice_path, 'choice_region_' + str(index) + '.jpg'), choice_region)
														
 
															             choice_m_box_dict_new = [choice_m_box_dict[i] for i in choice_box_ele[1]]
														
 
															             choice_m_dict_list_part = get_title_number_by_choice_m.get_title_number(choice_box_ele[0], choice_region,
														
 
															                                                                                      choice_m_box_dict_new, direction)
														
--- a/segment/sheet_resolve/analysis/resolve.py
+++ b/segment/sheet_resolve/analysis/resolve.py
@@ -298,16 +298,17 @@ def exam_number_row_col(image, regions, xml_path):
 
															         tree = utils.create_xml(name, tree,
														
 
															                                 exam_number_box['xmin'], exam_number_box['ymin'],
														
 
															                                 exam_number_box['xmax'], exam_number_box['ymax'])
														
 
															+        tree.write(xml_path)
														
 
															+        return [exam_number_row_col_dict]
														
 
															     else:
														
 
															         tree = utils.create_xml('exam_number', tree,
														
 
															                                 exam_number_box['xmin'], exam_number_box['ymin'],
														
 
															                                 exam_number_box['xmax'], exam_number_box['ymax'])
														
 
															-        exam_number_row_col_dict = {}
														
 
															-    tree.write(xml_path)
														
 
															+        tree.write(xml_path)
														
 
															-    return [exam_number_row_col_dict]
														
 
															+        return []
														
 
															 def cloze(image, regions, xml_path, conf_thresh, mns_thresh, cloze_sess):
														
--- a/segment/sheet_resolve/analysis/sheet/choice_infer.py
+++ b/segment/sheet_resolve/analysis/sheet/choice_infer.py
@@ -263,16 +263,16 @@ def cluster_and_anti_abnormal(image, xml_path, digital_list, chars_list,
 
															                               mean_height, mean_width, choice_s_height, choice_s_width, limit_loc):
														
 
															     limit_left, limit_top, limit_right, limit_bottom = limit_loc
														
 
															     limit_width, limit_height = limit_right - limit_left, limit_bottom - limit_top
														
 
															-    arr = np.ones((len(digital_list), 2))
														
 
															+    digital_loc_arr = np.ones((len(digital_list), 2))
														
 
															     for i, ele in enumerate(digital_list):
														
 
															-        arr[i] = np.array([ele["loc"][-2], ele["loc"][-1]])
														
 
															+        digital_loc_arr[i] = np.array([ele["loc"][-2], ele["loc"][-1]])
														
 
															     if choice_s_height != 0:
														
 
															         eps = int(choice_s_height * 2.5)
														
 
															     else:
														
 
															         eps = int(mean_height * 3)
														
 
															     print("eps: ", eps)
														
 
															-    db = DBSCAN(eps=eps, min_samples=2, metric='chebyshev').fit(arr)
														
 
															+    db = DBSCAN(eps=eps, min_samples=2, metric='chebyshev').fit(digital_loc_arr)
														
 
															     labels = db.labels_
														
 
															     # print(labels)
														
@@ -357,7 +357,7 @@ def cluster_and_anti_abnormal(image, xml_path, digital_list, chars_list,
 
															                 current_height = current_loc[3] - current_loc[1]
														
 
															                 infer_height = max((choice_m_mean_height - current_height), int(dif * current_height / current_len))
														
 
															-                infer_bottom = min(current_loc[3] + infer_height, limit_height-1)
														
 
															+                infer_bottom = min(current_loc[3] + infer_height, limit_height - 1)
														
 
															                 if infer_bottom <= limit_height:
														
 
															                     choice_m_numbers_list[e_index]["loc"][3] = infer_bottom
														
 
															                     choice_m_numbers_list[e_index]["loc"][5] = (choice_m_numbers_list[e_index]["loc"][1] +
														
@@ -403,7 +403,7 @@ def cluster_and_anti_abnormal(image, xml_path, digital_list, chars_list,
 
															         current_row_choice_m_d = sorted(current_row_choice_m_d, key=lambda x: x["loc"][0])
														
 
															         # current_row_choice_m_d.append(choice_m_numbers_list[random_index])
														
 
															         split_pix = sorted([ele["loc"][0] for ele in current_row_choice_m_d])  # xmin排序
														
 
															-        split_index = get_split_index(split_pix, dif=choice_s_width*0.8)
														
 
															+        split_index = get_split_index(split_pix, dif=choice_s_width * 0.8)
														
 
															         split_pix = [split_pix[ele] for ele in split_index[:-1]]
														
 
															         block_list = []
														
@@ -474,10 +474,10 @@ def cluster_and_anti_abnormal(image, xml_path, digital_list, chars_list,
 
															                     choice_option = 'A,B,C,D'
														
 
															                 else:
														
 
															                     tmp = max(set(letter_index))
														
 
															-                # while letter_index_times[tmp] < 2 and tmp > 3:
														
 
															-                #     t_list = list(set(letter_index))
														
 
															-                #     t_list.remove(tmp)
														
 
															-                #     tmp = max(t_list)
														
 
															+                    # while letter_index_times[tmp] < 2 and tmp > 3:
														
 
															+                    #     t_list = list(set(letter_index))
														
 
															+                    #     t_list.remove(tmp)
														
 
															+                    #     tmp = max(t_list)
														
 
															                     choice_option = ",".join(a_z[min(letter_index):tmp + 1])
														
 
															                 cols = tmp
														
@@ -545,11 +545,89 @@ def cluster_and_anti_abnormal(image, xml_path, digital_list, chars_list,
 
															             if ele in current_row_chars:
														
 
															                 choice_m_numbers_list.remove(ele)
														
 
															-    # 单独一行不聚类
														
 
															+        # 解决单行问题
														
 
															+        crt_right_max = max([int(ele['bounding_box']['xmax']) for ele in choice_m_list])
														
 
															+        if limit_right - crt_right_max > choice_s_width:
														
 
															+            # 存在区域
														
 
															+            region_loc = {'xmin': crt_right_max + 10, 'ymin': choice_m_list[0]['bounding_box']['ymin'],
														
 
															+                          'xmax': limit_right, 'ymax': choice_m_list[0]['bounding_box']['ymax']}
														
 
															+
														
 
															+            contain_dig = []
														
 
															+            for i, ele in enumerate(digital_loc_arr):
														
 
															+                if (region_loc['xmin'] < ele[0] + limit_left < region_loc['xmax']
														
 
															+                        and region_loc['ymin'] < ele[1] + limit_top < region_loc['ymax']):
														
 
															+                    contain_dig.append(digital_list[i])
														
 
															+
														
 
															+            contain_chars = [ele for ele in chars_list
														
 
															+                             if region_loc['xmin'] < (
														
 
															+                                     ele["location"]["left"] + ele["location"]["width"] // 2) + limit_left <
														
 
															+                             region_loc['xmax']
														
 
															+                             and
														
 
															+                             region_loc['xmin'] < (
														
 
															+                                     ele["location"]["top"] + ele["location"]["height"] // 2) + limit_top <
														
 
															+                             region_loc['ymax']]
														
 
															+            if contain_dig or contain_chars:
														
 
															+                d_ymin, d_ymax, d_xmin, d_xmax = 9999, 0, 9999, 0
														
 
															+                if contain_dig:
														
 
															+                    d_ymin = min([ele['loc'][1] for ele in contain_dig])
														
 
															+                    d_ymax = max([ele['loc'][3] for ele in contain_dig])
														
 
															+                    d_xmin = min([ele['loc'][0] for ele in contain_dig])
														
 
															+                    d_xmax = max([ele['loc'][2] for ele in contain_dig])
														
 
															+
														
 
															+                c_ymin, c_ymax, c_xmin, c_xmax = 9999, 0, 9999, 0
														
 
															+                if contain_chars:
														
 
															+                    c_ymin = min([ele["location"]["top"] for ele in contain_chars])
														
 
															+                    c_ymax = max([ele["location"]["top"] + ele["location"]["height"] for ele in contain_chars])
														
 
															+                    c_xmin = min([ele["location"]["left"] for ele in contain_chars])
														
 
															+                    c_xmax = max([ele["location"]["left"] + ele["location"]["width"] for ele in contain_chars])
														
 
															+
														
 
															+                r_ymin, r_ymax = min(d_ymin, c_ymin), max(d_ymax, c_ymax)
														
 
															+                r_xmin, r_xmax = min(d_xmin, c_xmin), max(d_xmax, c_xmax)
														
 
															+
														
 
															+                region_loc['ymin'] = r_ymin - 10 + limit_top
														
 
															+                region_loc['ymax'] = r_ymax + 10 + limit_top
														
 
															+                if d_xmin == r_xmin:
														
 
															+                    region_loc['xmin'] = d_xmax + 5 + limit_left
														
 
															+                    region_loc['xmax'] = d_xmax + 5 + limit_left + int(1.2 * choice_s_width)
														
 
															+                else:
														
 
															+                    if 1.2 * (r_xmax - r_xmin) > choice_s_width:
														
 
															+                        region_loc['xmin'] = r_xmin - 10 + limit_left
														
 
															+                        region_loc['xmax'] = r_xmax + 10 + limit_left
														
 
															+                    else:
														
 
															+                        region_loc['xmin'] = max((r_xmax - r_xmin) // 2 + r_xmin - choice_s_width + limit_left,
														
 
															+                                                 crt_right_max + 10)
														
 
															+                        region_loc['xmax'] = min((r_xmax - r_xmin) // 2 + r_xmin + choice_s_width + limit_left,
														
 
															+                                                 limit_right)
														
 
															+
														
 
															+                try:
														
 
															+                    choice_m_img = utils.crop_region(image, region_loc)
														
 
															+                    right_loc, bottom_loc = adjust_choice_m(choice_m_img, mean_height, mean_width * 2)
														
 
															+                    if right_loc > 0:
														
 
															+                        region_loc.update(dict(xmax=right_loc + region_loc['xmin']))
														
 
															+                    if bottom_loc > 0:
														
 
															+                        region_loc.update(dict(ymax=bottom_loc + region_loc['ymin']))
														
 
															+                except Exception as e:
														
 
															+                    print(e)
														
 
															+                    traceback.print_exc()
														
 
															+
														
 
															+                choice_m = dict(class_name='choice_m',
														
 
															+                                number=[-1],
														
 
															+                                bounding_box=region_loc,
														
 
															+                                choice_option='A,B,C,D',
														
 
															+                                default_points=[5],
														
 
															+                                direction=180,
														
 
															+                                cols=4,
														
 
															+                                rows=1,
														
 
															+                                single_width=(region_loc['xmax'] - region_loc['xmin']) // 4,
														
 
															+                                single_height=r_ymax - r_ymin
														
 
															+                                )
														
 
															+                choice_m_list.append(choice_m)
														
 
															+
														
 
															+    # 单独一行不聚类(理论上不会再到这一步了, 上个block解决)
														
 
															     for i, revised_choice_m in enumerate(need_revised_choice_m_list):
														
 
															         loc = revised_choice_m['bounding_box']
														
 
															         left_part_loc = loc.copy()
														
 
															-        left_part_loc.update({'xmax': loc['xmin']+choice_s_width})
														
 
															+        left_part_loc.update({'xmax': loc['xmin'] + choice_s_width})
														
 
															         choice_m_img = utils.crop_region(image, left_part_loc)
														
 
															         right_loc, bottom_loc = adjust_choice_m(choice_m_img, mean_height, mean_width * 2)
														
 
															         if right_loc > 0:
														
@@ -561,7 +639,7 @@ def cluster_and_anti_abnormal(image, xml_path, digital_list, chars_list,
 
															         right_part_loc = loc.copy()
														
 
															         # right_part_loc.update({'xmin': loc['xmax']-choice_s_width})
														
 
															-        right_part_loc.update({'xmin': left_part_loc['xmax']+5})
														
 
															+        right_part_loc.update({'xmin': left_part_loc['xmax'] + 5})
														
 
															         choice_m_img = utils.crop_region(image, right_part_loc)
														
 
															         right_loc, bottom_loc = adjust_choice_m(choice_m_img, mean_height, mean_width * 2)
														
 
															         if right_loc > 0:
														
@@ -572,7 +650,7 @@ def cluster_and_anti_abnormal(image, xml_path, digital_list, chars_list,
 
															         right_tmp_height = right_part_loc['ymax'] - right_part_loc['ymin']
														
 
															         number_len = max(1, int(revised_choice_m['rows'] // (left_tmp_height // right_tmp_height)))
														
 
															-        number = [ele+revised_choice_m['number'][-1]+1 for ele in range(number_len)]
														
 
															+        number = [ele + revised_choice_m['number'][-1] + 1 for ele in range(number_len)]
														
 
															         rows = len(number)
														
 
															         revised_choice_m.update({'bounding_box': left_part_loc})
														
@@ -582,11 +660,11 @@ def cluster_and_anti_abnormal(image, xml_path, digital_list, chars_list,
 
															         tmp.update({'bounding_box': right_part_loc, 'number': number, 'rows': rows})
														
 
															         choice_m_list.append(tmp)
														
 
															-    tmp = choice_m_list.copy()
														
 
															-    for ele in tmp:
														
 
															+    choice_m_list_copy = choice_m_list.copy()
														
 
															+    for ele in choice_m_list_copy:
														
 
															         loc = ele["bounding_box"]
														
 
															         w, h = loc['xmax'] - loc['xmin'], loc['ymax'] - loc['ymin']
														
 
															-        if 2*w*h < choice_s_width*choice_s_height:
														
 
															+        if 2 * w * h < choice_s_width * choice_s_height:
														
 
															             choice_m_list.remove(ele)
														
 
															     return choice_m_list
														
--- a/segment/sheet_resolve/analysis/sheet/ocr_key_words.py
+++ b/segment/sheet_resolve/analysis/sheet/ocr_key_words.py
--- a/segment/sheet_resolve/analysis/sheet/sheet_points.py
+++ b/segment/sheet_resolve/analysis/sheet/sheet_points.py
@@ -198,7 +198,8 @@ def get_total_title_quantity_and_value(box_with_content):
 
															                     if digital_value == None:
														
 
															                         value = -1
														
 
															                     else:
														
 
															-                        value = digital_value[0]
														
 
															+                        digital_value_ = digital_value.group()
														
 
															+                        value = digital_value_[0]
														
 
															                     title_two_value.append(int(value))
														
 
															                 else:
														
 
															                     title_two_number0 = result2[0]
														
--- a/segment/sheet_resolve/analysis/sheet/sheet_points_by_nlp.py
+++ b/segment/sheet_resolve/analysis/sheet/sheet_points_by_nlp.py
@@ -0,0 +1,260 @@
 
															+# File:get_sheet_points_by_nlp.py
														
 
															+# Author:lynn
														
 
															+# Date:2020/5/19 18:23
														
 
															+
														
 
															+
														
 
															+import ast, cv2, re
														
 
															+from segment.sheet_resolve.analysis.sheet.tag_parse import TagParse
														
 
															+import CRFPP
														
 
															+from segment.sheet_resolve.tools.utils import crop_region
														
 
															+
														
 
															+try:
														
 
															+    import xml.etree.cElementTree as ET
														
 
															+except ImportError:
														
 
															+    import xml.etree.ElementTree as ET
														
 
															+
														
 
															+
														
 
															+def decide_coordinate_full_contains(coordinate1, coordinate2):
														
 
															+    xmin1 = coordinate1[0]
														
 
															+    ymin1 = coordinate1[1]
														
 
															+    xmax1 = coordinate1[2]
														
 
															+    ymax1 = coordinate1[3]
														
 
															+    mid_x = int(xmin1 + (xmax1 - xmin1)//2)
														
 
															+    mid_y = int(ymin1 + (ymax1 - ymin1)//2)
														
 
															+
														
 
															+    xmin2 = coordinate2[0]
														
 
															+    ymin2 = coordinate2[1]
														
 
															+    xmax2 = coordinate2[2]
														
 
															+    ymax2 = coordinate2[3]
														
 
															+
														
 
															+    if xmin1 <= xmin2 and ymin1 <= ymin2 and xmax1 >= xmax2 and ymax1 >= ymax2:
														
 
															+        return True
														
 
															+    else:
														
 
															+        return False
														
 
															+
														
 
															+
														
 
															+def analyse_solve_solve0_result(result, ele):
														
 
															+    key_words = ['T', 'C', 'S', 'N', 'O', 'E']
														
 
															+    key_words_list = [ele[1] for ele in result]
														
 
															+    move_m_list = [ele for ele in key_words_list if ele != 'M']
														
 
															+    new_list = []
														
 
															+    if 'O' not in key_words_list:
														
 
															+        if 'N' in move_m_list and 'T' in move_m_list:
														
 
															+            index_n = key_words_list.index('N')
														
 
															+            index_t = key_words_list.index('T')
														
 
															+            numeber_str = result[index_n][0]
														
 
															+            number_pattern = re.findall('\d+', numeber_str)
														
 
															+            title_number = int(number_pattern[0])
														
 
															+            ele['number'] = title_number
														
 
															+
														
 
															+            total_score_str = result[index_t][0]
														
 
															+            total_score_pattern = re.findall('\d+', total_score_str)
														
 
															+            total_score = int(total_score_pattern[0])
														
 
															+            ele['default_points'] = total_score
														
 
															+            new_list.append(ele)
														
 
															+        elif 'N' not in move_m_list and 'T' in move_m_list:
														
 
															+            index_t = key_words_list.index('T')
														
 
															+            total_score_str = result[index_t][0]
														
 
															+            total_score_pattern = re.findall('\d+', total_score_str)
														
 
															+            total_score = int(total_score_pattern[0])
														
 
															+            ele['default_points'] = total_score
														
 
															+            new_list.append(ele)
														
 
															+        elif 'N' in move_m_list and 'T' not in move_m_list:
														
 
															+            index_n = key_words_list.index('N')
														
 
															+            numeber_str = result[index_n][0]
														
 
															+            number_pattern = re.findall('\d+', numeber_str)
														
 
															+            title_number = int(number_pattern[0])
														
 
															+            ele['number'] = title_number
														
 
															+            new_list.append(ele)
														
 
															+    return new_list
														
 
															+
														
 
															+
														
 
															+def analyse_choice_result(result, choice_m_list):
														
 
															+    key_words_list = [ele[1] for ele in result]
														
 
															+    move_m_list = [ele for ele in key_words_list if ele != 'M']
														
 
															+    o_len = [ele for ele in key_words_list if ele == 'O']
														
 
															+
														
 
															+    if 'O' in key_words_list:
														
 
															+        index_o = [index for index, ele in enumerate(key_words_list) if ele == 'O']
														
 
															+        split_0_index = index_o
														
 
															+        split_0_index.insert(-1, len(key_words_list))
														
 
															+        split_0_index = sorted(list(set(split_0_index)))
														
 
															+        split_by_o_list = []
														
 
															+        for index, ele in enumerate(split_0_index):
														
 
															+            if index == 0:
														
 
															+                one_part = result[0: (split_0_index[index + 1]) - 1]
														
 
															+                split_by_o_list.append(one_part)
														
 
															+            elif ele == len(key_words_list):
														
 
															+                break
														
 
															+            else:
														
 
															+                one_part = result[split_0_index[index]: (split_0_index[index + 1]) - 1]
														
 
															+                split_by_o_list.append(one_part)
														
 
															+        print(split_by_o_list)
														
 
															+        number_with_value = []
														
 
															+        number_with_value1 = []
														
 
															+        for index0, ele0 in enumerate(split_by_o_list):
														
 
															+            part_key_words = [ele[1] for ele in ele0]
														
 
															+            index_oo = part_key_words.index('O')
														
 
															+            index_ss = part_key_words.index('S')
														
 
															+            contiue_number0 = ele0[index_oo]
														
 
															+            number_list = []
														
 
															+            if '-' in contiue_number0[0]:
														
 
															+                number_list = contiue_number0[0].split('-')
														
 
															+            elif '~' in contiue_number0[0]:
														
 
															+                number_list = contiue_number0[0].split('~')
														
 
															+
														
 
															+            number_list = [int(ele) for ele in number_list]
														
 
															+            number_list_all = [i for i in range(number_list[0], number_list[1] + 1)]
														
 
															+            value = ele0[index_ss][0]
														
 
															+            nlp_number_value_dict = {}
														
 
															+            nlp_number_value_dict['number_list'] = number_list_all
														
 
															+            nlp_number_value_dict['value'] = value
														
 
															+            number_with_value1.append(nlp_number_value_dict)
														
 
															+            for ele in number_list_all:
														
 
															+                number_with_value.append({ele: value})
														
 
															+        print(number_with_value)
														
 
															+
														
 
															+        for nlp_number in number_with_value1:
														
 
															+            number_list_nlp0 = nlp_number['number_list']
														
 
															+            value_nlp = nlp_number['value']
														
 
															+            for choice_m_box in choice_m_list:
														
 
															+                number_list_raw0 = choice_m_box['number']
														
 
															+                decide_whether_inclue = [False for c in number_list_raw0 if c not in number_list_nlp0]
														
 
															+                count_of_False = decide_whether_inclue.count(False)
														
 
															+                rows = choice_m_box['rows']
														
 
															+                if count_of_False / rows > 0.8:
														
 
															+                    continue
														
 
															+                elif count_of_False / rows <= 0.4:
														
 
															+                    points_list = [float(value_nlp) for i in range(0, rows)]
														
 
															+                    choice_m_box['default_points'] = points_list
														
 
															+        return choice_m_list
														
 
															+
														
 
															+
														
 
															+def analyse_cloze_result(result, cloze_and_cloze_s_list):
														
 
															+    new_list = []
														
 
															+    for cloze_and_cloze_s_ele in cloze_and_cloze_s_list:
														
 
															+        cloze_s_info = cloze_and_cloze_s_ele['cloze_s_info']
														
 
															+
														
 
															+        key_words_list = [ele[1] for ele in result]
														
 
															+        move_m_list = [ele for ele in key_words_list if ele != 'M']
														
 
															+
														
 
															+        for cloze_s_ele in cloze_s_info:
														
 
															+            if 'S' in move_m_list and 'T' in move_m_list:
														
 
															+                index_n = key_words_list.index('S')
														
 
															+                value_str = result[index_n][0]
														
 
															+                value_pattern = re.findall('\d+', value_str)
														
 
															+                value = int(value_pattern[0])
														
 
															+                cloze_s_ele['number'] = value
														
 
															+
														
 
															+            elif 'C' not in move_m_list and 'T' in move_m_list:
														
 
															+                index_t = key_words_list.index('T')
														
 
															+                index_c = key_words_list.index('C')
														
 
															+
														
 
															+                total_score_str = result[index_t][0]
														
 
															+                total_score_pattern = re.findall('\d+', total_score_str)
														
 
															+                total_score = int(total_score_pattern[0])
														
 
															+                value_per = float(total_score / int(index_c))
														
 
															+                cloze_s_ele['default_points'] = value_per
														
 
															+                new_list.append(cloze_s_ele)
														
 
															+    return new_list
														
 
															+
														
 
															+
														
 
															+def analyse_cloze_result1(result, cloze_and_cloze_s_list):
														
 
															+    new_list = []
														
 
															+    for cloze_and_cloze_s_ele in cloze_and_cloze_s_list:
														
 
															+        cloze_s_info = cloze_and_cloze_s_ele['cloze_s_info']
														
 
															+
														
 
															+        key_words_list = [ele[1] for ele in result]
														
 
															+        move_m_list = [ele for ele in key_words_list if ele != 'M']
														
 
															+
														
 
															+        for cloze_s_ele in cloze_s_info:
														
 
															+            if 'S' in move_m_list and 'T' in move_m_list:
														
 
															+                index_n = key_words_list.index('S')
														
 
															+                value_str = result[index_n][0]
														
 
															+                value_pattern = re.findall('\d+', value_str)
														
 
															+                value = int(value_pattern[0])
														
 
															+                cloze_s_ele['number'] = value
														
 
															+
														
 
															+            elif 'C' not in move_m_list and 'T' in move_m_list:
														
 
															+                index_t = key_words_list.index('T')
														
 
															+                index_c = key_words_list.index('C')
														
 
															+
														
 
															+                total_score_str = result[index_t][0]
														
 
															+                total_score_pattern = re.findall('\d+', total_score_str)
														
 
															+                total_score = int(total_score_pattern[0])
														
 
															+                value_per = float(total_score / int(index_c))
														
 
															+                cloze_s_ele['default_points'] = value_per
														
 
															+                new_list.append(cloze_s_ele)
														
 
															+    return new_list
														
 
															+
														
 
															+
														
 
															+def get_sheet_points_by_nlp(sheet_dict):
														
 
															+    # json_path = r'C:\Users\Administrator\Desktop\type_score_nlp\type_score_info\example\english\33.json'
														
 
															+    # file = open(json_path, 'r', encoding='gbk').read()
														
 
															+    # json_file = ast.literal_eval(file)
														
 
															+    regions = sheet_dict['regions']
														
 
															+    ocr_list = []
														
 
															+    new_list = []
														
 
															+    choice_m_list = [ele for ele in regions if ele['class_name'] == 'choice_m']
														
 
															+    cloze_list = [ele for ele in regions if ele['class_name'] == 'cloze']
														
 
															+    cloze_and_cloze_s_list = []
														
 
															+    for element_cloze in cloze_list:
														
 
															+        cloze_box = element_cloze['bounding_box']
														
 
															+        cloze_bbox = [cloze_box['xmin'], cloze_box['ymin'], cloze_box['xmax'], cloze_box['ymax']]
														
 
															+        cloze_s_dict = {}
														
 
															+        cloze_s_list = []
														
 
															+        for element in regions:
														
 
															+            if element['class_name'] == 'cloze_s':
														
 
															+                cloze_s_box = element['bounding_box']
														
 
															+                cloze_s_bbox = [cloze_s_box['xmin'], cloze_s_box['ymin'], cloze_s_box['xmax'], cloze_s_box['ymax']]
														
 
															+
														
 
															+                if decide_coordinate_full_contains(cloze_bbox, cloze_s_bbox) == True:
														
 
															+                    cloze_s_list.append(element)
														
 
															+        cloze_s_dict['cloze_info'] = element_cloze
														
 
															+        cloze_s_dict['cloze_s_info'] = cloze_s_list
														
 
															+        cloze_and_cloze_s_list.append(cloze_s_dict)
														
 
															+    print(cloze_and_cloze_s_list)
														
 
															+
														
 
															+    for ele in regions:
														
 
															+        if 'type_score_ocr' in ele:
														
 
															+            ocr_list.append(ele)
														
 
															+    for index, ele in enumerate(ocr_list):
														
 
															+        ocr_content = ele['type_score_ocr']
														
 
															+        taggers = CRFPP.Tagger("-m " + './segment/sheet_resolve/model/nlp_model/crf2.model')
														
 
															+        tb = TagParse(taggers)
														
 
															+        result = tb.get_tag_val(ocr_content)
														
 
															+        if ele['class_name'] == 'cloze':
														
 
															+            print(ele)
														
 
															+
														
 
															+            cloze_and_cloze_s_list0 = []
														
 
															+            for ele1 in cloze_and_cloze_s_list:
														
 
															+                print(ele1)
														
 
															+                ele1_cloze = [ele1['cloze_info']['bounding_box']['xmin'], ele1['cloze_info']['bounding_box']['ymin'],
														
 
															+                              ele1['cloze_info']['bounding_box']['xmax'], ele1['cloze_info']['bounding_box']['ymax']]
														
 
															+                ele_cloze = [ele['bounding_box']['xmin'], ele['bounding_box']['ymin'],
														
 
															+                             ele['bounding_box']['xmax'], ele['bounding_box']['ymax']]
														
 
															+                if ele1_cloze == ele_cloze:
														
 
															+                    cloze_and_cloze_s_list0.append(ele1)
														
 
															+            new_list = analyse_cloze_result(result, cloze_and_cloze_s_list0)
														
 
															+        elif ele['class_name'] == 'choice':
														
 
															+            new_list = analyse_choice_result(result, choice_m_list)
														
 
															+        else:
														
 
															+            new_list = analyse_solve_solve0_result(result, ele)
														
 
															+
														
 
															+    for index0, ele0 in enumerate(regions):
														
 
															+        for index1, ele1 in enumerate(new_list):
														
 
															+            class_name0 = ele0['class_name']
														
 
															+            bounding_box0 = ele0['bounding_box']
														
 
															+
														
 
															+            class_name1 = ele1['class_name']
														
 
															+            bounding_box1 = ele1['bounding_box']
														
 
															+            if class_name0 == class_name1 and bounding_box0 == bounding_box1:
														
 
															+                ele0['default_points'] = ele1['default_points']
														
 
															+
														
 
															+    # pop type_score_ocr
														
 
															+    for ele in regions:
														
 
															+        if 'type_score_ocr' in ele:
														
 
															+            ele.pop('type_score_ocr')
														
 
															+
														
 
															+    sheet_dict.update({'regions': regions})
														
 
															+    return sheet_dict
														
--- a/segment/sheet_resolve/analysis/sheet/sheet_points_total.py
+++ b/segment/sheet_resolve/analysis/sheet/sheet_points_total.py
@@ -1,8 +1,9 @@
 
															 # -*- coding: utf-8 -*-
														
 
															-# @Time : 2020/5/22 0022 17:02
														
 
															+# @Time : 2020/5/28 0022 17:02
														
 
															 # @Author : LF
														
 
															 # @FileName: sheet_points_total.py
														
 
															 # @Software: PyCharm
														
 
															+# local_baidu_OCR
														
 
															 import requests
														
 
															 import base64
														
@@ -14,10 +15,10 @@ from PIL import Image
 
															 from segment.sheet_resolve.tools.brain_api import get_ocr_text_and_coordinate_in_google_format
														
 
															 from segment.sheet_resolve.analysis.sheet.ocr_key_words import key_words
														
 
															-try:
														
 
															-    import tr
														
 
															-except Exception:
														
 
															-    pass
														
 
															+# try:
														
 
															+#     import tr
														
 
															+# except Exception:
														
 
															+#     pass
														
 
															 OCR_ACCURACY = 'accurate'
														
@@ -360,7 +361,6 @@ def get_sheet_number_total(answer_sheet, res, img0):
 
															     '''解析type_score与对应分割模块的分数'''
														
 
															     for i in range(len(type_score_boxs)):
														
 
															-        type_score_flag = 1
														
 
															         test_result1 = model_type_score(type_score_boxs[i], choice_boxs, cloze_boxs, solve_boxs, composition_boxs)
														
 
															         if test_result1 != -1 and test_result1 != 0:
														
 
															             if type_score_boxs[i][0] - 5 > 0:
														
@@ -380,19 +380,19 @@ def get_sheet_number_total(answer_sheet, res, img0):
 
															             else:
														
 
															                 ymaxss = type_score_boxs[i][3]
														
 
															             test_result1['words'] = str()
														
 
															-            try:  # tr_OCR
														
 
															-                print('tr_OCR')
														
 
															-                image_src_type_score = image_src.crop((xminss, yminss, xmaxss, ymaxss))
														
 
															-                type_score_dict_ocr = tr.run(image_src_type_score)
														
 
															-                for t in range(len(type_score_dict_ocr)):
														
 
															-                    test_result1['words'] = test_result1['words'] + type_score_dict_ocr[t][1]
														
 
															-            except Exception as e:  # baidu_OCR
														
 
															-                print('baidu_OCR')
														
 
															-                type_score_dict_ocr = get_ocr_text_and_coordinate_in_google_format(img0[yminss:ymaxss, xminss:xmaxss], ocr_accuracy=OCR_ACCURACY,language_type='CHN_ENG')
														
 
															-                for t in range(len(type_score_dict_ocr['words'])):
														
 
															-                    test_result1['words'] = test_result1['words'] + type_score_dict_ocr['words'][t]
														
 
															-
														
 
															-            test = key_words(test_result1, type_score_flag)
														
 
															+            # try:  # tr_OCR
														
 
															+            #     image_src_type_score = image_src.crop((xminss, yminss, xmaxss, ymaxss))
														
 
															+            #     type_score_dict_ocr = tr.run(image_src_type_score)
														
 
															+            #     print('tr_OCR')
														
 
															+            #     for t in range(len(type_score_dict_ocr)):
														
 
															+            #         test_result1['words'] = test_result1['words'] + type_score_dict_ocr[t][1]
														
 
															+            # except Exception as e:  # baidu_OCR
														
 
															+            #     print('baidu_OCR')
														
 
															+            type_score_dict_ocr = get_ocr_text_and_coordinate_in_google_format(img0[yminss:ymaxss, xminss:xmaxss], ocr_accuracy=OCR_ACCURACY,language_type='CHN_ENG')
														
 
															+            for t in range(len(type_score_dict_ocr['words'])):
														
 
															+                test_result1['words'] = test_result1['words'] + type_score_dict_ocr['words'][t]
														
 
															+
														
 
															+            test = key_words(test_result1)
														
 
															             if test == {}:
														
 
															                 ### 添加返回值OCR结果
														
 
															                 add_ocr = {}
														
@@ -443,7 +443,6 @@ def get_sheet_number_total(answer_sheet, res, img0):
 
															                 if solve_boxs.count(all_test[jjjj]['Score_structure'][0]['bounding_box']):
														
 
															                     solve_boxs.remove(all_test[jjjj]['Score_structure'][0]['bounding_box'])
														
 
															     if choice_boxs != []:  # 9月16号修改
														
 
															-        type_score_flag = 0
														
 
															         for ij in range(len(choice_boxs)):
														
 
															             if choice_boxs[ij][1] - 150 > 0:
														
 
															                 yminss = choice_boxs[ij][1] - 150
														
@@ -453,52 +452,59 @@ def get_sheet_number_total(answer_sheet, res, img0):
 
															                 xminss = choice_boxs[ij][0] - 100
														
 
															             else:
														
 
															                 xminss = choice_boxs[ij][0]
														
 
															-            if yminss + 200 < img_h:
														
 
															-                ymaxss = yminss + 200
														
 
															-            else:
														
 
															-                ymaxss = choice_boxs[ij][3]
														
 
															-            type_score_dict_ocrs = {}
														
 
															-            new_test = {}
														
 
															-
														
 
															-            try:  # tr_OCR
														
 
															-                print('tr_OCR')
														
 
															-                image_choice = image_src.crop((xminss, yminss, choice_boxs[ij][2], ymaxss))
														
 
															-                res1 = tr.run(image_choice)
														
 
															-                for i in range(len(res1)):
														
 
															-                    if res1[i][1].find('分') != -1:
														
 
															-                        type_score_dict_ocrs['words'] = res1[i][1]
														
 
															-                    else:
														
 
															-                        continue
														
 
															-            except Exception as e:  # baidu_OCR
														
 
															-                print('baidu_OCR')
														
 
															-                res1 = get_ocr_text_and_coordinate_in_google_format(img0[yminss:ymaxss, xminss:choice_boxs[ij][2]], ocr_accuracy=OCR_ACCURACY,language_type='CHN_ENG')
														
 
															-                for i in range(len(res1['words'])):
														
 
															-                    if res1['words'][i].find('分') != -1:
														
 
															-                        type_score_dict_ocrs['words'] = res1['words'][i]
														
 
															-                    else:
														
 
															-                        continue
														
 
															-            if type_score_dict_ocrs != {}:
														
 
															-                new_test = key_words(type_score_dict_ocrs, type_score_flag)
														
 
															-            if new_test != {} and new_test['volume_structure'] != -1 and (
														
 
															-                    int(new_test['volume_structure'][0]['volume_total_score']) > 4 or int(
														
 
															-                    new_test['volume_structure'][0]['volume_score']) > 4):  # 如果识别到分数，添加到输出信息；如果还没有识别到分数，默认没有分数
														
 
															-                if int(new_test['volume_structure'][0]['volume_total_score']) > 200:  # 暂定试卷分数都在200以内，超过200的表示识别错误
														
 
															-                    new_test['volume_structure'][0]['volume_total_score'] = int(
														
 
															-                        new_test['volume_structure'][0]['volume_total_score']) % 100
														
 
															-                new_test['volume_structure'][0]['bounding_box'] = choice_boxs[ij]
														
 
															-                new_test['volume_structure'][0]['label'] = 'choice'
														
 
															-                all_test.append(new_test)
														
 
															-            elif new_test != {} and new_test['volume_structure'] == -1 and new_test['Score_structure'] != -1 and (
														
 
															-                    int(new_test['Score_structure'][0]['item_total_score']) > 4 or int(
														
 
															-                    new_test['Score_structure'][0]['item_score']) > 4):  # 如果识别到分数，添加到输出信息；如果还没有识别到分数，默认没有分数
														
 
															-                if int(new_test['Score_structure'][0]['item_total_score']) > 200:  # 暂定试卷分数都在200以内，超过200的表示识别错误
														
 
															-                    new_test['Score_structure'][0]['item_total_score'] = int(
														
 
															-                        new_test['Score_structure'][0]['item_total_score']) % 100
														
 
															-                new_test['Score_structure'][0]['bounding_box'] = choice_boxs[ij]
														
 
															-                new_test['Score_structure'][0]['label'] = 'choice'
														
 
															-                all_test.append(new_test)
														
 
															+            try:
														
 
															+                res1 = get_ocr_text_and_coordinate_in_google_format(img0[yminss:choice_boxs[ij][3], xminss:choice_boxs[ij][2]], ocr_accuracy=OCR_ACCURACY,language_type='CHN_ENG')
														
 
															+                aa = []
														
 
															+                type_score_dict_ocrs = {}
														
 
															+                for ii in range(len(res1['coordinates'])):
														
 
															+                    xmin11 = res1['coordinates'][ii][0] + choice_boxs[ij][0]
														
 
															+                    ymin11 = res1['coordinates'][ii][1] + choice_boxs[ij][1]
														
 
															+                    xmax11 = res1['coordinates'][ii][2] + choice_boxs[ij][0]
														
 
															+                    ymax11 = res1['coordinates'][ii][3] + choice_boxs[ij][1]
														
 
															+                    aaa = (xmin11, ymin11, xmax11, ymax11)
														
 
															+                    aa.append(aaa)
														
 
															+                res1['coordinates'] = aa
														
 
															+                new_test = {}
														
 
															+                if len(res1['words']) > 0:
														
 
															+                    type_score_dict_ocrs['words'] = res1['words'][0]
														
 
															+                    new_test = key_words(type_score_dict_ocrs)
														
 
															+                    if new_test == {} or new_test['Score_structure'] == -1:
														
 
															+                        if len(res1['words']) > 1:
														
 
															+                            type_score_dict_ocrs['words'] = res1['words'][1]
														
 
															+                            new_test = key_words(type_score_dict_ocrs)
														
 
															+                            if new_test == {} or new_test['Score_structure'] == -1:
														
 
															+                                if len(res1['words']) > 2:
														
 
															+                                    type_score_dict_ocrs['words'] = res1['words'][2]
														
 
															+                                    new_test = key_words(type_score_dict_ocrs)
														
 
															+                                if new_test == {} or new_test['Score_structure'] == -1:
														
 
															+                                    if len(res1['words']) > 3:
														
 
															+                                        type_score_dict_ocrs['words'] = res1['words'][3]
														
 
															+                                        new_test = key_words(type_score_dict_ocrs)
														
 
															+                                    if new_test == {} or new_test['Score_structure'] == -1:
														
 
															+                                        if len(res1['words']) > 4:
														
 
															+                                            type_score_dict_ocrs['words'] = res1['words'][4]
														
 
															+                                            new_test = key_words(type_score_dict_ocrs)
														
 
															+                if new_test != {} and new_test['volume_structure'] != -1 and (
														
 
															+                        int(new_test['volume_structure'][0]['volume_total_score']) > 4 or int(
														
 
															+                        new_test['volume_structure'][0]['volume_score']) > 4):  # 如果识别到分数，添加到输出信息；如果还没有识别到分数，默认没有分数
														
 
															+                    if int(new_test['volume_structure'][0]['volume_total_score']) > 200:  # 暂定试卷分数都在200以内，超过200的表示识别错误
														
 
															+                        new_test['volume_structure'][0]['volume_total_score'] = int(
														
 
															+                            new_test['volume_structure'][0]['volume_total_score']) % 100
														
 
															+                    new_test['volume_structure'][0]['bounding_box'] = choice_boxs[ij]
														
 
															+                    new_test['volume_structure'][0]['label'] = 'choice'
														
 
															+                    all_test.append(new_test)
														
 
															+                elif new_test != {} and new_test['volume_structure'] == -1 and new_test['Score_structure'] != -1 and (
														
 
															+                        int(new_test['Score_structure'][0]['item_total_score']) > 4 or int(
														
 
															+                        new_test['Score_structure'][0]['item_score']) > 4):  # 如果识别到分数，添加到输出信息；如果还没有识别到分数，默认没有分数
														
 
															+                    if int(new_test['Score_structure'][0]['item_total_score']) > 200:  # 暂定试卷分数都在200以内，超过200的表示识别错误
														
 
															+                        new_test['Score_structure'][0]['item_total_score'] = int(
														
 
															+                            new_test['Score_structure'][0]['item_total_score']) % 100
														
 
															+                    new_test['Score_structure'][0]['bounding_box'] = choice_boxs[ij]
														
 
															+                    new_test['Score_structure'][0]['label'] = 'choice'
														
 
															+                    all_test.append(new_test)
														
 
															+            except Exception:
														
 
															+                print('choice_boxs_score_NULL_or_error')
														
 
															     if cloze_boxs != []:
														
 
															-        type_score_flag = 0
														
 
															         for ij in range(len(cloze_boxs)):
														
 
															             if cloze_boxs[ij][1] - 100 > 0:
														
 
															                 yminss = cloze_boxs[ij][1] - 100
														
@@ -508,115 +514,107 @@ def get_sheet_number_total(answer_sheet, res, img0):
 
															                 xminss = cloze_boxs[ij][0] - 100
														
 
															             else:
														
 
															                 xminss = cloze_boxs[ij][0]
														
 
															-            type_score_dict_ocrs = {}
														
 
															-            new_test = {}
														
 
															-
														
 
															-            try:  # tr_OCR
														
 
															-                print('tr_OCR')
														
 
															-                image_choice = image_src.crop((xminss, yminss, cloze_boxs[ij][2], cloze_boxs[ij][3]))
														
 
															-                res1 = tr.run(image_choice)
														
 
															-                for i in range(len(res1)):
														
 
															-                    if res1[i][1].find('分') != -1:
														
 
															-                        type_score_dict_ocrs['words'] = res1[i][1]
														
 
															-                    else:
														
 
															-                        continue
														
 
															-            except Exception as e:  # baidu_OCR
														
 
															-                print('baidu_OCR')
														
 
															-                res1 = get_ocr_text_and_coordinate_in_google_format(
														
 
															-                    img0[yminss:cloze_boxs[ij][3], xminss:cloze_boxs[ij][2]], ocr_accuracy=OCR_ACCURACY,
														
 
															-                    language_type='CHN_ENG')
														
 
															-                for i in range(len(res1['words'])):
														
 
															-                    if res1['words'][i].find('分') != -1:
														
 
															-                        type_score_dict_ocrs['words'] = res1['words'][i]
														
 
															-                    else:
														
 
															-                        continue
														
 
															-            if type_score_dict_ocrs != {}:
														
 
															-                new_test = key_words(type_score_dict_ocrs, type_score_flag)
														
 
															-            if new_test != {} and new_test['volume_structure'] != -1 and (int(new_test['volume_structure'][0]['volume_total_score']) > 4 or int(new_test['volume_structure'][0]['volume_score']) > 4):  # 如果识别到分数，添加到输出信息；如果还没有识别到分数，默认没有分数
														
 
															-                if int(new_test['volume_structure'][0]['volume_total_score']) > 200:  # 暂定试卷分数都在200以内，超过200的表示识别错误
														
 
															-                    new_test['volume_structure'][0]['volume_total_score'] = int(
														
 
															-                        new_test['volume_structure'][0]['volume_total_score']) % 100
														
 
															-                new_test['volume_structure'][0]['bounding_box'] = cloze_boxs[ij]
														
 
															-                new_test['volume_structure'][0]['label'] = 'cloze'
														
 
															-                all_test.append(new_test)
														
 
															-            elif new_test != {} and new_test['volume_structure'] == -1 and new_test['Score_structure'] != -1 and (int(new_test['Score_structure'][0]['item_total_score']) > 4 or int(new_test['Score_structure'][0]['item_score']) > 4):  # 如果识别到分数，添加到输出信息；如果还没有识别到分数，默认没有分数
														
 
															-                if int(new_test['Score_structure'][0]['item_total_score']) > 200:  # 暂定试卷分数都在200以内，超过200的表示识别错误
														
 
															-                    new_test['Score_structure'][0]['item_total_score'] = int(
														
 
															-                        new_test['Score_structure'][0]['item_total_score']) % 100
														
 
															-                new_test['Score_structure'][0]['bounding_box'] = cloze_boxs[ij]
														
 
															-                new_test['Score_structure'][0]['label'] = 'cloze'
														
 
															-                all_test.append(new_test)
														
 
															+            try:
														
 
															+                res1 = get_ocr_text_and_coordinate_in_google_format(img0[yminss:cloze_boxs[ij][3], xminss:cloze_boxs[ij][2]], ocr_accuracy=OCR_ACCURACY,language_type='CHN_ENG')
														
 
															+                aa = []
														
 
															+                type_score_dict_ocrs = {}
														
 
															+                for ii in range(len(res1['coordinates'])):
														
 
															+                    xmin11 = res1['coordinates'][ii][0] + cloze_boxs[ij][0]
														
 
															+                    ymin11 = res1['coordinates'][ii][1] + cloze_boxs[ij][1]
														
 
															+                    xmax11 = res1['coordinates'][ii][2] + cloze_boxs[ij][0]
														
 
															+                    ymax11 = res1['coordinates'][ii][3] + cloze_boxs[ij][1]
														
 
															+                    aaa = (xmin11, ymin11, xmax11, ymax11)
														
 
															+                    aa.append(aaa)
														
 
															+                res1['coordinates'] = aa
														
 
															+                new_test = {}
														
 
															+                if len(res1['words']) > 0:
														
 
															+                    type_score_dict_ocrs['words'] = res1['words'][0]
														
 
															+                    new_test = key_words(type_score_dict_ocrs)
														
 
															+                    if new_test == {} or new_test['Score_structure'] == -1:
														
 
															+                        if len(res1['words']) > 1:
														
 
															+                            type_score_dict_ocrs['words'] = res1['words'][1]
														
 
															+                            new_test = key_words(type_score_dict_ocrs)
														
 
															+                            if new_test == {} or new_test['Score_structure'] == -1:
														
 
															+                                if len(res1['words']) > 2:
														
 
															+                                    type_score_dict_ocrs['words'] = res1['words'][2]
														
 
															+                                    new_test = key_words(type_score_dict_ocrs)
														
 
															+                                if new_test == {} or new_test['Score_structure'] == -1:
														
 
															+                                    if len(res1['words']) > 3:
														
 
															+                                        type_score_dict_ocrs['words'] = res1['words'][3]
														
 
															+                                        new_test = key_words(type_score_dict_ocrs)
														
 
															+                                    if new_test == {} or new_test['Score_structure'] == -1:
														
 
															+                                        if len(res1['words']) > 4:
														
 
															+                                            type_score_dict_ocrs['words'] = res1['words'][4]
														
 
															+                                            new_test = key_words(type_score_dict_ocrs)
														
 
															+                if new_test != {} and new_test['volume_structure'] != -1 and (int(new_test['volume_structure'][0]['volume_total_score']) > 4 or int(new_test['volume_structure'][0]['volume_score']) > 4):  # 如果识别到分数，添加到输出信息；如果还没有识别到分数，默认没有分数
														
 
															+                    if int(new_test['volume_structure'][0]['volume_total_score']) > 200:  # 暂定试卷分数都在200以内，超过200的表示识别错误
														
 
															+                        new_test['volume_structure'][0]['volume_total_score'] = int(
														
 
															+                            new_test['volume_structure'][0]['volume_total_score']) % 100
														
 
															+                    new_test['volume_structure'][0]['bounding_box'] = cloze_boxs[ij]
														
 
															+                    new_test['volume_structure'][0]['label'] = 'cloze'
														
 
															+                    all_test.append(new_test)
														
 
															+                elif new_test != {} and new_test['volume_structure'] == -1 and new_test['Score_structure'] != -1 and (int(new_test['Score_structure'][0]['item_total_score']) > 4 or int(new_test['Score_structure'][0]['item_score']) > 4):  # 如果识别到分数，添加到输出信息；如果还没有识别到分数，默认没有分数
														
 
															+                    if int(new_test['Score_structure'][0]['item_total_score']) > 200:  # 暂定试卷分数都在200以内，超过200的表示识别错误
														
 
															+                        new_test['Score_structure'][0]['item_total_score'] = int(
														
 
															+                            new_test['Score_structure'][0]['item_total_score']) % 100
														
 
															+                    new_test['Score_structure'][0]['bounding_box'] = cloze_boxs[ij]
														
 
															+                    new_test['Score_structure'][0]['label'] = 'cloze'
														
 
															+                    all_test.append(new_test)
														
 
															+            except Exception:
														
 
															+                print('cloze_boxs_score_NULL_or_error')
														
 
															     if solve_boxs != []:
														
 
															-        type_score_flag = 0
														
 
															         for ij in range(len(solve_boxs)):
														
 
															-            xminss = solve_boxs[ij][0]
														
 
															             yminss = solve_boxs[ij][1]
														
 
															-            if solve_boxs[ij][2] - xminss > 1000:
														
 
															-                xmaxss = xminss + 1000
														
 
															-            else:
														
 
															-                xmaxss = solve_boxs[ij][2]
														
 
															-            if yminss + 500 > img_h:
														
 
															-                ymaxss = yminss + 500
														
 
															-            else:
														
 
															-                ymaxss = solve_boxs[ij][3]
														
 
															-            type_score_dict_ocrs = {}
														
 
															-            new_test = {}
														
 
															-
														
 
															-            try:  # tr_OCR
														
 
															-                print('tr_OCR')
														
 
															-                image_choice = image_src.crop((xminss, yminss, xmaxss, ymaxss))
														
 
															-                res1 = tr.run(image_choice)
														
 
															-                for i in range(len(res1)):
														
 
															-                    if res1[i][1].find('分') != -1:
														
 
															-                        type_score_dict_ocrs['words'] = res1[i][1]
														
 
															-                    elif i == len(res1)-1:
														
 
															-                        for ii in range(len(res1)):
														
 
															-                            if res1[ii][1].find('题') != -1 or res1[ii][1].find('.') != -1 or res1[ii][1].find('、') != -1:
														
 
															-                                type_score_dict_ocrs['words'] = res1[ii][1]
														
 
															-                            else:
														
 
															-                                continue
														
 
															-                    else:
														
 
															-                        continue
														
 
															-            except Exception as e:  # baidu_OCR
														
 
															-                print('baidu_OCR')
														
 
															-                res1 = get_ocr_text_and_coordinate_in_google_format(
														
 
															-                    img0[yminss:ymaxss, xminss:xmaxss], ocr_accuracy=OCR_ACCURACY,
														
 
															-                    language_type='CHN_ENG')
														
 
															-                for i in range(len(res1['words'])):
														
 
															-                    if res1['words'][i].find('分') != -1:
														
 
															-                        type_score_dict_ocrs['words'] = res1['words'][i]
														
 
															-                    elif i == len(res1['words'])-1:
														
 
															-                        for ii in range(len(res1['words'])):
														
 
															-                            if res1['words'][ii].find('题') != -1 or res1['words'][ii][1].find('.') != -1 or res1['words'][ii].find('、') != -1:
														
 
															-                                type_score_dict_ocrs['words'] = res1['words'][ii]
														
 
															-                            else:
														
 
															-                                continue
														
 
															-                    else:
														
 
															-                        continue
														
 
															-            if type_score_dict_ocrs != {}:
														
 
															-                new_test = key_words(type_score_dict_ocrs, type_score_flag)
														
 
															-            if new_test != {} and new_test['volume_structure'] != -1 and int(new_test['volume_structure'][0][
														
 
															-                                                                                 'volume_total_score']) > 5:  # 如果识别到分数，添加到输出信息；如果还没有识别到分数，默认没有分数
														
 
															-                if int(new_test['volume_structure'][0][
														
 
															-                           'volume_total_score']) > 200:  # 暂定试卷分数都在200以内，超过200的表示识别错误
														
 
															-                    new_test['volume_structure'][0]['volume_total_score'] = int(
														
 
															-                        new_test['volume_structure'][0]['volume_total_score']) % 100
														
 
															-                new_test['volume_structure'][0]['bounding_box'] = solve_boxs[ij]
														
 
															-                new_test['volume_structure'][0]['label'] = 'solve'
														
 
															-                all_test.append(new_test)
														
 
															-            elif new_test != {} and new_test['volume_structure'] == -1 and new_test[
														
 
															-                'Score_structure'] != -1 and (
														
 
															-                    int(new_test['Score_structure'][0]['item_total_score']) > 5 or int(
														
 
															-                    new_test['Score_structure'][0][
														
 
															-                        'item_total_score']) == -1):  # 如果识别到分数，添加到输出信息；如果还没有识别到分数，默认没有分数
														
 
															-                if int(new_test['Score_structure'][0]['item_total_score']) > 200:  # 暂定试卷分数都在200以内，超过200的表示识别错误
														
 
															-                    new_test['Score_structure'][0]['item_total_score'] = int(
														
 
															-                        new_test['Score_structure'][0]['item_total_score']) % 100
														
 
															-                new_test['Score_structure'][0]['bounding_box'] = solve_boxs[ij]
														
 
															-                new_test['Score_structure'][0]['label'] = 'solve'
														
 
															-                all_test.append(new_test)
														
 
															+            xminss = solve_boxs[ij][0]
														
 
															+            try:
														
 
															+                res1 = get_ocr_text_and_coordinate_in_google_format(img0[yminss:solve_boxs[ij][3], xminss:solve_boxs[ij][2]], ocr_accuracy=OCR_ACCURACY,language_type='CHN_ENG')
														
 
															+                aa = []
														
 
															+                type_score_dict_ocrs = {}
														
 
															+                for ii in range(len(res1['coordinates'])):
														
 
															+                    xmin11 = res1['coordinates'][ii][0] + solve_boxs[ij][0]
														
 
															+                    ymin11 = res1['coordinates'][ii][1] + solve_boxs[ij][1]
														
 
															+                    xmax11 = res1['coordinates'][ii][2] + solve_boxs[ij][0]
														
 
															+                    ymax11 = res1['coordinates'][ii][3] + solve_boxs[ij][1]
														
 
															+                    aaa = (xmin11, ymin11, xmax11, ymax11)
														
 
															+                    aa.append(aaa)
														
 
															+                res1['coordinates'] = aa
														
 
															+                new_test = {}
														
 
															+                if len(res1['words']) > 0:
														
 
															+                    type_score_dict_ocrs['words'] = res1['words'][0]
														
 
															+                    new_test = key_words(type_score_dict_ocrs)
														
 
															+                    if new_test == {} or new_test['Score_structure'] == -1:
														
 
															+                        if len(res1['words']) > 1:
														
 
															+                            type_score_dict_ocrs['words'] = res1['words'][1]
														
 
															+                            new_test = key_words(type_score_dict_ocrs)
														
 
															+                            if new_test == {} or new_test['Score_structure'] == -1:
														
 
															+                                if len(res1['words']) > 2:
														
 
															+                                    type_score_dict_ocrs['words'] = res1['words'][2]
														
 
															+                                    new_test = key_words(type_score_dict_ocrs)
														
 
															+                                if new_test == {} or new_test['Score_structure'] == -1:
														
 
															+                                    if len(res1['words']) > 3:
														
 
															+                                        type_score_dict_ocrs['words'] = res1['words'][3]
														
 
															+                                        new_test = key_words(type_score_dict_ocrs)
														
 
															+                                    if new_test == {} or new_test['Score_structure'] == -1:
														
 
															+                                        if len(res1['words']) > 4:
														
 
															+                                            type_score_dict_ocrs['words'] = res1['words'][4]
														
 
															+                                            new_test = key_words(type_score_dict_ocrs)
														
 
															+                if new_test != {} and new_test['volume_structure'] != -1 and int(new_test['volume_structure'][0]['volume_total_score']) > 5:  # 如果识别到分数，添加到输出信息；如果还没有识别到分数，默认没有分数
														
 
															+                    if int(new_test['volume_structure'][0]['volume_total_score']) > 200:  # 暂定试卷分数都在200以内，超过200的表示识别错误
														
 
															+                        new_test['volume_structure'][0]['volume_total_score'] = int(new_test['volume_structure'][0]['volume_total_score']) % 100
														
 
															+                    new_test['volume_structure'][0]['bounding_box'] = solve_boxs[ij]
														
 
															+                    new_test['volume_structure'][0]['label'] = 'solve'
														
 
															+                    all_test.append(new_test)
														
 
															+                elif new_test != {} and new_test['volume_structure'] == -1 and new_test[
														
 
															+                    'Score_structure'] != -1 and (
														
 
															+                        int(new_test['Score_structure'][0]['item_total_score']) > 5 or int(new_test['Score_structure'][0]['item_total_score']) == -1):  # 如果识别到分数，添加到输出信息；如果还没有识别到分数，默认没有分数
														
 
															+                    if int(new_test['Score_structure'][0]['item_total_score']) > 200:  # 暂定试卷分数都在200以内，超过200的表示识别错误
														
 
															+                        new_test['Score_structure'][0]['item_total_score'] = int(new_test['Score_structure'][0]['item_total_score']) % 100
														
 
															+                    new_test['Score_structure'][0]['bounding_box'] = solve_boxs[ij]
														
 
															+                    new_test['Score_structure'][0]['label'] = 'solve'
														
 
															+                    all_test.append(new_test)
														
 
															+            except Exception:
														
 
															+                print('solve_boxs_score_NULL_or_error')
														
 
															     if composition_boxs != []:
														
 
															-        type_score_flag = 0
														
 
															         for ij in range(len(composition_boxs)):
														
 
															             if composition_boxs[ij][1] - 250 > 0:
														
 
															                 yminss = composition_boxs[ij][1] - 250
														
@@ -626,55 +624,57 @@ def get_sheet_number_total(answer_sheet, res, img0):
 
															                 xminss = composition_boxs[ij][0] - 100
														
 
															             else:
														
 
															                 xminss = composition_boxs[ij][0]
														
 
															-            type_score_dict_ocrs = {}
														
 
															-
														
 
															-            try:  # tr_OCR
														
 
															-                print('tr_OCR')
														
 
															-                image_choice = image_src.crop((xminss, yminss, composition_boxs[ij][2], composition_boxs[ij][3]))
														
 
															-                res1 = tr.run(image_choice)
														
 
															-                for i in range(len(res1)):
														
 
															-                    if res1[i][1].find('分') != -1:
														
 
															-                        type_score_dict_ocrs['words'] = res1[i][1]
														
 
															-                    elif i == len(res1):
														
 
															-                        for ii in range(len(res1)):
														
 
															-                            if res1[i][1].find('题') != -1 or res1[i][1].find('.') != -1 or res1[i][1].find('、') != -1:
														
 
															-                                type_score_dict_ocrs['words'] = res1[i][1]
														
 
															-                            else:
														
 
															-                                continue
														
 
															-            except Exception as e:  # baidu_OCR
														
 
															-                print('baidu_OCR')
														
 
															-                res1 = get_ocr_text_and_coordinate_in_google_format(
														
 
															-                    img0[yminss:composition_boxs[ij][3], xminss:composition_boxs[ij][2]], ocr_accuracy=OCR_ACCURACY,
														
 
															-                    language_type='CHN_ENG')
														
 
															-                for i in range(len(res1['words'])):
														
 
															-                    if res1['words'][i].find('分') != -1:
														
 
															-                        type_score_dict_ocrs['words'] = res1['words'][i]
														
 
															-                    elif i == len(res1):
														
 
															-                        for ii in range(len(res1['words'])):
														
 
															-                            if res1['words'][i].find('题') != -1 or res1['words'][i][1].find('.') != -1 or res1['words'][
														
 
															-                                i].find('、') != -1:
														
 
															-                                type_score_dict_ocrs['words'] = res1['words'][i]
														
 
															-                            else:
														
 
															-                                continue
														
 
															-            if type_score_dict_ocrs != {}:
														
 
															-                new_test = key_words(type_score_dict_ocrs, type_score_flag)
														
 
															-            if new_test != {} and new_test['volume_structure'] != -1 and int(
														
 
															-                    new_test['volume_structure'][0]['volume_total_score']) > 4:  # 如果识别到分数，添加到输出信息；如果还没有识别到分数，默认没有分数
														
 
															-                if int(new_test['volume_structure'][0]['volume_total_score']) > 200:  # 暂定试卷分数都在200以内，超过200的表示识别错误
														
 
															-                    new_test['volume_structure'][0]['volume_total_score'] = int(
														
 
															-                        new_test['volume_structure'][0]['volume_total_score']) % 100
														
 
															-                new_test['volume_structure'][0]['bounding_box'] = composition_boxs[ij]
														
 
															-                new_test['volume_structure'][0]['label'] = 'composition'
														
 
															-                all_test.append(new_test)
														
 
															-            elif new_test != {} and new_test['volume_structure'] == -1 and new_test[
														
 
															-                'Score_structure'] != -1 and int(
														
 
															-                new_test['Score_structure'][0]['item_total_score']) > 4:  # 如果识别到分数，添加到输出信息；如果还没有识别到分数，默认没有分数
														
 
															-                if int(new_test['Score_structure'][0]['item_total_score']) > 200:  # 暂定试卷分数都在200以内，超过200的表示识别错误
														
 
															-                    new_test['Score_structure'][0]['item_total_score'] = int(
														
 
															-                        new_test['Score_structure'][0]['item_total_score']) % 100
														
 
															-                new_test['Score_structure'][0]['bounding_box'] = composition_boxs[ij]
														
 
															-                new_test['Score_structure'][0]['label'] = 'composition'
														
 
															-                all_test.append(new_test)
														
 
															+            try:
														
 
															+                res1 = get_ocr_text_and_coordinate_in_google_format(img0[yminss:composition_boxs[ij][3], xminss:composition_boxs[ij][2]], ocr_accuracy=OCR_ACCURACY,language_type='CHN_ENG')
														
 
															+                aa = []
														
 
															+                type_score_dict_ocrs = {}
														
 
															+                for ii in range(len(res1['coordinates'])):
														
 
															+                    xmin11 = res1['coordinates'][ii][0] + composition_boxs[ij][0]
														
 
															+                    ymin11 = res1['coordinates'][ii][1] + composition_boxs[ij][1]
														
 
															+                    xmax11 = res1['coordinates'][ii][2] + composition_boxs[ij][0]
														
 
															+                    ymax11 = res1['coordinates'][ii][3] + composition_boxs[ij][1]
														
 
															+                    aaa = (xmin11, ymin11, xmax11, ymax11)
														
 
															+                    aa.append(aaa)
														
 
															+                res1['coordinates'] = aa
														
 
															+                new_test = {}
														
 
															+                if len(res1['words']) > 0:
														
 
															+                    type_score_dict_ocrs['words'] = res1['words'][0]
														
 
															+                    new_test = key_words(type_score_dict_ocrs)
														
 
															+                    if new_test == {} or new_test['Score_structure'] == -1:
														
 
															+                        if len(res1['words']) > 1:
														
 
															+                            type_score_dict_ocrs['words'] = res1['words'][1]
														
 
															+                            new_test = key_words(type_score_dict_ocrs)
														
 
															+                            if new_test == {} or new_test['Score_structure'] == -1:
														
 
															+                                if len(res1['words']) > 2:
														
 
															+                                    type_score_dict_ocrs['words'] = res1['words'][2]
														
 
															+                                    new_test = key_words(type_score_dict_ocrs)
														
 
															+                                if new_test == {} or new_test['Score_structure'] == -1:
														
 
															+                                    if len(res1['words']) > 3:
														
 
															+                                        type_score_dict_ocrs['words'] = res1['words'][3]
														
 
															+                                        new_test = key_words(type_score_dict_ocrs)
														
 
															+                                    if new_test == {} or new_test['Score_structure'] == -1:
														
 
															+                                        if len(res1['words']) > 4:
														
 
															+                                            type_score_dict_ocrs['words'] = res1['words'][4]
														
 
															+                                            new_test = key_words(type_score_dict_ocrs)
														
 
															+                if new_test != {} and new_test['volume_structure'] != -1 and int(
														
 
															+                        new_test['volume_structure'][0]['volume_total_score']) > 4:  # 如果识别到分数，添加到输出信息；如果还没有识别到分数，默认没有分数
														
 
															+                    if int(new_test['volume_structure'][0]['volume_total_score']) > 200:  # 暂定试卷分数都在200以内，超过200的表示识别错误
														
 
															+                        new_test['volume_structure'][0]['volume_total_score'] = int(
														
 
															+                            new_test['volume_structure'][0]['volume_total_score']) % 100
														
 
															+                    new_test['volume_structure'][0]['bounding_box'] = composition_boxs[ij]
														
 
															+                    new_test['volume_structure'][0]['label'] = 'composition'
														
 
															+                    all_test.append(new_test)
														
 
															+                elif new_test != {} and new_test['volume_structure'] == -1 and new_test[
														
 
															+                    'Score_structure'] != -1 and int(
														
 
															+                    new_test['Score_structure'][0]['item_total_score']) > 4:  # 如果识别到分数，添加到输出信息；如果还没有识别到分数，默认没有分数
														
 
															+                    if int(new_test['Score_structure'][0]['item_total_score']) > 200:  # 暂定试卷分数都在200以内，超过200的表示识别错误
														
 
															+                        new_test['Score_structure'][0]['item_total_score'] = int(
														
 
															+                            new_test['Score_structure'][0]['item_total_score']) % 100
														
 
															+                    new_test['Score_structure'][0]['bounding_box'] = composition_boxs[ij]
														
 
															+                    new_test['Score_structure'][0]['label'] = 'composition'
														
 
															+                    all_test.append(new_test)
														
 
															+            except Exception:
														
 
															+                print('composition_boxs_score_NULL_or_error')
														
 
															     for aaa in range(len(all_test)):
														
 
															         if all_test[aaa]['Score_structure'] != -1 and all_test[aaa]['volume_structure'] == -1:
														
 
															             score_last_one = {'model_box': dict(all_test[aaa])['Score_structure'][0]['bounding_box'],
														
@@ -793,17 +793,20 @@ def get_sheet_number_total(answer_sheet, res, img0):
 
															                                 count_choice_m = count_choice_m + len(answer_sheet['regions'][j]['number'])
														
 
															                                 j_temp.append(j)
														
 
															                         if j == len(answer_sheet['regions']) - 1 and j_temp !=[]:
														
 
															-                            for index, jj in enumerate(j_temp):
														
 
															-                                num_score_m = round(float(Score_last[i]['score'] / count_choice_m),1)
														
 
															-                                answer_sheet['regions'][jj]['default_points'] = len(answer_sheet['regions'][jj]['number']) * [num_score_m]
														
 
															-                            break
														
 
															-
														
 
															+                            try:
														
 
															+                                for index, jj in enumerate(j_temp):
														
 
															+                                    num_score_m_infer = round(float(Score_last[i]['score'] / count_choice_m), 2)
														
 
															+                                    num_score_m = [str(num_score_m_infer), int(num_score_m_infer)][int(num_score_m_infer) == num_score_m_infer]
														
 
															+                                    answer_sheet['regions'][jj]['default_points'] = len(answer_sheet['regions'][jj]['number']) * [num_score_m]
														
 
															+                                break
														
 
															+                            except Exception:
														
 
															+                                pass
														
 
															                 elif Score_last[i]['label'] == 'cloze':
														
 
															-                    count_cloze_s = 0
														
 
															                     for j in range(len(answer_sheet['regions'])):
														
 
															                         if answer_sheet['regions'][j]['class_name'] == 'cloze_s':
														
 
															                             if Score_last[i]['number_score'] != -1:
														
 
															                                 answer_sheet['regions'][j]['default_points'] = Score_last[i]['number_score']
														
 
															+
														
 
															         elif num_choice > 1 or num_cloze >1:
														
 
															             for i in range(len(Score_last)):
														
 
															                 if Score_last[i]['label'] == 'choice':
														
@@ -825,12 +828,16 @@ def get_sheet_number_total(answer_sheet, res, img0):
 
															                                     count_choice_m = count_choice_m + len(answer_sheet['regions'][j]['number'])
														
 
															                                     j_temp.append(j)
														
 
															                         if j == len(answer_sheet['regions']) - 1 and j_temp !=[]:
														
 
															-                            for index ,jj in enumerate(j_temp):
														
 
															-                                num_score_m = round(float(Score_last[i]['score'] / count_choice_m),1)
														
 
															-                                answer_sheet['regions'][jj]['default_points'] = len(answer_sheet['regions'][jj]['number']) * [num_score_m]
														
 
															-                            break
														
 
															+                            try:
														
 
															+                                for index, jj in enumerate(j_temp):
														
 
															+                                    num_score_m_infer = round(float(Score_last[i]['score'] / count_choice_m), 2)
														
 
															+                                    num_score_m = [str(num_score_m_infer), int(num_score_m_infer)][int(num_score_m_infer) == num_score_m_infer]
														
 
															+                                    answer_sheet['regions'][jj]['default_points'] = len(answer_sheet['regions'][jj]['number']) * [num_score_m]
														
 
															+                                break
														
 
															+                            except Exception:
														
 
															+                                pass
														
 
															+
														
 
															                 elif Score_last[i]['label'] == 'cloze':
														
 
															-                    count_cloze_s = 0
														
 
															                     for j in range(len(answer_sheet['regions'])):
														
 
															                         if answer_sheet['regions'][j]['class_name'] == 'cloze_s':
														
 
															                             xmin_dis = answer_sheet['regions'][j]['bounding_box']['xmin'] - \
														
@@ -844,6 +851,7 @@ def get_sheet_number_total(answer_sheet, res, img0):
 
															                             if xmin_dis > -30 and ymin_dis > -30 and xmax_dis < 30 and ymax_dis < 30:
														
 
															                                 if Score_last[i]['number_score'] != -1 :
														
 
															                                     answer_sheet['regions'][j]['default_points'] = Score_last[i]['number_score']
														
 
															+
														
 
															         elif choice_m_boxs !=[]:
														
 
															             x_choice_m_min = 10000
														
 
															             y_choice_m_min = 10000
														
@@ -870,63 +878,26 @@ def get_sheet_number_total(answer_sheet, res, img0):
 
															                                   'bounding_box': choice_m_boxs,
														
 
															                                   'label': 'choice_m',
														
 
															                                   'type_box': type_score_choice_m}
														
 
															-                type_score_flag = 0
														
 
															-                type_score_dict_ocrs = {}
														
 
															-                try:  # tr_OCR
														
 
															-                    print('tr_OCR')
														
 
															-                    image_choice = image_src.crop((type_score_boxs[0][0], type_score_boxs[0][1], type_score_boxs[0][2], type_score_boxs[0][3]))
														
 
															-                    res1 = tr.run(image_choice)
														
 
															-                    for i in range(len(res1)):
														
 
															-                        if res1[i][1].find('分') != -1:
														
 
															-                            type_score_dict_ocrs['words'] = res1[i][1]
														
 
															-                        elif i == len(res1):
														
 
															-                            for ii in range(len(res1)):
														
 
															-                                if res1[i][1].find('题') != -1 or res1[i][1].find('.') != -1 or res1[i][1].find(
														
 
															-                                        '、') != -1:
														
 
															-                                    type_score_dict_ocrs['words'] = res1[i][1]
														
 
															-                                else:
														
 
															-                                    continue
														
 
															-                except Exception as e:  # baidu_OCR
														
 
															-                    print('baidu_OCR')
														
 
															-                    res1 = get_ocr_text_and_coordinate_in_google_format(
														
 
															-                        img0[type_score_boxs[0][1]:type_score_boxs[0][3], type_score_boxs[0][0]:type_score_boxs[0][2]], ocr_accuracy=OCR_ACCURACY,
														
 
															-                        language_type='CHN_ENG')
														
 
															-                    for i in range(len(res1['words'])):
														
 
															-                        if res1['words'][i].find('分') != -1:
														
 
															-                            type_score_dict_ocrs['words'] = res1['words'][i]
														
 
															-                        elif i == len(res1):
														
 
															-                            for ii in range(len(res1['words'])):
														
 
															-                                if res1['words'][i].find('题') != -1 or res1['words'][i][1].find('.') != -1 or \
														
 
															-                                        res1['words'][
														
 
															-                                            i].find('、') != -1:
														
 
															-                                    type_score_dict_ocrs['words'] = res1['words'][i]
														
 
															-                                else:
														
 
															-                                    continue
														
 
															-                if type_score_dict_ocrs != {}:
														
 
															-                    test = key_words(type_score_dict_ocrs, type_score_flag)
														
 
															+                test_result1['words'] = str()
														
 
															+                # try:  # tr_OCR
														
 
															+                #     image_choice = image_src.crop((type_score_choice_m[0], type_score_choice_m[1], type_score_choice_m[2], type_score_choice_m[3]))
														
 
															+                #     res1 = tr.run(image_choice)
														
 
															+                #     print('tr_OCR')
														
 
															+                #     for t in range(len(res1)):
														
 
															+                #         test_result1['words'] = test_result1['words'] + res1[t][1]
														
 
															+                # except Exception as e:  # baidu_OCR
														
 
															+                #     print('baidu_OCR')
														
 
															+                res1 = get_ocr_text_and_coordinate_in_google_format(
														
 
															+                    img0[type_score_choice_m[1]:type_score_choice_m[3], type_score_choice_m[0]:type_score_choice_m[2]], ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG')
														
 
															+                for t in range(len(res1['words'])):
														
 
															+                    test_result1['words'] = test_result1['words'] + res1['words'][t]
														
 
															+                if test_result1['words'] != {}:
														
 
															+                    test = key_words(test_result1)
														
 
															                 choice_m_score = -1
														
 
															                 if test == {}:
														
 
															-                    ### 添加返回值OCR结果
														
 
															-                    add_ocr = {}
														
 
															-                    add_ocr['model_box'] = test_result1['bounding_box']
														
 
															-                    add_ocr['label'] = test_result1['label']
														
 
															-                    add_ocr['number'] = -1
														
 
															-                    add_ocr['score'] = -1
														
 
															-                    add_ocr['number_score'] = -1
														
 
															-                    add_ocr['counts'] = -1
														
 
															-                    add_ocr['ocr'] = test_result1['words']
														
 
															-                    Score_last.append(add_ocr)
														
 
															+                    choice_m_type_score_ocr = test_result1['words']
														
 
															                 elif test['volume_structure'] == -1 and test['Score_structure'] == -1:
														
 
															-                    ### 添加返回值OCR结果
														
 
															-                    add_ocr = {}
														
 
															-                    add_ocr['model_box'] = test_result1['bounding_box']
														
 
															-                    add_ocr['label'] = test_result1['label']
														
 
															-                    add_ocr['number'] = -1
														
 
															-                    add_ocr['score'] = -1
														
 
															-                    add_ocr['number_score'] = -1
														
 
															-                    add_ocr['counts'] = -1
														
 
															-                    add_ocr['ocr'] = test_result1['words']
														
 
															-                    Score_last.append(add_ocr)
														
 
															+                    choice_m_type_score_ocr = test_result1['words']
														
 
															                 else:
														
 
															                     if test['volume_structure'] != -1 and test['volume_structure'][0]['volume_score'] != -1:
														
 
															                         choice_m_score = test['volume_structure'][0]['volume_score']
														
@@ -937,8 +908,14 @@ def get_sheet_number_total(answer_sheet, res, img0):
 
															                         if answer_sheet['regions'][j]['class_name'] == 'choice_m':
														
 
															                             answer_sheet['regions'][j]['default_points'] = len(
														
 
															                                 answer_sheet['regions'][j]['number']) * [float(choice_m_score)]
														
 
															+                elif test_result1['words'] != {}:
														
 
															+                    for j in range(len(answer_sheet['regions'])):
														
 
															+                        if answer_sheet['regions'][j]['class_name'] == 'choice_m':
														
 
															+                            answer_sheet['regions'][j]['type_score_ocr'] = choice_m_type_score_ocr
														
 
															+
														
 
															         '''分数与模型对应'''
														
 
															+        ocr_flag = 0
														
 
															         for i in range(len(answer_sheet['regions'])):
														
 
															             for j in range(len(Score_last)):
														
 
															                 if (Score_last[j]['model_box'][0] == answer_sheet['regions'][i]['bounding_box']['xmin']
														
@@ -959,8 +936,11 @@ def get_sheet_number_total(answer_sheet, res, img0):
 
															                                 answer_sheet['regions'][i]['class_name'] == 'solve' or answer_sheet['regions'][i][
														
 
															                             'class_name'] == 'solve0'):
														
 
															                             answer_sheet['regions'][i]['class_name'] = 'optional_solve'
														
 
															+                        ocr_flag = 1
														
 
															+                        if 'type_score_ocr' in answer_sheet['regions'][i].keys():
														
 
															+                            del answer_sheet['regions'][i]['type_score_ocr']
														
 
															                         # answer_sheet['regions'][i]['number_score'] = Score_last[j]['number_score']  # 小题分数
														
 
															                         # answer_sheet['regions'][i]['counts'] = Score_last[j]['counts']  # 小题个数
														
 
															-                    if 'ocr' in Score_last[j]:  # 没有识别到分数的模块添加type_score_ocr结果
														
 
															+                    if ocr_flag == 0 and 'ocr' in Score_last[j]:  # 没有识别到分数的模块添加type_score_ocr结果
														
 
															                         answer_sheet['regions'][i]['type_score_ocr'] = Score_last[j]['ocr']
														
 
															     return answer_sheet
														
--- a/segment/sheet_resolve/analysis/sheet/tag_parse.py
+++ b/segment/sheet_resolve/analysis/sheet/tag_parse.py
@@ -0,0 +1,82 @@
 
															+
														
 
															+# -*- coding:utf-8 -*-
														
 
															+import CRFPP
														
 
															+import re
														
 
															+
														
 
															+
														
 
															+class TagParse:
														
 
															+    def __init__(self, tagger):
														
 
															+        self.tagger = tagger
														
 
															+
														
 
															+    def get_type(self, j):
														
 
															+        if re.match(r'\d', j):
														
 
															+            return 'num'
														
 
															+        elif j in '一二三四五六七八九十':
														
 
															+            return 'cn'
														
 
															+        elif re.match(u'[\u4e00-\u9fa5]', j):
														
 
															+            return 'ch'
														
 
															+        else:
														
 
															+            return 'b'
														
 
															+
														
 
															+    def sequence_init(self, sequence):
														
 
															+        self.tagger.clear()
														
 
															+        for word in sequence.strip():
														
 
															+            word = word.strip()
														
 
															+            if word:
														
 
															+                word = word + '\t' + self.get_type(word)
														
 
															+                self.tagger.add(word + "\t")
														
 
															+        self.tagger.parse()
														
 
															+
														
 
															+    def get_val(self, sequence):
														
 
															+        """
														
 
															+        得到句子的标签
														
 
															+        :param sequence: 一个句子,str
														
 
															+        :return: x_res:句子中一个个字符,list;y_res:每个字符对应的预测标签,list
														
 
															+        """
														
 
															+        self.sequence_init(sequence)
														
 
															+        size = self.tagger.size()
														
 
															+
														
 
															+        x_res = []
														
 
															+        y_res = []
														
 
															+        for i in range(0, size):
														
 
															+            x_res.append(self.tagger.x(i, 0))
														
 
															+            y_res.append(self.tagger.y2(i))
														
 
															+        return x_res, y_res
														
 
															+
														
 
															+    def get_tag_val(self, sequence):
														
 
															+        """
														
 
															+        按标签将句子分块
														
 
															+        :param sequence:一个句子,str
														
 
															+        :return:[["xxx1",标签1],["xxxx2","标签2"],["xxx3",""标签3].....]
														
 
															+        """
														
 
															+        char_list, tag_list = self.get_val(sequence)
														
 
															+        c = [0]
														
 
															+        c1 = []
														
 
															+
														
 
															+        for i in range(len(tag_list) - 1):
														
 
															+            if tag_list[i] != tag_list[i + 1]:
														
 
															+                c.append(i + 1)
														
 
															+                c1.append(i + 1)
														
 
															+        c1.append(len(sequence))
														
 
															+        res = []
														
 
															+        for i, j in zip(c, c1):
														
 
															+            res.append([''.join(char_list[i:j]), tag_list[i]])
														
 
															+            # char_list1 = char_list[i:j]
														
 
															+            # char_list_new = [''.join(char_list1)]
														
 
															+            # tag_list1 = tag_list[i]
														
 
															+            # res_dict = dict(map(lambda x, y: [x, y], tag_list1, char_list_new))
														
 
															+            # res.append(res_dict)
														
 
															+
														
 
															+        return res
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    taggers = CRFPP.Tagger("-m " + './model/crf2.model')
														
 
															+    tb = TagParse(taggers)
														
 
															+    # print(tb.get_tag_val('二、非选择题(共160分。36-42为必考题,43-47为选考题。)'))
														
 
															+    # print(tb.get_tag_val('一、选择题〈1-20每题1.5分,21-30每题2分,共50分'))
														
 
															+    print(tb.get_tag_val('任务型阅读。（共5小题，每小题2分，计10分）阅读短文，并按照要求完成66~70题'))
														
 
															+    # print(tb.get_tag_val('一、选择题(共25小题,每小题2分,共50分'))
														
 
															+    # print(tb.get_tag_val('27.(按要求给分,共12分)'))
														
 
															+    # print(tb.get_tag_val('53(10分)'))
														
 
															+    # print(tb.get_tag_val('一、选择题(每小题4分,共计40分。1至6小题单选,7至10小题多选,全对得4分,选对但不全得2分,不选或选错不得分)'))
														
--- a/segment/sheet_server.py
+++ b/segment/sheet_server.py
@@ -26,6 +26,8 @@ from segment.sheet_resolve.analysis.sheet.sheet_points import get_sheet_points
 
															 from segment.sheet_resolve.analysis.sheet.sheet_points_total import get_sheet_number_total
														
 
															 from segment.sheet_resolve.tools import utils
														
 
															 from segment.sheet_resolve.tools.brain_api import get_ocr_text_and_coordinate, change_format_baidu_to_google
														
 
															+from segment.sheet_resolve.analysis.sheet.sheet_points_by_nlp import get_sheet_points_by_nlp
														
 
															+
														
 
															 logger = logging.getLogger(settings.LOGGING_TYPE)
														
@@ -74,7 +76,7 @@ def convert_pil_to_jpeg(raw_img):
 
															         img.paste(raw_img, mask=raw_img.split()[3])  # 3 is the alpha channel
														
 
															     else:
														
 
															         img = raw_img
														
 
															-    open_cv_image = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR)
														
 
															+    open_cv_image = np.array(img)
														
 
															     return img, open_cv_image
														
@@ -351,7 +353,13 @@ def sheet_points(sheet_dict_list, image_list, ocr_list, if_ocr=False):
 
															         for index, ele in enumerate(sheet_dict_list):
														
 
															             ocr_res = change_format_baidu_to_google(ocr_list[index])
														
 
															             sheet_dict = get_sheet_number_total(ele, ocr_res, image_list[index])
														
 
															-            sheet_total_list.append(sheet_dict)
														
 
															+            regions_list = sheet_dict['regions']
														
 
															+            type_score_ocr = [ele for ele in regions_list if 'type_score_ocr' in ele]
														
 
															+            if len(type_score_ocr) == 0:
														
 
															+                sheet_total_list.append(sheet_dict)
														
 
															+            else:
														
 
															+                sheet_dict0 = get_sheet_points_by_nlp(sheet_dict)
														
 
															+                sheet_total_list.append(sheet_dict0)
														
 
															     except Exception as e:
														
 
															         traceback.print_exc()
														
 
															         sheet_total_list = sheet_dict_list