Explorar el Código

1.选做
2.排序
3.跨栏合并
4.分空白页
5.补全改进

lighttxu hace 4 años
padre
commit
ba54f8a1bc

+ 1 - 1
exam_segment_django/urls.py

@@ -22,7 +22,7 @@ from django.urls import path
 from django.views.static import serve
 
 urlpatterns = [
-    path('admin/', admin.site.urls),
+    # path('admin/', admin.site.urls),
     path('segment/', include('segment.urls')),
     url(r'^exam_image/(?P<path>.*)$', serve, {'document_root': settings.MEDIA_ROOT}),
 ] + static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)

+ 7 - 5
segment/sheet_resolve/analysis/choice/choice_line_box.py

@@ -434,15 +434,17 @@ def choice_m_adjust(image, choice_m_bbox_list):
     return choice_m_bbox_list
 
 
-def choice_m_row_col(image, choice_m_bbox_list, xml_path):
+def choice_m_row_col(image, choice_m_bbox_list, direction, xml_path):
     a_z = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
     choice_m_dict_list = []
 
     # 或在长宽比接近的choice_m中选取
-    random_one = random.randint(0, len(choice_m_bbox_list)-1)
-    choice_m_for_dircetion = utils.crop_region(image, choice_m_bbox_list[random_one]['bounding_box'])
-    res_dict = get_ocr_text_and_coordinate(choice_m_for_dircetion, ocr_accuracy='accurate', language_type='ENG')
-    direction = get_direction(res_dict)
+    if not direction:
+        random_one = random.randint(0, len(choice_m_bbox_list)-1)
+        choice_m_for_dircetion = utils.crop_region(image, choice_m_bbox_list[random_one]['bounding_box'])
+        res_dict = get_ocr_text_and_coordinate(choice_m_for_dircetion, ocr_accuracy='accurate', language_type='ENG')
+        direction = get_direction(res_dict)
+
     for index0, box in enumerate(choice_m_bbox_list):  # rcnn识别的框匹配题号
         box = box['bounding_box']
         m_left, m_top = box['xmin'], box['ymin'],

+ 242 - 31
segment/sheet_resolve/analysis/choice/choice_m_row_column.py

@@ -95,21 +95,184 @@ def image_detect(image_np, category, score_threshold):
     return detections
 
 
+# def get_choice_m_row_and_col(left, top, image):
+#     im_resize = 300
+#     ''' choice_m resize to 300*300'''
+#     image_src = Image.fromarray(image)
+#     if image_src.mode == 'RGB':
+#         image_src = image_src.convert("L")
+#     w, h = image_src.size
+#     if h > w:
+#         image_src = image_src.resize((int(im_resize / h * w), im_resize))
+#     else:
+#         image_src = image_src.resize((im_resize, int(im_resize / w * h)))
+#     w_, h_ = image_src.size
+#     image_300 = Image.new(image_src.mode, (im_resize, im_resize), (255))
+#     image_300.paste(image_src, [0, 0, w_, h_])
+#
+#     category_index = label_map_util.create_category_index_from_labelmap(tf_settings.choice_m_ssd_label,
+#                                                                         use_display_name=True)
+#     detections = image_detect(image_300, category_index, 0.5)
+#     if len(detections) > 1:
+#         box_xmin = []
+#         box_ymin = []
+#         box_xmax = []
+#         box_ymax = []
+#         x_distance_all = []
+#         y_distance_all = []
+#         x_width_all = []
+#         y_height_all = []
+#         all_small_coordinate = []
+#         ssd_column = 1
+#         ssd_row = 1
+#         count_x = 0
+#         count_y = 0
+#         for index, box in enumerate(detections):
+#             if box[-1] != 'T' and box[2] <= w_ and box[3] <= h_:
+#                 box0 = round(box[0] * (w / w_))  # Map to the original image
+#                 box1 = round(box[1] * (h / h_))
+#                 box2 = round(box[2] * (w / w_))
+#                 box3 = round(box[3] * (h / h_))
+#                 box_xmin.append(box0)
+#                 box_ymin.append(box1)
+#                 box_xmax.append(box2)
+#                 box_ymax.append(box3)
+#                 small_coordinate = {'xmin': box0 + left,
+#                                     'ymin': box1 + top,
+#                                     'xmax': box2 + left,
+#                                     'ymax': box3 + top}
+#                 all_small_coordinate.append(small_coordinate)
+#                 x_width = box2 - box0
+#                 y_height = box3 - box1
+#                 x_width_all.append(x_width)
+#                 y_height_all.append(y_height)
+#
+#         sorted_xmin = sorted(box_xmin)
+#         sorted_ymin = sorted(box_ymin)
+#         sorted_xmax = sorted(box_xmax)
+#         sorted_ymax = sorted(box_ymax)
+#
+#         x_width_all_sorted = sorted(x_width_all, reverse=True)
+#         y_height_all_sorted = sorted(y_height_all, reverse=True)
+#         len_x = len(x_width_all)
+#         len_y = len(y_height_all)
+#         x_width_median = np.median(x_width_all_sorted)
+#         y_height_median = np.median(y_height_all_sorted)
+#
+#         for i in range(len(sorted_xmin) - 1):
+#             x_distance = abs(sorted_xmin[i + 1] - sorted_xmin[i])
+#             y_distance = abs(sorted_ymin[i + 1] - sorted_ymin[i])
+#             if x_distance > 20:
+#                 ssd_column = ssd_column + 1
+#                 x_distance_all.append(x_distance)
+#                 if x_distance > 2 * x_width_median + 4:
+#                     count_x = count_x + 1
+#             if y_distance > 10:
+#                 ssd_row = ssd_row + 1
+#                 y_distance_all.append(y_distance)
+#                 if y_distance > 2 * y_height_median + 3:
+#                     count_y = count_y + 1
+#             if x_width_all_sorted[i] - x_width_median > 40:
+#                 ssd_column = ssd_column - 1
+#             elif x_width_median - x_width_all_sorted[i] > 40:
+#                 ssd_column = ssd_column - 1
+#             if y_height_all_sorted[i] - y_height_median > 20:
+#                 ssd_row = ssd_row - 1
+#             elif y_height_median - y_height_all_sorted[i] > 20:
+#                 ssd_row = ssd_row - 1
+#
+#         if count_x < len(x_distance_all) / 2 + 1:
+#             ssd_column = ssd_column + count_x
+#         elif count_y < len(y_distance_all) / 2 + 1:
+#             ssd_row = ssd_row + count_y
+#
+#         average_height = int(np.mean(y_height_all))
+#         average_width = int(np.mean(x_width_all))
+#
+#         # average_height = format(np.mean(y_height_all), '.2f')
+#         # average_width = format(np.mean(x_width_all), '.2f')
+#         # average_height = int(np.mean(y_distance_all))
+#         # average_width = int(np.mean(x_distance_all))
+#         location_ssd = {'xmin': sorted_xmin[0] + left,
+#                         'ymin': sorted_ymin[0] + top,
+#                         'xmax': sorted_xmax[-1] + left,
+#                         'ymax': sorted_ymax[-1] + top}
+#
+#         choice_m_ssd = {'bounding_box': location_ssd,
+#                         "single_height": average_height,
+#                         "single_width": average_width,
+#                         "rows": ssd_row,
+#                         "cols": ssd_column,
+#                         'class_name': 'choice_m',
+#                         'all_small_coordinate': all_small_coordinate
+#                         }
+#     else:
+#         choice_m_ssd = {}
+#     return choice_m_ssd
+
+
+
 def get_choice_m_row_and_col(left, top, image):
     im_resize = 300
+    da_number_h = 0
+    da_number_w = 0
+
+    w_ = 300
+    h_ = 300
     ''' choice_m resize to 300*300'''
     image_src = Image.fromarray(image)
     if image_src.mode == 'RGB':
         image_src = image_src.convert("L")
     w, h = image_src.size
+    bounder_w = w
+    bounder_h = h
+    image_300 = Image.new(image_src.mode, (im_resize, im_resize), (255))
+
     if h > w:
-        image_src = image_src.resize((int(im_resize / h * w), im_resize))
+        if h > 300:
+            # w = int(w/(h/300))
+            h_1 = 300
+            image_src_resize = image_src.resize((w, 300))
+        else:
+            h_1 = h
+            image_src_resize = image_src
+        da_number_h = int((im_resize - w) / (w + 4))
+        if da_number_h > 0:
+            w_ = w
+            h_ = h_1
+            bounder_w = w
+            for idx in range(da_number_h):
+                x0 = idx * w + 4
+                x1 = idx * w + 4 + w
+                image_300.paste(image_src_resize, [x0, 0, x1, h_1])
+
+        else:
+            image_src_resize = image_src.resize((int(im_resize / h * w), im_resize))
+            w_, h_ = image_src_resize.size
+            image_300.paste(image_src_resize, [0, 0, w_, h_])
     else:
-        image_src = image_src.resize((im_resize, int(im_resize / w * h)))
-    w_, h_ = image_src.size
-    image_300 = Image.new(image_src.mode, (im_resize, im_resize), (255))
-    image_300.paste(image_src, [0, 0, w_, h_])
+        if w > 300:
+            # h = int(h/(w/300))
+            w_1 = 300
+            image_src_resize = image_src.resize((300, h))
+        else:
+            w_1 = w
+            image_src_resize = image_src
+        da_number_w = int((im_resize - h) / (h + 4))
+        if da_number_w > 0:
+            h_ = h
+            w_ = w_1
+            bounder_h = h
+            for idx in range(da_number_w):
+                h0 = idx * h + 4
+                h1 = idx * h + 4 + h
+                image_300.paste(image_src_resize, [0, h0, w_1, h1])
+        else:
+            image_src_resize = image_src.resize((im_resize, int(im_resize / w * h)))
+            w_, h_ = image_src_resize.size
+            image_300.paste(image_src_resize, [0, 0, w_, h_])
 
+    w_resize, h_resize = image_src_resize.size
     category_index = label_map_util.create_category_index_from_labelmap(tf_settings.choice_m_ssd_label,
                                                                         use_display_name=True)
     detections = image_detect(image_300, category_index, 0.5)
@@ -128,7 +291,7 @@ def get_choice_m_row_and_col(left, top, image):
         count_x = 0
         count_y = 0
         for index, box in enumerate(detections):
-            if box[-1] != 'T' and box[2] <= w_ and box[3] <= h_:
+            if box[-1] != 'T' and box[2] <= 300 and box[3] <= 300:
                 box0 = round(box[0] * (w / w_))  # Map to the original image
                 box1 = round(box[1] * (h / h_))
                 box2 = round(box[2] * (w / w_))
@@ -137,11 +300,12 @@ def get_choice_m_row_and_col(left, top, image):
                 box_ymin.append(box1)
                 box_xmax.append(box2)
                 box_ymax.append(box3)
-                small_coordinate = {'xmin': box0 + left,
-                                    'ymin': box1 + top,
-                                    'xmax': box2 + left,
-                                    'ymax': box3 + top}
-                all_small_coordinate.append(small_coordinate)
+                if box2 < bounder_w and box3 < bounder_h:
+                    small_coordinate = {'xmin': box0 + left,
+                                        'ymin': box1 + top,
+                                        'xmax': box2 + left,
+                                        'ymax': box3 + top}
+                    all_small_coordinate.append(small_coordinate)
                 x_width = box2 - box0
                 y_height = box3 - box1
                 x_width_all.append(x_width)
@@ -160,8 +324,14 @@ def get_choice_m_row_and_col(left, top, image):
         y_height_median = np.median(y_height_all_sorted)
 
         for i in range(len(sorted_xmin) - 1):
-            x_distance = abs(sorted_xmin[i + 1] - sorted_xmin[i])
-            y_distance = abs(sorted_ymin[i + 1] - sorted_ymin[i])
+            if sorted_xmin[i + 1] < bounder_w:
+                x_distance = abs(sorted_xmin[i + 1] - sorted_xmin[i])
+            else:
+                x_distance = 0
+            if sorted_ymin[i + 1] < bounder_h:
+                y_distance = abs(sorted_ymin[i + 1] - sorted_ymin[i])
+            else:
+                y_distance = 0
             if x_distance > 20:
                 ssd_column = ssd_column + 1
                 x_distance_all.append(x_distance)
@@ -189,23 +359,64 @@ def get_choice_m_row_and_col(left, top, image):
         average_height = int(np.mean(y_height_all))
         average_width = int(np.mean(x_width_all))
 
-        # average_height = format(np.mean(y_height_all), '.2f')
-        # average_width = format(np.mean(x_width_all), '.2f')
-        # average_height = int(np.mean(y_distance_all))
-        # average_width = int(np.mean(x_distance_all))
-        location_ssd = {'xmin': sorted_xmin[0] + left,
-                        'ymin': sorted_ymin[0] + top,
-                        'xmax': sorted_xmax[-1] + left,
-                        'ymax': sorted_ymax[-1] + top}
-
-        choice_m_ssd = {'bounding_box': location_ssd,
-                        "single_height": average_height,
-                        "single_width": average_width,
-                        "rows": ssd_row,
-                        "cols": ssd_column,
-                        'class_name': 'choice_m',
-                        'all_small_coordinate': all_small_coordinate
-                        }
+        if da_number_w > 1 and da_number_h < 1:
+            location_ssd = {'xmin': sorted_xmin[0] + left,
+                            'ymin': sorted_ymin[0] + top,
+                            'xmax': sorted_xmax[-1] + left,
+                            'ymax': h + top}
+
+            choice_m_ssd = {'bounding_box': location_ssd,
+                            "single_height": average_height,
+                            "single_width": average_width,
+                            "rows": ssd_row,
+                            "cols": ssd_column,
+                            'class_name': 'choice_m',
+                            'all_small_coordinate': all_small_coordinate
+                            }
+
+        elif da_number_h > 1 and da_number_w < 1:
+            location_ssd = {'xmin': sorted_xmin[0] + left,
+                            'ymin': sorted_ymin[0] + top,
+                            'xmax': w + left,
+                            'ymax': sorted_ymax[-1] + top}
+
+            choice_m_ssd = {'bounding_box': location_ssd,
+                            "single_height": average_height,
+                            "single_width": average_width,
+                            "rows": ssd_row,
+                            "cols": ssd_column,
+                            'class_name': 'choice_m',
+                            'all_small_coordinate': all_small_coordinate
+                            }
+        elif da_number_h > 1 and da_number_w > 1:
+            location_ssd = {'xmin': sorted_xmin[0] + left,
+                            'ymin': sorted_ymin[0] + top,
+                            'xmax': w + left,
+                            'ymax': h + top}
+
+            choice_m_ssd = {'bounding_box': location_ssd,
+                            "single_height": average_height,
+                            "single_width": average_width,
+                            "rows": ssd_row,
+                            "cols": ssd_column,
+                            'class_name': 'choice_m',
+                            'all_small_coordinate': all_small_coordinate
+                            }
+        else:
+            location_ssd = {'xmin': sorted_xmin[0] + left,
+                            'ymin': sorted_ymin[0] + top,
+                            'xmax': sorted_xmax[-1] + left,
+                            'ymax': sorted_ymax[-1] + top}
+
+            choice_m_ssd = {'bounding_box': location_ssd,
+                            "single_height": average_height,
+                            "single_width": average_width,
+                            "rows": ssd_row,
+                            "cols": ssd_column,
+                            'class_name': 'choice_m',
+                            'all_small_coordinate': all_small_coordinate
+                            }
+
     else:
         choice_m_ssd = {}
-    return choice_m_ssd
+    return choice_m_ssd

+ 89 - 99
segment/sheet_resolve/analysis/resolve.py

@@ -2,13 +2,13 @@
 # @File    : resolve.py
 # @Time    : 2018/12/3 0003 上午 10:16
 
-import time
 import traceback
 import xml.etree.cElementTree as ET
+
 from django.conf import settings
+
 import segment.logging_config as logging
 import segment.sheet_resolve.analysis.choice.analysis_choice as resolve_choice
-import segment.sheet_resolve.analysis.choice.choice_box as choice_box
 import segment.sheet_resolve.analysis.choice.choice_line_box as choice_line_box
 import segment.sheet_resolve.analysis.cloze.analysis_cloze as resolve_cloze
 import segment.sheet_resolve.analysis.cloze.cloze_line_box as resolve_cloze_line_box
@@ -17,19 +17,18 @@ import segment.sheet_resolve.analysis.exam_number.exam_number_row_column as exam
 import segment.sheet_resolve.analysis.sheet.analysis_sheet as resolve_sheet
 import segment.sheet_resolve.analysis.solve.mark_box as resolve_mark_box
 import segment.sheet_resolve.analysis.solve.mark_line_box as resolve_mark_line_box
-from segment.sheet_resolve.tools import utils
-from segment.sheet_resolve.tools.tf_sess import TfSess
-from segment.sheet_resolve.tools.tf_settings import xml_template_path, model_dict
-from segment.sheet_resolve.tools.utils import read_single_img, read_xml_to_json, create_xml
-from segment.sheet_resolve.analysis.sheet.sheet_adjust import adjust_item_edge_by_gray_image
-from segment.sheet_resolve.analysis.sheet.sheet_infer import infer_bar_code, box_infer_and_complete, infer_solve
-from segment.sheet_resolve.analysis.sheet.sheet_infer import infer_exam_number, adjust_exam_number, exam_number_infer_by_s
 from segment.sheet_resolve.analysis.sheet.choice_infer import infer_choice_m
 from segment.sheet_resolve.analysis.sheet.ocr_sheet import tell_columns, sheet_sorted
+from segment.sheet_resolve.analysis.sheet.sheet_adjust import adjust_item_edge_by_gray_image
+from segment.sheet_resolve.analysis.sheet.sheet_infer import infer_bar_code, adjust_exam_number, exam_number_infer_by_s
+from segment.sheet_resolve.analysis.sheet.sheet_infer import infer_exam_number, infer_solve, box_infer_and_complete
+from segment.sheet_resolve.analysis.sheet.sheet_infer import exam_number_adjust_infer
+from segment.sheet_resolve.tools import utils
+from segment.sheet_resolve.tools.tf_settings import xml_template_path, model_dict
+from segment.sheet_resolve.tools.utils import create_xml
 
 logger = logging.getLogger(settings.LOGGING_TYPE)
 
-
 sheet_infer_dict = dict(bar_code=True,
                         choice_m=True,
                         exam_number=True,
@@ -44,9 +43,6 @@ def sheet(series_number, image_path, image, conf_thresh, mns_thresh, subject, sh
     classes = list(model_dict[model_type]['classes'])
     coordinate_bias_dict = model_dict[model_type]['class_coordinate_bias']
 
-    if '_blank' in model_type:
-        model_type = model_type.replace("_blank", "")
-
     sheets_dict = resolve_sheet.get_single_image_sheet_regions(model_type, image_path, image, classes,
                                                                sheet_sess.sess, sheet_sess.net,
                                                                conf_thresh, mns_thresh, coordinate_bias_dict)
@@ -93,6 +89,7 @@ def sheet(series_number, image_path, image, conf_thresh, mns_thresh, subject, sh
                 exam_number_list = infer_exam_number(image, ocr, regions)
                 regions.extend(exam_number_list)
 
+            image, regions = exam_number_adjust_infer(image, regions)
         except Exception as e:
             traceback.print_exc()
             logger.info('试卷:{} 考号推断失败: {}'.format(image_path, e))
@@ -100,8 +97,10 @@ def sheet(series_number, image_path, image, conf_thresh, mns_thresh, subject, sh
     if sheet_infer_dict['choice_m']:
 
         try:
-            choice_m_list = infer_choice_m(image, regions, col_split_x, ocr)
-            #remain_choice_m = []
+            col_split = col_split_x.copy()
+            if not col_split:
+                col_split = [w - 1]
+            choice_m_list = infer_choice_m(image, regions, col_split, ocr)
             if len(choice_m_list) > 0:
                 choice_m_old_list = [ele for ele in regions if 'choice_m' == ele['class_name']]
                 for infer_box in choice_m_list.copy():
@@ -122,7 +121,7 @@ def sheet(series_number, image_path, image, conf_thresh, mns_thresh, subject, sh
                             choice_m_list.remove(infer_box)
                             break
 
-                #remain_choice_m.extend(choice_m_list)
+                # remain_choice_m.extend(choice_m_list)
 
                 # regions = [ele for ele in regions if 'choice_m' != ele['class_name']]
                 # regions.extend(remain_choice_m)
@@ -142,14 +141,24 @@ def sheet(series_number, image_path, image, conf_thresh, mns_thresh, subject, sh
                              'cloze_s',
                              'exam_number',
                              'solve',
+                             'solve0',
                              'composition',
-                             'correction'
+                             'composition0',
+                             'correction',
+                             'alarm_info',
+                             'page'
                              ]
+            if 'math' not in subject:
+                include_class.remove('cloze_s')
             regions_subset = [ele for ele in regions if ele['class_name'] in include_class]
-            col_regions = sheet_sorted(regions_subset, col_split_x)
 
-            top = min([ele['bounding_box']['ymin'] for ele in regions])
-            bottom = max([ele['bounding_box']['ymax'] for ele in regions])
+            col_split = col_split_x.copy()
+            if not col_split:
+                col_split = [w - 1]
+            col_regions = sheet_sorted(regions_subset, col_split.copy())
+
+            top = min([ele['bounding_box']['ymin'] for ele in regions if 'seal' not in ele['class_name']])
+            bottom = max([ele['bounding_box']['ymax'] for ele in regions if 'seal' not in ele['class_name']])
 
             seal_area = [ele for ele in regions if 'seal' in ele['class_name']]
             if len(seal_area) > 0:
@@ -163,8 +172,9 @@ def sheet(series_number, image_path, image, conf_thresh, mns_thresh, subject, sh
                 left = min([ele['bounding_box']['xmin'] for ele in regions])
                 right = max([ele['bounding_box']['xmax'] for ele in regions])
 
-            solve_regions = infer_solve(regions, left, right, top, bottom, col_regions, col_split_x)
-            regions.append(solve_regions)
+            solve_regions = infer_solve(regions, left, right, top, bottom, h, w, col_regions, col_split_x.copy())
+            regions.extend(solve_regions)
+
         except Exception as e:
             traceback.print_exc()
             logger.info('试卷:{} 解答题补全推断失败: {}'.format(image_path, e))
@@ -185,7 +195,7 @@ def sheet(series_number, image_path, image, conf_thresh, mns_thresh, subject, sh
         traceback.print_exc()
         logger.info('试卷:{} 自适应边框失败: {}'.format(image_path, e))
 
-    sheets_dict.update({'regions': adjust_regions})
+    sheets_dict.update({'regions': adjust_regions, 'col_split': col_split_x})
 
     #  generate xml
     tree = ET.parse(xml_template_path)
@@ -206,6 +216,7 @@ def sheet(series_number, image_path, image, conf_thresh, mns_thresh, subject, sh
     else:
         depth.text = '1'
 
+    # 没有adjust 的regions
     for ele in regions:
         name = ele['class_name']
         xmin = ele['bounding_box']['xmin']
@@ -214,6 +225,14 @@ def sheet(series_number, image_path, image, conf_thresh, mns_thresh, subject, sh
         ymax = ele['bounding_box']['ymax']
         tree = create_xml(name, tree, xmin, ymin, xmax, ymax)
 
+    for i, ele in enumerate(sheets_dict['col_split']):
+        name = 'line_{}'.format(i)
+        xmin = ele
+        ymin = 1
+        xmax = ele + 2
+        ymax = img_shape[0]
+        tree = create_xml(name, tree, xmin, ymin, xmax, ymax)
+
     tree.write(xml_save_path)
     return sheets_dict, xml_save_path
 
@@ -226,16 +245,15 @@ def choice(image, regions, xml_path, conf_thresh, mns_thresh, choice_sess):
     choice_list = []
     for ele in regions:
         if ele["class_name"] == 'choice':
-
             choice_bbox = ele['bounding_box']
             left = choice_bbox['xmin']
             top = choice_bbox['ymin']
             choice_img = utils.crop_region(image, choice_bbox)
 
-            choice_dict_tf = resolve_choice. \
-                get_single_image_sheet_regions('choice', choice_img, classes,
-                                               choice_sess.sess, choice_sess.net, conf_thresh, mns_thresh,
-                                               coordinate_bias_dict)
+            choice_dict_tf = resolve_choice.get_single_image_sheet_regions('choice', choice_img, classes,
+                                                                           choice_sess.sess, choice_sess.net,
+                                                                           conf_thresh, mns_thresh,
+                                                                           coordinate_bias_dict)
 
             choice_list = choice_list + choice_line_box.choice_line(left, top, choice_img, choice_dict_tf, xml_path)
 
@@ -250,25 +268,55 @@ def choice_row_col(image, regions, xml_path, conf_thresh, mns_thresh, choice_ses
     choice_list = []
     for ele in regions:
         if ele["class_name"] == 'choice':
-
             choice_box = ele['bounding_box']
             left = choice_box['xmin']
             top = choice_box['ymin']
             choice_img = utils.crop_region(image, choice_box)
 
-            choice_m_dict_tf = resolve_choice. \
-                get_single_image_sheet_regions('choice_m', choice_img, classes,
-                                               choice_sess.sess, choice_sess.net, conf_thresh, mns_thresh,
-                                               coordinate_bias_dict)
+            choice_m_dict_tf = resolve_choice.get_single_image_sheet_regions('choice_m', choice_img, classes,
+                                                                             choice_sess.sess, choice_sess.net,
+                                                                             conf_thresh, mns_thresh,
+                                                                             coordinate_bias_dict)
 
-            choice_list = choice_list + choice_line_box.choice_line_with_number(left, top, choice_img, choice_m_dict_tf, xml_path)
+            choice_list = choice_list + choice_line_box.choice_line_with_number(left, top, choice_img,
+                                                                                choice_m_dict_tf,
+                                                                                xml_path)
 
     return choice_list
 
 
 def choice_m_row_col(image, regions, xml_path):
+    choice_m_dict_tf = []
+    direction_list = []
+    for ele in regions:
+
+        if ele['class_name'] == 'choice_m':
+            choice_m_dict_tf.append(ele)
+        if ele['class_name'] == 'choice_n':
+            loc = ele['bounding_box']
+            xmin, ymin, xmax, ymax = loc['xmin'], loc['ymin'], loc['xmax'], loc['ymax']
+            if ymax - ymin > 2 * (xmax - xmin):
+                direction = 180
+            else:
+                direction = 90
+            direction_list.append(direction)
+        if ele['class_name'] == 'choice_s':
+            loc = ele['bounding_box']
+            xmin, ymin, xmax, ymax = loc['xmin'], loc['ymin'], loc['xmax'], loc['ymax']
+            if ymax - ymin > 2 * (xmax - xmin):
+                direction = 90
+            else:
+                direction = 180
+            direction_list.append(direction)
+
+    c180 = direction_list.count(180)
+    c90 = direction_list.count(90)
+
+    if c180 == c90 == 0:
+        direction = 0
+    else:
+        direction = 180 if c180 >= c90 else 90
 
-    choice_m_dict_tf = [ele for ele in regions if ele['class_name'] == 'choice_m']
     # choice_m_row_col_with_number
     choice_list = []
     try:
@@ -279,7 +327,7 @@ def choice_m_row_col(image, regions, xml_path):
         # else:
         #     choice_list = choice_line_box.choice_m_row_col(image, choice_m_dict_tf, xml_path)  # 找选择题行列、分数
 
-        choice_list = choice_line_box.choice_m_row_col(image, choice_m_dict_tf, xml_path)  # 找选择题行列、分数
+        choice_list = choice_line_box.choice_m_row_col(image, choice_m_dict_tf, direction, xml_path)  # 找选择题行列、分数
         tree = ET.parse(xml_path)  # xml tree
         for index_num, box in enumerate(choice_list):
             if len(box['bounding_box']) > 0:
@@ -366,7 +414,8 @@ def cloze(image, regions, xml_path, conf_thresh, mns_thresh, cloze_sess):
             cloze_dict_tf = resolve_cloze.get_single_image_sheet_regions('cloze', cloze_img, classes,
                                                                          cloze_sess.sess, cloze_sess.net, conf_thresh,
                                                                          mns_thresh, coordinate_bias_dict)
-            cloze_list = cloze_list + resolve_cloze_line_box.cloze_line(left, top, cloze_img, cloze_dict_tf['regions'], xml_path)
+            cloze_list = cloze_list + resolve_cloze_line_box.cloze_line(left, top, cloze_img, cloze_dict_tf['regions'],
+                                                                        xml_path)
 
     return cloze_list
 
@@ -382,7 +431,7 @@ def solve_with_mark(image, regions, xml_path):
             exam_number_img = utils.crop_region(image, exam_number_box)
             solve_mark_dict = resolve_mark_box.solve_mark(left, top, exam_number_img, xml_path)
             if len(solve_mark_dict) > 0:
-                ele['class_name'] = 'solve_'+str(solve_mark_dict['number'])
+                ele['class_name'] = 'solve_' + str(solve_mark_dict['number'])
                 solve_list.append(ele)
                 mark_list.append(solve_mark_dict)
 
@@ -410,8 +459,8 @@ def solve(image, regions, xml_path):
 def solve_with_number(regions, xml_path):
     solve_list = []
     for ele in regions:
-        if 'solve' in ele["class_name"] or 'composition' in ele["class_name"]:
-            solve_dict = {'number': -1, 'default_points': -1}
+        if 'solve' in ele["class_name"] or 'composition' in ele["class_name"] or 'correction' in ele["class_name"]:
+            solve_dict = {'number': -1, 'default_points': -1, 'span': False, 'span_id': 1}
             ele.update(solve_dict)
             solve_list.append(ele)
 
@@ -451,62 +500,3 @@ def cloze_with_number(regions, xml_path):
 
     tree.write(xml_path)
     return cloze_list
-
-
-def make_together(image_path):
-
-    sheet_sess = TfSess('sheet')
-    choice_sess = TfSess('choice')
-    cloze_sess = TfSess('cloze')
-
-    raw_img = read_single_img(image_path)
-    conf_thresh_0 = 0.7
-    mns_thresh_0 = 0.3
-
-    series_number = 123456789
-    subject = 'english'
-    sheets_dict_0, xml_save_path = sheet(series_number, image_path, raw_img, conf_thresh_0, mns_thresh_0, subject, sheet_sess)
-    # 手动修改faster_rcnn识别生成的框
-
-    sheets_dict_0 = read_xml_to_json(xml_save_path)
-    regions = sheets_dict_0['regions']
-    classes_name = str([ele['class_name'] for ele in regions])
-
-    if 'choice' in classes_name:
-        try:
-            sheets_dict_0['choice'] = choice(raw_img, regions, xml_save_path, conf_thresh_0, mns_thresh_0, choice_sess)
-        except Exception:
-            traceback.print_exc()
-
-    if 'exam_number' in classes_name:
-        try:
-            sheets_dict_0['exam_number'] = exam_number(raw_img, regions, xml_save_path)
-        except Exception:
-            traceback.print_exc()
-
-    if 'cloze' in classes_name:
-        try:
-            sheets_dict_0['cloze'] = cloze(raw_img, regions, xml_save_path, conf_thresh_0, mns_thresh_0, cloze_sess)
-        except Exception:
-            traceback.print_exc()
-
-    if 'solve' in classes_name:
-        try:
-            solve_list, mark_list = solve(raw_img, regions, xml_save_path,)
-            sheets_dict_0['solve'] = solve_list
-            sheets_dict_0['mark'] = mark_list
-        except Exception:
-            traceback.print_exc()
-
-    # print(sheets_dict_0)
-    return sheets_dict_0
-
-
-# if __name__ == '__main__':
-#     start_time = time.time()
-#
-#     image_path_0 = os.path.join(r'C:\Users\Administrator\Desktop\sheet\correct\back_sizes\template',
-#                                 '20180719004308818_0020.jpg')
-#     make_together(image_path_0)
-#     end_time = time.time()
-#     print('time cost: ', (end_time - start_time))

+ 170 - 69
segment/sheet_resolve/analysis/sheet/analysis_sheet.py

@@ -6,12 +6,16 @@ import traceback
 
 import numpy as np
 import cv2
-
+from django.conf import settings
+import segment.logging_config as logging
 from segment.sheet_resolve.lib.model.test import im_detect
 from segment.sheet_resolve.lib.model.nms_wrapper import nms
 from segment.sheet_resolve.lib.utils.timer import Timer
 from segment.sheet_resolve.tools import utils
-from segment.sheet_resolve.analysis.solve.optional_solve import find_contours, resolve_optional_choice
+from segment.sheet_resolve.analysis.solve.optional_solve import resolve_optional_choice
+
+
+logger = logging.getLogger(settings.LOGGING_TYPE)
 
 
 def analysis_single_image_with_regions(analysis_type, classes,
@@ -25,13 +29,18 @@ def analysis_single_image_with_regions(analysis_type, classes,
     # Detect all object classes and regress object bounds
     timer = Timer()
     timer.tic()
+    if '_blank' in analysis_type:
+        analysis_type = analysis_type.replace('_blank', '')
     if analysis_type in ['unknown_subject', 'math', 'math_zxhx', 'english', 'chinese',
                          'physics', 'chemistry', 'biology', 'politics', 'history',
                          'geography', 'science_comprehensive', 'arts_comprehensive'
                          ]:
         analysis_type = 'sheet'
-    im, ratio = utils.img_resize(analysis_type, im_raw)
+        # im, ratio = utils.img_resize(analysis_type, im_raw)
+    im, ratio = utils.resize_faster_rcnn(analysis_type, im_raw)
+
     scores, boxes = im_detect(analysis_type, sess, net, im)
+
     timer.toc()
     print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time, boxes.shape[0]))
 
@@ -47,7 +56,6 @@ def analysis_single_image_with_regions(analysis_type, classes,
                           cls_scores[:, np.newaxis])).astype(np.float32)
         keep = nms(dets, mns_thresh)
         dets = dets[keep, :]
-        # vis_detections(im, cls, dets, ax, thresh=conf_thresh)
         inds = np.where(dets[:, -1] >= conf_thresh)[0]
         if len(inds) > 0:
             if cls in list(coordinate_bias_dict.keys()):
@@ -74,12 +82,11 @@ def analysis_single_image_with_regions(analysis_type, classes,
                 xmax = (xmax if (xmax < size[1]) else size[1] - 1)
                 ymax = (ymax if (ymax < size[0]) else size[0] - 1)
 
-                if cls in ['solve0', ]:
-                    cls = 'solve'
+                if cls in ['solve0', 'composition0']:
+                    cls = cls.replace('0', '')
 
                 bbox_dict = {"xmin": xmin, "ymin": ymin, "xmax": xmax, "ymax": ymax}
-                # class_dict = {"class_name": cls, "bounding_box": bbox_dict, "score": score}
-                class_dict = {"class_name": cls, "bounding_box": bbox_dict}
+                class_dict = {"class_name": cls, "bounding_box": bbox_dict, "score": score}
 
                 # if cls == 'qr_code':
                 #     qr_img = utils.crop_region(im_raw, bbox_dict)
@@ -118,27 +125,35 @@ def get_single_image_sheet_regions(analysis_type, img_path, img, classes,
     return img_dict
 
 
-def question_number_format(init_number, crt_numbers, sheet_dict):
-    for region in sheet_dict['regions']:
+def question_number_format(init_number, crt_numbers, regions):
+    logger.info('regions: {}'.format(regions))
+    for region in regions:
+        logger.info('region: {}'.format(region))
+        if region['class_name'] == 'optional_choice':
+            continue
+        while init_number in crt_numbers:
+            init_number += 1
         numbers = region.get("number")
-        if numbers and isinstance(numbers, int):
+        if numbers and (isinstance(numbers, int) or isinstance(numbers, float)):
             if numbers <= 0 or numbers in crt_numbers or numbers >= 1000:
-                numbers = init_number
-                crt_numbers.append(numbers)
-                init_number += 1
+                if not region.get("span"):
+                    numbers = init_number
+                    crt_numbers.append(numbers)
+                    init_number += 1
             region.update({"number": numbers})
             crt_numbers.append(numbers)
         if numbers and isinstance(numbers, list):
             for i, num in enumerate(numbers):
                 if num <= 0 or num in crt_numbers or num >= 1000:
-                    numbers[i] = init_number
-                    crt_numbers.append(init_number)
-                    init_number += 1
+                    if not region.get("span"):
+                        numbers[i] = init_number
+                        crt_numbers.append(init_number)
+                        init_number += 1
 
             region.update({"number": numbers})
             crt_numbers.extend(numbers)
 
-    return sheet_dict, init_number, crt_numbers
+    return regions, init_number, crt_numbers
 
 
 def box_region_format(sheet_dict, image, subject, shrink=True):
@@ -158,10 +173,12 @@ def box_region_format(sheet_dict, image, subject, shrink=True):
                      ]
 
     sheet_regions = sheet_dict['regions']
-    optional_solve_tmp = []
-    default_points_dict = {'choice_m': 5, "cloze": 5, 'solve': 12, 'cloze_s': 5, "composition": 60}
+    optional_choice_tmp = []
+    default_points_dict = {'choice_m': 5, "cloze": 5, 'solve': 12, 'optional_solve': 10, 'cloze_s': 5,
+                           "composition": 60}
     if subject == "english":
-        default_points_dict = {'choice_m': 2, "cloze": 2, 'solve': 2, 'cloze_s': 2, "composition": 25}
+        default_points_dict = {'choice_m': 2, "cloze": 2, 'solve': 2, 'optional_solve': 10, 'cloze_s': 2,
+                               "composition": 25}
 
     for i in range(len(sheet_regions) - 1, -1, -1):
         if subject == "math":
@@ -182,8 +199,36 @@ def box_region_format(sheet_dict, image, subject, shrink=True):
             sheet_regions[i]['class_name'] = 'composition'
 
         if sheet_regions[i]['class_name'] == 'select_s':
-            sheet_regions[i]['class_name'] = 'optional_choice'
-            optional_solve_tmp.append(sheet_regions[i])
+            # sheet_regions[i]['class_name'] = 'optional_choice'
+            # optional_solve_tmp.append(sheet_regions[i])
+
+            bbox = sheet_regions[i]['bounding_box']
+            box_region = utils.crop_region(image, bbox)
+            left = bbox['xmin']
+            top = bbox['ymin']
+            right = bbox['xmax']
+            bottom = bbox['ymax']
+
+            if (right - left) >= (bottom - top):
+                direction = 180
+            else:
+                direction = 90
+
+            try:
+                res = resolve_optional_choice(left, top, direction, box_region)
+            except Exception as e:
+                res = {'class_name': 'optional_choice',
+                       'rows': 1, 'cols': 1,
+                       'number': [501],
+                       'single_width': right - left,
+                       'single_height': bottom - top,
+                       'bounding_box': {'xmin': left,
+                                        'ymin': top,
+                                        'xmax': right,
+                                        'ymax': bottom}}
+
+            optional_choice_tmp.append(res)
+
             sheet_regions.pop(i)
 
         if shrink:
@@ -194,63 +239,74 @@ def box_region_format(sheet_dict, image, subject, shrink=True):
         if ele['class_name'] == 'solve':
             solve_box = (ele['bounding_box']['xmin'], ele['bounding_box']['ymin'],
                          ele['bounding_box']['xmax'], ele['bounding_box']['ymax'])
-            for optional_solve in optional_solve_tmp:
-                optional_solve_box = (optional_solve['bounding_box']['xmin'], optional_solve['bounding_box']['ymin'],
-                                      optional_solve['bounding_box']['xmax'], optional_solve['bounding_box']['ymax'])
-                if utils.decide_coordinate_contains(optional_solve_box, solve_box):
+            for optional_choice in optional_choice_tmp:
+                optional_choice_box = (optional_choice['bounding_box']['xmin'], optional_choice['bounding_box']['ymin'],
+                                       optional_choice['bounding_box']['xmax'], optional_choice['bounding_box']['ymax'])
+                if utils.decide_coordinate_contains(optional_choice_box, solve_box):
                     ele['class_name'] = 'optional_solve'
+                    choice_numbers = optional_choice['number']
+                    solve_points = ele['number']
+                    if choice_numbers[0] < 500:
+                        ele['number'] = choice_numbers
+                        ele['default_points'] = [ele['default_points']] * len(choice_numbers)
+                    else:
+                        ele['number'] = [solve_points] * len(choice_numbers)
+                        optional_choice['numbers'] = [solve_points] * len(choice_numbers)
+                        ele['default_points'] = [ele['default_points']] * len(choice_numbers)
                     break
                 else:
                     continue
 
-        if ele['class_name'] == "composition":
+        # 设置默认分数
+        # if ele['class_name'] == "composition":
+        #     if isinstance(ele['default_points'], list):
+        #         for i, dp in enumerate(ele['default_points']):
+        #             if dp != default_points_dict[ele['class_name']]:
+        #                 ele['default_points'][i] = default_points_dict[ele['class_name']]
+        #
+        #     if isinstance(ele['default_points'], int):
+        #         if ele['default_points'] != default_points_dict[ele['class_name']]:
+        #             ele['default_points'] = default_points_dict[ele['class_name']]
+
+        if ele['class_name'] in ["choice_m", "cloze", "cloze_s", "solve", "optional_solve", "composition"]:
             if isinstance(ele['default_points'], list):
                 for i, dp in enumerate(ele['default_points']):
-                    if dp != default_points_dict[ele['class_name']]:
+                    if dp <= -1:
                         ele['default_points'][i] = default_points_dict[ele['class_name']]
 
-            if isinstance(ele['default_points'], int):
-                if ele['default_points'] != default_points_dict[ele['class_name']]:
+            if isinstance(ele['default_points'], int) or isinstance(ele['default_points'], float):
+                if ele['default_points'] <= -1:
                     ele['default_points'] = default_points_dict[ele['class_name']]
 
-        if ele['class_name'] in ["choice_m", "cloze", "cloze_s", "solve"]:
-            if isinstance(ele['default_points'], list):
-                for i, dp in enumerate(ele['default_points']):
-                    if dp == -1:
-                        ele['default_points'][i] = default_points_dict[ele['class_name']]
-
-            if isinstance(ele['default_points'], int):
-                if ele['default_points'] == -1:
-                    ele['default_points'] = default_points_dict[ele['class_name']]
-
-    for ele in optional_solve_tmp:  # 选做题
-        bbox = ele['bounding_box']
-        box_region = utils.crop_region(image, bbox)
-        left = bbox['xmin']
-        top = bbox['ymin']
-        right = bbox['xmax']
-        bottom = bbox['ymax']
-
-        if (right - left) >= (bottom-top):
-            direction = 180
-        else:
-            direction = 90
-
-        # res = find_contours(left, top, box_region)
-        try:
-            res = resolve_optional_choice(left, top, direction, box_region)
-        except Exception as e:
-            res = {'rows': 1, 'cols': 2,
-                   'option': 'A, B',
-                   'single_width': (right - left) // 3,
-                   'single_height': bottom - top,
-                   'bounding_box': {'xmin': left,
-                                    'ymin': top,
-                                    'xmax': right,
-                                    'ymax': bottom}}
-        res['class_name'] = 'optional_choice'
-
-        sheet_regions.append(res)
+    sheet_regions.extend(optional_choice_tmp)
+    # for ele in optional_choice_tmp:  # 选做题
+    #     bbox = ele['bounding_box']
+    #     box_region = utils.crop_region(image, bbox)
+    #     left = bbox['xmin']
+    #     top = bbox['ymin']
+    #     right = bbox['xmax']
+    #     bottom = bbox['ymax']
+    #
+    #     if (right - left) >= (bottom - top):
+    #         direction = 180
+    #     else:
+    #         direction = 90
+    #
+    #     # res = find_contours(left, top, box_region)
+    #     try:
+    #         res = resolve_optional_choice(left, top, direction, box_region)
+    #     except Exception as e:
+    #         res = {'class_name': 'optional_choice',
+    #                'rows': 1, 'cols': 1,
+    #                'numbers': [501],
+    #                'single_width': right - left,
+    #                'single_height': bottom - top,
+    #                'bounding_box': {'xmin': left,
+    #                                 'ymin': top,
+    #                                 'xmax': right,
+    #                                 'ymax': bottom}}
+    #
+    #     sheet_regions.append(res)
 
     # iou
     sheet_tmp = sheet_regions.copy()
@@ -260,6 +316,7 @@ def box_region_format(sheet_dict, image, subject, shrink=True):
             box = region['bounding_box']
             for j, region_in in enumerate(sheet_tmp):
                 box_in = region_in['bounding_box']
+                # TODO 根据大小
                 iou = utils.cal_iou(box, box_in)
                 if iou[0] > 0.75 and i != j:
                     sheet_regions.remove(region)
@@ -268,3 +325,47 @@ def box_region_format(sheet_dict, image, subject, shrink=True):
 
     sheet_dict.update({'regions': sheet_regions})
     return sheet_dict
+
+
+def merge_span_boxes(col_sheets):
+    if len(col_sheets) <= 1:
+        return col_sheets
+
+    for i, cur_col in enumerate(col_sheets[:-1]):
+        next_col = col_sheets[i + 1]
+        if not cur_col or not next_col:
+            continue
+        current_bottom_box = cur_col[-1]
+        next_col_top_box = next_col[0]
+
+        b_name = current_bottom_box['class_name']
+        t_name = next_col_top_box['class_name']
+
+        if b_name == t_name == 'solve':
+            b_number = current_bottom_box['number']
+            t_number = next_col_top_box['number']
+            if b_number >= 500 or t_number >= 500 or b_number == t_number:
+                numbers = min(b_number, t_number)
+
+                crt_points = current_bottom_box['default_points']
+                next_points = next_col_top_box['default_points']
+                # default_points = max(current_bottom_box['default_points'], next_col_top_box['default_points'])
+                default_points = crt_points
+
+                current_bottom_box.update({'number': numbers, 'default_points': default_points, "span": True})
+                next_col_top_box.update({'number': numbers, 'default_points': default_points,
+                                         "span": True, "span_id": current_bottom_box["span_id"] + 1})
+        elif b_name == t_name == 'composition':
+            b_number = current_bottom_box['number']
+            t_number = next_col_top_box['number']
+            numbers = min(b_number, t_number)
+
+            default_points = max(current_bottom_box['default_points'], next_col_top_box['default_points'])
+
+            current_bottom_box.update({'number': numbers, 'default_points': default_points, "span": True})
+            next_col_top_box.update({'number': numbers, 'default_points': default_points,
+                                     "span": True, "span_id": current_bottom_box["span_id"] + 1})
+        else:
+            continue
+
+    return col_sheets

+ 224 - 176
segment/sheet_resolve/analysis/sheet/choice_infer.py

@@ -69,14 +69,14 @@ def adjust_choice_m(image, xe, ye):
     return right_limit, bottom_limit
 
 
-def find_digital(ocr_raw_list):
+def find_digital(ocr_raw_list, left, top):
     pattern = r'\d+'
     x_list = []
     y_list = []
     digital_list = list()
     chars_list = list()
     height_list, width_list = list(), list()
-    ocr_dict_list = combine_char_in_raw_format(ocr_raw_list)
+    ocr_dict_list = combine_char_in_raw_format(ocr_raw_list, left, top)
     for i, ele in enumerate(ocr_dict_list):
         words = ele['words']
         words = words.replace(' ', '').upper()  # 去除空格
@@ -117,7 +117,7 @@ def find_digital(ocr_raw_list):
             digital_list.append({"digital": int(number), "loc": number_loc})
 
         current_chars = [char for index, char in enumerate(ele['chars'])
-                         if index not in digital_index_detail_list and char['char'] not in ['.', ',', '。', '、']]
+                         if index not in digital_index_detail_list and char['char'].encode('utf-8').isalpha()]
 
         chars_list += current_chars
 
@@ -164,7 +164,7 @@ def cluster2choice_m_(cluster_list, m_h, m_w):
     return block_list
 
 
-def cluster2choice_m(cluster_list, mean_width):
+def cluster2choice_m(cluster_list, mean_width, tf_box=False):
     # 比较x坐标,去掉误差值
     numbers_x = [ele['loc'][4] for ele in cluster_list]
     numbers_x_array = np.array(numbers_x)
@@ -232,7 +232,8 @@ def cluster2choice_m(cluster_list, mean_width):
             else:
                 numbers_array[suspect] = -1
 
-        numbers_array = infer_number(numbers_array, number_interval_mode_value)  # 推断题号
+        times = 0
+        numbers_array = infer_number(numbers_array, times, number_interval_mode_value)  # 推断题号
         numbers_array = np.array(numbers_array)
 
     numbers_interval = np.abs(numbers_array[1:] - numbers_array[:-1])
@@ -246,6 +247,9 @@ def cluster2choice_m(cluster_list, mean_width):
     split_index = sorted(list(set(split_index)))
     block_list = []
 
+    if tf_box:
+        split_index = [0, len(cluster_list)]
+
     for i in range(len(split_index) - 1):
         block = cluster_list[split_index[i]: split_index[i + 1]]
         block_numbers = numbers_array[split_index[i]: split_index[i + 1]]
@@ -263,115 +267,156 @@ def cluster2choice_m(cluster_list, mean_width):
     return block_list
 
 
-def cluster_and_anti_abnormal(image, xml_path, digital_list, chars_list,
+def point_in_polygon(point, polygon):
+    xmin, ymin, xmax, ymax = polygon['xmin'], polygon['ymin'], polygon['xmax'], polygon['ymax']
+    if xmin <= point[0] <= xmax and ymin <= point[1] <= ymax:
+        return True
+    else:
+        return False
+
+
+def cluster_and_anti_abnormal(image, xml_path, choice_n_list, digital_list, chars_list,
                               mean_height, mean_width, choice_s_height, choice_s_width, limit_loc):
     limit_left, limit_top, limit_right, limit_bottom = limit_loc
     limit_width, limit_height = limit_right - limit_left, limit_bottom - limit_top
-    digital_loc_arr = np.ones((len(digital_list), 2))
+    digital_loc_arr = []
+    digital_list_to_cluster = []
+    # 在choice_n 的数字不进行聚类
     for i, ele in enumerate(digital_list):
-        digital_loc_arr[i] = np.array([ele["loc"][-2], ele["loc"][-1]])
+        point = [ele["loc"][-2], ele["loc"][-1]]
+        contain = False
+        for choice_n in choice_n_list:
+            choice_n_loc = choice_n['bounding_box']
+
+            numbers = choice_n.get('numbers')
+            if not numbers:
+                choice_n.update({"numbers": []})
+            if point_in_polygon(point, choice_n_loc):
+                contain = True
+                choice_n["numbers"].append(digital_list[i])
+                break
+        if not contain:
+            digital_list_to_cluster.append(digital_list[i])
+            digital_loc_arr.append(point)
 
-    if choice_s_height != 0:
-        eps = int(choice_s_height * 2.5)
-    else:
-        eps = int(mean_height * 3)
-    print("eps: ", eps)
-    db = DBSCAN(eps=eps, min_samples=2, metric='chebyshev').fit(digital_loc_arr)
+    choice_m_numbers_list = []
+    for ele in choice_n_list:
+        loc = ele['bounding_box']
+        xmin, ymin, xmax, ymax = loc['xmin'], loc['ymin'], loc['xmax'], loc['ymax']
+        mid_x, mid_y = (xmax - xmin) // 2 + xmin, (ymax - ymin) // 2 + ymin
+
+        cluster = ele.get('numbers')
+        if not cluster:
+            block_list = [{"numbers": [-1], "loc": [xmin, ymin, xmax, ymax, mid_x, mid_y]}]
+        else:
+            block_list = cluster2choice_m(cluster, mean_width, tf_box=True)
+            block_list[0].update({"loc": [xmin, ymin, xmax, ymax, mid_x, mid_y]})
 
-    labels = db.labels_
-    # print(labels)
+        choice_m_numbers_list += block_list
 
-    cluster_label = []
-    for ele in labels:
-        if ele not in cluster_label and ele != -1:
-            cluster_label.append(ele)
+    if digital_loc_arr:
+        digital_loc_arr = np.array(digital_loc_arr)
+        if choice_s_height != 0:
+            eps = int(choice_s_height * 2.5)
+        else:
+            eps = int(mean_height * 3)
+        print("eps: ", eps)
+        db = DBSCAN(eps=eps, min_samples=2, metric='chebyshev').fit(digital_loc_arr)
 
-    a_e_dict = {k: [] for k in cluster_label}
-    choice_m_numbers_list = []
-    for index, ele in enumerate(labels):
-        if ele != -1:
-            a_e_dict[ele].append(digital_list[index])
+        labels = db.labels_
+        print(labels)
+
+        cluster_label = []
+        for ele in labels:
+            if ele not in cluster_label and ele != -1:
+                cluster_label.append(ele)
+
+        a_e_dict = {k: [] for k in cluster_label}
+
+        for index, ele in enumerate(labels):
+            if ele != -1:
+                a_e_dict[ele].append(digital_list_to_cluster[index])
 
-    for ele in cluster_label:
-        cluster = a_e_dict[ele]
-        choice_m_numbers_list += cluster2choice_m(cluster, mean_width)
+        for ele in cluster_label:
+            cluster = a_e_dict[ele]
+            choice_m_numbers_list += cluster2choice_m(cluster, mean_width)
 
     all_list_nums = [ele["numbers"] for ele in choice_m_numbers_list]
     all_nums_len = [len(ele) for ele in all_list_nums]
     all_nums = list(chain.from_iterable(all_list_nums))
 
-    counts = np.bincount(np.array(all_nums_len))
-    if np.max(counts) < 2:
-        mode_value = max(all_nums_len)
-    else:
-        mode_value = np.argmax(counts)
-        mode_value = all_nums_len[np.where(np.array(all_nums_len) == mode_value)[0][-1]]
-
-    if mode_value > 1:  # 缺失补全
-        error_index_list = list(np.where(np.array(all_nums_len) != mode_value)[0])
-
-        all_height = [ele["loc"][3] - ele["loc"][1] for index, ele
-                      in enumerate(choice_m_numbers_list) if index not in error_index_list]
-        choice_m_mean_height = int(sum(all_height) / len(all_height))
-
-        for e_index in list(error_index_list):
-            current_choice_m = choice_m_numbers_list[e_index]
-            current_numbers_list = list(all_list_nums[e_index])
-            current_len = all_nums_len[e_index]
-            dif = mode_value - current_len
-
-            if 1 in current_numbers_list:
-                t2 = current_numbers_list + [-1] * dif
-                infer_t1_list = infer_number(t2)  # 后补
-                infer_t2_list = infer_number(t2)  # 后补
-                cond1 = False
-                cond2 = True
-            else:
-                t1_cond = [True] * dif
-                t2_cond = [True] * dif
-
-                t1 = [-1] * dif + current_numbers_list
-                infer_t1_list = infer_number(t1)  # 前补
-                t2 = current_numbers_list + [-1] * dif
-                infer_t2_list = infer_number(t2)  # 后补
-
-                for i in range(0, dif):
-                    t1_infer = infer_t1_list[i]
-                    t2_infer = infer_t2_list[-i - 1]
-                    if t1_infer == 0 or t1_infer in all_nums:
-                        t1_cond[i] = False
-                    if t2_infer in all_nums:
-                        t2_cond[i] = False
-                cond1 = not (False in t1_cond)
-                cond2 = not (False in t2_cond)
-
-            if cond1 and not cond2:
-                current_loc = current_choice_m["loc"]
-                current_height = current_loc[3] - current_loc[1]
-
-                infer_height = max((choice_m_mean_height - current_height), int(dif * current_height / current_len))
-                choice_m_numbers_list[e_index]["loc"][1] = current_loc[1] - infer_height
-                choice_m_numbers_list[e_index]["loc"][5] = (choice_m_numbers_list[e_index]["loc"][1] +
-                                                            (choice_m_numbers_list[e_index]["loc"][3] -
-                                                             choice_m_numbers_list[e_index]["loc"][1]) // 2)
-                choice_m_numbers_list[e_index]["numbers"] = infer_t1_list
-                all_nums.extend(infer_t1_list[:dif])
-            if not cond1 and cond2:
-                current_loc = current_choice_m["loc"]
-                current_height = current_loc[3] - current_loc[1]
-
-                infer_height = max((choice_m_mean_height - current_height), int(dif * current_height / current_len))
-                infer_bottom = min(current_loc[3] + infer_height, limit_height - 1)
-                if infer_bottom <= limit_height:
-                    choice_m_numbers_list[e_index]["loc"][3] = infer_bottom
-                    choice_m_numbers_list[e_index]["loc"][5] = (choice_m_numbers_list[e_index]["loc"][1] +
-                                                                (choice_m_numbers_list[e_index]["loc"][3] -
-                                                                 choice_m_numbers_list[e_index]["loc"][1]) // 2)
-                    choice_m_numbers_list[e_index]["numbers"] = infer_t2_list
-                    all_nums.extend(infer_t2_list[-dif:])
-            else:
-                # cond1 = cond2 = true, 因为infer选择题时已横向排序, 默认这种情况不会出现
-                pass
+    # counts = np.bincount(np.array(all_nums_len))
+    # if np.max(counts) < 2:
+    #     mode_value = max(all_nums_len)
+    # else:
+    #     mode_value = np.argmax(counts)
+    #     mode_value = all_nums_len[np.where(np.array(all_nums_len) == mode_value)[0][-1]]
+    #
+    # if mode_value > 1:  # 缺失补全
+    #     error_index_list = list(np.where(np.array(all_nums_len) != mode_value)[0])
+    #
+    #     all_height = [ele["loc"][3] - ele["loc"][1] for index, ele
+    #                   in enumerate(choice_m_numbers_list) if index not in error_index_list]
+    #     choice_m_mean_height = int(sum(all_height) / len(all_height))
+    #
+    #     for e_index in list(error_index_list):
+    #         current_choice_m = choice_m_numbers_list[e_index]
+    #         current_numbers_list = list(all_list_nums[e_index])
+    #         current_len = all_nums_len[e_index]
+    #         dif = mode_value - current_len
+    #
+    #         if 1 in current_numbers_list:
+    #             t2 = current_numbers_list + [-1] * dif
+    #             infer_t1_list = infer_number(t2)  # 后补
+    #             infer_t2_list = infer_number(t2)  # 后补
+    #             cond1 = False
+    #             cond2 = True
+    #         else:
+    #             t1_cond = [True] * dif
+    #             t2_cond = [True] * dif
+    #
+    #             t1 = [-1] * dif + current_numbers_list
+    #             infer_t1_list = infer_number(t1)  # 前补
+    #             t2 = current_numbers_list + [-1] * dif
+    #             infer_t2_list = infer_number(t2)  # 后补
+    #
+    #             for i in range(0, dif):
+    #                 t1_infer = infer_t1_list[i]
+    #                 t2_infer = infer_t2_list[-i - 1]
+    #                 if t1_infer == 0 or t1_infer in all_nums:
+    #                     t1_cond[i] = False
+    #                 if t2_infer in all_nums:
+    #                     t2_cond[i] = False
+    #             cond1 = not (False in t1_cond)
+    #             cond2 = not (False in t2_cond)
+    #
+    #         if cond1 and not cond2:
+    #             current_loc = current_choice_m["loc"]
+    #             current_height = current_loc[3] - current_loc[1]
+    #
+    #             infer_height = max((choice_m_mean_height - current_height), int(dif * current_height / current_len))
+    #             choice_m_numbers_list[e_index]["loc"][1] = current_loc[1] - infer_height
+    #             choice_m_numbers_list[e_index]["loc"][5] = (choice_m_numbers_list[e_index]["loc"][1] +
+    #                                                         (choice_m_numbers_list[e_index]["loc"][3] -
+    #                                                          choice_m_numbers_list[e_index]["loc"][1]) // 2)
+    #             choice_m_numbers_list[e_index]["numbers"] = infer_t1_list
+    #             all_nums.extend(infer_t1_list[:dif])
+    #         if not cond1 and cond2:
+    #             current_loc = current_choice_m["loc"]
+    #             current_height = current_loc[3] - current_loc[1]
+    #
+    #             infer_height = max((choice_m_mean_height - current_height), int(dif * current_height / current_len))
+    #             infer_bottom = min(current_loc[3] + infer_height, limit_height - 1)
+    #             if infer_bottom <= limit_height:
+    #                 choice_m_numbers_list[e_index]["loc"][3] = infer_bottom
+    #                 choice_m_numbers_list[e_index]["loc"][5] = (choice_m_numbers_list[e_index]["loc"][1] +
+    #                                                             (choice_m_numbers_list[e_index]["loc"][3] -
+    #                                                              choice_m_numbers_list[e_index]["loc"][1]) // 2)
+    #                 choice_m_numbers_list[e_index]["numbers"] = infer_t2_list
+    #                 all_nums.extend(infer_t2_list[-dif:])
+    #         else:
+    #             # cond1 = cond2 = true, 因为infer选择题时已横向排序, 默认这种情况不会出现
+    #             pass
 
     for ele in choice_m_numbers_list:
         loc = ele["loc"]
@@ -387,7 +432,6 @@ def cluster_and_anti_abnormal(image, xml_path, digital_list, chars_list,
     #     tree = create_xml(name, tree, str(xmin + limit_left), str(ymin + limit_top), str(xmax + limit_left), str(ymax + limit_top))
     #
     # tree.write(xml_path)
-
     choice_m_numbers_list = sorted(choice_m_numbers_list, key=lambda x: x['loc'][3] - x['loc'][1], reverse=True)
     choice_m_numbers_right_limit = max([ele['loc'][2] for ele in choice_m_numbers_list])
     remain_len = len(choice_m_numbers_list)
@@ -439,30 +483,15 @@ def cluster_and_anti_abnormal(image, xml_path, digital_list, chars_list,
         current_row_chars = [ele for ele in chars_list
                              if ymin_limit < (ele["location"]["top"] + ele["location"]["height"] // 2) < ymax_limit]
 
-        # if not current_row_chars:
-        #     max_char_width = choice_s_width // 4
-        #     row_chars_xmax = choice_m_numbers_right_limit + int(choice_s_width * 1.5)
-        # else:
-        #     max_char_width = max([ele["location"]["width"] for ele in current_row_chars]) // 2
-        #     row_chars_xmax = max(
-        #         [ele["location"]["left"] + ele["location"]["width"] for ele in current_row_chars]) + max_char_width * 2
-
         # split_index.append(row_chars_xmax)  # 边界
-        split_pix.append(round(split_pix[-1] + choice_s_width * 1.2))
+        split_pix.append(round(split_pix[-1] + choice_s_width * 1.75))
         for i in range(0, len(split_pix) - 1):
             left_limit = split_pix[i]
             right_limit = split_pix[i + 1]
             block_chars = [ele for ele in current_row_chars
                            if left_limit < (ele["location"]["left"] + ele["location"]["width"] // 2) < right_limit]
 
-            # chars_xmin = min([ele["location"]["left"] for ele in block_chars]) - max_char_width
-            # chars_xmax = max(
-            #     [ele["location"]["left"] + ele["location"]["width"] for ele in block_chars]) + max_char_width
-
-            # a_z = '_ABCD_FGH__K_MNOPQRSTUVWXYZ'  EIJL -> _
-            # a_z = '_ABCDEFGHI_K_MNOPQRSTUVWXYZ'
             a_z = '_ABCD_FGHT'
-            # letter_text = set([ele['char'].upper() for ele in block_chars if ele['char'].upper() in a_z])
             letter_index = [a_z.index(ele['char'].upper()) for ele in block_chars if ele['char'].upper() in a_z]
 
             letter_index_times = {ele: 0 for ele in set(letter_index)}
@@ -478,26 +507,24 @@ def cluster_and_anti_abnormal(image, xml_path, digital_list, chars_list,
                     choice_option = 'A,B,C,D'
                 else:
                     tmp = max(set(letter_index))
-                    # while letter_index_times[tmp] < 2 and tmp > 3:
-                    #     t_list = list(set(letter_index))
-                    #     t_list.remove(tmp)
-                    #     tmp = max(t_list)
 
                     choice_option = ",".join(a_z[min(letter_index):tmp + 1])
                 cols = tmp
 
             bias = 3  # pix
             current_loc = current_row_choice_m_d[i]["loc"]
-            location = dict(xmin=(current_loc[2] + bias) + limit_left,  # 当前数字xmax右边
+            location = dict(xmin=(current_loc[2] + bias),  # 当前数字xmax右边
                             # xmin=max(current_loc[2] + bias, chars_xmin) + limit_left,
-                            ymin=current_loc[1] + limit_top,
+                            ymin=current_loc[1],
 
-                            xmax=(right_limit - bias) + limit_left,
+                            xmax=(right_limit - bias),
                             # xmax=min(chars_xmax, right_limit - bias) + limit_left,
-                            ymax=current_loc[3] + limit_top)
+                            ymax=current_loc[3])
 
             try:
                 choice_m_img = utils.crop_region(image, location)
+                if 0 in choice_m_img.shape[:2]:
+                    continue
                 right_loc, bottom_loc = adjust_choice_m(choice_m_img, mean_height, mean_width * 2)
                 if right_loc > 0:
                     location.update(dict(xmax=right_loc + location['xmin']))
@@ -519,9 +546,7 @@ def cluster_and_anti_abnormal(image, xml_path, digital_list, chars_list,
                                 default_points=[5] * len(numbers),
                                 direction=direction,
                                 cols=cols,
-                                rows=len(numbers),
-                                single_width=tmp_w // cols,
-                                single_height=tmp_h // len(numbers))
+                                rows=len(numbers))
             else:
                 choice_m = dict(class_name='choice_m',
                                 number=numbers,
@@ -530,10 +555,7 @@ def cluster_and_anti_abnormal(image, xml_path, digital_list, chars_list,
                                 default_points=[5] * len(numbers),
                                 direction=direction,
                                 cols=len(numbers),
-                                rows=cols,
-                                single_width=tmp_w // len(numbers),
-                                single_height=tmp_h // cols
-                                )
+                                rows=cols)
 
             if tmp_w > 2 * choice_s_width:
                 need_revised_choice_m_list.append(choice_m)
@@ -553,26 +575,25 @@ def cluster_and_anti_abnormal(image, xml_path, digital_list, chars_list,
         crt_right_max = max([int(ele['bounding_box']['xmax']) for ele in choice_m_list])
         if limit_right - crt_right_max > choice_s_width:
             # 存在区域
-            region_loc = {'xmin': crt_right_max + 10, 'ymin': choice_m_list[0]['bounding_box']['ymin'],
-                          'xmax': limit_right, 'ymax': choice_m_list[0]['bounding_box']['ymax']}
+            region_loc = {'xmin': crt_right_max + 10,
+                          'ymin': choice_m_list[0]['bounding_box']['ymin'],
+                          'xmax': limit_right,
+                          'ymax': choice_m_list[0]['bounding_box']['ymax']}
 
             contain_dig = []
             for i, ele in enumerate(digital_loc_arr):
-                if (region_loc['xmin'] < ele[0] + limit_left < region_loc['xmax']
-                        and region_loc['ymin'] < ele[1] + limit_top < region_loc['ymax']):
+                if region_loc['xmin'] < ele[0] < region_loc['xmax'] and region_loc['ymin'] < ele[1] < region_loc['ymax']:
                     contain_dig.append(digital_list[i])
 
             contain_chars = [ele for ele in chars_list
-                             if region_loc['xmin'] < (
-                                     ele["location"]["left"] + ele["location"]["width"] // 2) + limit_left <
-                             region_loc['xmax']
+                             if region_loc['xmin'] < (ele["location"]["left"] + ele["location"]["width"] // 2) < region_loc['xmax']
                              and
-                             region_loc['xmin'] < (
-                                     ele["location"]["top"] + ele["location"]["height"] // 2) + limit_top <
-                             region_loc['ymax']]
+                             region_loc['xmin'] < (ele["location"]["top"] + ele["location"]["height"] // 2) < region_loc['ymax']]
+            numbers = [-1]
             if contain_dig or contain_chars:
                 d_ymin, d_ymax, d_xmin, d_xmax = 9999, 0, 9999, 0
                 if contain_dig:
+                    numbers = [ele["digital"] for ele in contain_dig]
                     d_ymin = min([ele['loc'][1] for ele in contain_dig])
                     d_ymax = max([ele['loc'][3] for ele in contain_dig])
                     d_xmin = min([ele['loc'][0] for ele in contain_dig])
@@ -588,44 +609,55 @@ def cluster_and_anti_abnormal(image, xml_path, digital_list, chars_list,
                 r_ymin, r_ymax = min(d_ymin, c_ymin), max(d_ymax, c_ymax)
                 r_xmin, r_xmax = min(d_xmin, c_xmin), max(d_xmax, c_xmax)
 
-                region_loc['ymin'] = r_ymin - 10 + limit_top
-                region_loc['ymax'] = r_ymax + 10 + limit_top
+                region_loc['ymin'] = r_ymin - 10
+                region_loc['ymax'] = r_ymax + 10
                 if d_xmin == r_xmin:
-                    region_loc['xmin'] = d_xmax + 5 + limit_left
-                    region_loc['xmax'] = d_xmax + 5 + limit_left + int(1.2 * choice_s_width)
+                    region_loc['xmin'] = d_xmax + 5
+                    region_loc['xmax'] = d_xmax + 5 + int(1.2 * choice_s_width)
                 else:
                     if 1.2 * (r_xmax - r_xmin) > choice_s_width:
-                        region_loc['xmin'] = r_xmin - 10 + limit_left
-                        region_loc['xmax'] = r_xmax + 10 + limit_left
+                        region_loc['xmin'] = r_xmin - 10
+                        region_loc['xmax'] = r_xmax + 10
                     else:
-                        region_loc['xmin'] = max((r_xmax - r_xmin) // 2 + r_xmin - choice_s_width + limit_left,
+                        region_loc['xmin'] = max((r_xmax - r_xmin) // 2 + r_xmin - choice_s_width,
                                                  crt_right_max + 10)
-                        region_loc['xmax'] = min((r_xmax - r_xmin) // 2 + r_xmin + choice_s_width + limit_left,
+                        region_loc['xmax'] = min((r_xmax - r_xmin) // 2 + r_xmin + choice_s_width ,
                                                  limit_right)
 
-                try:
-                    choice_m_img = utils.crop_region(image, region_loc)
-                    right_loc, bottom_loc = adjust_choice_m(choice_m_img, mean_height, mean_width * 2)
-                    if right_loc > 0:
-                        region_loc.update(dict(xmax=right_loc + region_loc['xmin']))
-                    if bottom_loc > 0:
-                        region_loc.update(dict(ymax=bottom_loc + region_loc['ymin']))
-                except Exception as e:
-                    print(e)
-                    traceback.print_exc()
+            else:
+                # 默认这种情况只有1行或2行
+                numbers = [-1]
+                region_xmin = crt_right_max + 5
+                region_xmax = int(region_xmin + 1.2 * choice_s_width)
+                region_ymin = min([int(ele['bounding_box']['ymin']) for ele in choice_m_list])
+                region_ymax = max([int(ele['bounding_box']['ymax']) for ele in choice_m_list])
+                region_ymax = region_ymin + (region_ymax - region_ymin) // 2  # 默认这种情况只有1行或2行
+                region_loc = {'xmin': region_xmin, 'ymin': region_ymin, 'xmax': region_xmax, 'ymax': region_ymax}
 
-                choice_m = dict(class_name='choice_m',
-                                number=[-1],
-                                bounding_box=region_loc,
-                                choice_option='A,B,C,D',
-                                default_points=[5],
-                                direction=180,
-                                cols=4,
-                                rows=1,
-                                single_width=(region_loc['xmax'] - region_loc['xmin']) // 4,
-                                single_height=r_ymax - r_ymin
-                                )
-                choice_m_list.append(choice_m)
+            try:
+                choice_m_img = utils.crop_region(image, region_loc)
+                if 0 in choice_m_img.shape[:2]:
+                    continue
+                right_loc, bottom_loc = adjust_choice_m(choice_m_img, mean_height, mean_width * 2)
+                if right_loc > 0:
+                    region_loc.update(dict(xmax=right_loc + region_loc['xmin']))
+                if bottom_loc > 0:
+                    region_loc.update(dict(ymax=bottom_loc + region_loc['ymin']))
+            except Exception as e:
+                print(e)
+                traceback.print_exc()
+
+            choice_m = dict(class_name='choice_m',
+                            number=numbers,
+                            bounding_box=region_loc,
+                            choice_option='A,B,C,D',
+                            default_points=[5],
+                            direction=180,
+                            cols=4,
+                            rows=1,
+                            single_width=(region_loc['xmax'] - region_loc['xmin']) // 4,
+                            )
+            choice_m_list.append(choice_m)
 
     # 单独一行不聚类(理论上不会再到这一步了, 上个block解决)
     for i, revised_choice_m in enumerate(need_revised_choice_m_list):
@@ -675,8 +707,20 @@ def cluster_and_anti_abnormal(image, xml_path, digital_list, chars_list,
 
 def infer_choice_m(image, tf_sheet, col_split_x, ocr, xml=None):
     infer_box_list = ocr2sheet(image, col_split_x, ocr, xml)
-    # print(sheet_region_list)
-    choice_m_list = []
+    if not infer_box_list:
+        for ele in tf_sheet:
+            if ele['class_name'] == 'choice':
+                choice_xmin = ele['bounding_box']['xmin']
+                choice_ymin = ele['bounding_box']['ymin']
+                choice_xmax = ele['bounding_box']['xmax']
+                choice_ymax = ele['bounding_box']['ymax']
+
+                mid_x = choice_xmin + (choice_xmax-choice_xmin)//2
+                for i in range(len(col_split_x)-1):
+                    if col_split_x[i] < mid_x < col_split_x[i+1]:
+                        choice_xmax = col_split_x[i+1] - 5
+                        infer_box_list.append({'loc': [choice_xmin, choice_ymin, choice_xmax, choice_ymax]})
+                        break
 
     choice_s_h_list = [int(ele['bounding_box']['ymax']) - int(ele['bounding_box']['ymin']) for ele in tf_sheet
                        if ele['class_name'] == 'choice_s']
@@ -693,6 +737,9 @@ def infer_choice_m(image, tf_sheet, col_split_x, ocr, xml=None):
     else:
         choice_s_width = 0
 
+    choice_n_list = [ele for ele in tf_sheet if ele['class_name'] == 'choice_n']
+
+    choice_m_list = []
     for infer_box in infer_box_list:
         # {'loc': [240, 786, 1569, 1368]}
         loc = infer_box['loc']
@@ -726,16 +773,17 @@ def infer_choice_m(image, tf_sheet, col_split_x, ocr, xml=None):
             #     ocr = brain_api.get_ocr_text_and_coordinate(infer_image, 'accurate', 'CHN_ENG')
 
             try:
-                digital_list, chars_list, digital_mean_h, digital_mean_w = find_digital(ocr)
+                digital_list, chars_list, digital_mean_h, digital_mean_w = find_digital(ocr, xmin, ymin)
                 if not digital_list:
                     continue
 
-                choice_m = cluster_and_anti_abnormal(image, xml, digital_list, chars_list,
+                choice_m = cluster_and_anti_abnormal(image, xml, choice_n_list, digital_list, chars_list,
                                                      digital_mean_h, digital_mean_w,
                                                      choice_s_height, choice_s_width, loc)
 
                 choice_m_list.extend(choice_m)
             except Exception as e:
+                choice_m_numbers_res = []
                 traceback.print_exc()
                 print('not found choice feature')
                 pass

+ 78 - 0
segment/sheet_resolve/analysis/sheet/decide_blank.py

@@ -0,0 +1,78 @@
+# @Author  : lightXu
+# @File    : predict.py
+import time
+
+import cv2
+import numpy as np
+
+from segment.sheet_resolve.tools.tf_settings import decide_blank_model
+
+model = np.load(decide_blank_model)
+
+
+def round_float(value):
+    return round(value, 6)
+
+
+def gen_feature(img):
+    h, w = img.shape
+    mean = round_float(np.mean(img)/255)
+    aspect = round_float(h / w)
+
+    row_box, col_box = 4, 4
+    row_box_pix, col_box_pix = h // row_box, w // col_box
+
+    box_feature = [aspect, mean]
+    for r in range(row_box):
+        for c in range(col_box):
+            box = img[r*row_box_pix:(r+1)*row_box_pix, c*col_box_pix:(c+1)*col_box_pix]
+            box_mean = round_float(np.mean(box)/255)
+            box_feature.append(box_mean)
+
+    return box_feature
+
+
+def kernel_trans(x1, array, k_tup=('rbf', 1.3)):
+    if k_tup[0] == 'lin':
+        kernel = np.dot(x1, array)
+    elif k_tup[0] == 'rbf':
+        sigma = k_tup[1]
+        if np.ndim(x1) == 1:
+            kernel = np.exp(np.sum(np.square(x1 - array)) / (-1 * np.square(sigma)))
+        else:
+            kernel = np.sum(np.square(x1-array), axis=1)
+            kernel = np.exp(-kernel / np.square(sigma))
+    else:
+        raise NameError('核函数无法识别')
+
+    return kernel
+
+
+def svm_predict(img, subject_id=3):
+    feature = gen_feature(img)
+    feature.insert(0, subject_id)
+
+    feature = np.array(feature, dtype=np.float)
+
+    # model = np.load('model.npy')
+    alpha_y = model[:, 0].T
+    b = model[0, 1]
+    sVs = model[:, 2:]
+
+    fxk = np.dot(alpha_y, kernel_trans(sVs, feature)) + b
+    p = np.sign(fxk)
+    if p == 1:
+        # print('blank')
+        return True
+    else:
+        # print('unblank')
+        return False
+
+
+if __name__ == '__main__':
+    file = r'E:\math_svm\test\202005060540_0001_214731356c.jpg'
+    im = cv2.imread(file, 0)
+    t1 = time.time()
+    svm_predict(im)
+    t2 = time.time()
+    print(t2-t1)

+ 8 - 7
segment/sheet_resolve/analysis/sheet/ocr_sheet.py

@@ -96,9 +96,6 @@ def tell_columns(image, sheet_dict):
 
     split_x = [px for px in [x1, x2] if px != 0]
 
-    if not split_x:
-        split_x = [w-1]
-
     return split_x
 
 
@@ -196,8 +193,10 @@ def ocr2sheet(image, col_split_list, raw_ocr, xml_path=None):
                 # 下分界行的上一行
                 bottom_limit = chn_index[split_index[i] + 1]
                 if bottom_limit in raw_chn_index:
-                    while int(ocr_res[bottom_limit - 1]["location"]['height']) >= int(
-                            ocr_res[bottom_limit - 1]["location"]['width']):
+                    while (int(ocr_res[bottom_limit - 1]["location"]['height'])
+                           >= int(ocr_res[bottom_limit - 1]["location"]['width'])
+                           or
+                           ocr_res[bottom_limit - 1]["words"] in punctuation_p):
                         bottom_limit = bottom_limit - 1
 
                     bottom = int(
@@ -233,7 +232,7 @@ def sheet_sorted(regions, split_x):
     自顶向下排序,答题卡依次分块,每块区域内排序:第一次分栏,第二次栏内分块
     """
 
-    region_contain_set = [('choice_m', 'solve'), ('choice_m', 'cloze'),]
+    region_contain_set = [('choice_m', 'solve'), ('choice_m', 'cloze')]
     region_contain_set_ = [(ele[1], ele[0]) for ele in region_contain_set]
     region_contain_set.extend(region_contain_set_)
 
@@ -269,9 +268,11 @@ def sheet_sorted(regions, split_x):
             for j, region_ in enumerate(col_regions):
                 name_ = region_['class_name']
                 y_min_ = region_['bounding_box']['ymin']
+                y_max_ = region_['bounding_box']['ymax']
+                y_mid_ = y_min_ + (y_max_ - y_min_)//2
                 if j > 0:
                     # 栏内分块
-                    if ymin <= y_min_ < ymax and (name, name_) not in region_contain_set:
+                    if ymin <= y_mid_ < ymax and (name, name_) not in region_contain_set:
                         row_regions.append(region_)
                     if ymax <= y_min_:
                         break

+ 140 - 52
segment/sheet_resolve/analysis/sheet/sheet_infer.py

@@ -117,11 +117,10 @@ def infer_bar_code(image, ocr_dict_list, attention_region):
                     xmax = right_board_location['left'] + right_board_location['width']
                     ymax = down_board_location['top'] + down_board_location['height']
 
-                    xmin = int(xmin) if xmin >= 1 else 1
-                    ymin = int(ymin) if ymin >= 1 else 1
-                    xmax = int(xmax) if xmax <= img_cols - 1 else img_cols - 1
-                    ymax = int(ymax) if ymax <= img_rows - 1 else img_rows - 1
-
+                    xmin = max(1, int(xmin)-5)
+                    ymin = max(1, int(ymin)-5)
+                    xmax = min(int(xmax), img_rows - 1)
+                    ymax = min(int(ymax), img_cols - 1 )
                     bar_code_dict = {'class_name': 'bar_code',
                                      'bounding_box': {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax}}
                     bar_code_dict_list.append(bar_code_dict)
@@ -371,6 +370,59 @@ def exam_number_infer_by_s(image, regions):
     return exam_dict_list
 
 
+def draw_blank(image, infer_box):
+    x_bias, y_bias = infer_box[:2]
+    exam_number_infer_region = crop_region_direct(image, infer_box)
+    ocr = get_ocr_text_and_coordinate(exam_number_infer_region)
+    pattern = r'[\u4e00-\u9fa5]'
+    for index, ele in enumerate(ocr):
+        # words = ele['words'].replace(' ', '')
+        for char in ele['chars']:
+            if re.match(pattern, char['char']):
+                loc = char['location']
+                left, top, width, height = loc['left']+x_bias, loc['top']+y_bias, loc['width'], loc['height']
+                image[top:top+2*height, left:left+width, :] = 255
+
+    return image
+
+
+def exam_number_adjust_infer(image, regions):
+    box = []
+    box1 = []
+    for i in range(len(regions) - 1, -1, -1):
+        region = regions[i]
+        if region['class_name'] == 'exam_number_s':
+            loc = region['bounding_box']
+            box.append([loc['xmin'], loc['ymin'], loc['xmax'], loc['ymax']])
+
+        if region['class_name'] == 'exam_number':
+            loc = region['bounding_box']
+            box.append([loc['xmin'], loc['ymin'], loc['xmax'], loc['ymax']])
+
+            regions.pop(i)
+
+        if region['class_name'] == 'exam_number_w':
+            loc = region['bounding_box']
+            box1.append([loc['xmin'], loc['ymax'], loc['xmax'], loc['ymax']])
+
+    if not box:
+        return image, regions
+
+    box_array = np.array(box+box1)
+    xmin, ymin = np.min(box_array, axis=0)[:2]
+    xmax, ymax = np.max(box_array, axis=0)[2:]
+
+    images = draw_blank(image, (xmin, ymin, xmax, ymax))
+
+    exam_number_infer = {'class_name': 'exam_number',
+                         'bounding_box': {'xmin': xmin, 'ymin': ymin,
+                                          'xmax': xmax, 'ymax': ymax}}
+
+    regions.append(exam_number_infer)
+
+    return images, regions
+
+
 def gen_xml_new(path, ocr_list):
     tree = ET.parse(r'../../tools/000000-template.xml')  # xml tree
     for index, ele in enumerate(ocr_list):
@@ -1071,52 +1123,60 @@ def box_infer_and_complete(image, sheet_region_dict, ocr=''):
     return sheet_region_dict
 
 
-def infer_solve(sheet_dict_list, left, right, top, bottom, col_regions, col_split):
-    if len(col_split) == 1:
-        col_split.insert(0, left)
-    else:
-        col_split.insert(0, left)
-        col_split.append(right)
+def infer_solve(sheet_dict_list, left, right, top, bottom, h, w, col_regions, col_split, subject='math'):
+    col_split.insert(0, left)
+    col_split.append(right)
 
-    boundary_list = [(split, top, col_split[i+1]-1, bottom) for i, split in enumerate(col_split[:-1])]
+    boundary_list = [(split, 1, col_split[i+1]-1, h-1) for i, split in enumerate(col_split[:-1])]
 
     infer_polygon = []
-    tmp = []
-    for i, col in enumerate(col_regions):
-        if i == 0:
+    for col_i, col in enumerate(col_regions):
+        if not col:
+            continue
+        class_names = [ele['class_name'] for ele in col]
+
+        # 确定是正面第一栏
+        if ((subject != 'chinese')
+            and
+           (any([ele in class_names for ele in ['infer_title', 'bar_code', 'choice_m',  'cloze_s']]))):
             bottom_box = col[-1]
             bottom_box_ymax = bottom_box['bounding_box']['ymax']
-            infer_loc = {'xmin': boundary_list[i][0], 'ymin': bottom_box_ymax+5,
-                         'xmax': boundary_list[i][2], 'ymax': boundary_list[i][3]}
+            infer_loc = {'xmin': boundary_list[col_i][0], 'ymin': bottom_box_ymax+5,
+                         'xmax': boundary_list[col_i][2], 'ymax': boundary_list[col_i][3]}
             box_polygon = Polygon([(infer_loc['xmin'], infer_loc['ymin']),
                                    (infer_loc['xmax'], infer_loc['ymin']),
                                    (infer_loc['xmax'], infer_loc['ymax']),
                                    (infer_loc['xmin'], infer_loc['ymax']),])
             infer_polygon.append(box_polygon)
-            tmp.append(infer_loc)
 
         else:
-            top_box = col[0]
-            bottom_box_ymin = top_box['bounding_box']['ymin']
-            infer_loc = {'xmin': boundary_list[i][0], 'ymin': boundary_list[i][1],
-                         'xmax': boundary_list[i][2], 'ymax': bottom_box_ymin-5}
-            box_polygon = Polygon([(infer_loc['xmin'], infer_loc['ymin']),
-                                   (infer_loc['xmax'], infer_loc['ymin']),
-                                   (infer_loc['xmax'], infer_loc['ymax']),
-                                   (infer_loc['xmin'], infer_loc['ymax']),])
-            infer_polygon.append(box_polygon)
-            tmp.append(infer_loc)
-
-            bottom_box = col[-1]
-            bottom_box_ymax = bottom_box['bounding_box']['ymax']
-            infer_loc = {'xmin': boundary_list[i][0], 'ymin': bottom_box_ymax,
-                         'xmax': boundary_list[i][2], 'ymax': boundary_list[i][3]}
-            box_polygon = Polygon([(infer_loc['xmin'], infer_loc['ymin']),
-                                   (infer_loc['xmax'], infer_loc['ymin']),
-                                   (infer_loc['xmax'], infer_loc['ymax']),
-                                   (infer_loc['xmin'], infer_loc['ymax']),])
-            infer_polygon.append(box_polygon)
-            tmp.append(infer_loc)
+            y_axis = np.zeros(h)
+            y_axis[top:top+1] = 1
+            y_axis[bottom-1:bottom] = 1
+            for ele in col:
+                ymin = ele['bounding_box']['ymin']
+                ymax = ele['bounding_box']['ymax']
+                y_axis[ymin:ymax+1] = 1
+
+            split_index_list = []
+            split_index = 0
+            for i, ele in enumerate(y_axis):
+                split_index = split_index % 2
+                if ele == split_index:
+                    # print(i)
+                    split_index = split_index + 1
+                    split_index_list.append(i)
+
+            for i in range(0, len(split_index_list)-1, 2):
+                ymin = split_index_list[i] + 3
+                ymax = split_index_list[i+1] - 3
+                infer_loc = {'xmin': boundary_list[col_i][0], 'ymin': ymin,
+                             'xmax': boundary_list[col_i][2], 'ymax': ymax}
+                box_polygon = Polygon([(infer_loc['xmin'], infer_loc['ymin']),
+                                       (infer_loc['xmax'], infer_loc['ymin']),
+                                       (infer_loc['xmax'], infer_loc['ymax']),
+                                       (infer_loc['xmin'], infer_loc['ymax']), ])
+                infer_polygon.append(box_polygon)
 
     res = []
     all_type_score_polygon = []
@@ -1156,26 +1216,54 @@ def infer_solve(sheet_dict_list, left, right, top, bottom, col_regions, col_spli
                     type_score_ymin.append(p_ymin)
 
         if type_score_num == 1:
-            solve_box = {'class_name': 'solve',
-                         'bounding_box': {'xmin': int(p_xmin), 'ymin': int(p_ymin),
-                                          'xmax': int(p_xmax), 'ymax': int(p_ymax)}}
+            w, h = p_xmax-p_xmin, p_ymax-p_ymin
+            w, h = max(w, 0.1), max(h, 0.1)
+            aspect_flag = max(w / h, h / w) < ASPECT_FLAG
 
-            sheet_dict_list.append(solve_box)
-            infer_polygon.remove(poly)
-            res.append(solve_box)
+            if aspect_flag:
+                solve_box = {'class_name': 'solve',
+                             'bounding_box': {'xmin': int(p_xmin), 'ymin': int(p_ymin),
+                                              'xmax': int(p_xmax), 'ymax': int(p_ymax)}}
+
+                infer_polygon.remove(poly)
+                res.append(solve_box)
+            else:
+                solve_box = {'class_name': 'solve_with_type_score_without_aspect',
+                             'bounding_box': {'xmin': int(p_xmin), 'ymin': int(p_ymin),
+                                              'xmax': int(p_xmax), 'ymax': int(p_ymax)}}
+
+                infer_polygon.remove(poly)
+                res.append(solve_box)
         if type_score_num > 1:  # 多type_score
             type_score_ymin = sorted(type_score_ymin)
             type_score_ymin[0] = min(p_ymin, type_score_ymin[0])
             type_score_ymin.append(p_ymax)
-            for i in range(0, len(type_score_ymin) - 1):
+            for col_i in range(0, len(type_score_ymin) - 1):
                 w = p_xmax - p_xmin
-                h = type_score_ymin[i + 1] - type_score_ymin[i]
-                if max(w / h, h / w) < ASPECT_FLAG:
+                h = type_score_ymin[col_i + 1] - type_score_ymin[col_i]
+                w, h = max(w, 0.1), max(h, 0.1)
+                aspect_flag = max(w / h, h / w) < ASPECT_FLAG
+                if aspect_flag:
                     solve_box = {'class_name': 'solve',
-                                 'bounding_box': {'xmin': int(p_xmin), 'ymin': int(type_score_ymin[i]-5),
-                                                  'xmax': int(p_xmax), 'ymax': int(type_score_ymin[i + 1])}}
-                    sheet_dict_list.append(solve_box)
+                                 'bounding_box': {'xmin': int(p_xmin), 'ymin': int(type_score_ymin[col_i]-5),
+                                                  'xmax': int(p_xmax), 'ymax': int(type_score_ymin[col_i + 1])}}
                     res.append(solve_box)
             infer_polygon.remove(poly)
 
-    return res
+    for poly in infer_polygon.copy():  # infer solve
+        in_xmin, in_ymin, in_xmax, in_ymax = poly.bounds
+        w, h = in_xmax - in_xmin, in_ymax - in_ymin
+        w, h = max(w, 0.1), max(h, 0.1)
+        aspect_flag = max(w / h, h / w) < ASPECT_FLAG
+        if aspect_flag:
+            solve_box = {'class_name': 'solve_without_type_score',
+                         'bounding_box': {'xmin': int(in_xmin), 'ymin': int(in_ymin),
+                                          'xmax': int(in_xmax), 'ymax': int(in_ymax)}}
+        else:
+            solve_box = {'class_name': 'w_h_blank',
+                         'bounding_box': {'xmin': int(in_xmin), 'ymin': int(in_ymin),
+                                          'xmax': int(in_xmax), 'ymax': int(in_ymax)}}
+
+        infer_polygon.remove(poly)
+        res.append(solve_box)
+    return res

+ 2 - 2
segment/sheet_resolve/analysis/sheet/sheet_points.py

@@ -82,7 +82,7 @@ def change_box(cloze_s_res, cloze_s_region):
 
 
 def get_cloze_number_and_value(cloze_s_box_with_content):
-    print(cloze_s_box_with_content)
+    # print(cloze_s_box_with_content)
     list_of_cloze_s = []
     number_value = []
     for words_index, words_str in enumerate(cloze_s_box_with_content):
@@ -154,7 +154,7 @@ def get_cloze_number_and_value(cloze_s_box_with_content):
             else:
                 title_number = -1
             total_score = -1
-            words_str.update({'title_number': title_number, 'total_score': int(total_score)})
+            words_str.update({'title_number': int(title_number), 'total_score': int(total_score)})
             list_of_cloze_s.append(words_str)
             number_value.append(title_number)
             number_value.append(total_score)

+ 466 - 329
segment/sheet_resolve/analysis/sheet/sheet_points_total.py

@@ -1,24 +1,26 @@
 # -*- coding: utf-8 -*-
-# @Time : 2020/5/28 0022 17:02
+# @Time : 2020/6/15 0015 10:10
 # @Author : LF
-# @FileName: sheet_points_total.py
+# @FileName: sheet_point_total.py
 # @Software: PyCharm
-# local_baidu_OCR
+
 
 import requests
 import base64
 from urllib import parse, request
 import cv2
 import re
-
+from threading import Thread
+import copy
+from collections import OrderedDict
 from PIL import Image
 from segment.sheet_resolve.tools.brain_api import get_ocr_text_and_coordinate_in_google_format
 from segment.sheet_resolve.analysis.sheet.ocr_key_words import key_words
 
-# try:
-#     import tr
-# except Exception:
-#     pass
+try:
+    import tr.tr as tr
+except Exception:
+    pass
 
 OCR_ACCURACY = 'accurate'
 
@@ -260,6 +262,64 @@ def model_type_score(all_type_score_one, choice_box, cloze_box, solve_box,compos
 
     return test_result1
 
+def module_type_score(all_type_score_one, choice_box, cloze_box, solve_box, composition_box):  # 每个模块内包含的type_score
+    '''
+    :param all_type_score_one: 模型得到的单个type_score的坐标位置
+    :param choice_box: 模型得到的选择题坐标位置
+    :param cloze_box: 模型得到的填空题坐标位置
+    :param solve_box: 模型得到的解答题坐标位置
+    :return:
+    '''
+
+    N_choice = len(choice_box)
+    N_cloze = len(cloze_box)
+    N_solve = len(solve_box)
+    N_composition = len(composition_box)
+    choice_type_score = {}
+    cloze_type_score = {}
+    solve_type_score = {}
+    composition_type_score = {}
+    test_result1 = {}
+    temp_dis = 100000
+    for j in range(N_choice):
+        if (list(all_type_score_one)[0] and list(all_type_score_one)[2]) in range(choice_box[j][0] - 100, choice_box[j][2] + 50) and (list(all_type_score_one)[1] and list(all_type_score_one)[3]) in range(choice_box[j][1] - 150, choice_box[j][3] - 50):
+            choice_type_score = {'bounding_box': choice_box[j],
+                                'label': 'choice',
+                                'type_box': all_type_score_one}
+            break
+    for j in range(N_cloze):
+        if (list(all_type_score_one)[0] and list(all_type_score_one)[2]) in range(cloze_box[j][0] - 100, cloze_box[j][2] + 50) and (list(all_type_score_one)[1] and list(all_type_score_one)[3]) in range(cloze_box[j][1] - 100, cloze_box[j][3] - 50):
+            cloze_type_score = {'bounding_box': cloze_box[j],
+                                    'label': 'cloze',
+                                    'type_box': all_type_score_one}
+            break
+    for j in range(N_solve):
+        if (list(all_type_score_one)[0] and list(all_type_score_one)[2]) in range(solve_box[j][0] - 50, solve_box[j][2] + 50) and (list(all_type_score_one)[1] and list(all_type_score_one)[3]) in range(solve_box[j][1]-50, solve_box[j][3]):
+            solve_type_score = {'bounding_box': solve_box[j],
+                                    'label': 'solve',
+                                    'type_box': all_type_score_one}
+            break
+    for j in range(N_composition):
+        if (list(all_type_score_one)[0] and list(all_type_score_one)[2]) in range(composition_box[j][0] - 100, composition_box[j][2] + 50) and (list(all_type_score_one)[1] and list(all_type_score_one)[3]) in range(composition_box[j][1] - 200, composition_box[j][3] - 50):
+            composition_type_score = {'bounding_box': composition_box[j],
+                                      'label': 'composition',
+                                      'type_box': all_type_score_one}
+            break
+    if choice_type_score != {}:
+        # 建立相互关联的关系。 即表示该type_score对应于选择题
+        test_result1 = choice_type_score
+    elif cloze_type_score != {}:
+        # 建立相互关联的关系。 即表示该type_score对应于填空题
+        test_result1 = cloze_type_score
+    elif solve_type_score != {}:
+        # 建立相互关联的关系。 即表示该type_score对应于解答题
+        test_result1 = solve_type_score
+    elif composition_type_score != {}:
+            test_result1 = composition_type_score
+    else:
+        test_result1 = -1
+    return test_result1
+
 
 def ocr_key_words(rect, type_score_dict):  # 将ocr识别得到的文字与模型得到的type_score对应
     '''
@@ -274,8 +334,7 @@ def ocr_key_words(rect, type_score_dict):  # 将ocr识别得到的文字与模
     ymax = type_score_dict['type_box'][3]
     words = []
     for j in range(len_ocr):
-        if rect['coordinates'][j][0] - xmin > -30 and rect['coordinates'][j][1] - ymin > -30 and rect['coordinates'][j][
-            2] - xmax < 30 and rect['coordinates'][j][3] - ymax < 30:
+        if rect['coordinates'][j][0] - xmin > -30 and rect['coordinates'][j][1] - ymin > -30 and rect['coordinates'][j][2] - xmax < 30 and rect['coordinates'][j][3] - ymax < 30:
             word = rect['chars'][j]
             words.append(word)
     type_score_dict['words'] = words
@@ -284,8 +343,57 @@ def ocr_key_words(rect, type_score_dict):  # 将ocr识别得到的文字与模
     return type_score_dict_ocr
 
 
-
-
+def big_block_score(img0,xmins_b,ymins_b,xmaxs_b,ymaxs_b):
+    res1 = get_ocr_text_and_coordinate_in_google_format(img0[ymins_b:ymaxs_b, xmins_b:xmaxs_b],ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG')
+    aa = []
+    type_score_dict_ocrs = {}
+    for ii in range(len(res1['coordinates'])):
+        xmin11 = res1['coordinates'][ii][0] + xmins_b
+        ymin11 = res1['coordinates'][ii][1] + ymins_b
+        xmax11 = res1['coordinates'][ii][2] + xmins_b
+        ymax11 = res1['coordinates'][ii][3] + ymins_b
+        aaa = (xmin11, ymin11, xmax11, ymax11)
+        aa.append(aaa)
+    res1['coordinates'] = aa
+    new_test = {}
+    coordinates = 0
+    if len(res1['words']) > 0:
+        type_score_dict_ocrs['words'] = res1['words'][0]
+        coordinates = res1['coordinates'][0]
+        new_test = key_words(type_score_dict_ocrs)
+        if new_test == {} or new_test['Score_structure'] == -1:
+            if len(res1['words']) > 1:
+                type_score_dict_ocrs['words'] = res1['words'][1]
+                coordinates = res1['coordinates'][1]
+                new_test = key_words(type_score_dict_ocrs)
+                if new_test == {} or new_test['Score_structure'] == -1:
+                    if len(res1['words']) > 2:
+                        type_score_dict_ocrs['words'] = res1['words'][2]
+                        coordinates = res1['coordinates'][2]
+                        new_test = key_words(type_score_dict_ocrs)
+                    if new_test == {} or new_test['Score_structure'] == -1:
+                        if len(res1['words']) > 3:
+                            type_score_dict_ocrs['words'] = res1['words'][3]
+                            coordinates = res1['coordinates'][3]
+                            new_test = key_words(type_score_dict_ocrs)
+                        if new_test == {} or new_test['Score_structure'] == -1:
+                            if len(res1['words']) > 4:
+                                type_score_dict_ocrs['words'] = res1['words'][4]
+                                coordinates = res1['coordinates'][4]
+                                new_test = key_words(type_score_dict_ocrs)
+    if new_test != {} and new_test['volume_structure'] != -1 and new_test['volume_structure'] != 1:  # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
+        if int(new_test['volume_structure'][0]['volume_total_score']) > 200:  # 暂定试卷分数都在200以内,超过200的表示识别错误
+            new_test['volume_structure'][0]['volume_total_score'] = int(
+                new_test['volume_structure'][0]['volume_total_score']) % 100
+        return new_test
+    elif new_test != {} and (new_test['volume_structure'] == -1 or new_test['volume_structure'] == 1) and new_test['Score_structure'] != -1:  # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
+        if int(new_test['Score_structure'][0]['item_total_score']) > 200:  # 暂定试卷分数都在200以内,超过200的表示识别错误
+            new_test['Score_structure'][0]['item_total_score'] = int(
+                new_test['Score_structure'][0]['item_total_score']) % 100
+        new_test['Score_structure'][0]['type_box'] = coordinates
+        return new_test
+    else:
+        return -1
 
 def get_sheet_number_total(answer_sheet, res, img0):
 
@@ -311,13 +419,14 @@ def get_sheet_number_total(answer_sheet, res, img0):
     Score_last = []
     score_last_one = 0
     volume_last_one = 0
-    model_box2 = []
     composition_boxs = []
-    score2 = []
-    num_redundance = 0
     num_composition = 0
     j_temp = []
     jj_temp =[]
+    eles = []
+    yy_max = []
+    score_del = []
+    key_modules_classes = ['choice', 'cloze', 'solve', 'solve0', 'composition0', 'composition', 'correction', 'type_score']
 
     for ele in answer_sheet["regions"]:  # 从模型输出获取对应标签的边框信息
         if ele["class_name"] == 'choice':
@@ -359,18 +468,13 @@ def get_sheet_number_total(answer_sheet, res, img0):
             type_score_boxs.append(type_score_one)
             num_type_score = num_type_score + 1
 
+
     '''解析type_score与对应分割模块的分数'''
     for i in range(len(type_score_boxs)):
-        test_result1 = model_type_score(type_score_boxs[i], choice_boxs, cloze_boxs, solve_boxs, composition_boxs)
+        test_result1 = module_type_score(type_score_boxs[i], choice_boxs, cloze_boxs, solve_boxs, composition_boxs)
         if test_result1 != -1 and test_result1 != 0:
-            if type_score_boxs[i][0] - 5 > 0:
-                xminss = type_score_boxs[i][0]-5
-            else:
-                xminss = type_score_boxs[i][0]
-            if type_score_boxs[i][1] - 5 > 0:
-                yminss = type_score_boxs[i][1] - 5
-            else:
-                yminss = type_score_boxs[i][1]
+            xminss = (type_score_boxs[i][0] - 5) if type_score_boxs[i][0] - 5 > 0 else type_score_boxs[i][0]
+            yminss = (type_score_boxs[i][1] - 5) if type_score_boxs[i][1] - 5 > 0 else type_score_boxs[i][1]
             if type_score_boxs[i][2] + 5 < img_w:
                 xmaxss = type_score_boxs[i][2] + 5
             else:
@@ -380,17 +484,28 @@ def get_sheet_number_total(answer_sheet, res, img0):
             else:
                 ymaxss = type_score_boxs[i][3]
             test_result1['words'] = str()
-            # try:  # tr_OCR
-            #     image_src_type_score = image_src.crop((xminss, yminss, xmaxss, ymaxss))
-            #     type_score_dict_ocr = tr.run(image_src_type_score)
-            #     print('tr_OCR')
-            #     for t in range(len(type_score_dict_ocr)):
-            #         test_result1['words'] = test_result1['words'] + type_score_dict_ocr[t][1]
-            # except Exception as e:  # baidu_OCR
-            #     print('baidu_OCR')
-            type_score_dict_ocr = get_ocr_text_and_coordinate_in_google_format(img0[yminss:ymaxss, xminss:xmaxss], ocr_accuracy=OCR_ACCURACY,language_type='CHN_ENG')
-            for t in range(len(type_score_dict_ocr['words'])):
-                test_result1['words'] = test_result1['words'] + type_score_dict_ocr['words'][t]
+            try:  # tr_OCR
+                image_src_type_score = image_src.crop((xminss, yminss, xmaxss, ymaxss))
+                w_small = xmaxss - xminss
+                h_small = ymaxss - yminss
+                if h_small < 100 and w_small > 100:
+                    image_src_type_score = Image.new(image_src.mode, (w_small, 100), (255))
+                    image_src_type_score.paste(image_src, [0, 0, w_small, h_small])
+                elif w_small < 100 and h_small > 100:
+                    image_src_type_score = Image.new(image_src.mode, (100, h_small), (255))
+                    image_src_type_score.paste(image_src, [0, 0, w_small, h_small])
+                elif w_small < 100 and h_small < 100:
+                    image_src_type_score = Image.new(image_src.mode, (100, 100), (255))
+                    image_src_type_score.paste(image_src, [0, 0, w_small, h_small])
+                type_score_dict_ocr = tr.run(image_src_type_score)
+                print('tr_OCR')
+                for t in range(len(type_score_dict_ocr)):
+                    test_result1['words'] = test_result1['words'] + type_score_dict_ocr[t][1]
+            except Exception as e:  # baidu_OCR
+                print('baidu_OCR')
+                type_score_dict_ocr = get_ocr_text_and_coordinate_in_google_format(img0[yminss:ymaxss, xminss:xmaxss], ocr_accuracy=OCR_ACCURACY,language_type='CHN_ENG')
+                for t in range(len(type_score_dict_ocr['words'])):
+                    test_result1['words'] = test_result1['words'] + type_score_dict_ocr['words'][t]
 
             test = key_words(test_result1)
             if test == {}:
@@ -402,6 +517,7 @@ def get_sheet_number_total(answer_sheet, res, img0):
                 add_ocr['score'] = -1
                 add_ocr['number_score'] = -1
                 add_ocr['counts'] = -1
+                add_ocr['type_score_box'] = type_score_boxs[i]
                 add_ocr['ocr'] = test_result1['words']
                 Score_last.append(add_ocr)
             elif test['volume_structure'] == -1 and test['Score_structure'] == -1:
@@ -413,30 +529,45 @@ def get_sheet_number_total(answer_sheet, res, img0):
                 add_ocr['score'] = -1
                 add_ocr['number_score'] = -1
                 add_ocr['counts'] = -1
+                add_ocr['type_score_box'] = type_score_boxs[i]
                 add_ocr['ocr'] = test_result1['words']
                 Score_last.append(add_ocr)
             elif test != {}:
-                if test['volume_structure'] != -1 and int(
+                if test['volume_structure'] != -1 and test['volume_structure'] != 1 and int(
                         test['volume_structure'][0]['volume_total_score']) > 200:  # 暂定试卷分数都在200以内,超过200的表示识别错误
                     test['volume_structure'][0]['volume_total_score'] = int(
                         test['volume_structure'][0]['volume_total_score']) % 100
-                elif test['volume_structure'] == -1 and test['Score_structure'] != -1 and int(
+                elif (test['volume_structure'] == -1 or test['volume_structure'] == 1) and test['Score_structure'] != -1 and int(
                         test['Score_structure'][0]['item_total_score']) > 200:  # 暂定试卷分数都在200以内,超过200的表示识别错误
                     test['Score_structure'][0]['item_total_score'] = int(
                         test['Score_structure'][0]['item_total_score']) % 100
                 all_test.append(test)
+            else:
+                ### 添加返回值OCR结果
+                add_ocr = {}
+                add_ocr['model_box'] = test_result1['bounding_box']
+                add_ocr['label'] = test_result1['label']
+                add_ocr['number'] = -1
+                add_ocr['score'] = -1
+                add_ocr['number_score'] = -1
+                add_ocr['counts'] = -1
+                add_ocr['type_score_box'] = type_score_boxs[i]
+                add_ocr['ocr'] = test_result1['words']
+                Score_last.append(add_ocr)
+
 
     ''' 解析模型分割模块没有对应的type_score时的分数'''
     for jjjj in range(len(all_test)):
         if all_test[jjjj]['Score_structure'] != -1:
             label_1 = all_test[jjjj]['Score_structure'][0]['label']
-            if label_1 == 'choice':
+            num_1 = all_test[jjjj]['Score_structure'][0]['item_N']
+            if label_1 == 'choice' :
                 if choice_boxs.count(all_test[jjjj]['Score_structure'][0]['bounding_box']):
                     choice_boxs.remove(all_test[jjjj]['Score_structure'][0]['bounding_box'])
             elif label_1 == 'cloze':
                 if cloze_boxs.count(all_test[jjjj]['Score_structure'][0]['bounding_box']):
                     cloze_boxs.remove(all_test[jjjj]['Score_structure'][0]['bounding_box'])
-            elif label_1 == 'solve':
+            elif label_1 == 'solve' and num_1 != 10000 and num_1 != -1:
                 if solve_boxs.count(all_test[jjjj]['Score_structure'][0]['bounding_box']):
                     solve_boxs.remove(all_test[jjjj]['Score_structure'][0]['bounding_box'])
             elif label_1 == 'composition':
@@ -444,61 +575,11 @@ def get_sheet_number_total(answer_sheet, res, img0):
                     solve_boxs.remove(all_test[jjjj]['Score_structure'][0]['bounding_box'])
     if choice_boxs != []:  # 9月16号修改
         for ij in range(len(choice_boxs)):
-            if choice_boxs[ij][1] - 150 > 0:
-                yminss = choice_boxs[ij][1] - 150
-            else:
-                yminss = choice_boxs[ij][1]
-            if choice_boxs[ij][0] - 100 > 0:
-                xminss = choice_boxs[ij][0] - 100
-            else:
-                xminss = choice_boxs[ij][0]
+            yminss = choice_boxs[ij][1] - 150 if choice_boxs[ij][1] - 150 > 0 else choice_boxs[ij][1]
+            xminss = choice_boxs[ij][0] - 100 if choice_boxs[ij][0] - 100 > 0 else choice_boxs[ij][0]
             try:
-                res1 = get_ocr_text_and_coordinate_in_google_format(img0[yminss:choice_boxs[ij][3], xminss:choice_boxs[ij][2]], ocr_accuracy=OCR_ACCURACY,language_type='CHN_ENG')
-                aa = []
-                type_score_dict_ocrs = {}
-                for ii in range(len(res1['coordinates'])):
-                    xmin11 = res1['coordinates'][ii][0] + choice_boxs[ij][0]
-                    ymin11 = res1['coordinates'][ii][1] + choice_boxs[ij][1]
-                    xmax11 = res1['coordinates'][ii][2] + choice_boxs[ij][0]
-                    ymax11 = res1['coordinates'][ii][3] + choice_boxs[ij][1]
-                    aaa = (xmin11, ymin11, xmax11, ymax11)
-                    aa.append(aaa)
-                res1['coordinates'] = aa
-                new_test = {}
-                if len(res1['words']) > 0:
-                    type_score_dict_ocrs['words'] = res1['words'][0]
-                    new_test = key_words(type_score_dict_ocrs)
-                    if new_test == {} or new_test['Score_structure'] == -1:
-                        if len(res1['words']) > 1:
-                            type_score_dict_ocrs['words'] = res1['words'][1]
-                            new_test = key_words(type_score_dict_ocrs)
-                            if new_test == {} or new_test['Score_structure'] == -1:
-                                if len(res1['words']) > 2:
-                                    type_score_dict_ocrs['words'] = res1['words'][2]
-                                    new_test = key_words(type_score_dict_ocrs)
-                                if new_test == {} or new_test['Score_structure'] == -1:
-                                    if len(res1['words']) > 3:
-                                        type_score_dict_ocrs['words'] = res1['words'][3]
-                                        new_test = key_words(type_score_dict_ocrs)
-                                    if new_test == {} or new_test['Score_structure'] == -1:
-                                        if len(res1['words']) > 4:
-                                            type_score_dict_ocrs['words'] = res1['words'][4]
-                                            new_test = key_words(type_score_dict_ocrs)
-                if new_test != {} and new_test['volume_structure'] != -1 and (
-                        int(new_test['volume_structure'][0]['volume_total_score']) > 4 or int(
-                        new_test['volume_structure'][0]['volume_score']) > 4):  # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
-                    if int(new_test['volume_structure'][0]['volume_total_score']) > 200:  # 暂定试卷分数都在200以内,超过200的表示识别错误
-                        new_test['volume_structure'][0]['volume_total_score'] = int(
-                            new_test['volume_structure'][0]['volume_total_score']) % 100
-                    new_test['volume_structure'][0]['bounding_box'] = choice_boxs[ij]
-                    new_test['volume_structure'][0]['label'] = 'choice'
-                    all_test.append(new_test)
-                elif new_test != {} and new_test['volume_structure'] == -1 and new_test['Score_structure'] != -1 and (
-                        int(new_test['Score_structure'][0]['item_total_score']) > 4 or int(
-                        new_test['Score_structure'][0]['item_score']) > 4):  # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
-                    if int(new_test['Score_structure'][0]['item_total_score']) > 200:  # 暂定试卷分数都在200以内,超过200的表示识别错误
-                        new_test['Score_structure'][0]['item_total_score'] = int(
-                            new_test['Score_structure'][0]['item_total_score']) % 100
+                new_test = big_block_score(img0, xminss, yminss, choice_boxs[ij][2], choice_boxs[ij][3])
+                if new_test != -1:
                     new_test['Score_structure'][0]['bounding_box'] = choice_boxs[ij]
                     new_test['Score_structure'][0]['label'] = 'choice'
                     all_test.append(new_test)
@@ -506,57 +587,11 @@ def get_sheet_number_total(answer_sheet, res, img0):
                 print('choice_boxs_score_NULL_or_error')
     if cloze_boxs != []:
         for ij in range(len(cloze_boxs)):
-            if cloze_boxs[ij][1] - 100 > 0:
-                yminss = cloze_boxs[ij][1] - 100
-            else:
-                yminss = cloze_boxs[ij][1]
-            if cloze_boxs[ij][0] - 100 > 0:
-                xminss = cloze_boxs[ij][0] - 100
-            else:
-                xminss = cloze_boxs[ij][0]
+            yminss = cloze_boxs[ij][1] - 100 if cloze_boxs[ij][1] - 100 > 0 else cloze_boxs[ij][1]
+            xminss = cloze_boxs[ij][0] - 100 if cloze_boxs[ij][0] - 100 > 0 else cloze_boxs[ij][0]
             try:
-                res1 = get_ocr_text_and_coordinate_in_google_format(img0[yminss:cloze_boxs[ij][3], xminss:cloze_boxs[ij][2]], ocr_accuracy=OCR_ACCURACY,language_type='CHN_ENG')
-                aa = []
-                type_score_dict_ocrs = {}
-                for ii in range(len(res1['coordinates'])):
-                    xmin11 = res1['coordinates'][ii][0] + cloze_boxs[ij][0]
-                    ymin11 = res1['coordinates'][ii][1] + cloze_boxs[ij][1]
-                    xmax11 = res1['coordinates'][ii][2] + cloze_boxs[ij][0]
-                    ymax11 = res1['coordinates'][ii][3] + cloze_boxs[ij][1]
-                    aaa = (xmin11, ymin11, xmax11, ymax11)
-                    aa.append(aaa)
-                res1['coordinates'] = aa
-                new_test = {}
-                if len(res1['words']) > 0:
-                    type_score_dict_ocrs['words'] = res1['words'][0]
-                    new_test = key_words(type_score_dict_ocrs)
-                    if new_test == {} or new_test['Score_structure'] == -1:
-                        if len(res1['words']) > 1:
-                            type_score_dict_ocrs['words'] = res1['words'][1]
-                            new_test = key_words(type_score_dict_ocrs)
-                            if new_test == {} or new_test['Score_structure'] == -1:
-                                if len(res1['words']) > 2:
-                                    type_score_dict_ocrs['words'] = res1['words'][2]
-                                    new_test = key_words(type_score_dict_ocrs)
-                                if new_test == {} or new_test['Score_structure'] == -1:
-                                    if len(res1['words']) > 3:
-                                        type_score_dict_ocrs['words'] = res1['words'][3]
-                                        new_test = key_words(type_score_dict_ocrs)
-                                    if new_test == {} or new_test['Score_structure'] == -1:
-                                        if len(res1['words']) > 4:
-                                            type_score_dict_ocrs['words'] = res1['words'][4]
-                                            new_test = key_words(type_score_dict_ocrs)
-                if new_test != {} and new_test['volume_structure'] != -1 and (int(new_test['volume_structure'][0]['volume_total_score']) > 4 or int(new_test['volume_structure'][0]['volume_score']) > 4):  # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
-                    if int(new_test['volume_structure'][0]['volume_total_score']) > 200:  # 暂定试卷分数都在200以内,超过200的表示识别错误
-                        new_test['volume_structure'][0]['volume_total_score'] = int(
-                            new_test['volume_structure'][0]['volume_total_score']) % 100
-                    new_test['volume_structure'][0]['bounding_box'] = cloze_boxs[ij]
-                    new_test['volume_structure'][0]['label'] = 'cloze'
-                    all_test.append(new_test)
-                elif new_test != {} and new_test['volume_structure'] == -1 and new_test['Score_structure'] != -1 and (int(new_test['Score_structure'][0]['item_total_score']) > 4 or int(new_test['Score_structure'][0]['item_score']) > 4):  # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
-                    if int(new_test['Score_structure'][0]['item_total_score']) > 200:  # 暂定试卷分数都在200以内,超过200的表示识别错误
-                        new_test['Score_structure'][0]['item_total_score'] = int(
-                            new_test['Score_structure'][0]['item_total_score']) % 100
+                new_test = big_block_score(img0, xminss, yminss, cloze_boxs[ij][2], cloze_boxs[ij][3])
+                if new_test != -1:
                     new_test['Score_structure'][0]['bounding_box'] = cloze_boxs[ij]
                     new_test['Score_structure'][0]['label'] = 'cloze'
                     all_test.append(new_test)
@@ -564,51 +599,11 @@ def get_sheet_number_total(answer_sheet, res, img0):
                 print('cloze_boxs_score_NULL_or_error')
     if solve_boxs != []:
         for ij in range(len(solve_boxs)):
-            yminss = solve_boxs[ij][1]
-            xminss = solve_boxs[ij][0]
+            yminss = solve_boxs[ij][1] - 50 if solve_boxs[ij][1] - 50 > 0 else solve_boxs[ij][1]
+            xminss = solve_boxs[ij][0] - 50 if solve_boxs[ij][0] - 50 > 0 else solve_boxs[ij][0]
             try:
-                res1 = get_ocr_text_and_coordinate_in_google_format(img0[yminss:solve_boxs[ij][3], xminss:solve_boxs[ij][2]], ocr_accuracy=OCR_ACCURACY,language_type='CHN_ENG')
-                aa = []
-                type_score_dict_ocrs = {}
-                for ii in range(len(res1['coordinates'])):
-                    xmin11 = res1['coordinates'][ii][0] + solve_boxs[ij][0]
-                    ymin11 = res1['coordinates'][ii][1] + solve_boxs[ij][1]
-                    xmax11 = res1['coordinates'][ii][2] + solve_boxs[ij][0]
-                    ymax11 = res1['coordinates'][ii][3] + solve_boxs[ij][1]
-                    aaa = (xmin11, ymin11, xmax11, ymax11)
-                    aa.append(aaa)
-                res1['coordinates'] = aa
-                new_test = {}
-                if len(res1['words']) > 0:
-                    type_score_dict_ocrs['words'] = res1['words'][0]
-                    new_test = key_words(type_score_dict_ocrs)
-                    if new_test == {} or new_test['Score_structure'] == -1:
-                        if len(res1['words']) > 1:
-                            type_score_dict_ocrs['words'] = res1['words'][1]
-                            new_test = key_words(type_score_dict_ocrs)
-                            if new_test == {} or new_test['Score_structure'] == -1:
-                                if len(res1['words']) > 2:
-                                    type_score_dict_ocrs['words'] = res1['words'][2]
-                                    new_test = key_words(type_score_dict_ocrs)
-                                if new_test == {} or new_test['Score_structure'] == -1:
-                                    if len(res1['words']) > 3:
-                                        type_score_dict_ocrs['words'] = res1['words'][3]
-                                        new_test = key_words(type_score_dict_ocrs)
-                                    if new_test == {} or new_test['Score_structure'] == -1:
-                                        if len(res1['words']) > 4:
-                                            type_score_dict_ocrs['words'] = res1['words'][4]
-                                            new_test = key_words(type_score_dict_ocrs)
-                if new_test != {} and new_test['volume_structure'] != -1 and int(new_test['volume_structure'][0]['volume_total_score']) > 5:  # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
-                    if int(new_test['volume_structure'][0]['volume_total_score']) > 200:  # 暂定试卷分数都在200以内,超过200的表示识别错误
-                        new_test['volume_structure'][0]['volume_total_score'] = int(new_test['volume_structure'][0]['volume_total_score']) % 100
-                    new_test['volume_structure'][0]['bounding_box'] = solve_boxs[ij]
-                    new_test['volume_structure'][0]['label'] = 'solve'
-                    all_test.append(new_test)
-                elif new_test != {} and new_test['volume_structure'] == -1 and new_test[
-                    'Score_structure'] != -1 and (
-                        int(new_test['Score_structure'][0]['item_total_score']) > 5 or int(new_test['Score_structure'][0]['item_total_score']) == -1):  # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
-                    if int(new_test['Score_structure'][0]['item_total_score']) > 200:  # 暂定试卷分数都在200以内,超过200的表示识别错误
-                        new_test['Score_structure'][0]['item_total_score'] = int(new_test['Score_structure'][0]['item_total_score']) % 100
+                new_test = big_block_score(img0, xminss, yminss, solve_boxs[ij][2], solve_boxs[ij][3])
+                if new_test != -1:
                     new_test['Score_structure'][0]['bounding_box'] = solve_boxs[ij]
                     new_test['Score_structure'][0]['label'] = 'solve'
                     all_test.append(new_test)
@@ -616,60 +611,11 @@ def get_sheet_number_total(answer_sheet, res, img0):
                 print('solve_boxs_score_NULL_or_error')
     if composition_boxs != []:
         for ij in range(len(composition_boxs)):
-            if composition_boxs[ij][1] - 250 > 0:
-                yminss = composition_boxs[ij][1] - 250
-            else:
-                yminss = composition_boxs[ij][1]
-            if composition_boxs[ij][0] - 100 > 0:
-                xminss = composition_boxs[ij][0] - 100
-            else:
-                xminss = composition_boxs[ij][0]
+            yminss = composition_boxs[ij][1] - 240 if composition_boxs[ij][1] - 240 > 0 else composition_boxs[ij][1]
+            xminss = composition_boxs[ij][0] - 100 if composition_boxs[ij][0] - 100 > 0 else composition_boxs[ij][0]
             try:
-                res1 = get_ocr_text_and_coordinate_in_google_format(img0[yminss:composition_boxs[ij][3], xminss:composition_boxs[ij][2]], ocr_accuracy=OCR_ACCURACY,language_type='CHN_ENG')
-                aa = []
-                type_score_dict_ocrs = {}
-                for ii in range(len(res1['coordinates'])):
-                    xmin11 = res1['coordinates'][ii][0] + composition_boxs[ij][0]
-                    ymin11 = res1['coordinates'][ii][1] + composition_boxs[ij][1]
-                    xmax11 = res1['coordinates'][ii][2] + composition_boxs[ij][0]
-                    ymax11 = res1['coordinates'][ii][3] + composition_boxs[ij][1]
-                    aaa = (xmin11, ymin11, xmax11, ymax11)
-                    aa.append(aaa)
-                res1['coordinates'] = aa
-                new_test = {}
-                if len(res1['words']) > 0:
-                    type_score_dict_ocrs['words'] = res1['words'][0]
-                    new_test = key_words(type_score_dict_ocrs)
-                    if new_test == {} or new_test['Score_structure'] == -1:
-                        if len(res1['words']) > 1:
-                            type_score_dict_ocrs['words'] = res1['words'][1]
-                            new_test = key_words(type_score_dict_ocrs)
-                            if new_test == {} or new_test['Score_structure'] == -1:
-                                if len(res1['words']) > 2:
-                                    type_score_dict_ocrs['words'] = res1['words'][2]
-                                    new_test = key_words(type_score_dict_ocrs)
-                                if new_test == {} or new_test['Score_structure'] == -1:
-                                    if len(res1['words']) > 3:
-                                        type_score_dict_ocrs['words'] = res1['words'][3]
-                                        new_test = key_words(type_score_dict_ocrs)
-                                    if new_test == {} or new_test['Score_structure'] == -1:
-                                        if len(res1['words']) > 4:
-                                            type_score_dict_ocrs['words'] = res1['words'][4]
-                                            new_test = key_words(type_score_dict_ocrs)
-                if new_test != {} and new_test['volume_structure'] != -1 and int(
-                        new_test['volume_structure'][0]['volume_total_score']) > 4:  # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
-                    if int(new_test['volume_structure'][0]['volume_total_score']) > 200:  # 暂定试卷分数都在200以内,超过200的表示识别错误
-                        new_test['volume_structure'][0]['volume_total_score'] = int(
-                            new_test['volume_structure'][0]['volume_total_score']) % 100
-                    new_test['volume_structure'][0]['bounding_box'] = composition_boxs[ij]
-                    new_test['volume_structure'][0]['label'] = 'composition'
-                    all_test.append(new_test)
-                elif new_test != {} and new_test['volume_structure'] == -1 and new_test[
-                    'Score_structure'] != -1 and int(
-                    new_test['Score_structure'][0]['item_total_score']) > 4:  # 如果识别到分数,添加到输出信息;如果还没有识别到分数,默认没有分数
-                    if int(new_test['Score_structure'][0]['item_total_score']) > 200:  # 暂定试卷分数都在200以内,超过200的表示识别错误
-                        new_test['Score_structure'][0]['item_total_score'] = int(
-                            new_test['Score_structure'][0]['item_total_score']) % 100
+                new_test = big_block_score(img0, xminss, yminss, composition_boxs[ij][2], composition_boxs[ij][3])
+                if new_test != -1:
                     new_test['Score_structure'][0]['bounding_box'] = composition_boxs[ij]
                     new_test['Score_structure'][0]['label'] = 'composition'
                     all_test.append(new_test)
@@ -682,7 +628,18 @@ def get_sheet_number_total(answer_sheet, res, img0):
                               'number': dict(all_test[aaa])['Score_structure'][0]['item_N'],
                               'score': dict(all_test[aaa])['Score_structure'][0]['item_total_score'],
                               'number_score': dict(all_test[aaa])['Score_structure'][0]['item_score'],
-                              'counts': dict(all_test[aaa])['Score_structure'][0]['item_count']}
+                              'counts': dict(all_test[aaa])['Score_structure'][0]['item_count'],
+                              'type_score_box': dict(all_test[aaa])['Score_structure'][0]['type_box']}
+            Score_last.append(score_last_one)
+            continue
+        elif all_test[aaa]['Score_structure'] != -1 and all_test[aaa]['volume_structure'] == 1:
+            score_last_one = {'model_box': dict(all_test[aaa])['Score_structure'][0]['bounding_box'],
+                              'label': dict(all_test[aaa])['Score_structure'][0]['label'],
+                              'number': 10000,
+                              'score': dict(all_test[aaa])['Score_structure'][0]['item_total_score'],
+                              'number_score': dict(all_test[aaa])['Score_structure'][0]['item_score'],
+                              'counts': dict(all_test[aaa])['Score_structure'][0]['item_count'],
+                              'type_score_box': dict(all_test[aaa])['Score_structure'][0]['type_box']}
             Score_last.append(score_last_one)
             continue
         elif all_test[aaa]['Score_structure'] != -1 and all_test[aaa]['volume_structure'] != -1:
@@ -691,7 +648,8 @@ def get_sheet_number_total(answer_sheet, res, img0):
                               'number': -1,
                               'score': dict(all_test[aaa])['Score_structure'][0]['volume_total_score'],
                               'number_score': dict(all_test[aaa])['Score_structure'][0]['volume_score'],
-                              'counts': dict(all_test[aaa])['Score_structure'][0]['volume_count']}
+                              'counts': dict(all_test[aaa])['Score_structure'][0]['volume_count'],
+                              'type_score_box': dict(all_test[aaa])['Score_structure'][0]['type_box']}
             Score_last.append(score_last_one)
 
             volume_last_one = {'volume_N': dict(all_test[aaa])['volume_structure'][0]['volume_N'],
@@ -709,70 +667,202 @@ def get_sheet_number_total(answer_sheet, res, img0):
                                'keyword_type': dict(all_test[aaa])['volume_structure'][0]['keyword_type']}
             volume_last.append(volume_last_one)
             continue
-    # Score_last = sorted(Score_last, key=lambda x: (
-    # x['model_box'][0], x['model_box'][0] + x['model_box'][1], -x['score']))  # 按答题卡顺序输出
 
     '''去重一个边框可能对应多个type_score的情况,英语单独解析'''
-    len_Score_last = len(Score_last)
-    if answer_sheet['subject'] == 'english':
-        for i in range(len_Score_last):
-            if Score_last[i]['label'] == 'cloze':
-                if Score_last[i]['model_box'] in model_box2:
-                    index2 = model_box2.index(Score_last[i]['model_box'])
-                    score = Score_last[i]['score']
-                    if score < score2[index2] and score2[index2] < 20:  # 去重,type_score多余的包含小题分数
-                        Score_last[i] = -1
-                    elif score < score2[index2] and score2[index2] > 20:  # 去重,type_score多余的包含分卷分数
-                        Score_last[index2] = -1
-                    elif score > score2[index2] and score < 20:  # 去重,type_score在不大于30分的情况下,暂定保留更大的分数
-                        Score_last[index2] = -1
-                    elif score > score2[index2] and score > 20:  # 去重,type_score去除大于30分的重复分数
-                        Score_last[i] = -1
+    Score_last_Remove_Duplicates = OrderedDict()
+    for item in Score_last:
+        Score_last_Remove_Duplicates.setdefault(item['model_box'], {**item, })
+    Score_last_Remove_Duplicates = list(Score_last_Remove_Duplicates.values())
+    if len(Score_last_Remove_Duplicates) != len(Score_last):
+        len_Score_last = len(Score_last)
+        Score_last = sorted(Score_last, key=lambda x: (x['model_box'][0] + x['model_box'][1] + x['type_score_box'][0] + x['type_score_box'][1]),reverse=True)
+        if answer_sheet['subject'] == 'english':  # 暂定英语只去除重复分数,不修改主观题边框
+            for i in range(len_Score_last-1, -1, -1):
+                if Score_last[i]['label'] == 'cloze':
+                    model_box2 = []; score2 = []; num2 = []; type_score2 = []; temp22 = []
+                    if Score_last[i]['model_box'] in model_box2:
+                        index21 = model_box2.index(Score_last[i]['model_box'])
+                        index2 = temp22[index21]
+                        score = Score_last[i]['score']
+                        if score < score2[index21] and score2[index21] < 20:  # 去重,type_score多余的包含小题分数
+                            Score_last[i] = -1
+                        elif score < score2[index21] and score2[index21] > 20:  # 去重,type_score多余的包含分卷分数
+                            Score_last[index2] = -1
+                            temp22[index21] = i
+                            score2[index21] = Score_last[i]['score']
+                            num2[index21] = Score_last[i]['number']
+                            type_score2[index21] = Score_last[i]['type_score_box']
+                        elif score > score2[index21] and score < 20:  # 去重,type_score在不大于20分的情况下,暂定保留更大的分数
+                            Score_last[index2] = -1
+                            temp22[index21] = i
+                            score2[index21] = Score_last[i]['score']
+                            num2[index21] = Score_last[i]['number']
+                            type_score2[index21] = Score_last[i]['type_score_box']
+                        elif score > score2[index21] and score > 20:  # 去重,type_score去除大于20分的重复分数
+                            Score_last[i] = -1
+                        else:
+                            Score_last[i] = -1
                     else:
-                        Score_last[i] = -1
+                        model_box2.append(Score_last[i]['model_box'])
+                        score2.append(Score_last[i]['score'])
+                        num2.append(Score_last[i]['number'])
+                        type_score2.append(Score_last[i]['type_score_box'])
+                        temp22.append(i)
                 else:
-                    model_box2.append(Score_last[i]['model_box'])
-                    score2.append(Score_last[i]['score'])
-            else:
-                if Score_last[i]['model_box'] in model_box2:
-                    index2 = model_box2.index(Score_last[i]['model_box'])
-                    score = Score_last[i]['score']
-                    if score < score2[index2]:  # 去重,type_score多余的包含小题分数
-                        Score_last[i] = -1
-                    elif score > score2[index2]:  # 去重,type_score在不大于30分的情况下,暂定保留更大的分数
-                        Score_last[index2] = -1
+                    model_box2 = []; score2 = []; num2 = []; type_score2 = []; temp22 = []
+                    if Score_last[i]['model_box'] in model_box2:
+                        index21 = model_box2.index(Score_last[i]['model_box'])
+                        index2 = temp22[index21]
+                        score = Score_last[i]['score']
+                        if score < score2[index21]:  # 去重,暂定保留更大的分数
+                            Score_last[i] = -1
+                        elif score > score2[index21]:  # 去重,暂定保留更大的分数
+                            Score_last[index2] = -1
+                            temp22[index21] = i
+                            score2[index21] = Score_last[i]['score']
+                            num2[index21] = Score_last[i]['number']
+                            type_score2[index21] = Score_last[i]['type_score_box']
+                        else:
+                            Score_last[i] = -1
                     else:
-                        Score_last[i] = -1
-                else:
-                    model_box2.append(Score_last[i]['model_box'])
-                    score2.append(Score_last[i]['score'])
-    else:
-        for i in range(len_Score_last):
-            if Score_last[i]['model_box'] in model_box2:
-                index2 = model_box2.index(Score_last[i]['model_box'])
-                score = Score_last[i]['score']
-                if score < score2[index2] and score2[index2] < 30:  # 去重,type_score多余的包含小题分数
-                    Score_last[i] = -1
-                elif score < score2[index2] and score2[index2] > 30:  # 去重,type_score多余的包含分卷分数
-                    Score_last[index2] = -1
-                elif score > score2[index2] and score < 30:  # 去重,type_score在不大于30分的情况下,暂定保留更大的分数
-                    Score_last[index2] = -1
-                elif score > score2[index2] and score > 30:  # 去重,type_score去除大于30分的重复分数
-                    Score_last[i] = -1
-                else:
-                    Score_last[i] = -1
-            else:
-                model_box2.append(Score_last[i]['model_box'])
-                score2.append(Score_last[i]['score'])
-    while num_redundance < len_Score_last:  # 去重一个边框可能对应多个type_score的情况
-        if Score_last[num_redundance] == -1:
-            del Score_last[num_redundance]
-            len_Score_last = len_Score_last - 1
-        else:
-            num_redundance = num_redundance + 1
-    # print(Score_last)
-    # print(volume_last)  # 分卷信息,暂不输出
-    # print(answer_sheet['regions'])
+                        model_box2.append(Score_last[i]['model_box'])
+                        score2.append(Score_last[i]['score'])
+                        num2.append(Score_last[i]['number'])
+                        type_score2.append(Score_last[i]['type_score_box'])
+                        temp22.append(i)
+        else:  # 除去英语外的主观题边框修正
+            model_box2 = []; score2 = []; num2 = []; type_score2 = []; temp22 = []
+            for i in range(len_Score_last - 1, -1, -1):  # 根据type_score切分
+                if Score_last[i]['label'] == 'solve' or Score_last[i]['label'] == 'solve0':
+                    if Score_last[i]['model_box'] in model_box2 and type(Score_last[i]['number']) is not list:
+                        index21 = model_box2.index(Score_last[i]['model_box'])
+                        index2 = temp22[index21]
+                        score = Score_last[i]['score']
+                        num = Score_last[i]['number']
+                        type_score = Score_last[i]['type_score_box']
+                        del_box = 0
+                        if num == -1:  # 去除同一主观题内对应的多个边框内的小项分数
+                            if 'ocr' in Score_last[index2]:
+                                Score_last[index2] = -1
+                                temp22[index21] = i
+                                score2[index21] = Score_last[i]['score']
+                                num2[index21] = Score_last[i]['number']
+                                type_score2[index21] = Score_last[i]['type_score_box']
+                            elif 'ocr' in Score_last[i]:
+                                Score_last[i] = -1
+
+                            else:
+                                Score_last[i] = -1
+
+                        elif num == 10000:
+                            Score_last[i] = -1
+                        elif num2[index21] == 10000 or num2[index21] == -1 and type_score[1] - type_score2[index21][1] > 100:  # 同一主观题包含大题分数和小项分数,且大题分数位于边框中间,切分为两个主观题
+                            yy_max.append(Score_last[i]['model_box'][3])
+                            del_box = copy.deepcopy(Score_last[index2]['model_box'])
+                            score_del.append(del_box)
+                            Score_last[index2]['model_box'] = (
+                            Score_last[index2]['model_box'][0], Score_last[index2]['model_box'][1],
+                            Score_last[index2]['model_box'][2], type_score[1])
+                            Score_last[i]['model_box'] = (
+                            Score_last[i]['model_box'][0], type_score[1], Score_last[i]['model_box'][2],
+                            Score_last[i]['model_box'][3])
+                            score2[index21] = Score_last[i]['score']
+                            num2[index21] = Score_last[i]['number']
+                            type_score2[index21] = Score_last[i]['type_score_box']
+                            temp22[index21] = i
+                        elif score < score2[index21] and score2[index21] > 30:  # 默认有效的分数值小于30分
+                            Score_last[index2] = -1
+                            temp22[index21] = i
+                            score2[index21] = Score_last[i]['score']
+                            num2[index21] = Score_last[i]['number']
+                            type_score2[index21] = Score_last[i]['type_score_box']
+                        elif score < 30 and score2[index21] < 30 and type_score[1] - type_score2[index21][1] > 100:  # 同一主观题包含两个大题分数,且距离 >00 的情况下切分为两个主观题
+                            yy_max.append(Score_last[i]['model_box'][3])
+                            del_box = copy.deepcopy(Score_last[index2]['model_box'])
+                            score_del.append(del_box)
+                            Score_last[index2]['model_box'] = (
+                            Score_last[index2]['model_box'][0], Score_last[index2]['model_box'][1],
+                            Score_last[index2]['model_box'][2], type_score[1])
+                            Score_last[i]['model_box'] = (
+                            Score_last[i]['model_box'][0], type_score[1], Score_last[i]['model_box'][2],
+                            Score_last[i]['model_box'][3])
+                            score2[index21] = Score_last[i]['score']
+                            num2[index21] = Score_last[i]['number']
+                            type_score2[index21] = Score_last[i]['type_score_box']
+                            temp22[index21] = i
+                        elif score > 30 and score2[index21] > 30:  # 默认有效的分数值小于30分
+                            temp_del = i if score > score2[index21] else index2
+                            Score_last[temp_del] = -1
+                            if temp_del == index2:
+                                temp22[index21] = i
+                                score2[index21] = Score_last[i]['score']
+                                num2[index21] = Score_last[i]['number']
+                                type_score2[index21] = Score_last[i]['type_score_box']
+                        elif score < 30 and score2[index21] < 30:  # 多个分数小于30,且距离 <100 的情况下,删除
+                            temp_del = i if score < score2[index21] else index2
+                            Score_last[temp_del] = -1
+                            if temp_del == index2:
+                                temp22[index21] = i
+                                score2[index21] = Score_last[i]['score']
+                                num2[index21] = Score_last[i]['number']
+                                type_score2[index21] = Score_last[i]['type_score_box']
+                        else:
+                            Score_last[i] = -1
+                    else:
+                        model_box2.append(Score_last[i]['model_box'])
+                        score2.append(Score_last[i]['score'])
+                        num2.append(Score_last[i]['number'])
+                        type_score2.append(Score_last[i]['type_score_box'])
+                        temp22.append(i)
+        for del_i in range(len_Score_last - 1, -1, -1):
+            if Score_last[del_i] == -1:
+                del Score_last[del_i]
+    if answer_sheet['subject'] != 'english':
+        Score_last = sorted(Score_last, key=lambda x: (x['model_box'][0], x['model_box'][0] + x['model_box'][1]),reverse=True)
+        temp33 = 0
+        len3 = len(Score_last)
+        for i in range(len3 - 1, -1, -1):  # 根据type_score合并
+            if Score_last[i]['label'] == 'solve' or Score_last[i]['label'] == 'solve0':
+                num = Score_last[i]['number']
+                type_score = Score_last[i]['type_score_box']
+                if type(Score_last[i]['number']) is not list:
+                    if num == 10000 or num == -1 or num < 4 and (Score_last[i]['label'] == 'solve' or Score_last[i]['label'] == 'solve0'):  # 小题题号
+                        temp3 = 100000
+                        for indexi, model_box31 in enumerate(Score_last):
+                            if indexi != i and ((type_score[0] and type_score[2]) in range(model_box31['model_box'][0] - 30,model_box31['model_box'][2])) and ((type_score[1] and type_score[3]) in range(model_box31['model_box'][1],model_box31['model_box'][3]+30)):  # 根据小题边框的xmin与主观题xmin的差值判断为一栏,以及纵坐标差值判断条件
+                                del_box = copy.deepcopy(Score_last[i]['model_box'])
+                                score_del.append(del_box)
+                                score_del.append(Score_last[indexi]['model_box'])
+                                Score_last[indexi]['model_box'] = (
+                                Score_last[indexi]['model_box'][0], Score_last[indexi]['model_box'][1],
+                                Score_last[indexi]['model_box'][2], Score_last[i]['model_box'][3])
+                                del Score_last[i]
+                                break
+                            temp31 = int(type_score[1] - model_box31['model_box'][3])  # 计算小题边框的ymin与主观题边框ymax的距离
+                            if (type_score[0] in range(model_box31['model_box'][0] - 30,model_box31['model_box'][2])) and temp31 > -20 and temp31 < temp3:  # 根据小题边框的xmin与主观题xmin的差值判断为一栏,以及纵坐标差值判断条件
+                                temp3 = temp31
+                                temp33 = indexi
+                                yy_max.append(Score_last[i]['model_box'][3])
+                                if indexi == len(Score_last) - 1:
+                                    del_box = copy.deepcopy(Score_last[temp33]['model_box'])
+                                    score_del.append(del_box)
+                                    score_del.append(Score_last[i]['model_box'])
+                                    Score_last[temp33]['model_box'] = (
+                                    Score_last[temp33]['model_box'][0], Score_last[temp33]['model_box'][1],
+                                    Score_last[temp33]['model_box'][2], Score_last[i]['model_box'][3])
+                                    del Score_last[i]
+                                    break
+                            elif indexi == len(Score_last) - 1 and temp3 != 100000:
+                                del_box = copy.deepcopy(Score_last[temp33]['model_box'])
+                                score_del.append(del_box)
+                                score_del.append(Score_last[i]['model_box'])
+                                Score_last[temp33]['model_box'] = (
+                                Score_last[temp33]['model_box'][0], Score_last[temp33]['model_box'][1],
+                                Score_last[temp33]['model_box'][2], Score_last[i]['model_box'][3])
+                                del Score_last[i]
+                                break
+
+
 
     if Score_last != []:
         for i in range(len(Score_last)): # 多选题题号和分数逐个显示
@@ -806,7 +896,7 @@ def get_sheet_number_total(answer_sheet, res, img0):
                             if Score_last[i]['number_score'] != -1:
                                 answer_sheet['regions'][j]['default_points'] = Score_last[i]['number_score']
 
-        elif num_choice > 1 or num_cloze >1:
+        elif num_choice > 1 or num_cloze > 1:
             for i in range(len(Score_last)):
                 if Score_last[i]['label'] == 'choice':
                     count_choice_m = 0
@@ -877,18 +967,18 @@ def get_sheet_number_total(answer_sheet, res, img0):
                                   'label': 'choice_m',
                                   'type_box': type_score_choice_m}
                 test_result1['words'] = str()
-                # try:  # tr_OCR
-                #     image_choice = image_src.crop((type_score_choice_m[0], type_score_choice_m[1], type_score_choice_m[2], type_score_choice_m[3]))
-                #     res1 = tr.run(image_choice)
-                #     print('tr_OCR')
-                #     for t in range(len(res1)):
-                #         test_result1['words'] = test_result1['words'] + res1[t][1]
-                # except Exception as e:  # baidu_OCR
-                #     print('baidu_OCR')
-                res1 = get_ocr_text_and_coordinate_in_google_format(
-                    img0[type_score_choice_m[1]:type_score_choice_m[3], type_score_choice_m[0]:type_score_choice_m[2]], ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG')
-                for t in range(len(res1['words'])):
-                    test_result1['words'] = test_result1['words'] + res1['words'][t]
+                try:  # tr_OCR
+                    image_choice = image_src.crop((type_score_choice_m[0], type_score_choice_m[1], type_score_choice_m[2], type_score_choice_m[3]))
+                    res1 = tr.run(image_choice)
+                    print('tr_OCR')
+                    for t in range(len(res1)):
+                        test_result1['words'] = test_result1['words'] + res1[t][1]
+                except Exception as e:  # baidu_OCR
+                    print('baidu_OCR')
+                    res1 = get_ocr_text_and_coordinate_in_google_format(
+                        img0[type_score_choice_m[1]:type_score_choice_m[3], type_score_choice_m[0]:type_score_choice_m[2]], ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG')
+                    for t in range(len(res1['words'])):
+                        test_result1['words'] = test_result1['words'] + res1['words'][t]
                 if test_result1['words'] != {}:
                     test = key_words(test_result1)
                 choice_m_score = -1
@@ -914,21 +1004,39 @@ def get_sheet_number_total(answer_sheet, res, img0):
 
         '''分数与模型对应'''
         ocr_flag = 0
-        for i in range(len(answer_sheet['regions'])):
-            for j in range(len(Score_last)):
-                if (Score_last[j]['model_box'][0] == answer_sheet['regions'][i]['bounding_box']['xmin']
-                        and Score_last[j]['model_box'][1] == answer_sheet['regions'][i]['bounding_box']['ymin']
-                        and Score_last[j]['model_box'][2] == answer_sheet['regions'][i]['bounding_box']['xmax']
-                        and Score_last[j]['model_box'][3] == answer_sheet['regions'][i]['bounding_box']['ymax']):
-                    if Score_last[j]['number'] != -1:
+        for i in range(len(answer_sheet['regions'])-1, -1, -1):
+            for j in range(len(Score_last)-1, -1, -1):
+                if (int(Score_last[j]['model_box'][0]) == int(answer_sheet['regions'][i]['bounding_box']['xmin']) and
+                    int(Score_last[j]['model_box'][1]) == int(answer_sheet['regions'][i]['bounding_box']['ymin']) and
+                    int(Score_last[j]['model_box'][2]) == int(answer_sheet['regions'][i]['bounding_box']['xmax']) and
+                    int(Score_last[j]['model_box'][3]) != int(answer_sheet['regions'][i]['bounding_box']['ymax'])) and (answer_sheet['regions'][i]['class_name'] == 'solve' or answer_sheet['regions'][i]['class_name'] == 'solve0'):
+                    answer_sheet['regions'][i]['bounding_box']['ymax'] = int(Score_last[j]['model_box'][3])
+                    if Score_last[j]['number'] == 10000:
+                        answer_sheet['regions'][i]['number'] = -1  # 题号
+                    elif Score_last[j]['number'] != -1 and Score_last[j]['number'] != 10000:
+                        answer_sheet['regions'][i]['number'] = Score_last[j]['number']  # 题号
+                    else:
+                        answer_sheet['regions'][i]['number'] = -1  # 题号
+                    if Score_last[j]['score'] != -1:
+                        answer_sheet['regions'][i]['default_points'] = Score_last[j]['score']
+                        if type(answer_sheet['regions'][i]['default_points']) is list and (
+                                answer_sheet['regions'][i]['class_name'] == 'solve' or answer_sheet['regions'][i][
+                            'class_name'] == 'solve0'):
+                            answer_sheet['regions'][i]['class_name'] = 'optional_solve'
+                    elif 'default_points' not in answer_sheet['regions'][i].keys():
+                        answer_sheet['regions'][i]['default_points'] = -1
+                    del Score_last[j]
+                elif (int(Score_last[j]['model_box'][0]) == int(answer_sheet['regions'][i]['bounding_box']['xmin']) and
+                    int(Score_last[j]['model_box'][1]) == int(answer_sheet['regions'][i]['bounding_box']['ymin']) and
+                    int(Score_last[j]['model_box'][2]) == int(answer_sheet['regions'][i]['bounding_box']['xmax']) and
+                    int(Score_last[j]['model_box'][3]) == int(answer_sheet['regions'][i]['bounding_box']['ymax'])):
+                    if Score_last[j]['number'] == 10000:
+                        answer_sheet['regions'][i]['number'] = -1  # 题号
+                    elif Score_last[j]['number'] != -1 and Score_last[j]['number'] != 10000:
                         answer_sheet['regions'][i]['number'] = Score_last[j]['number']  # 题号
+                    else:
+                        answer_sheet['regions'][i]['number'] = -1  # 题号
                     if Score_last[j]['score'] != -1:
-                        # score = Score_last[j]['score']
-                        # try:
-                        #     length = len(answer_sheet['regions'][i]['number'])
-                        #     answer_sheet['regions'][i]['default_points'] = length * [score]
-                        # except Exception:
-                        #     answer_sheet['regions'][i]['default_points'] = score
                         answer_sheet['regions'][i]['default_points'] = Score_last[j]['score']
                         if type(answer_sheet['regions'][i]['default_points']) is list and (
                                 answer_sheet['regions'][i]['class_name'] == 'solve' or answer_sheet['regions'][i][
@@ -937,8 +1045,37 @@ def get_sheet_number_total(answer_sheet, res, img0):
                         ocr_flag = 1
                         if 'type_score_ocr' in answer_sheet['regions'][i].keys():
                             del answer_sheet['regions'][i]['type_score_ocr']
-                        # answer_sheet['regions'][i]['number_score'] = Score_last[j]['number_score']  # 小题分数
-                        # answer_sheet['regions'][i]['counts'] = Score_last[j]['counts']  # 小题个数
+                    elif 'default_points' not in answer_sheet['regions'][i].keys():
+                        answer_sheet['regions'][i]['default_points'] = -1
                     if ocr_flag == 0 and 'ocr' in Score_last[j]:  # 没有识别到分数的模块添加type_score_ocr结果
                         answer_sheet['regions'][i]['type_score_ocr'] = Score_last[j]['ocr']
+                    del Score_last[j]
+
+            if score_del != []:  # del_model_boxs
+                for j in range(len(score_del)):
+                    if score_del != [] and (score_del[j][0] == answer_sheet['regions'][i]['bounding_box']['xmin'] and score_del[j][1] ==
+                            answer_sheet['regions'][i]['bounding_box']['ymin'] and score_del[j][2] ==
+                            answer_sheet['regions'][i]['bounding_box']['xmax'] and score_del[j][3] ==
+                            answer_sheet['regions'][i]['bounding_box']['ymax']):
+                        del answer_sheet['regions'][i]
+        for jj in range(len(Score_last)):   # add_model_boxs
+            answer_sheet_one = {}
+            answer_sheet_one['class_name'] = Score_last[jj]['label']
+            box_one = {}
+            box_one['xmin'] = int(Score_last[jj]['model_box'][0])
+            box_one['ymin'] = int(Score_last[jj]['model_box'][1])
+            box_one['xmax'] = int(Score_last[jj]['model_box'][2])
+            box_one['ymax'] = int(Score_last[jj]['model_box'][3])
+            answer_sheet_one['bounding_box'] = box_one
+            if Score_last[jj]['number'] == 10000:
+                answer_sheet_one['number'] = -1  # 题号
+            elif Score_last[jj]['number'] != -1 and Score_last[jj]['number'] != 10000:
+                answer_sheet_one['number'] = Score_last[jj]['number']  # 题号
+            else:
+                answer_sheet_one['number'] = -1  # 题号
+            if Score_last[jj]['score'] != -1:
+                answer_sheet_one['default_points'] = Score_last[jj]['score']
+            else:
+                answer_sheet_one['default_points'] = -1
+            answer_sheet['regions'].append(answer_sheet_one)
     return answer_sheet

+ 80 - 88
segment/sheet_resolve/analysis/solve/optional_solve.py

@@ -4,6 +4,7 @@
 import cv2
 import re
 from segment.sheet_resolve.tools.brain_api import get_ocr_text_and_coordinate
+from segment.sheet_resolve.analysis.sheet.choice_infer import find_digital
 
 
 def rgb2binary(im):
@@ -23,96 +24,87 @@ def find_contours(left, top, image, ex_x=30, ex_y=1):
     cnts = contours[0] if int(major) > 3 else contours[1]
 
     cnt = sorted(cnts, key=cv2.contourArea)
-    l, t, r, b = 9999, 9999, 0, 0
-    sum_w, sum_h = 0, 0
+    boxes = []
     for ele in cnt:
         x, y, w, h = cv2.boundingRect(ele)
-        xm = x + w
-        ym = y + h
-        l, t, r, b = min(l, x), min(t, y), max(r, xm), max(b, ym)
-        sum_w, sum_h = sum_w + w, sum_h + h
-
-    cols = len(cnt)
-    if cols > 4:
-        cols = 4
-    single_width, single_height = int(sum_w / len(cnt)), int(sum_h / len(cnt))
-    optional_solve_dict = {'rows': 1, 'cols': cols,
-                           'single_width': single_width,
-                           'single_height': single_height,
-                           'bounding_box': {'xmin': l + left + single_width,
-                                            'ymin': t + top,
-                                            'xmax': r + left,
-                                            'ymax': b + top}
-                           }
-    return optional_solve_dict
-
-
-def resolve_optional_choice(ll, tt, direction, image):
-    ocr_res = get_ocr_text_and_coordinate(image)
-    # ocr_res = [{'chars': [{'char': '[', 'location': {'width': 16, 'top': 12, 'left': 11, 'height': 32}}, {'char': '4', 'location': {'width': 16, 'top': 12, 'left': 27, 'height': 32}}, {'char': '5', 'location': {'width': 16, 'top': 12, 'left': 36, 'height': 32}}, {'char': ']', 'location': {'width': 16, 'top': 12, 'left': 55, 'height': 32}}, {'char': '[', 'location': {'width': 16, 'top': 12, 'left': 74, 'height': 32}}, {'char': '4', 'location': {'width': 16, 'top': 12, 'left': 93, 'height': 32}}, {'char': '6', 'location': {'width': 16, 'top': 12, 'left': 102, 'height': 32}}, {'char': ']', 'location': {'width': 16, 'top': 12, 'left': 121, 'height': 32}}, {'char': '[', 'location': {'width': 16, 'top': 12, 'left': 140, 'height': 32}}, {'char': '4', 'location': {'width': 16, 'top': 12, 'left': 159, 'height': 32}}, {'char': '7', 'location': {'width': 16, 'top': 12, 'left': 178, 'height': 32}}, {'char': ']', 'location': {'width': 14, 'top': 12, 'left': 188, 'height': 32}}], 'location': {'width': 191, 'top': 12, 'left': 11, 'height': 32}, 'words': '[45][46][47]'}]
-    digital_p = r'[\[*|【*]\d+[\]*|]*]'
-    eng_char_p = '[[*|【*][A|B|C|D|E|F|G|T|F][]*|】*]'  # english
-
-    pattern_list = [digital_p, eng_char_p]
-
-    option_list = []
-    mean_width_list = []
-    mean_height_list = []
-    for i, words_line in enumerate(ocr_res):
-        words = words_line['words']
-        words = words.replace(' ', '').upper()  # 去除空格
-        loc = words_line['location']
-        top = int(loc['top'])
-        left = int(loc['left'])
-        width = int(loc['width'])
-        height = int(loc['height'])
-        loc.update({'right': left + width, 'bottom': top + height,
-                    'mid_x': left + width // 2, 'mid_y': top + height // 2})
-
-        for p in pattern_list:
-            words_m = re.finditer(p, words)
-            match_index_list = [(m.group(), m.span()) for m in words_m if m]
-
-            option_list += [ele[0].replace('[', '').replace(']', '').replace('【', ']').replace('】', '')
-                            for ele in match_index_list]
-
-            for letter_info in match_index_list:
-                index_start = letter_info[1][0]
-                index_end = letter_info[1][1] - 1
-                char_start = words_line['chars'][index_start]
-                char_end = words_line['chars'][index_end]
-
-                letter_loc_xmin = int(char_start['location']['left'])
-                letter_loc_ymin = min(int(char_start['location']['top']), int(char_end['location']['top']))
-                letter_loc_xmax = int(char_end['location']['left']) + int(char_end['location']['width'])
-                letter_loc_ymax = max(int(char_start['location']['top']) + int(char_start['location']['height']),
-                                      int(char_end['location']['top']) + int(char_end['location']['height']))
-
-                mean_width_list.append(letter_loc_xmax-letter_loc_xmin)
-                mean_height_list.append(letter_loc_ymax-letter_loc_ymin)
-
-    if not option_list:
-        option_list = 'A,B'
-    left = min([int(ele['location']['left']) for ele in ocr_res])
-    top = min([int(ele['location']['top']) for ele in ocr_res])
-    right = max([int(ele['location']['left']) + int(ele['location']['width']) for ele in ocr_res])
-    bottom = max([int(ele['location']['top']) + int(ele['location']['height']) for ele in ocr_res])
-
-    if direction == 180:
-        rows, cols = 1, len(option_list)
+        x_mid = x + w//2
+        y_mid = y + h//2
+        xmax = x+w
+        ymax = y+h
+        boxes.append((x, y, xmax, ymax, x_mid,y_mid))
+
+    return boxes
+
+
+def point_in_polygon(point, polygon):
+    xmin, ymin, xmax, ymax = polygon[0], polygon[1], polygon[2], polygon[3]
+    if xmin <= point[0] <= xmax and ymin <= point[1] <= ymax:
+        return True
     else:
-        rows, cols = len(option_list), 1
-
-    mean_width = sum(mean_width_list) // len(mean_width_list)
-    mean_height = sum(mean_height_list) // len(mean_height_list)
-    optional_choice_dict = {'rows': rows, 'cols': cols,
-                            'option': ','.join(option_list),
-                            'single_width': mean_width,
-                            'single_height': mean_height,
-                            'direction': direction,
-                            'bounding_box': {'xmin': ll + left,
-                                             'ymin': tt + top,
-                                             'xmax': ll + right,
-                                             'ymax': tt + bottom}}
+        return False
 
+
+def resolve_optional_choice(l_, t_, direction, image):
+    ocr_res = get_ocr_text_and_coordinate(image)
+    digital_list, chars_list, d_mean_height, d_mean_width = find_digital(ocr_res, 0, 0,)
+    if not digital_list:
+        numbers = [501]
+        h, w = image.shape
+        optional_choice_dict = {'class_name': 'optional_choice',
+                                'rows': 1, 'cols': 1,
+                                'single_width': w,
+                                'single_height': h,
+                                'direction': direction,
+                                'bounding_box': {'xmin': l_,
+                                                 'ymin': t_,
+                                                 'xmax': l_ + w,
+                                                 'ymax': t_ + h},
+                                'number': numbers}
+    else:
+        numbers = sorted([ele['digital'] for ele in digital_list])
+
+        contours = find_contours(l_, t_, image, d_mean_width//2, d_mean_height//2)
+
+        res_region = []
+        for contour in contours:
+            for num in digital_list:
+                loc = num['loc']
+                num_center = (loc[-2], loc[-1])
+                if point_in_polygon(num_center, contour):
+                    if num_center[0] < contour[-2] - d_mean_width//3:
+                        # 数字在box的左侧
+                        region = (loc[2]+1, loc[1]-1, contour[2]-3, loc[3]+1)
+                    else:
+                        # 数字在box中间, 数字在box左侧
+                        region = (contour[0], loc[1] - 1, contour[2], loc[3] + 1)
+                    res_region.append(region)
+
+        l, t, r, b = 9999, 9999, 0, 0
+        sum_w, sum_h = 0, 0
+        for region in res_region:
+            l, t, r, b = min(l, region[0]), min(t, region[1]), max(r, region[2]), max(b, region[3])
+
+            width, height = region[2] - region[0], region[3] - region[1]
+            sum_w += width
+            sum_h += height
+
+        mean_w, mean_h = sum_w//len(numbers), sum_h//len(numbers)
+
+        if direction == 180:
+            rows, cols = 1, len(numbers)
+        else:
+            rows, cols = len(numbers), 1
+
+        optional_choice_dict = {'class_name': 'optional_choice',
+                                'rows': rows, 'cols': cols,
+                                'single_width': mean_w,
+                                'single_height': mean_h,
+                                'direction': direction,
+                                'bounding_box': {'xmin': l_ + l,
+                                                 'ymin': t_ + t,
+                                                 'xmax': l_ + r,
+                                                 'ymax': t_ + b},
+                                'number': numbers}
+
+    # print(optional_choice_dict)
     return optional_choice_dict

+ 2 - 2
segment/sheet_resolve/lib/model/test.py

@@ -43,8 +43,8 @@ def _get_image_blob(analysis_type, im):
     processed_ims = []
     im_scale_factors = []
 
-    scales = (375,)
-    max_size = 500
+    scales = (1500,)
+    max_size = 2000
     if 'sheet' in analysis_type:
         scales = cfg.SHEET.SCALES
         max_size = cfg.SHEET.MAX_SIZE

+ 1 - 0
segment/sheet_resolve/tools/tf_settings.py

@@ -9,6 +9,7 @@ subject_list = ['math', 'math_zxhx', 'english', 'chinese',
                 'geography', 'science_comprehensive', 'arts_comprehensive', 'cloze', 'choice']
 
 BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+decide_blank_model = model_dir_path = os.path.join(BASE_DIR, 'model', 'decide_blank', 'model.npy')
 
 xml_template_path = os.path.join(BASE_DIR, 'labels', '000000-template.xml')
 images_dir_path = os.path.join(BASE_DIR, 'images')

+ 30 - 9
segment/sheet_resolve/tools/utils.py

@@ -146,18 +146,35 @@ def img_resize(analysis_type, im):
     if ycv > xcv:
         # 使用cv2.resize时,参数输入是 宽×高×通道
         resize = cv2.resize(im, (min_size, max_size), interpolation=cv2.INTER_AREA)
-        # cv2.imshow("image", resize)
-        # cv2.waitKey(100000)
         ratio = (float(xcv / min_size), float(ycv / max_size))
         return resize, ratio
     if ycv <= xcv:
         resize = cv2.resize(im, (max_size, min_size), interpolation=cv2.INTER_AREA)
-        # cv2.imshow("image", resize)
-        # cv2.waitKey(100000)
         ratio = (float(xcv / max_size), float(ycv / min_size))
         return resize, ratio
 
 
+def resize_faster_rcnn(analysis_type, im_orig):
+    min_size = 375
+    max_size = 500
+    if analysis_type == 'math_blank':
+        min_size = 1500
+        max_size = 2000
+
+    im_shape = im_orig.shape
+    im_size_min = np.min(im_shape[0:2])
+    im_size_max = np.max(im_shape[0:2])
+
+    im_scale = float(min_size) / float(im_size_min)
+    # Prevent the biggest axis from being more than MAX_SIZE
+    if np.round(im_scale * im_size_max) > max_size:
+        im_scale = float(max_size) / float(im_size_max)
+    im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
+                    interpolation=cv2.INTER_LINEAR)
+
+    return im, (im_scale, im_scale)
+
+
 def resize_by_percent(im, percent):
     """
     :param im:
@@ -654,7 +671,9 @@ def list_to_dict(box_list):
     return location_s_box
 
 
-def infer_number(number_list, interval=1):
+def infer_number(number_list, times=0, interval=1):
+    if times > 30:
+        return number_list
     # 默认题号间隔为1
     if number_list[-1] != -1 and len(list(set(number_list[:-1]))) == 1:
         new_number_list = []
@@ -678,10 +697,12 @@ def infer_number(number_list, interval=1):
                     number_list[n_index - 1] = number_list[n_index] - interval
                 if number_list[n_index + 1] == -1:
                     number_list[n_index + 1] = number_list[n_index] + interval
-        return infer_number(number_list)
+
+        times += 1
+        return infer_number(number_list, times)
 
 
-def combine_char_in_raw_format(word_result_list):
+def combine_char_in_raw_format(word_result_list, left_boundary=0, top_boundary=0):
     new_all_word_list = []
     for index, chars_dict in enumerate(word_result_list):
         chars_list = chars_dict['chars']
@@ -689,7 +710,7 @@ def combine_char_in_raw_format(word_result_list):
         char_str = ''
         for ele in chars_list:
             location = ele['location']
-            left, top, width, height = location['left'], location['top'], location['width'], location['height']
+            left, top, width, height = location['left']+left_boundary, location['top']+top_boundary, location['width'], location['height']
             right, bottom = left + width, top + height
 
             box = (left, top, right, bottom, width, height)
@@ -720,7 +741,7 @@ def combine_char_in_raw_format(word_result_list):
             min_arr = location_arr.min(axis=0)
             max_arr = location_arr.max(axis=0)
             location = {'left': min_arr[0], 'top': min_arr[1],
-                        'width': max_arr[2]-min_arr[0], 'height': max_arr[3]-min_arr[1]}
+                        'width': max_arr[2] - min_arr[0], 'height': max_arr[3] - min_arr[1]}
             new_chars_list = []
             for ii, loc in enumerate(location_arr):
                 char = combine_str[ii]

+ 59 - 29
segment/sheet_server.py

@@ -1,6 +1,7 @@
 # @Author  : lightXu
 # @File    : sheet_server.py
 # @Time    : 2018/12/19 0019 下午 14:33
+import itertools
 import json
 import os
 import shutil
@@ -21,47 +22,61 @@ from segment.sheet_resolve.analysis.resolve import cloze
 from segment.sheet_resolve.analysis.resolve import exam_number_row_col
 from segment.sheet_resolve.analysis.resolve import sheet
 from segment.sheet_resolve.analysis.resolve import solve, solve_with_number, cloze_with_number
-from segment.sheet_resolve.analysis.sheet.analysis_sheet import box_region_format, question_number_format
+from segment.sheet_resolve.analysis.sheet.analysis_sheet import box_region_format, question_number_format, merge_span_boxes
 from segment.sheet_resolve.analysis.sheet.sheet_points import get_sheet_points
 from segment.sheet_resolve.analysis.sheet.sheet_points_total import get_sheet_number_total
 from segment.sheet_resolve.tools import utils
 from segment.sheet_resolve.tools.brain_api import get_ocr_text_and_coordinate, change_format_baidu_to_google
 from segment.sheet_resolve.analysis.sheet.sheet_points_by_nlp import get_sheet_points_by_nlp
+from segment.sheet_resolve.analysis.sheet.ocr_sheet import sheet_sorted
+from segment.sheet_resolve.analysis.sheet.decide_blank import svm_predict
 
 
 logger = logging.getLogger(settings.LOGGING_TYPE)
 
 
-def decide_blank_sheet(image):
+def decide_blank_sheet(image, subject):
+    """
+    :param image:
+    :param subject:
+    :return: true:blank, false:unblank
+    """
     if len(image.shape) <= 2:
         gray_image = image
     else:
         gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
-    height = gray_image.shape[0]
-    width = gray_image.shape[1]
-    if max(height, width) > 800:
-        percent = max(height, width) / 800
-        new_x = int(width * percent)
-        new_y = int(height * percent)
-
-        gray_image = cv2.resize(gray_image, (new_x, new_y), interpolation=cv2.INTER_AREA)
-
-    if height > width:  # 纵向
-        image = gray_image[height//2:, :]
-        PIX_VALUE_LOW = 25.0  # 二进制参数
-        PIX_VALUE_HIGH = 220  # 原始图像参数
-
-    else:  # 横向
-        image = gray_image[:, width // 2:]
-        PIX_VALUE_LOW = 15.0
-        PIX_VALUE_HIGH = 250
-
-    bin_img = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
-    bin_img_mean = np.mean(bin_img)
-    img_raw_mean = np.mean(image)
-
-    print(bin_img_mean, img_raw_mean)
-    blank_cond = bin_img_mean < PIX_VALUE_LOW or img_raw_mean > PIX_VALUE_HIGH
+
+    if subject == 'math':
+        subject_id = 3
+        blank_cond = svm_predict(gray_image, subject_id)
+    else:
+
+        height = gray_image.shape[0]
+        width = gray_image.shape[1]
+        if max(height, width) > 800:
+            percent = max(height, width) / 800
+            new_x = int(width * percent)
+            new_y = int(height * percent)
+
+            gray_image = cv2.resize(gray_image, (new_x, new_y), interpolation=cv2.INTER_AREA)
+
+        if height > width:  # 纵向
+            image = gray_image[height//2:, :]
+            PIX_VALUE_LOW = 25.0  # 二进制参数
+            PIX_VALUE_HIGH = 220  # 原始图像参数
+
+        else:  # 横向
+            image = gray_image[:, width // 2:]
+            PIX_VALUE_LOW = 15.0
+            PIX_VALUE_HIGH = 250
+
+        bin_img = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
+        bin_img_mean = np.mean(bin_img)
+        img_raw_mean = np.mean(image)
+
+        print(bin_img_mean, img_raw_mean)
+        blank_cond = bin_img_mean < PIX_VALUE_LOW or img_raw_mean > PIX_VALUE_HIGH
+
     return blank_cond
 
 
@@ -309,11 +324,12 @@ def sheet_detail_resolve(raw_img, sheet_dict, xml_save_path, shrink=True):
             traceback.print_exc()
             logger.info('试卷:{} 考号区域解析失败: {}'.format(xml_save_path.replace('xml', '.jpg'), e))
 
-    if 'solve' or 'solve0' or 'composition' or 'composition0' in classes_names_list:
+    if 'solve' or 'solve0' or 'composition' or 'composition0' or 'correction' in classes_names_list:
         try:
             solve_number = solve_with_number(region_tmp, xml_save_path)
             region_tmp = [ele for ele in region_tmp if 'solve' not in ele['class_name']]  # 重名
             region_tmp = [ele for ele in region_tmp if 'composition' not in ele['class_name']]
+            region_tmp = [ele for ele in region_tmp if 'correction' not in ele['class_name']]
             if len(solve_number) > 0:
                 region_tmp.extend(solve_number)
         except Exception as e:
@@ -372,8 +388,22 @@ def sheet_points(sheet_dict_list, image_list, ocr_list, if_ocr=False):
 
 
 def sheet_format_output(init_number, crt_numbers, sheet_dict, image, subject, shrink):
+    # 去除无用的class、改名、加选做
     sheet_dict = box_region_format(sheet_dict, image, subject, shrink)
-    sheet_dict, init_number, crt_numbers = question_number_format(init_number, crt_numbers, sheet_dict)
+    # 排序
+    col_regions_list = sheet_sorted(sheet_dict["regions"], sheet_dict["col_split"])
+
+    # 改题号
+    for col_regions in col_regions_list:
+        _, init_number, crt_numbers = question_number_format(init_number, crt_numbers, col_regions)
+
+    merge_span_boxes(col_regions_list)
+
+    regions = list(itertools.chain(*col_regions_list))
+    for i, box in enumerate(regions, 1):
+        box.update({'sort_id': i})
+
+    sheet_dict.update({"regions": regions})
     return sheet_dict, init_number, crt_numbers
 
 

+ 5 - 2
segment/sheet_views.py

@@ -63,11 +63,14 @@ tf_sess_dict = {
     # 'geography': TfSess('geography'),
     # 'science_comprehensive': TfSess('science_comprehensive'),
     # 'arts_comprehensive': TfSess('arts_comprehensive'),
+    # 'math_blank': TfSess('math_blank'),
     # 'chinese_blank': TfSess('chinese_blank'),
     # 'science_comprehensive_blank': TfSess('science_comprehensive_blank'),
     # 'arts_comprehensive_blank': TfSess('arts_comprehensive_blank'),
 }
 
+with_blank_subjects = ["math", "chinese", "science_comprehensive", "arts_comprehensive"]
+
 
 # Create your views here.
 def index(request):
@@ -411,10 +414,10 @@ def analysis_box_once_with_multiple_img(request):
 
                     ocr_list.append(ocr_res)
 
-                    if subject not in ["chinese", "science_comprehensive", "arts_comprehensive"]:
+                    if subject not in with_blank_subjects:
                         sheet_sess = tf_sess_dict[subject]
                         answered = "fixed"
-                    elif decide_blank_sheet(image):
+                    elif decide_blank_sheet(image, subject):
                         sheet_sess = tf_sess_dict[subject + '_blank']
                         subject = subject + '_blank'
                         answered = "blank"