# -*- coding: utf-8 -*- # @Time : 2020/5/28 0022 17:04 # @Author : LF # @FileName: ocr_key_words.py # @Software: PyCharm import re def find_repeat(source, elmt): # 去重后重新定位数字索引 elmt_index = [] s_index = 0 e_index = len(source) while (s_index < e_index): try: temp = source.index(elmt, s_index, e_index) elmt_index.append(temp) s_index = temp + 1 except ValueError: break return elmt_index def ocr_key_words(rect,type_score_dict): # 将ocr识别得到的文字与模型得到的type_score对应 ''' :param rect: OCR识别结果数组,格式:res = {'chars': [},'coordinates': [(),()},'words': []} :param type_score_dict: 模型得到的type_score(与模型得到的边框相对应) :return: 字典中添加word ''' len_ocr = len(rect['chars']) xmin = type_score_dict['type_box'][0] ymin = type_score_dict['type_box'][1] xmax = type_score_dict['type_box'][2] ymax = type_score_dict['type_box'][3] words=[] for j in range(len_ocr): if rect['coordinates'][j][0] - xmin > -30 and rect['coordinates'][j][1] - ymin > -30 and rect['coordinates'][j][2] - xmax < 30 and rect['coordinates'][j][3] - ymax < 30: word = rect['chars'][j] words.append(word) type_score_dict['words']= words type_score_dict_ocr = type_score_dict return type_score_dict_ocr def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析 total_score = 0 volume_score = 0 volume_structure_item = 0 volume_structure = [] Score_structure_item = 0 Score_structure = [] all_structure = {} keyword_volume = re.compile(r'第卷|第部') keyword_type = re.compile(r'选择|非选择题|综合题|问答题|主观题|客观题|解答题|计算题') keyword_type1 = ['选择', '非选择题', '综合题', '问答题', '主观题', '客观题', '解答题','计算题'] len_keyword_type1 = len(keyword_type1) keyword_item1 = re.compile(r'共分|合计分|总共分|总计分|小题满分|本小题|满分|共计|共.分|合计.分|总共.分|总计.分|小题满分.|本小题.|满分.|共计.') keyword_item2 = re.compile(r'每题分|每小题分|空分|每小题.分|每题.分|空.分|个分') # '分/题'暂未考虑 keyword_item3 = re.compile(r'共题|共小题|分小题|本题小题|共个小题|分为小题|分个小题|本大题共小题') keyword_item4 = ['分'] keyword_item5 = re.compile(r'题|.|、') keyword_item6 = re.compile(r'分/题|分') if 'words' in type_score_dict_ocr.keys(): ocr_1 = type_score_dict_ocr['words'] else: return all_structure s = ''.join((str(x) for x in ocr_1)) # 合并为一个字符串 if s.find('IV') != -1 or s.find('Ⅳ') != -1: s = s.replace('Ⅳ', '4') s = s.replace('IV', '4') elif s.find('III') != -1 or s.find('Ⅲ') != -1: s = s.replace('Ⅲ', '3') s = s.replace('III', '3') elif s.find('II') != -1 or s.find('Ⅱ') != -1: s = s.replace('Ⅱ', '2') s = s.replace('II', '2') elif s.find('VI') != -1 or s.find('Ⅵ') != -1: s = s.replace('Ⅵ', '6') s = s.replace('VI', '6') elif s.find('VII') != -1 or s.find('Ⅶ') != -1: s = s.replace('Ⅶ', '7') s = s.replace('VII', '7') elif s.find('VIII') != -1 or s.find('Ⅷ') != -1: s = s.replace('Ⅷ', '8') s = s.replace('VIII', '8') elif s.find('IX') != -1 or s.find('Ⅸ') != -1: s = s.replace('Ⅸ', '9') s = s.replace('IX', '9') elif s.find('X') != -1 or s.find('Ⅹ') != -1: s = s.replace('Ⅹ', '10') s = s.replace('X', '10') elif s.find('I') != -1 or s.find('Ⅰ') != -1: s = s.replace('Ⅰ', '1') s = s.replace('I', '1') elif s.find('V') != -1 or s.find('Ⅴ') != -1: s = s.replace('Ⅴ', '5') s = s.replace('V', '5') C_s = re.sub("[A-Za-z0-9\!\%\[\]\,\。]", "", s) # 提取汉字 E_s = ''.join(re.findall(r'[A-Za-z]', s)) # 提取英文字符 N_s = re.findall('\d+', s) # 提取阿拉伯数字 try: if len(N_s) == 1 and len(N_s[0]) < 6 and len(E_s) == 0 and (keyword_item6.search(C_s)): type_score_dict_ocr['item_N'] = -1 type_score_dict_ocr['item_total_score'] = int(N_s[0]) type_score_dict_ocr['item_count'] = -1 type_score_dict_ocr['item_score'] = -1 Score_structure_item = type_score_dict_ocr Score_structure.append(Score_structure_item) all_structure = {'volume_structure': -1, 'Score_structure': Score_structure} elif len(N_s) == 1 and len(N_s[0]) < 6 and len(E_s) == 0 and (keyword_item5.search(C_s) or len(C_s) == 0): type_score_dict_ocr['item_N'] = int(N_s[0]) type_score_dict_ocr['item_total_score'] = -1 type_score_dict_ocr['item_count'] = -1 type_score_dict_ocr['item_score'] = -1 Score_structure_item = type_score_dict_ocr Score_structure.append(Score_structure_item) all_structure = {'volume_structure': -1, 'Score_structure': Score_structure} elif N_s != []: if keyword_volume.search(C_s): ''' 对应试卷中存在分卷信息的情况,根据包含数字的个数分为5类,暂定包含信息的有效数字个数小于5,并处理小题分数和总分可能包含小数点的情况 暂定小题个数不包含小数 暂定总分数中不存在有意义的小数位 ''' if len(N_s) == 1: num_index = s.index(N_s[0]) num_infer = s[num_index - len(N_s[0])] num_back = s[num_index + len(N_s[0])] if num_back == '分': # 第卷/部*分 volume_score = int(N_s[0]) type_score_dict_ocr['volume_N'] = -1 type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = -1 type_score_dict_ocr['volume_score'] = -1 elif num_back == '卷' or num_back == '部': # 第*卷 volume_N = int(N_s[0]) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = -1 type_score_dict_ocr['volume_count'] = -1 type_score_dict_ocr['volume_score'] = -1 else: return all_structure elif len(N_s) == 2: num_index1 = s.index(N_s[0]) num_infer0 = s[num_index1 - len(N_s[0])] num_back0 = s[num_index1 + len(N_s[0])] all_1 = find_repeat(s, N_s[1]) temp1 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[1]: temp1 = temp1 + 1 num_index2 = all_1[temp1] num_infer1 = s[num_index2 - len(N_s[1])] num_back1 = s[num_index2 + len(N_s[1])] if isinstance(N_s[0], str): N_s[0] = int(N_s[0]) if isinstance(N_s[1], str): N_s[1] = int(N_s[1]) if keyword_item1.search(C_s): if keyword_item2.search(C_s): if num_back0 == '分' and num_back1 == '分': if N_s[0] < N_s[1]: # 第卷,每小题*分,共*分 volume_score = N_s[1] item_score = N_s[0] item_count = int(volume_score / item_score) else: # 第卷,共*分 ,每小题*分 volume_score = N_s[0] item_score = N_s[1] item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = -1 type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure elif keyword_item3.search(C_s): if num_back1 == '分': # 第卷,共*小题,共*分 volume_score = N_s[1] item_count = N_s[0] item_score = volume_score / item_count type_score_dict_ocr['volume_N'] = -1 type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back0 == '分': # 第卷,共*分 ,共*小题 volume_score = N_s[0] item_count = N_s[1] item_score = volume_score / item_count type_score_dict_ocr['volume_N'] = -1 type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure else: if (num_back1 == '卷' or num_back1 == '部') and num_back1 == '分': # 第*卷*分 volume_N = N_s[0] volume_score = N_s[1] type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = -1 type_score_dict_ocr['volume_score'] = -1 elif num_back0 == '.' and num_infer1 == '.' and num_back1 == '分': # 第卷,共*.*分 volume_N = -1 volume_score = N_s[0] type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = -1 type_score_dict_ocr['volume_score'] = -1 else: return all_structure else: if keyword_item2.search(C_s): if keyword_item3.search(C_s): if num_back1 == '分': # 第卷,共*小题,每小题*分 item_count = N_s[0] item_score = N_s[1] volume_score = item_score * item_count type_score_dict_ocr['volume_N'] = -1 type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back0 == '分': # 第卷,每小题*分 ,共*小题 item_count = N_s[1] item_score = N_s[0] volume_score = item_count * item_score type_score_dict_ocr['volume_N'] = -1 type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure else: if num_back1 == '分': # 第卷,每小题*.*分 volume_score = -1 item_count = -1 item_score = float(str(N_s[0]) + '.' + str(N_s[1])) type_score_dict_ocr['volume_N'] = -1 type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure else: return all_structure elif len(N_s) == 3: num_index1 = s.index(N_s[0]) num_infer0 = s[num_index1 - len(N_s[0])] num_back0 = s[num_index1 + len(N_s[0])] all_1 = find_repeat(s, N_s[1]) temp1 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[1]: temp1 = temp1 + 1 num_index2 = all_1[temp1] num_infer1 = s[num_index2 - len(N_s[1])] num_back1 = s[num_index2 + len(N_s[1])] all_2 = find_repeat(s, N_s[2]) temp2 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[2]: temp2 = temp2 + 1 for jj in range(len(N_s[1])): if N_s[1][jj] == N_s[2]: temp2 = temp2 + 1 num_index3 = all_2[temp2] num_infer2 = s[num_index3 - len(N_s[2])] num_back2 = s[num_index3 + len(N_s[2])] if isinstance(N_s[0], str): N_s[0] = int(N_s[0]) if isinstance(N_s[1], str): N_s[1] = int(N_s[1]) if isinstance(N_s[2], str): N_s[2] = int(N_s[2]) if keyword_item1.search(C_s): if keyword_item2.search(C_s): if keyword_item3.search(C_s): if num_back0 == '分' and num_back1 == '分': if N_s[0] > N_s[1]: # 第卷,共*分,每题*分,共*题 volume_score = N_s[0] item_count = N_s[2] item_score = N_s[1] else: # 第卷,每题*分,共*分,共*题 volume_score = N_s[1] item_count = N_s[2] item_score = N_s[0] type_score_dict_ocr['volume_N'] = -1 type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back0 == '分' and num_back2 == '分': if N_s[0] > N_s[2]: # 第卷,共*分,共*题,每题*分 volume_score = N_s[0] item_count = N_s[1] item_score = N_s[2] else: # 第卷,每题*分,共*题,共*分 volume_score = N_s[2] item_count = N_s[1] item_score = N_s[0] type_score_dict_ocr['volume_N'] = -1 type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back1 == '分' and num_back2 == '分': # 第卷,共*题,共*分,每题*分 if N_s[1] > N_s[2]: volume_score = N_s[1] item_count = N_s[0] item_score = N_s[2] else: volume_score = N_s[2] item_count = N_s[0] item_score = N_s[1] type_score_dict_ocr['volume_N'] = -1 type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure else: if num_back1 == '分' and num_back2 == '分': # 第*卷,共*分,每题*分 / 第*卷,每题*分,共*分 volume_N = int(N_s[0]) if N_s[1] > N_s[2]: volume_score = N_s[1] item_score = N_s[2] item_count = int(volume_score / item_score) else: volume_score = N_s[2] item_score = N_s[1] item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back0 == '.' and num_infer1 == '.' and num_back1 == '分' and num_back2 == '分': # 第卷,共*.*分,每题*分 / 第卷,每题*.*分,共*分 volume_N = -1 if N_s[0] > N_s[2]: volume_score = N_s[0] item_score = N_s[2] item_count = int(volume_score / item_score) else: volume_score = N_s[2] item_score = float(str(N_s[0]) + '.' + str(N_s[1])) item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back1 == '.' and num_infer2 == '.' and num_back0 == '分' and num_back2 == '分': # 第卷,共*分,每题*.*分 / 第卷,每题*分,共*.*分 volume_N = -1 if N_s[0] > N_s[1]: volume_score = N_s[0] item_score = float(str(N_s[1]) + '.' + str(N_s[2])) item_count = int(volume_score / item_score) else: volume_score = N_s[1] item_score = N_s[0] item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure else: if keyword_item3.search(C_s): if (num_back0 == '卷' or num_back0 == '部') and (num_back1 == '题' or num_back1 == '小') and num_back2 == '分': # 第*卷,共*题,共*分 volume_N = N_s[0] volume_score = N_s[2] item_count = N_s[1] item_score = volume_score / item_count type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif (num_back0 == '卷' or num_back0 == '部') and (num_back2 == '题' or num_back2 == '小') and num_back1 == '分': # 第*卷,共*分,共*题 volume_N = N_s[0] volume_score = N_s[1] item_count = N_s[2] item_score = volume_score / item_count type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back0 == '.' and num_infer1 == '.' and (num_back2 == '题' or num_back2 == '小') and num_back1 == '分': # 第卷,共*.*分,共*题 volume_N = -1 volume_score = N_s[0] item_count = N_s[2] item_score = volume_score / item_count type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back1 == '.' and num_infer2 == '.' and (num_back0 == '题' or num_back0 == '小') and num_back2 == '分': # 第卷,共*题,共*.*分 volume_N = -1 volume_score = N_s[1] item_count = N_s[0] item_score = volume_score / item_count type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure else: if num_back2 == '分': # 第*卷,共*.*分 volume_N = int(N_s[0]) volume_score = N_s[1] item_score = -1 item_count = -1 type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure else: if keyword_item2.search(C_s): if keyword_item3.search(C_s): if (num_back0 == '卷' or num_back0 == '部') and num_back2 == '分': # 第*卷,共*题,每题*分 volume_N = N_s[0] item_count = N_s[1] item_score = N_s[2] volume_score = item_count * item_score type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif (num_back0 == '卷' or num_back0 == '部') and num_back1 == '分': # 第*卷,每题*分,共*题 volume_N = N_s[0] item_count = N_s[2] item_score = N_s[1] volume_score = item_count * item_score type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif (num_back0 == '卷' or num_back0 == '部') and num_back1 == '分': # 第卷,每题*.*分,共*题 volume_N = -1 item_score = float(str(N_s[0]) + '.' + str(N_s[1])) item_count = N_s[2] volume_score = item_score * item_count type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif (num_back0 == '卷' or num_back0 == '部') and num_back2 == '分': # 第卷,共*题,每题*.*分 volume_N = -1 item_score = float(str(N_s[1]) + '.' + str(N_s[2])) item_count = N_s[0] volume_score = item_score * item_count type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure else: if num_back2 == '分': # 第*卷,小题*.*分 volume_N = int(N_s[0]) volume_score = -1 item_score = float(str(N_s[1]) + '.' + str(N_s[2])) item_count = -1 type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure elif len(N_s) == 4: num_index1 = s.index(N_s[0]) num_infer0 = s[num_index1 - len(N_s[0])] num_back0 = s[num_index1 + len(N_s[0])] all_1 = find_repeat(s, N_s[1]) temp1 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[1]: temp1 = temp1 + 1 num_index2 = all_1[temp1] num_infer1 = s[num_index2 - len(N_s[1])] num_back1 = s[num_index2 + len(N_s[1])] all_2 = find_repeat(s, N_s[2]) temp2 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[2]: temp2 = temp2 + 1 for jj in range(len(N_s[1])): if N_s[1][jj] == N_s[2]: temp2 = temp2 + 1 num_index3 = all_2[temp2] num_infer2 = s[num_index3 - len(N_s[2])] num_back2 = s[num_index3 + len(N_s[2])] all_3 = find_repeat(s, N_s[3]) temp3 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[3]: temp3 = temp3 + 1 for jj in range(len(N_s[1])): if N_s[1][jj] == N_s[3]: temp3 = temp3 + 1 for kk in range(len(N_s[2])): if N_s[2][kk] == N_s[3]: temp3 = temp3 + 1 num_index4 = all_3[temp3] num_infer3 = s[num_index4 - len(N_s[3])] num_back3 = s[num_index4 + len(N_s[3])] if isinstance(N_s[0], str): N_s[0] = int(N_s[0]) if isinstance(N_s[1], str): N_s[1] = int(N_s[1]) if isinstance(N_s[2], str): N_s[2] = int(N_s[2]) if isinstance(N_s[3], str): N_s[3] = int(N_s[3]) if keyword_item1.search(C_s): if keyword_item2.search(C_s): if keyword_item3.search(C_s): if (num_back0 == '卷' or num_back0 == '部') and num_back1 == '分' and num_back3 == '分': if N_s[3] > N_s[1]: # 第*卷,每题*分,共*题,共*分 volume_N = N_s[0] volume_score = N_s[3] item_count = N_s[2] item_score = N_s[1] else: # 第*卷,共*分,共*题,每题*分 volume_N = N_s[0] volume_score = N_s[1] item_count = N_s[2] item_score = N_s[3] type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif (num_back0 == '卷' or num_back0 == '部') and num_back2 == '分' and num_back3 == '分': if N_s[3] > N_s[2]: # 第*卷,共*题,每题*分,共*分 volume_N = N_s[0] volume_score = N_s[3] item_count = N_s[1] item_score = N_s[2] else: # 第*卷,共*题,共*分 , 每题*分 volume_N = N_s[0] volume_score = N_s[2] item_count = N_s[1] item_score = N_s[3] type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif (num_back0 == '卷' or num_back0 == '部') and num_back2 == '分' and num_back1 == '分': if N_s[1] > N_s[2]: # 第*卷,共*分,每题*分,共*题 volume_N = N_s[0] volume_score = N_s[1] item_count = N_s[3] item_score = N_s[2] else: # 第*卷,每题*分,共*分,共*题 volume_N = N_s[0] volume_score = N_s[2] item_count = N_s[3] item_score = N_s[1] type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif (num_back0 == '.' and num_infer1 == '.') and num_back1 == '分' and num_back2 == '分': # 第卷,每题*.*分,共*分,共*题/第卷,共*.*分,每题*分,共*题 volume_N = -1 if int(N_s[0]) > int(N_s[2]): volume_score = N_s[0] item_score = N_s[2] item_count = int(volume_score / item_score) else: volume_score = N_s[2] item_score = float(str(N_s[0]) + '.' + str(N_s[1])) item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif (num_back1 == '.' and num_infer2 == '.') and num_back0 == '分' and num_back2 == '分': # 第卷,每题*分,共*.*分,共*题/第卷,共*分,每题*.*分,共*题 volume_N = -1 if int(N_s[0]) > int(N_s[1]): volume_score = N_s[0] item_score = float(str(N_s[1]) + '.' + str(N_s[2])) item_count = int(volume_score / item_score) else: volume_score = N_s[1] item_score = N_s[0] item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif (num_back1 == '.' and num_infer2 == '.') and num_back2 == '分' and num_back3 == '分': # 第卷,共*题,共*.*分,每题*分/第卷,共*题,每题*.*分,共*分 volume_N = -1 if N_s[1] > N_s[3]: volume_score = N_s[1] item_score = N_s[3] item_count = int(volume_score / item_score) else: volume_score = N_s[4] item_score = float(str(N_s[1]) + '.' + str(N_s[2])) item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif (num_back2 == '.' and num_infer3 == '.') and num_back1 == '分' and num_back3 == '分': # 第卷,共*题,共*分,每题*.*分/第卷,共*题,每题*分,共*.*分 volume_N = -1 if int(N_s[1]) > int(N_s[2]): volume_score = N_s[1] item_score = float(str(N_s[2]) + '.' + str(N_s[3])) item_count = int(volume_score / item_score) else: volume_score = N_s[2] item_score = N_s[1] item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif (num_back0 == '.' and num_infer1 == '.') and num_back1 == '分' and num_back3 == '分': # 第卷,共*.*分,共*题,每题*分/第卷,每题*.*分,共*题,共*分 volume_N = -1 if int(N_s[0]) > int(N_s[3]): volume_score = N_s[0] item_score = N_s[3] item_count = int(volume_score / item_score) else: volume_score = N_s[3] item_score = float(str(N_s[0]) + '.' + str(N_s[1])) item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif (num_back2 == '.' and num_infer3 == '.') and num_back0 == '分' and num_back3 == '分': # 第卷,共*分,共*题,每题*.*分/第卷,每题*分,共*题,共*.*分 volume_N = -1 if int(N_s[0]) > int(N_s[2]): volume_score = N_s[0] item_score = N_s[2] + '.' + N_s[3] item_count = int(volume_score / item_score) else: volume_score = N_s[2] item_score = N_s[0] item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: if (num_back0 == '卷' or num_back0 == '部') and num_back1 == '.' and num_infer2 == '.' and num_back2 == '分' and num_back3 == '分': # 第*卷,每题*.*分,共*分/第*卷,共*.*分,每题*分 volume_N = int(N_s[0]) if N_s[1] > N_s[3]: volume_score = N_s[1] item_score = N_s[3] item_count = int(volume_score / item_score) else: volume_score = N_s[3] item_score = float(str(N_s[1]) + '.' + str(N_s[2])) item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif (num_back0 == '卷' or num_back0 == '部') and num_back2 == '.' and num_infer3 == '.' and num_back1 == '分' and num_back3 == '分': # 第*卷,每题*分,共*.*分/第*卷,共*分,每题*.*分 volume_N = int(N_s[0]) if int(N_s[1]) > int(N_s[2]): volume_score = N_s[1] item_score = float(str(N_s[2]) + '.' + str(N_s[3])) item_count = int(volume_score / item_score) else: volume_score = N_s[2] item_score = N_s[1] item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back0 == '.' and num_infer1 == '.' and num_back1 == '分' and num_back2 == '.' and num_infer3 == '.' and num_back3 == '分': # 第卷,每题*.*分,共*.*分/第卷,共*.*分,每题*.*分 volume_N = -1 if N_s[0] > N_s[2]: volume_score = float(str(N_s[2]) + '.' + str(N_s[3])) item_score = N_s[3] item_count = int(volume_score / item_score) else: volume_score = N_s[3] item_score = float(str(N_s[1]) + '.' + str(N_s[2])) item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif (num_back0 == '卷' or num_back0 == '部') and num_back1 == '分' and num_back2 == '分': if N_s[3] > N_s[1]: # 第*卷,每题*.*分,共*分 volume_N = N_s[0] volume_score = N_s[3] item_count = -1 item_score = float(str(N_s[1]) + '.' + str(N_s[2])) else: # 第*卷,共*.*分,每题*分 volume_N = N_s[0] volume_score = N_s[1] item_count = -1 item_score = N_s[3] type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure else: if keyword_item3.search(C_s): if (num_back0 == '卷' or num_back0 == '部') and num_back2 == '分': # 第*卷,共*.*分,*小题 volume_N = N_s[0] volume_score = N_s[1] item_count = N_s[3] item_score = -1 type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif (num_back0 == '卷' or num_back0 == '部') and num_back3 == '分': # 第*卷,*小题 ,共*.*分 volume_N = N_s[0] volume_score = N_s[2] item_count = N_s[1] item_score = -1 type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure else: return all_structure else: if keyword_item2.search(C_s): if keyword_item3.search(C_s): if (num_back0 == '卷' or num_back0 == '部') and num_back2 == '分': # 第*卷,小题*.*分,*小题 volume_N = N_s[0] volume_score = -1 item_count = N_s[3] item_score = float(str(N_s[1]) + '.' + str(N_s[2])) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif (num_back0 == '卷' or num_back0 == '部') and num_back3 == '分': # 第*卷,*小题 ,小题*.*分 volume_N = N_s[0] volume_score = -1 item_count = N_s[1] item_score = float(str(N_s[2]) + '.' + str(N_s[3])) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure else: return all_structure else: return all_structure elif len(N_s) == 5: num_index1 = s.index(N_s[0]) num_infer0 = s[num_index1 - len(N_s[0])] num_back0 = s[num_index1 + len(N_s[0])] all_1 = find_repeat(s, N_s[1]) temp1 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[1]: temp1 = temp1 + 1 num_index2 = all_1[temp1] num_infer1 = s[num_index2 - len(N_s[1])] num_back1 = s[num_index2 + len(N_s[1])] all_2 = find_repeat(s, N_s[2]) temp2 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[2]: temp2 = temp2 + 1 for jj in range(len(N_s[1])): if N_s[1][jj] == N_s[2]: temp2 = temp2 + 1 num_index3 = all_2[temp2] num_infer2 = s[num_index3 - len(N_s[2])] num_back2 = s[num_index3 + len(N_s[2])] all_3 = find_repeat(s, N_s[3]) temp3 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[3]: temp3 = temp3 + 1 for jj in range(len(N_s[1])): if N_s[1][jj] == N_s[3]: temp3 = temp3 + 1 for kk in range(len(N_s[2])): if N_s[2][kk] == N_s[3]: temp3 = temp3 + 1 num_index4 = all_3[temp3] num_infer3 = s[num_index4 - len(N_s[3])] num_back3 = s[num_index4 + len(N_s[3])] all_4 = find_repeat(s, N_s[4]) temp4 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[4]: temp4 = temp4 + 1 for jj in range(len(N_s[1])): if N_s[1][jj] == N_s[4]: temp4 = temp4 + 1 for kk in range(len(N_s[2])): if N_s[2][kk] == N_s[4]: temp4 = temp4 + 1 for ll in range(len(N_s[3])): if N_s[3][ll] == N_s[4]: temp4 = temp4 + 1 num_index5 = all_4[temp4] num_infer4 = s[num_index5 - len(N_s[4])] num_back4 = s[num_index5 + len(N_s[4])] if isinstance(N_s[0], str): N_s[0] = int(N_s[0]) if isinstance(N_s[1], str): N_s[1] = int(N_s[1]) if isinstance(N_s[2], str): N_s[2] = int(N_s[2]) if isinstance(N_s[3], str): N_s[3] = int(N_s[3]) if isinstance(N_s[4], str): N_s[4] = int(N_s[4]) if keyword_item1.search(C_s): if keyword_item2.search(C_s): if keyword_item3.search(C_s): if (num_back0 == '卷' or num_back0 == '部') and (num_back1 == '.' and num_infer2 == '.') and num_back2 == '分' and num_back3 == '分': # 第*卷,每题*.*分,共*分,共*题/第卷,共*.*分,每题*分,共*题 volume_N = N_s[0] if N_s[1] > N_s[3]: volume_score = N_s[1] item_score = N_s[3] item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: volume_score = N_s[3] item_score = float(str(N_s[1]) + '.' + str(N_s[2])) item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif (num_back0 == '卷' or num_back0 == '部') and (num_back2 == '.' and num_infer3 == '.') and num_back1 == '分' and num_back3 == '分': # 第*卷,每题*分,共*.*分,共*题/第卷,共*分,每题*.*分,共*题 volume_N = N_s[0] if N_s[1] > N_s[2]: volume_score = N_s[1] item_score = float(str(N_s[2]) + '.' + str(N_s[3])) item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: volume_score = N_s[2] item_score = N_s[1] item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif (num_back0 == '卷' or num_back0 == '部') and (num_back2 == '.' and num_infer3 == '.') and num_back3 == '分' and num_back4 == '分': # 第卷,共*题,共*.*分,每题*分/第卷,共*题,每题*.*分,共*分 volume_N = N_s[0] if N_s[2] > N_s[4]: volume_score = N_s[2] item_score = N_s[4] item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: volume_score = N_s[4] item_score = float(str(N_s[2]) + '.' + str(N_s[3])) item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif (num_back0 == '卷' or num_back0 == '部') and (num_back3 == '.' and num_infer4 == '.') and num_back2 == '分' and num_back4 == '分': # 第*卷,共*题,共*分,每题*.*分/第卷,共*题,每题*分,共*.*分 volume_N = N_s[0] if N_s[2] > N_s[3]: volume_score = N_s[2] item_score = float(str(N_s[3]) + '.' + str(N_s[4])) item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: volume_score = N_s[3] item_score = N_s[2] item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif (num_back0 == '卷' or num_back0 == '部') and (num_back1 == '.' and num_infer2 == '.') and num_back2 == '分' and num_back4 == '分': # 第*卷,共*.*分,共*题,每题*分/第*卷,每题*.*分,共*题,共*分 volume_N = N_s[0] if N_s[1] > N_s[4]: volume_score = N_s[1] item_score = N_s[4] item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: volume_score = N_s[4] item_score = float(str(N_s[1]) + '.' + str(N_s[2])) item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif (num_back0 == '卷' or num_back0 == '部') and (num_back3 == '.' and num_infer4 == '.') and num_back1 == '分' and num_back4 == '分': # 第*卷,共*分,共*题,每题*.*分/第卷,每题*分,共*题,共*.*分 volume_N = N_s[0] if N_s[1] > N_s[3]: volume_score = N_s[0] item_score = float(str(N_s[4]) + '.' + str(N_s[4])) item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: volume_score = N_s[3] item_score = N_s[1] item_count = int(volume_score / item_score) type_score_dict_ocr['volume_N'] = volume_N type_score_dict_ocr['volume_total_score'] = volume_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure else: return all_structure else: return all_structure else: return all_structure if 'volume_N' not in type_score_dict_ocr.keys(): all_structure = {'volume_structure': -1, 'Score_structure': -1} return all_structure else: for i in range(len_keyword_type1): if C_s.find(keyword_type1[i]) != -1 and C_s.find('非') != -1: type_score_dict_ocr['keyword_type'] = keyword_type1[1] break elif C_s.find(keyword_type1[0]) != -1: type_score_dict_ocr['keyword_type'] = keyword_type1[0] Score_structure_item = type_score_dict_ocr Score_structure.append(Score_structure_item) break elif C_s.find(keyword_type1[i]) != -1: type_score_dict_ocr['keyword_type'] = keyword_type1[i] break elif i == len_keyword_type1 - 1: type_score_dict_ocr['keyword_type'] = keyword_type1[0] Score_structure_item = type_score_dict_ocr Score_structure.append(Score_structure_item) break volume_structure_item = type_score_dict_ocr volume_structure.append(volume_structure_item) if Score_structure == []: all_structure = {'volume_structure': volume_structure, 'Score_structure': -1} else: all_structure = {'volume_structure': volume_structure, 'Score_structure': Score_structure} else: ''' 对应试卷中不存在分卷信息的情况,根据包含数字的个数分为4类,暂定包含信息的有效数字个数小于4,并处理小题分数和总分可能包含小数点的情况 暂定小题个数不包含小数 暂定总分数中不存在有意义的小数位 ''' if keyword_item1.search(C_s): if len(N_s) == 1: num_index = s.index(N_s[0]) num_infer = s[num_index - len(N_s[0])] num_back = s[num_index + len(N_s[0])] if isinstance(N_s[0], str): N_s[0] = int(N_s[0]) if num_back == '分': # 选择题/主观题,共*分 item_total_score = N_s[0] type_score_dict_ocr['volume_N'] = -1 type_score_dict_ocr['volume_total_score'] = int(item_total_score) type_score_dict_ocr['volume_count'] = -1 type_score_dict_ocr['volume_score'] = -1 elif len(N_s) == 2: num_index1 = s.index(N_s[0]) num_infer0 = s[num_index1 - len(N_s[0])] num_back0 = s[num_index1 + len(N_s[0])] all_1 = find_repeat(s, N_s[1]) temp1 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[1]: temp1 = temp1 + 1 num_index2 = all_1[temp1] num_infer1 = s[num_index2 - len(N_s[1])] num_back1 = s[num_index2 + len(N_s[1])] if isinstance(N_s[0], str): N_s[0] = int(N_s[0]) if isinstance(N_s[1], str): N_s[1] = int(N_s[1]) if keyword_item2.search(C_s): if N_s[0] > N_s[1]: # 选择题/主观题/客观题,共*分,每题*分 item_total_score = int(N_s[0]) item_count = int(N_s[0] / N_s[1]) item_score = N_s[1] else: # 选择题/主观题/客观题,每题*分,共*分 item_total_score = int(N_s[1]) item_count = int(N_s[1] / N_s[0]) item_score = N_s[0] type_score_dict_ocr['volume_N'] = -1 type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: if keyword_item3.search(C_s): if num_back0 == '分': # 选择题/主观题,共*分,共*题 item_total_score = N_s[0] item_count = N_s[1] item_score = N_s[0] / N_s[1] type_score_dict_ocr['volume_N'] = -1 type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back1 == '分': # 选择题/主观题,共*题,共*分 item_total_score = N_s[1] item_count = N_s[0] item_score = N_s[1] / N_s[0] type_score_dict_ocr['volume_N'] = -1 type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure else: if num_back0 == '.' and num_infer1 == '.' and num_back1 == '分': # *.*分 item_N = -1 item_total_score = N_s[0] type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = -1 type_score_dict_ocr['volume_score'] = -1 elif num_back1 == '分': # *,*分 item_N = N_s[0] item_total_score = int(N_s[1]) type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = -1 type_score_dict_ocr['volume_score'] = -1 else: return all_structure elif len(N_s) == 3: num_index1 = s.index(N_s[0]) num_infer0 = s[num_index1 - len(N_s[0])] num_back0 = s[num_index1 + len(N_s[0])] all_1 = find_repeat(s, N_s[1]) temp1 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[1]: temp1 = temp1 + 1 num_index2 = all_1[temp1] num_infer1 = s[num_index2 - len(N_s[1])] num_back1 = s[num_index2 + len(N_s[1])] all_2 = find_repeat(s, N_s[2]) temp2 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[2]: temp2 = temp2 + 1 for jj in range(len(N_s[1])): if N_s[1][jj] == N_s[2]: temp2 = temp2 + 1 num_index3 = all_2[temp2] num_infer2 = s[num_index3 - len(N_s[2])] num_back2 = s[num_index3 + len(N_s[2])] if isinstance(N_s[0], str): N_s[0] = int(N_s[0]) if isinstance(N_s[1], str): N_s[1] = int(N_s[1]) if isinstance(N_s[2], str): N_s[2] = int(N_s[2]) if keyword_item2.search(C_s): if keyword_item3.search(C_s): if num_back0 == '分' and num_back2 == '分': if N_s[2] > N_s[0]: # 每题*分,共*题,共*分 item_total_score = N_s[2] item_count = N_s[1] item_score = N_s[0] else: # 共*分,共*题,每题*分 item_total_score = N_s[0] item_count = N_s[1] item_score = N_s[2] if item_total_score < item_count * item_score: item_total_score = item_count * item_score type_score_dict_ocr['volume_N'] = -1 type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif (num_infer0 == '题' or num_infer0 == '空') and num_back0 == '分' and num_back1 == '分': if N_s[1] > N_s[0]: # 每题*分,共*分 ,共*题 item_total_score = N_s[1] item_count = N_s[2] item_score = N_s[0] else: # 共*分,每题*分 ,共*题 item_total_score = N_s[0] item_count = N_s[2] item_score = N_s[1] if item_total_score < item_count * item_score: item_total_score = item_count * item_score type_score_dict_ocr['volume_N'] = -1 type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back1 == '分' and num_back2 == '分': if N_s[2] > N_s[1]: # 共*题,每题*分,共*分 item_total_score = N_s[2] item_count = N_s[0] item_score = N_s[1] else: # 共*题,共*分,每题*分 item_total_score = N_s[1] item_count = N_s[0] item_score = N_s[2] if item_total_score < item_count * item_score: item_total_score = item_count * item_score type_score_dict_ocr['volume_N'] = -1 type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure else: if num_back0 != '.' and num_back1 == '分' and num_back2 == '分': if N_s[1] > N_s[2]: # *,共*分,每题*分 item_N = N_s[0] item_total_score = N_s[1] item_count = int(N_s[1] / N_s[2]) item_score = N_s[2] else: # *,每题*分 ,共*分 item_N = N_s[0] item_total_score = N_s[2] item_count = int(N_s[2] / N_s[1]) item_score = N_s[1] type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back0 == '.' and num_infer1 == '.' and num_back2 == '分' and num_back1 == '分': # 每题*.*分,共*分/共*.*分,每题*分 item_N = -1 if int(N_s[0]) > int(N_s[2]): item_total_score = N_s[0] item_score = N_s[2] item_count = int(item_total_score/item_score) else: item_total_score = N_s[2] item_score = float(str(N_s[0])+'.'+str(N_s[1])) item_count = int(item_total_score / item_score) type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back1 == '.' and num_infer2 == '.' and num_back0 == '分' and num_back2 == '分': # 每题*分,共*.*分/共*分,每题*.*分 item_N = -1 if int(N_s[0]) > int(N_s[1]): item_total_score = N_s[0] item_score = float(str(N_s[1])+'.'+str(N_s[2])) item_count = int(item_total_score/item_score) else: item_total_score = N_s[1] item_score = N_s[0] item_count = int(item_total_score / item_score) type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure else: if keyword_item3.search(C_s): if num_back2 == '分' and num_infer2 =='.' and num_back1 =='.': # *小题,共*.*分, item_N = -1 item_total_score = N_s[1] item_count = N_s[0] item_score = N_s[1]/N_s[0] type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back1 == '分' and num_infer1 =='.'and num_back0 =='.': # 共*.*分,*小题 item_N = -1 item_total_score = N_s[0] item_count = N_s[2] item_score = N_s[0]/N_s[2] type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back2 == '分' and num_infer2 !='.': # *,*小题,共*分, item_N = N_s[0] item_total_score = N_s[2] item_count = N_s[1] item_score = N_s[2]/N_s[1] type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back1 == '分' and num_infer1 !='.': # *,共*分,共*小题 item_N = N_s[0] item_total_score = N_s[1] item_count = N_s[2] item_score = N_s[1] / N_s[2] type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure else: if num_back2 == '分' and num_infer2 =='.' and num_back1 =='.': # *,共*.*分, item_N = N_s[0] item_total_score = N_s[1] item_count = -1 item_score = -1 type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back2 == '分': item_total_score = N_s[1] item_N = -1 item_count = -1 item_score = -1 type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure elif len(N_s) == 4: num_index1 = s.index(N_s[0]) num_infer0 = s[num_index1 - len(N_s[0])] num_back0 = s[num_index1 + len(N_s[0])] all_1 = find_repeat(s, N_s[1]) temp1 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[1]: temp1 = temp1 + 1 num_index2 = all_1[temp1] num_infer1 = s[num_index2 - len(N_s[1])] num_back1 = s[num_index2 + len(N_s[1])] all_2 = find_repeat(s, N_s[2]) temp2 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[2]: temp2 = temp2 + 1 for jj in range(len(N_s[1])): if N_s[1][jj] == N_s[2]: temp2 = temp2 + 1 num_index3 = all_2[temp2] num_infer2 = s[num_index3 - len(N_s[2])] num_back2 = s[num_index3 + len(N_s[2])] all_3 = find_repeat(s, N_s[3]) temp3 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[3]: temp3 = temp3 + 1 for jj in range(len(N_s[1])): if N_s[1][jj] == N_s[3]: temp3 = temp3 + 1 for kk in range(len(N_s[2])): if N_s[2][kk] == N_s[3]: temp3 = temp3 + 1 num_index4 = all_3[temp3] num_infer3 = s[num_index4 - len(N_s[3])] num_back3 = s[num_index4 + len(N_s[3])] if isinstance(N_s[0], str): N_s[0] = int(N_s[0]) if isinstance(N_s[1], str): N_s[1] = int(N_s[1]) if isinstance(N_s[2], str): N_s[2] = int(N_s[2]) if isinstance(N_s[3], str): N_s[3] = int(N_s[3]) if keyword_item2.search(C_s): if keyword_item3.search(C_s): if num_back1 == '分' and num_back3 == '分': if N_s[3] > N_s[1]: # *,每题*分,共*题,共*分 item_N = N_s[0] item_total_score = N_s[3] item_count = N_s[2] item_score = N_s[1] else: # *,共*分,共*题,每题*分 item_N = N_s[0] item_total_score = N_s[1] item_count = N_s[2] item_score = N_s[3] type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr[ 'volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back1 == '分' and num_back2 == '分': if N_s[2] > N_s[1]: # *,每题*分,共*分,共*题 item_N = N_s[0] item_total_score = N_s[2] item_count = N_s[3] item_score = N_s[1] else: # *,共*分,每题*分,共*题 item_N = N_s[0] item_total_score = N_s[1] item_count = N_s[3] item_score = N_s[2] type_score_dict_ocr['volume_N'] = item_N if item_total_score < item_count * item_score: item_total_score = item_count * item_score type_score_dict_ocr[ 'volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back2 == '分' and num_back3 == '分': if N_s[3] > N_s[2]: # *,共*题,每题*分,共*分 item_N = N_s[0] item_total_score = N_s[3] item_count = N_s[1] item_score = N_s[2] else: item_N = N_s[0] item_total_score = N_s[2] item_count = N_s[1] item_score = N_s[3] if item_total_score < item_count * item_score: item_total_score = item_count * item_score type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr[ 'volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back0 == '.' and num_infer1 == '.' and num_back1 == '分'and num_back3 == '分' : # 共*.*分,共*题, 每题*分/每题*.*分,共*题,共*分 item_N = -1 if N_s[0] > N_s[3]: item_total_score = N_s[0] item_score = N_s[3] item_count = int(item_total_score / item_score) else: item_total_score = N_s[3] item_score = float(str(N_s[0]) + '.' + str(N_s[1])) item_count = int(item_total_score / item_score) type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr[ 'item_total_score'] = item_total_score type_score_dict_ocr['item_count'] = item_count type_score_dict_ocr['item_score'] = item_score elif num_back2 == '.' and num_infer3 == '.' and num_back0 == '分'and num_back3 == '分': # 共*分,共*题, 每题*.*分/每题*分,共*题,共*.*分 item_N = -1 if N_s[0] > N_s[2]: item_total_score = N_s[0] item_score = float(str(N_s[2]) + '.' + str(N_s[3])) item_count = int(item_total_score / item_score) else: item_total_score = N_s[2] item_score = N_s[0] item_count = int(item_total_score / item_score) type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr[ 'item_total_score'] = item_total_score type_score_dict_ocr['item_count'] = item_count type_score_dict_ocr['item_score'] = item_score elif num_back1 == '.' and num_infer2 == '.' and num_back2 == '分'and num_back3 == '分': # 共*题,共*.*分,每题*分/共*题,每题*.*分,共*分 item_N = -1 if N_s[1] > N_s[3]: item_total_score = N_s[1] item_score = N_s[3] item_count = int(item_total_score / item_score) else: item_total_score = N_s[3] item_score = float(str(N_s[1]) + '.' + str(N_s[2])) item_count = int(item_total_score / item_score) type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back2 == '.' and num_infer3 == '.' and num_back3 == '分'and num_back1 == '分' : # 共*题,共*分,每题*.*分/共*题,每题*分,共*.*分 item_N = -1 if N_s[1] > N_s[2]: item_total_score = N_s[1] item_score = float(str(N_s[2]) + '.' + str(N_s[3])) item_count = int(item_total_score / item_score) else: item_total_score = N_s[2] item_score = N_s[1] item_count = int(item_total_score / item_score) type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back0 == '.' and num_infer1 == '.' and num_back1 == '分'and num_back2 == '分' : # 每题*.*分,共*分,共*题/共*.*分,每题*分,共*题 item_N = -1 if N_s[0] > N_s[2]: item_total_score = N_s[0] item_score = N_s[2] item_count = int(item_total_score / item_score) else: item_total_score = N_s[2] item_score = float(str(N_s[0]) + '.' + str(N_s[1])) item_count = int(item_total_score / item_score) type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back1 == '.' and num_infer2 == '.' and num_back2 == '分'and num_back0 == '分' : # 每题*分,共*.*分,共*题/共*分,每题*.*分,共*题 item_N = -1 if N_s[0] > N_s[1]: item_total_score = N_s[0] item_score = float(str(N_s[1]) + '.' + str(N_s[2])) item_count = int(item_total_score / item_score) else: item_total_score = N_s[1] item_score = N_s[0] item_count = int(item_total_score / item_score) type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure else: if num_back1 == '.' and num_infer2 == '.' and num_back2 == '分'and num_back3 == '分' : # *,共*.*分, 每题*分/每题*.*分,共*分 item_N = N_s[0] if N_s[1] > N_s[3]: item_total_score = N_s[1] item_score = N_s[3] item_count = int(item_total_score / item_score) else: item_total_score = N_s[3] item_score = float(str(N_s[1]) + '.' + str(N_s[2])) item_count = int(item_total_score / item_score) type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr[ 'item_total_score'] = item_total_score type_score_dict_ocr['item_count'] = item_count type_score_dict_ocr['item_score'] = item_score elif num_back2== '.' and num_infer3== '.' and num_back1 == '分'and num_back3 == '分' : # *,共*分, 每题*.*分/*,每题*分,共*.*分 item_N = int(N_s[0]) if N_s[1] > N_s[2]: item_total_score = N_s[1] item_score = float(str(N_s[2]) + '.' + str(N_s[3])) item_count = int(item_total_score / item_score) else: item_total_score = N_s[2] item_score = N_s[1] item_count = int(item_total_score / item_score) type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr[ 'item_total_score'] = item_total_score type_score_dict_ocr['item_count'] = item_count type_score_dict_ocr['item_score'] = item_score else: return all_structure else: if keyword_item3.search(C_s): if num_back3 == '分': # *,*小题,共*.*分 item_total_score = N_s[2] item_N = N_s[0] item_count = N_s[1] item_score = item_total_score / item_count type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back2 == '分': # *,共*.*分,*小题 item_total_score = N_s[1] item_N = N_s[0] item_count = N_s[3] item_score = item_total_score / item_count type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure else: return all_structure elif len(N_s) == 5: num_index1 = s.index(N_s[0]) num_infer0 = s[num_index1 - len(N_s[0])] num_back0 = s[num_index1 + len(N_s[0])] all_1 = find_repeat(s, N_s[1]) temp1 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[1]: temp1 = temp1 + 1 num_index2 = all_1[temp1] num_infer1 = s[num_index2 - len(N_s[1])] num_back1 = s[num_index2 + len(N_s[1])] all_2 = find_repeat(s, N_s[2]) temp2 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[2]: temp2 = temp2 + 1 for jj in range(len(N_s[1])): if N_s[1][jj] == N_s[2]: temp2 = temp2 + 1 num_index3 = all_2[temp2] num_infer2 = s[num_index3 - len(N_s[2])] num_back2 = s[num_index3 + len(N_s[2])] all_3 = find_repeat(s, N_s[3]) temp3 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[3]: temp3 = temp3 + 1 for jj in range(len(N_s[1])): if N_s[1][jj] == N_s[3]: temp3 = temp3 + 1 for kk in range(len(N_s[2])): if N_s[2][kk] == N_s[3]: temp3 = temp3 + 1 num_index4 = all_3[temp3] num_infer3 = s[num_index4 - len(N_s[3])] num_back3 = s[num_index4 + len(N_s[3])] all_4 = find_repeat(s, N_s[4]) temp4 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[4]: temp4 = temp4 + 1 for jj in range(len(N_s[1])): if N_s[1][jj] == N_s[4]: temp4 = temp4 + 1 for kk in range(len(N_s[2])): if N_s[2][kk] == N_s[4]: temp4 = temp4 + 1 for ll in range(len(N_s[3])): if N_s[3][ll] == N_s[4]: temp4 = temp4 + 1 num_index5 = all_4[temp4] num_infer4 = s[num_index5 - len(N_s[4])] num_back4 = s[num_index5 + len(N_s[4])] if isinstance(N_s[0], str): N_s[0] = int(N_s[0]) if isinstance(N_s[1], str): N_s[1] = int(N_s[1]) if isinstance(N_s[2], str): N_s[2] = int(N_s[2]) if isinstance(N_s[3], str): N_s[3] = int(N_s[3]) if isinstance(N_s[4], str): N_s[4] = int(N_s[4]) if keyword_item2.search(C_s): if keyword_item3.search(C_s): if num_back1== '.' and num_infer2== '.' and num_back2 == '分'and num_back3 == '分' : # *,每题*.*分,共*分,*小题/*,共*.*分,每题*分,共*小题 item_N = N_s[0] if N_s[1] > N_s[3]: item_total_score = N_s[1] item_score = N_s[3] item_count = N_s[4] else: item_total_score = N_s[3] item_score = float(str(N_s[1]) + '.' + str(N_s[2])) item_count = N_s[4] type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back2== '.' and num_infer3== '.' and num_back1 == '分'and num_back3 == '分' : # *,每题*分,共*.*分,*小题/*,共*分,每题*.*分,共*小题 item_N = N_s[0] if N_s[1] > N_s[2]: item_total_score = N_s[1] item_score = float(str(N_s[2]) + '.' + str(N_s[3])) item_count = N_s[4] else: item_total_score = N_s[2] item_score = N_s[1] item_count = N_s[4] type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back2== '.' and num_infer3== '.' and num_back3 == '分'and num_back4 == '分' : # *,*小题,每题*.*分,共*分/*,*小题,共*.*分,每题*分 item_N = N_s[0] if N_s[2] > N_s[4]: item_total_score = N_s[2] item_score = N_s[4] item_count = N_s[1] else: item_total_score = N_s[4] item_score = float(str(N_s[2]) + '.' + str(N_s[3])) item_count = N_s[1] type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back3== '.' and num_infer4== '.' and num_back2 == '分'and num_back4 == '分' : # *,*小题,每题*分,共*.*分/*,*小题,共*分,每题*.*分 item_N = N_s[0] if N_s[2] > N_s[3]: item_total_score = N_s[2] item_score = float(str(N_s[3]) + '.' + str(N_s[3])) item_count = N_s[1] else: item_total_score = N_s[3] item_score = N_s[2] item_count = N_s[1] type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back1== '.' and num_infer2== '.' and num_back2 == '分'and num_back4 == '分' : # *,每题*.*分,*小题,共*分/*,共*.*分,*小题,每题*分 item_N = N_s[0] if N_s[1] > N_s[4]: item_total_score = N_s[1] item_score = N_s[4] item_count = N_s[3] else: item_total_score = N_s[4] item_score = float(str(N_s[1]) + '.' + str(N_s[2])) item_count = N_s[3] type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back3== '.' and num_infer4== '.' and num_back1 == '分'and num_back4 == '分' : # *,每题*分,*小题,共*.*分/*,共*分,*小题,每题*.*分 item_N = N_s[0] if N_s[1] > N_s[3]: item_total_score = N_s[1] item_score = float(str(N_s[3]) + '.' + str(N_s[4])) item_count = N_s[2] else: item_total_score = N_s[3] item_score = N_s[1] item_count = N_s[2] type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: # 暂定len=5时不判断不存在题号的情况 return all_structure else: # 暂定len=5时不判断不存在题目个数的情况 return all_structure else: # 暂定len=5时不判断不存在小项分数的情况 return all_structure else: return all_structure else: if keyword_item2.search(C_s): if len(N_s) == 1: num_index1 = s.index(N_s[0]) num_infer0 = s[num_index1 - len(N_s[0])] num_back0 = s[num_index1 + len(N_s[0])] if isinstance(N_s[0], str): N_s[0] = int(N_s[0]) if num_back0 == '分': # 每题*分 item_score = N_s[0] type_score_dict_ocr['volume_N'] = -1 type_score_dict_ocr['volume_total_score'] = -1 type_score_dict_ocr['volume_count'] = -1 type_score_dict_ocr['volume_score'] = item_score else: return all_structure elif len(N_s) == 2: num_index1 = s.index(N_s[0]) num_infer0 = s[num_index1 - len(N_s[0])] num_back0 = s[num_index1 + len(N_s[0])] all_1 = find_repeat(s, N_s[1]) temp1 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[1]: temp1 = temp1 + 1 num_index2 = all_1[temp1] num_infer1 = s[num_index2 - len(N_s[1])] num_back1 = s[num_index2 + len(N_s[1])] if isinstance(N_s[0], str): N_s[0] = int(N_s[0]) if isinstance(N_s[1], str): N_s[1] = int(N_s[1]) if keyword_item3.search(C_s): if num_back1 == '分': # 共*题,每题*分 item_total_score = N_s[0] * N_s[1] item_count = N_s[0] item_score = N_s[1] type_score_dict_ocr['volume_N'] = -1 type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back0 == '分': # 每题*分,共*题 item_total_score = int(N_s[0]) * int(N_s[1]) item_count = int(N_s[1]) item_score = int(N_s[0]) type_score_dict_ocr['volume_N'] = -1 type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure else: if num_back1 == '分' and num_back0 == '.' and num_infer1 == '.': # *.*分 item_N = -1 item_score = float(str(N_s[0])+'.'+str(N_s[1])) type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = -1 type_score_dict_ocr['volume_count'] = -1 type_score_dict_ocr['volume_score'] = item_score elif num_back1 == '分': # *,*分 item_N = int(N_s[0]) item_score = int(N_s[1]) type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = -1 type_score_dict_ocr['volume_count'] = -1 type_score_dict_ocr['volume_score'] = item_score else: return all_structure elif len(N_s) == 3: num_index1 = s.index(N_s[0]) num_infer0 = s[num_index1 - len(N_s[0])] num_back0 = s[num_index1 + len(N_s[0])] all_1 = find_repeat(s, N_s[1]) temp1 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[1]: temp1 = temp1 + 1 num_index2 = all_1[temp1] num_infer1 = s[num_index2 - len(N_s[1])] num_back1 = s[num_index2 + len(N_s[1])] all_2 = find_repeat(s, N_s[2]) temp2 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[2]: temp2 = temp2 + 1 for jj in range(len(N_s[1])): if N_s[1][jj] == N_s[2]: temp2 = temp2 + 1 num_index3 = all_2[temp2] num_infer2 = s[num_index3 - len(N_s[2])] num_back2 = s[num_index3 + len(N_s[2])] if isinstance(N_s[0], str): N_s[0] = int(N_s[0]) if isinstance(N_s[1], str): N_s[1] = int(N_s[1]) if isinstance(N_s[2], str): N_s[2] = int(N_s[2]) if num_back2 == '分' and (num_back1 == '题' or num_back1 == '小' or num_back1 == '空') and num_back0 != '分': # *,共*题,每题*分 item_N = int(N_s[0]) item_total_score = int(N_s[1]) * int(N_s[2]) item_count = int(N_s[1]) item_score = int(N_s[2]) type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back1 == '分' and (num_back2 == '题' or num_back2 == '小' or num_back2 == '空') and num_back0 != '分': # *,每题*分,共*题 item_N = int(N_s[0]) item_total_score = int(N_s[1]) * int(N_s[2]) item_count = int(N_s[2]) item_score = int(N_s[1]) type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_infer2 == '.' and num_back2 == '分' and num_back1 == '.': # 共*题,每题*.*分 item_N = -1 item_count = int(N_s[0]) item_score = float(str(N_s[1])+'.'+str(N_s[2])) item_total_score = int(item_count * item_score) type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_infer1 == '.' and num_back1 == '分' and num_back0 == '.' : # 每题*.*分,共*题 item_N = -1 item_count = int(N_s[2]) item_score = float(str(N_s[0]) + '.' + str(N_s[1])) item_total_score = int(item_count * item_score) type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back2 == '分': # * * ,每题*分 item_N = -1 item_count = -1 item_score = -1 item_total_score = int(N_s[2]) type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure elif len(N_s) == 4: num_index1 = s.index(N_s[0]) num_infer0 = s[num_index1 - len(N_s[0])] num_back0 = s[num_index1 + len(N_s[0])] all_1 = find_repeat(s, N_s[1]) temp1 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[1]: temp1 = temp1 + 1 num_index2 = all_1[temp1] num_infer1 = s[num_index2 - len(N_s[1])] num_back1 = s[num_index2 + len(N_s[1])] all_2 = find_repeat(s, N_s[2]) temp2 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[2]: temp2 = temp2 + 1 for jj in range(len(N_s[1])): if N_s[1][jj] == N_s[2]: temp2 = temp2 + 1 num_index3 = all_2[temp2] num_infer2 = s[num_index3 - len(N_s[2])] num_back2 = s[num_index3 + len(N_s[2])] all_3 = find_repeat(s, N_s[3]) temp3 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[3]: temp3 = temp3 + 1 for jj in range(len(N_s[1])): if N_s[1][jj] == N_s[3]: temp3 = temp3 + 1 num_index4 = all_3[temp3] num_infer3 = s[num_index4 - len(N_s[3])] num_back3 = s[num_index4 + len(N_s[3])] if isinstance(N_s[0], str): N_s[0] = int(N_s[0]) if isinstance(N_s[1], str): N_s[1] = int(N_s[1]) if isinstance(N_s[2], str): N_s[2] = int(N_s[2]) if isinstance(N_s[3], str): N_s[3] = int(N_s[3]) if num_back2 == '.' and num_infer3 == '.' and num_back3 == '分' and (num_back1 == '题' or num_back1 == '小' or num_back1 == '空') and num_back0 != '分': # *,共*题,每题*.*分 item_N = int(N_s[0]) item_count = int(N_s[1]) item_score = float(str(N_s[2]) + '.' + str(N_s[3])) item_total_score = int(item_count * item_score) type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score elif num_back1 == '.' and num_infer2 == '.' and num_back2 == '分' and (num_back3 == '题' or num_back3 == '小' or num_back3 == '空') and num_back0 != '分': # *,每题*.*分,共*题 item_N = int(N_s[0]) item_count = int(N_s[3]) item_score = float(str(N_s[1]) + '.' + str(N_s[2])) item_total_score = int(item_count * item_score) type_score_dict_ocr['volume_N'] = item_N type_score_dict_ocr['volume_total_score'] = item_total_score type_score_dict_ocr['volume_count'] = item_count type_score_dict_ocr['volume_score'] = item_score else: return all_structure else: return all_structure else: if C_s.find(keyword_item4[0]) != -1: if len(N_s) == 2: # *,*分 num_index1 = s.index(N_s[0]) num_infer0 = s[num_index1 - len(N_s[0])] num_back0 = s[num_index1 + len(N_s[0])] if num_infer0 == '( ' or num_back0 == ')': return all_structure else: all_1 = find_repeat(s, N_s[1]) temp1 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[1]: temp1 = temp1 + 1 num_index2 = all_1[temp1] num_infer1 = s[num_index2 - len(N_s[1])] num_back1 = s[num_index2 + len(N_s[1])] if isinstance(N_s[0], str): N_s[0] = int(N_s[0]) if isinstance(N_s[1], str): N_s[1] = int(N_s[1]) if int(N_s[0]) > 1000: item_N =0 item_N1 = int(N_s[0][-4] + N_s[0][-3]) item_N2 = int(N_s[0][-2] + N_s[0][-1]) if item_N2 - item_N1 == 1: item_N = [0, 0] item_N = [item_N1, item_N2] elif item_N2 - item_N1 == 2: item_N = [0, 0, 0] item_N = [item_N1, item_N1 + 1, item_N2] elif item_N2 - item_N1 == 3: item_N = [0, 0, 0, 0] item_N = [item_N1, item_N1 + 1, item_N1 + 2, item_N2] type_score_dict_ocr['item_N'] = item_N item_total_score = int(N_s[1]) type_score_dict_ocr['item_total_score'] = item_total_score type_score_dict_ocr['item_count'] = -1 type_score_dict_ocr['item_score'] = -1 Score_structure_item = type_score_dict_ocr Score_structure.append(Score_structure_item) all_structure = {'volume_structure': -1, 'Score_structure': Score_structure} return all_structure else: item_N = int(N_s[0]) item_total_score = int(N_s[1]) type_score_dict_ocr['item_N'] = item_N type_score_dict_ocr['item_total_score'] = item_total_score type_score_dict_ocr['item_count'] = -1 type_score_dict_ocr['item_score'] = -1 Score_structure_item = type_score_dict_ocr Score_structure.append(Score_structure_item) all_structure = {'volume_structure': -1, 'Score_structure': Score_structure} return all_structure elif len(N_s) == 3: # *,*分 num_index1 = s.index(N_s[0]) num_infer1 = s[num_index1 - len(N_s[0])] num_back1 = s[num_index1 + len(N_s[0])] all_1 = find_repeat(s, N_s[1]) temp1 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[1]: temp1 = temp1 + 1 num_index2 = all_1[temp1] num_infer2 = s[num_index2 - len(N_s[1])] num_back2 = s[num_index2 + len(N_s[1])] all_2 = find_repeat(s, N_s[2]) temp2 = 0 for ii in range(len(N_s[0])): if N_s[0][ii] == N_s[2]: temp2 = temp2 + 1 for jj in range(len(N_s[1])): if N_s[1][jj] == N_s[2]: temp2 = temp2 + 1 num_index3 = all_2[temp2] num_infer3 = s[num_index3 - len(N_s[2])] if num_index3 + len(N_s[2]) < len(s): num_back3 = s[num_index3 + len(N_s[2])] else: num_back3 = [] if isinstance(N_s[0], str): N_s[0] = int(N_s[0]) if isinstance(N_s[1], str): N_s[1] = int(N_s[1]) if isinstance(N_s[2], str): N_s[2] = int(N_s[2]) if num_back3 == '分' and num_infer3 == '.' and num_back2 == '分': # *,*.*分 item_N = N_s[0] item_total_score = N_s[1] type_score_dict_ocr['item_total_score'] = item_total_score type_score_dict_ocr['item_N'] = item_N type_score_dict_ocr['item_count'] = -1 type_score_dict_ocr['item_score'] = -1 Score_structure_item = type_score_dict_ocr Score_structure.append(Score_structure_item) all_structure = {'volume_structure': -1, 'Score_structure': Score_structure} return all_structure elif num_back3 == '分': if int(N_s[1]) - int(N_s[0]) == 1: item_N = [0, 0] item_N = [int(N_s[0]), int(N_s[1])] elif int(N_s[1]) - int(N_s[0]) == 2: item_N = [0, 0, 0] item_N = [int(N_s[0]), int(N_s[0]) + 1, int(N_s[1])] elif int(N_s[1]) - int(N_s[0]) == 3: item_N = [0, 0, 0, 0] item_N = [int(N_s[0]), int(N_s[0]) + 1, int(N_s[0]) + 2, int(N_s[1])] else: return all_structure item_total_score = int(N_s[2]) type_score_dict_ocr['item_total_score'] = item_total_score type_score_dict_ocr['item_N'] = item_N type_score_dict_ocr['item_count'] = -1 type_score_dict_ocr['item_score'] = -1 Score_structure_item = type_score_dict_ocr Score_structure.append(Score_structure_item) all_structure = {'volume_structure': -1, 'Score_structure': Score_structure} return all_structure elif len(N_s) == 1: num_index1 = s.index(N_s[0]) num_infer1 = s[num_index1 - len(N_s[0])] if num_index1 + len(N_s[0]) < len(s): num_back1 = s[num_index1 + len(N_s[0])] item_total_score = int(N_s[0]) type_score_dict_ocr['item_N'] = -1 type_score_dict_ocr['item_total_score'] = item_total_score type_score_dict_ocr['item_count'] = -1 type_score_dict_ocr['item_score'] = -1 if num_back1 == '分': # *分 Score_structure_item = type_score_dict_ocr Score_structure.append(Score_structure_item) all_structure = {'volume_structure': -1, 'Score_structure': Score_structure} return all_structure else: return all_structure if 'volume_N' not in type_score_dict_ocr.keys(): all_structure = {'volume_structure': -1, 'Score_structure': -1} return all_structure else: for xxx in range(len_keyword_type1): if C_s.find(keyword_type1[1]) != -1: type_score_dict_ocr['keyword_type'] = keyword_type1[1] break elif C_s.find(keyword_type1[0]) != -1: type_score_dict_ocr['keyword_type'] = keyword_type1[0] Score_structure_item = type_score_dict_ocr Score_structure.append(Score_structure_item) break elif C_s.find(keyword_type1[xxx]) != -1: type_score_dict_ocr['keyword_type'] = keyword_type1[xxx] break elif xxx == len_keyword_type1 - 1: type_score_dict_ocr['keyword_type'] = -2 type_score_dict_ocr['item_N'] = type_score_dict_ocr.pop('volume_N') type_score_dict_ocr['item_total_score'] = type_score_dict_ocr.pop('volume_total_score') type_score_dict_ocr['item_count'] = type_score_dict_ocr.pop('volume_count') type_score_dict_ocr['item_score'] = type_score_dict_ocr.pop('volume_score') Score_structure_item = type_score_dict_ocr Score_structure.append(Score_structure_item) break volume_structure_item = type_score_dict_ocr volume_structure.append(volume_structure_item) if Score_structure == []: all_structure = {'volume_structure': volume_structure, 'Score_structure': -1} elif Score_structure[0]['keyword_type'] != -2: all_structure = {'volume_structure': volume_structure, 'Score_structure': Score_structure} else: all_structure = {'volume_structure': -1, 'Score_structure': Score_structure} return all_structure except Exception as e: print('Skip ocr_key_words') return all_structure