|
@@ -1,10 +1,11 @@
|
|
|
# -*- coding: utf-8 -*-
|
|
|
-# @Time : 2020/5/14 0014 13:37
|
|
|
+# @Time : 2020/5/22 0022 17:04
|
|
|
# @Author : LF
|
|
|
-# @FileName: ocr_key_words_2020_5_14.py
|
|
|
+# @FileName: ocr_key_words.py
|
|
|
# @Software: PyCharm
|
|
|
-
|
|
|
import re
|
|
|
+
|
|
|
+
|
|
|
def find_repeat(source, elmt): # 去重后重新定位数字索引
|
|
|
elmt_index = []
|
|
|
s_index = 0
|
|
@@ -19,7 +20,7 @@ def find_repeat(source, elmt): # 去重后重新定位数字索引
|
|
|
return elmt_index
|
|
|
|
|
|
|
|
|
-def ocr_key_words(rect,type_score_dict): # 将ocr识别得到的文字与模型得到的type_score对应
|
|
|
+def ocr_key_words(rect, type_score_dict): # 将ocr识别得到的文字与模型得到的type_score对应
|
|
|
'''
|
|
|
:param rect: OCR识别结果数组,格式:res = {'chars': [},'coordinates': [(),()},'words': []}
|
|
|
:param type_score_dict: 模型得到的type_score(与模型得到的边框相对应)
|
|
@@ -30,19 +31,20 @@ def ocr_key_words(rect,type_score_dict): # 将ocr识别得到的文字与模型
|
|
|
ymin = type_score_dict['type_box'][1]
|
|
|
xmax = type_score_dict['type_box'][2]
|
|
|
ymax = type_score_dict['type_box'][3]
|
|
|
- words=[]
|
|
|
+ words = []
|
|
|
|
|
|
for j in range(len_ocr):
|
|
|
- if rect['coordinates'][j][0] - xmin > -30 and rect['coordinates'][j][1] - ymin > -30 and rect['coordinates'][j][2] - xmax < 30 and rect['coordinates'][j][3] - ymax < 30:
|
|
|
+ if rect['coordinates'][j][0] - xmin > -30 and rect['coordinates'][j][1] - ymin > -30 and rect['coordinates'][j][
|
|
|
+ 2] - xmax < 30 and rect['coordinates'][j][3] - ymax < 30:
|
|
|
word = rect['chars'][j]
|
|
|
words.append(word)
|
|
|
- type_score_dict['words']= words
|
|
|
+ type_score_dict['words'] = words
|
|
|
type_score_dict_ocr = type_score_dict
|
|
|
|
|
|
return type_score_dict_ocr
|
|
|
|
|
|
|
|
|
-def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
+def key_words(type_score_dict_ocr, type_score_flag): # 根据OCR结果结合关键字解析
|
|
|
|
|
|
total_score = 0
|
|
|
volume_score = 0
|
|
@@ -53,52 +55,82 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
all_structure = {}
|
|
|
keyword_volume = re.compile(r'第卷|第部')
|
|
|
keyword_type = re.compile(r'选择|非选择题|综合题|问答题|主观题|客观题|解答题|计算题')
|
|
|
- keyword_type1 = ['选择', '非选择题', '综合题', '问答题', '主观题', '客观题', '解答题','计算题']
|
|
|
+ keyword_type1 = ['选择', '非选择题', '综合题', '问答题', '主观题', '客观题', '解答题', '计算题']
|
|
|
len_keyword_type1 = len(keyword_type1)
|
|
|
keyword_item1 = re.compile(r'共分|合计分|总共分|总计分|小题满分|本小题|满分|共计|共.分|合计.分|总共.分|总计.分|小题满分.|本小题.|满分.|共计.')
|
|
|
- keyword_item2 = re.compile(r'每题分|每小题分|空分|每小题.分|每题.分|空.分') # '分/题'暂未考虑
|
|
|
+ keyword_item2 = re.compile(r'每题分|每小题分|空分|每小题.分|每题.分|空.分|个分') # '分/题'暂未考虑
|
|
|
keyword_item3 = re.compile(r'共题|共小题|分小题|本题小题|共个小题|分为小题|分个小题|本大题共小题')
|
|
|
keyword_item4 = ['分']
|
|
|
- keyword_item5 = re.compile(r'分/题|题|.|、')
|
|
|
+ keyword_item5 = re.compile(r'题|.|、')
|
|
|
+ keyword_item6 = re.compile(r'分/题|分')
|
|
|
|
|
|
ocr_1 = type_score_dict_ocr['words']
|
|
|
s = ''.join((str(x) for x in ocr_1)) # 合并为一个字符串
|
|
|
- if s.find('IV') != -1 or s.find('Ⅳ') != -1:
|
|
|
+ if s.find('IV') != -1 or s.find('Ⅳ') != -1 or s.find('四') != -1:
|
|
|
s = s.replace('Ⅳ', '4')
|
|
|
s = s.replace('IV', '4')
|
|
|
- elif s.find('III') != -1 or s.find('Ⅲ') != -1:
|
|
|
+ s = s.replace('四', '4')
|
|
|
+ elif s.find('III') != -1 or s.find('Ⅲ') != -1 or s.find('三') != -1:
|
|
|
s = s.replace('Ⅲ', '3')
|
|
|
s = s.replace('III', '3')
|
|
|
- elif s.find('II') != -1 or s.find('Ⅱ') != -1:
|
|
|
+ s = s.replace('三', '3')
|
|
|
+ elif s.find('II') != -1 or s.find('Ⅱ') != -1 or s.find('二') != -1:
|
|
|
s = s.replace('Ⅱ', '2')
|
|
|
s = s.replace('II', '2')
|
|
|
- elif s.find('VI') != -1 or s.find('Ⅵ') != -1:
|
|
|
+ s = s.replace('二', '2')
|
|
|
+ elif s.find('VI') != -1 or s.find('Ⅵ') != -1 or s.find('六') != -1:
|
|
|
s = s.replace('Ⅵ', '6')
|
|
|
s = s.replace('VI', '6')
|
|
|
- elif s.find('VII') != -1 or s.find('Ⅶ') != -1:
|
|
|
+ s = s.replace('六', '6')
|
|
|
+ elif s.find('VII') != -1 or s.find('Ⅶ') != -1 or s.find('七') != -1:
|
|
|
s = s.replace('Ⅶ', '7')
|
|
|
s = s.replace('VII', '7')
|
|
|
- elif s.find('VIII') != -1 or s.find('Ⅷ') != -1:
|
|
|
+ s = s.replace('七', '7')
|
|
|
+ elif s.find('VIII') != -1 or s.find('Ⅷ') != -1 or s.find('八') != -1:
|
|
|
s = s.replace('Ⅷ', '8')
|
|
|
s = s.replace('VIII', '8')
|
|
|
- elif s.find('IX') != -1 or s.find('Ⅸ') != -1:
|
|
|
+ s = s.replace('八', '8')
|
|
|
+ elif s.find('IX') != -1 or s.find('Ⅸ') != -1 or s.find('九') != -1:
|
|
|
s = s.replace('Ⅸ', '9')
|
|
|
s = s.replace('IX', '9')
|
|
|
- elif s.find('X') != -1 or s.find('Ⅹ') != -1:
|
|
|
+ s = s.replace('九', '9')
|
|
|
+ elif s.find('X') != -1 or s.find('Ⅹ') != -1 or s.find('十') != -1:
|
|
|
s = s.replace('Ⅹ', '10')
|
|
|
s = s.replace('X', '10')
|
|
|
- elif s.find('I') != -1 or s.find('Ⅰ') != -1:
|
|
|
+ s = s.replace('十', '10')
|
|
|
+ elif s.find('I') != -1 or s.find('Ⅰ') != -1 or s.find('一') != -1:
|
|
|
s = s.replace('Ⅰ', '1')
|
|
|
s = s.replace('I', '1')
|
|
|
- elif s.find('V') != -1 or s.find('Ⅴ') != -1:
|
|
|
+ s = s.replace('一', '1')
|
|
|
+ elif s.find('V') != -1 or s.find('Ⅴ') != -1 or s.find('五') != -1:
|
|
|
s = s.replace('Ⅴ', '5')
|
|
|
s = s.replace('V', '5')
|
|
|
+ s = s.replace('五', '5')
|
|
|
|
|
|
C_s = re.sub("[A-Za-z0-9\!\%\[\]\,\。]", "", s) # 提取汉字
|
|
|
E_s = ''.join(re.findall(r'[A-Za-z]', s)) # 提取英文字符
|
|
|
N_s = re.findall('\d+', s) # 提取阿拉伯数字
|
|
|
|
|
|
- if len(N_s) == 1 and len(N_s[0]) < 6 and len(E_s) == 0 and (keyword_item5.search(C_s) or len(C_s) == 0):
|
|
|
+ if len(N_s) == 1 and len(N_s[0]) < 6 and len(E_s) == 0 and (keyword_item6.search(C_s)):
|
|
|
+ type_score_dict_ocr['item_N'] = -1
|
|
|
+ type_score_dict_ocr['item_total_score'] = int(N_s[0])
|
|
|
+ type_score_dict_ocr['item_count'] = -1
|
|
|
+ type_score_dict_ocr['item_score'] = -1
|
|
|
+ Score_structure_item = type_score_dict_ocr
|
|
|
+ Score_structure.append(Score_structure_item)
|
|
|
+ all_structure = {'volume_structure': -1,
|
|
|
+ 'Score_structure': Score_structure}
|
|
|
+ elif type_score_flag == 1 and len(N_s) == 1 and len(N_s[0]) < 6 and len(E_s) == 0 and (
|
|
|
+ keyword_item5.search(C_s) or len(C_s) == 0):
|
|
|
+ type_score_dict_ocr['item_N'] = int(N_s[0])
|
|
|
+ type_score_dict_ocr['item_total_score'] = -1
|
|
|
+ type_score_dict_ocr['item_count'] = -1
|
|
|
+ type_score_dict_ocr['item_score'] = -1
|
|
|
+ Score_structure_item = type_score_dict_ocr
|
|
|
+ Score_structure.append(Score_structure_item)
|
|
|
+ all_structure = {'volume_structure': -1,
|
|
|
+ 'Score_structure': Score_structure}
|
|
|
+ elif type_score_flag == 0 and len(N_s) == 1 and len(E_s) == 0 and (keyword_item5.search(C_s)):
|
|
|
type_score_dict_ocr['item_N'] = int(N_s[0])
|
|
|
type_score_dict_ocr['item_total_score'] = -1
|
|
|
type_score_dict_ocr['item_count'] = -1
|
|
@@ -151,7 +183,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
if keyword_item1.search(C_s):
|
|
|
if keyword_item2.search(C_s):
|
|
|
if num_back0 == '分' and num_back1 == '分':
|
|
|
- if N_s[0] < N_s[1]: # 第卷,每小题*分,共*分
|
|
|
+ if N_s[0] < N_s[1]: # 第卷,每小题*分,共*分
|
|
|
volume_score = N_s[1]
|
|
|
item_score = N_s[0]
|
|
|
item_count = int(volume_score / item_score)
|
|
@@ -272,7 +304,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
volume_score = N_s[0]
|
|
|
item_count = N_s[2]
|
|
|
item_score = N_s[1]
|
|
|
- else: # 第卷,每题*分,共*分,共*题
|
|
|
+ else: # 第卷,每题*分,共*分,共*题
|
|
|
volume_score = N_s[1]
|
|
|
item_count = N_s[2]
|
|
|
item_score = N_s[0]
|
|
@@ -281,11 +313,11 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
elif num_back0 == '分' and num_back2 == '分':
|
|
|
- if N_s[0] > N_s[2]: # 第卷,共*分,共*题,每题*分
|
|
|
+ if N_s[0] > N_s[2]: # 第卷,共*分,共*题,每题*分
|
|
|
volume_score = N_s[0]
|
|
|
item_count = N_s[1]
|
|
|
item_score = N_s[2]
|
|
|
- else: # 第卷,每题*分,共*题,共*分
|
|
|
+ else: # 第卷,每题*分,共*题,共*分
|
|
|
volume_score = N_s[2]
|
|
|
item_count = N_s[1]
|
|
|
item_score = N_s[0]
|
|
@@ -355,7 +387,8 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
return all_structure
|
|
|
else:
|
|
|
if keyword_item3.search(C_s):
|
|
|
- if (num_back0 == '卷' or num_back0 == '部') and (num_back1 == '题' or num_back1 == '小') and num_back2 == '分': # 第*卷,共*题,共*分
|
|
|
+ if (num_back0 == '卷' or num_back0 == '部') and (
|
|
|
+ num_back1 == '题' or num_back1 == '小') and num_back2 == '分': # 第*卷,共*题,共*分
|
|
|
volume_N = N_s[0]
|
|
|
volume_score = N_s[2]
|
|
|
item_count = N_s[1]
|
|
@@ -364,7 +397,8 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = volume_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif (num_back0 == '卷' or num_back0 == '部') and (num_back2 == '题' or num_back2 == '小') and num_back1 == '分': # 第*卷,共*分,共*题
|
|
|
+ elif (num_back0 == '卷' or num_back0 == '部') and (
|
|
|
+ num_back2 == '题' or num_back2 == '小') and num_back1 == '分': # 第*卷,共*分,共*题
|
|
|
volume_N = N_s[0]
|
|
|
volume_score = N_s[1]
|
|
|
item_count = N_s[2]
|
|
@@ -373,7 +407,8 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = volume_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif num_back0 == '.' and num_infer1 == '.' and (num_back2 == '题' or num_back2 == '小') and num_back1 == '分': # 第卷,共*.*分,共*题
|
|
|
+ elif num_back0 == '.' and num_infer1 == '.' and (
|
|
|
+ num_back2 == '题' or num_back2 == '小') and num_back1 == '分': # 第卷,共*.*分,共*题
|
|
|
volume_N = -1
|
|
|
volume_score = N_s[0]
|
|
|
item_count = N_s[2]
|
|
@@ -382,7 +417,8 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = volume_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif num_back1 == '.' and num_infer2 == '.' and (num_back0 == '题' or num_back0 == '小') and num_back2 == '分': # 第卷,共*题,共*.*分
|
|
|
+ elif num_back1 == '.' and num_infer2 == '.' and (
|
|
|
+ num_back0 == '题' or num_back0 == '小') and num_back2 == '分': # 第卷,共*题,共*.*分
|
|
|
volume_N = -1
|
|
|
volume_score = N_s[1]
|
|
|
item_count = N_s[0]
|
|
@@ -507,12 +543,12 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
if keyword_item2.search(C_s):
|
|
|
if keyword_item3.search(C_s):
|
|
|
if (num_back0 == '卷' or num_back0 == '部') and num_back1 == '分' and num_back3 == '分':
|
|
|
- if N_s[3] > N_s[1]: # 第*卷,每题*分,共*题,共*分
|
|
|
+ if N_s[3] > N_s[1]: # 第*卷,每题*分,共*题,共*分
|
|
|
volume_N = N_s[0]
|
|
|
volume_score = N_s[3]
|
|
|
item_count = N_s[2]
|
|
|
item_score = N_s[1]
|
|
|
- else: # 第*卷,共*分,共*题,每题*分
|
|
|
+ else: # 第*卷,共*分,共*题,每题*分
|
|
|
volume_N = N_s[0]
|
|
|
volume_score = N_s[1]
|
|
|
item_count = N_s[2]
|
|
@@ -522,12 +558,12 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
elif (num_back0 == '卷' or num_back0 == '部') and num_back2 == '分' and num_back3 == '分':
|
|
|
- if N_s[3] > N_s[2]: # 第*卷,共*题,每题*分,共*分
|
|
|
+ if N_s[3] > N_s[2]: # 第*卷,共*题,每题*分,共*分
|
|
|
volume_N = N_s[0]
|
|
|
volume_score = N_s[3]
|
|
|
item_count = N_s[1]
|
|
|
item_score = N_s[2]
|
|
|
- else: # 第*卷,共*题,共*分 , 每题*分
|
|
|
+ else: # 第*卷,共*题,共*分 , 每题*分
|
|
|
volume_N = N_s[0]
|
|
|
volume_score = N_s[2]
|
|
|
item_count = N_s[1]
|
|
@@ -537,12 +573,12 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
elif (num_back0 == '卷' or num_back0 == '部') and num_back2 == '分' and num_back1 == '分':
|
|
|
- if N_s[1] > N_s[2]: # 第*卷,共*分,每题*分,共*题
|
|
|
+ if N_s[1] > N_s[2]: # 第*卷,共*分,每题*分,共*题
|
|
|
volume_N = N_s[0]
|
|
|
volume_score = N_s[1]
|
|
|
item_count = N_s[3]
|
|
|
item_score = N_s[2]
|
|
|
- else: # 第*卷,每题*分,共*分,共*题
|
|
|
+ else: # 第*卷,每题*分,共*分,共*题
|
|
|
volume_N = N_s[0]
|
|
|
volume_score = N_s[2]
|
|
|
item_count = N_s[3]
|
|
@@ -551,7 +587,8 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = volume_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif (num_back0 == '.' and num_infer1 == '.') and num_back1 == '分' and num_back2 == '分': # 第卷,每题*.*分,共*分,共*题/第卷,共*.*分,每题*分,共*题
|
|
|
+ elif (
|
|
|
+ num_back0 == '.' and num_infer1 == '.') and num_back1 == '分' and num_back2 == '分': # 第卷,每题*.*分,共*分,共*题/第卷,共*.*分,每题*分,共*题
|
|
|
volume_N = -1
|
|
|
if int(N_s[0]) > int(N_s[2]):
|
|
|
volume_score = N_s[0]
|
|
@@ -565,7 +602,8 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = volume_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif (num_back1 == '.' and num_infer2 == '.') and num_back0 == '分' and num_back2 == '分': # 第卷,每题*分,共*.*分,共*题/第卷,共*分,每题*.*分,共*题
|
|
|
+ elif (
|
|
|
+ num_back1 == '.' and num_infer2 == '.') and num_back0 == '分' and num_back2 == '分': # 第卷,每题*分,共*.*分,共*题/第卷,共*分,每题*.*分,共*题
|
|
|
volume_N = -1
|
|
|
if int(N_s[0]) > int(N_s[1]):
|
|
|
volume_score = N_s[0]
|
|
@@ -579,7 +617,8 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = volume_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif (num_back1 == '.' and num_infer2 == '.') and num_back2 == '分' and num_back3 == '分': # 第卷,共*题,共*.*分,每题*分/第卷,共*题,每题*.*分,共*分
|
|
|
+ elif (
|
|
|
+ num_back1 == '.' and num_infer2 == '.') and num_back2 == '分' and num_back3 == '分': # 第卷,共*题,共*.*分,每题*分/第卷,共*题,每题*.*分,共*分
|
|
|
volume_N = -1
|
|
|
if N_s[1] > N_s[3]:
|
|
|
volume_score = N_s[1]
|
|
@@ -593,7 +632,8 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = volume_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif (num_back2 == '.' and num_infer3 == '.') and num_back1 == '分' and num_back3 == '分': # 第卷,共*题,共*分,每题*.*分/第卷,共*题,每题*分,共*.*分
|
|
|
+ elif (
|
|
|
+ num_back2 == '.' and num_infer3 == '.') and num_back1 == '分' and num_back3 == '分': # 第卷,共*题,共*分,每题*.*分/第卷,共*题,每题*分,共*.*分
|
|
|
volume_N = -1
|
|
|
if int(N_s[1]) > int(N_s[2]):
|
|
|
volume_score = N_s[1]
|
|
@@ -607,7 +647,8 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = volume_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif (num_back0 == '.' and num_infer1 == '.') and num_back1 == '分' and num_back3 == '分': # 第卷,共*.*分,共*题,每题*分/第卷,每题*.*分,共*题,共*分
|
|
|
+ elif (
|
|
|
+ num_back0 == '.' and num_infer1 == '.') and num_back1 == '分' and num_back3 == '分': # 第卷,共*.*分,共*题,每题*分/第卷,每题*.*分,共*题,共*分
|
|
|
volume_N = -1
|
|
|
if int(N_s[0]) > int(N_s[3]):
|
|
|
volume_score = N_s[0]
|
|
@@ -621,7 +662,8 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = volume_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif (num_back2 == '.' and num_infer3 == '.') and num_back0 == '分' and num_back3 == '分': # 第卷,共*分,共*题,每题*.*分/第卷,每题*分,共*题,共*.*分
|
|
|
+ elif (
|
|
|
+ num_back2 == '.' and num_infer3 == '.') and num_back0 == '分' and num_back3 == '分': # 第卷,共*分,共*题,每题*.*分/第卷,每题*分,共*题,共*.*分
|
|
|
volume_N = -1
|
|
|
if int(N_s[0]) > int(N_s[2]):
|
|
|
volume_score = N_s[0]
|
|
@@ -636,7 +678,8 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
else:
|
|
|
- if (num_back0 == '卷' or num_back0 == '部') and num_back1 == '.' and num_infer2 == '.' and num_back2 == '分' and num_back3 == '分': # 第*卷,每题*.*分,共*分/第*卷,共*.*分,每题*分
|
|
|
+ if (
|
|
|
+ num_back0 == '卷' or num_back0 == '部') and num_back1 == '.' and num_infer2 == '.' and num_back2 == '分' and num_back3 == '分': # 第*卷,每题*.*分,共*分/第*卷,共*.*分,每题*分
|
|
|
volume_N = int(N_s[0])
|
|
|
if N_s[1] > N_s[3]:
|
|
|
volume_score = N_s[1]
|
|
@@ -650,7 +693,8 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = volume_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif (num_back0 == '卷' or num_back0 == '部') and num_back2 == '.' and num_infer3 == '.' and num_back1 == '分' and num_back3 == '分': # 第*卷,每题*分,共*.*分/第*卷,共*分,每题*.*分
|
|
|
+ elif (
|
|
|
+ num_back0 == '卷' or num_back0 == '部') and num_back2 == '.' and num_infer3 == '.' and num_back1 == '分' and num_back3 == '分': # 第*卷,每题*分,共*.*分/第*卷,共*分,每题*.*分
|
|
|
volume_N = int(N_s[0])
|
|
|
if int(N_s[1]) > int(N_s[2]):
|
|
|
volume_score = N_s[1]
|
|
@@ -679,12 +723,12 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
elif (num_back0 == '卷' or num_back0 == '部') and num_back1 == '分' and num_back2 == '分':
|
|
|
- if N_s[3] > N_s[1]: # 第*卷,每题*.*分,共*分
|
|
|
+ if N_s[3] > N_s[1]: # 第*卷,每题*.*分,共*分
|
|
|
volume_N = N_s[0]
|
|
|
volume_score = N_s[3]
|
|
|
item_count = -1
|
|
|
item_score = float(N_s[1] + '.' + N_s[2])
|
|
|
- else: # 第*卷,共*.*分,每题*分
|
|
|
+ else: # 第*卷,共*.*分,每题*分
|
|
|
volume_N = N_s[0]
|
|
|
volume_score = N_s[1]
|
|
|
item_count = -1
|
|
@@ -813,7 +857,8 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
if keyword_item1.search(C_s):
|
|
|
if keyword_item2.search(C_s):
|
|
|
if keyword_item3.search(C_s):
|
|
|
- if (num_back0 == '卷' or num_back0 == '部') and (num_back1 == '.' and num_infer2 == '.') and num_back2 == '分' and num_back3 == '分': # 第*卷,每题*.*分,共*分,共*题/第卷,共*.*分,每题*分,共*题
|
|
|
+ if (num_back0 == '卷' or num_back0 == '部') and (
|
|
|
+ num_back1 == '.' and num_infer2 == '.') and num_back2 == '分' and num_back3 == '分': # 第*卷,每题*.*分,共*分,共*题/第卷,共*.*分,每题*分,共*题
|
|
|
volume_N = N_s[0]
|
|
|
if N_s[1] > N_s[3]:
|
|
|
volume_score = N_s[1]
|
|
@@ -831,7 +876,8 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = volume_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif (num_back0 == '卷' or num_back0 == '部') and (num_back2 == '.' and num_infer3 == '.') and num_back1 == '分' and num_back3 == '分': # 第*卷,每题*分,共*.*分,共*题/第卷,共*分,每题*.*分,共*题
|
|
|
+ elif (num_back0 == '卷' or num_back0 == '部') and (
|
|
|
+ num_back2 == '.' and num_infer3 == '.') and num_back1 == '分' and num_back3 == '分': # 第*卷,每题*分,共*.*分,共*题/第卷,共*分,每题*.*分,共*题
|
|
|
volume_N = N_s[0]
|
|
|
if N_s[1] > N_s[2]:
|
|
|
volume_score = N_s[1]
|
|
@@ -849,7 +895,8 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = volume_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif (num_back0 == '卷' or num_back0 == '部') and (num_back2 == '.' and num_infer3 == '.') and num_back3 == '分' and num_back4 == '分': # 第卷,共*题,共*.*分,每题*分/第卷,共*题,每题*.*分,共*分
|
|
|
+ elif (num_back0 == '卷' or num_back0 == '部') and (
|
|
|
+ num_back2 == '.' and num_infer3 == '.') and num_back3 == '分' and num_back4 == '分': # 第卷,共*题,共*.*分,每题*分/第卷,共*题,每题*.*分,共*分
|
|
|
volume_N = N_s[0]
|
|
|
if N_s[2] > N_s[4]:
|
|
|
volume_score = N_s[2]
|
|
@@ -867,7 +914,8 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = volume_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif (num_back0 == '卷' or num_back0 == '部') and (num_back3 == '.' and num_infer4 == '.') and num_back2 == '分' and num_back4 == '分': # 第*卷,共*题,共*分,每题*.*分/第卷,共*题,每题*分,共*.*分
|
|
|
+ elif (num_back0 == '卷' or num_back0 == '部') and (
|
|
|
+ num_back3 == '.' and num_infer4 == '.') and num_back2 == '分' and num_back4 == '分': # 第*卷,共*题,共*分,每题*.*分/第卷,共*题,每题*分,共*.*分
|
|
|
volume_N = N_s[0]
|
|
|
if N_s[2] > N_s[3]:
|
|
|
volume_score = N_s[2]
|
|
@@ -885,7 +933,8 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = volume_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif (num_back0 == '卷' or num_back0 == '部') and (num_back1 == '.' and num_infer2 == '.') and num_back2 == '分' and num_back4 == '分': # 第*卷,共*.*分,共*题,每题*分/第*卷,每题*.*分,共*题,共*分
|
|
|
+ elif (num_back0 == '卷' or num_back0 == '部') and (
|
|
|
+ num_back1 == '.' and num_infer2 == '.') and num_back2 == '分' and num_back4 == '分': # 第*卷,共*.*分,共*题,每题*分/第*卷,每题*.*分,共*题,共*分
|
|
|
volume_N = N_s[0]
|
|
|
if N_s[1] > N_s[4]:
|
|
|
volume_score = N_s[1]
|
|
@@ -903,7 +952,8 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = volume_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif (num_back0 == '卷' or num_back0 == '部') and (num_back3 == '.' and num_infer4 == '.') and num_back1 == '分' and num_back4 == '分': # 第*卷,共*分,共*题,每题*.*分/第卷,每题*分,共*题,共*.*分
|
|
|
+ elif (num_back0 == '卷' or num_back0 == '部') and (
|
|
|
+ num_back3 == '.' and num_infer4 == '.') and num_back1 == '分' and num_back4 == '分': # 第*卷,共*分,共*题,每题*.*分/第卷,每题*分,共*题,共*.*分
|
|
|
volume_N = N_s[0]
|
|
|
if N_s[1] > N_s[3]:
|
|
|
volume_score = N_s[0]
|
|
@@ -995,11 +1045,11 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
if isinstance(N_s[1], str):
|
|
|
N_s[1] = int(N_s[1])
|
|
|
if keyword_item2.search(C_s):
|
|
|
- if N_s[0] > N_s[1]: # 选择题/主观题/客观题,共*分,每题*分
|
|
|
+ if N_s[0] > N_s[1]: # 选择题/主观题/客观题,共*分,每题*分
|
|
|
item_total_score = int(N_s[0])
|
|
|
item_count = int(N_s[0] / N_s[1])
|
|
|
item_score = N_s[1]
|
|
|
- else: # 选择题/主观题/客观题,每题*分,共*分
|
|
|
+ else: # 选择题/主观题/客观题,每题*分,共*分
|
|
|
item_total_score = int(N_s[1])
|
|
|
item_count = int(N_s[1] / N_s[0])
|
|
|
item_score = N_s[0]
|
|
@@ -1029,7 +1079,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
else:
|
|
|
return all_structure
|
|
|
else:
|
|
|
- if num_back0 == '.' and num_infer1 == '.' and num_back1 == '分': # *.*分
|
|
|
+ if num_back0 == '.' and num_infer1 == '.' and num_back1 == '分': # *.*分
|
|
|
item_N = -1
|
|
|
item_total_score = N_s[0]
|
|
|
type_score_dict_ocr['volume_N'] = item_N
|
|
@@ -1081,7 +1131,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
item_total_score = N_s[2]
|
|
|
item_count = N_s[1]
|
|
|
item_score = N_s[0]
|
|
|
- else: # 共*分,共*题,每题*分
|
|
|
+ else: # 共*分,共*题,每题*分
|
|
|
item_total_score = N_s[0]
|
|
|
item_count = N_s[1]
|
|
|
item_score = N_s[2]
|
|
@@ -1096,7 +1146,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
item_total_score = N_s[1]
|
|
|
item_count = N_s[2]
|
|
|
item_score = N_s[0]
|
|
|
- else: # 共*分,每题*分 ,共*题
|
|
|
+ else: # 共*分,每题*分 ,共*题
|
|
|
item_total_score = N_s[0]
|
|
|
item_count = N_s[2]
|
|
|
item_score = N_s[1]
|
|
@@ -1107,11 +1157,11 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
elif num_back1 == '分' and num_back2 == '分':
|
|
|
- if N_s[2] > N_s[1]: # 共*题,每题*分,共*分
|
|
|
+ if N_s[2] > N_s[1]: # 共*题,每题*分,共*分
|
|
|
item_total_score = N_s[2]
|
|
|
item_count = N_s[0]
|
|
|
item_score = N_s[1]
|
|
|
- else: # 共*题,共*分,每题*分
|
|
|
+ else: # 共*题,共*分,每题*分
|
|
|
item_total_score = N_s[1]
|
|
|
item_count = N_s[0]
|
|
|
item_score = N_s[2]
|
|
@@ -1125,12 +1175,12 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
return all_structure
|
|
|
else:
|
|
|
if num_back0 != '.' and num_back1 == '分' and num_back2 == '分':
|
|
|
- if N_s[1] > N_s[2]: # *,共*分,每题*分
|
|
|
+ if N_s[1] > N_s[2]: # *,共*分,每题*分
|
|
|
item_N = N_s[0]
|
|
|
item_total_score = N_s[1]
|
|
|
item_count = int(N_s[1] / N_s[2])
|
|
|
item_score = N_s[2]
|
|
|
- else: # *,每题*分 ,共*分
|
|
|
+ else: # *,每题*分 ,共*分
|
|
|
item_N = N_s[0]
|
|
|
item_total_score = N_s[2]
|
|
|
item_count = int(N_s[2] / N_s[1])
|
|
@@ -1144,10 +1194,10 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
if int(N_s[0]) > int(N_s[2]):
|
|
|
item_total_score = N_s[0]
|
|
|
item_score = N_s[2]
|
|
|
- item_count = int(item_total_score/item_score)
|
|
|
+ item_count = int(item_total_score / item_score)
|
|
|
else:
|
|
|
item_total_score = N_s[2]
|
|
|
- item_score = float(N_s[0]+'.'+N_s[1])
|
|
|
+ item_score = float(N_s[0] + '.' + N_s[1])
|
|
|
item_count = int(item_total_score / item_score)
|
|
|
type_score_dict_ocr['volume_N'] = item_N
|
|
|
type_score_dict_ocr['volume_total_score'] = item_total_score
|
|
@@ -1157,8 +1207,8 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
item_N = -1
|
|
|
if int(N_s[0]) > int(N_s[1]):
|
|
|
item_total_score = N_s[0]
|
|
|
- item_score = float(N_s[1]+'.'+N_s[2])
|
|
|
- item_count = int(item_total_score/item_score)
|
|
|
+ item_score = float(N_s[1] + '.' + N_s[2])
|
|
|
+ item_count = int(item_total_score / item_score)
|
|
|
else:
|
|
|
item_total_score = N_s[1]
|
|
|
item_score = N_s[0]
|
|
@@ -1171,34 +1221,34 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
return all_structure
|
|
|
else:
|
|
|
if keyword_item3.search(C_s):
|
|
|
- if num_back2 == '分' and num_infer2 =='.' and num_back1 =='.': # *小题,共*.*分,
|
|
|
+ if num_back2 == '分' and num_infer2 == '.' and num_back1 == '.': # *小题,共*.*分,
|
|
|
item_N = -1
|
|
|
item_total_score = N_s[1]
|
|
|
item_count = N_s[0]
|
|
|
- item_score = N_s[1]/N_s[0]
|
|
|
+ item_score = N_s[1] / N_s[0]
|
|
|
type_score_dict_ocr['volume_N'] = item_N
|
|
|
type_score_dict_ocr['volume_total_score'] = item_total_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif num_back1 == '分' and num_infer1 =='.'and num_back0 =='.': # 共*.*分,*小题
|
|
|
+ elif num_back1 == '分' and num_infer1 == '.' and num_back0 == '.': # 共*.*分,*小题
|
|
|
item_N = -1
|
|
|
item_total_score = N_s[0]
|
|
|
item_count = N_s[2]
|
|
|
- item_score = N_s[0]/N_s[2]
|
|
|
+ item_score = N_s[0] / N_s[2]
|
|
|
type_score_dict_ocr['volume_N'] = item_N
|
|
|
type_score_dict_ocr['volume_total_score'] = item_total_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif num_back2 == '分' and num_infer2 !='.': # *,*小题,共*分,
|
|
|
+ elif num_back2 == '分' and num_infer2 != '.': # *,*小题,共*分,
|
|
|
item_N = N_s[0]
|
|
|
item_total_score = N_s[2]
|
|
|
item_count = N_s[1]
|
|
|
- item_score = N_s[2]/N_s[1]
|
|
|
+ item_score = N_s[2] / N_s[1]
|
|
|
type_score_dict_ocr['volume_N'] = item_N
|
|
|
type_score_dict_ocr['volume_total_score'] = item_total_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif num_back1 == '分' and num_infer1 !='.': # *,共*分,共*小题
|
|
|
+ elif num_back1 == '分' and num_infer1 != '.': # *,共*分,共*小题
|
|
|
item_N = N_s[0]
|
|
|
item_total_score = N_s[1]
|
|
|
item_count = N_s[2]
|
|
@@ -1210,7 +1260,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
else:
|
|
|
return all_structure
|
|
|
else:
|
|
|
- if num_back2 == '分' and num_infer2 =='.' and num_back1 =='.': # *,共*.*分,
|
|
|
+ if num_back2 == '分' and num_infer2 == '.' and num_back1 == '.': # *,共*.*分,
|
|
|
item_N = N_s[0]
|
|
|
item_total_score = N_s[1]
|
|
|
item_count = -1
|
|
@@ -1229,7 +1279,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
else:
|
|
|
- return all_structure
|
|
|
+ return all_structure
|
|
|
elif len(N_s) == 4:
|
|
|
num_index1 = s.index(N_s[0])
|
|
|
num_infer0 = s[num_index1 - len(N_s[0])]
|
|
@@ -1278,12 +1328,12 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
if keyword_item2.search(C_s):
|
|
|
if keyword_item3.search(C_s):
|
|
|
if num_back1 == '分' and num_back3 == '分':
|
|
|
- if N_s[3] > N_s[1]: # *,每题*分,共*题,共*分
|
|
|
+ if N_s[3] > N_s[1]: # *,每题*分,共*题,共*分
|
|
|
item_N = N_s[0]
|
|
|
item_total_score = N_s[3]
|
|
|
item_count = N_s[2]
|
|
|
item_score = N_s[1]
|
|
|
- else: # *,共*分,共*题,每题*分
|
|
|
+ else: # *,共*分,共*题,每题*分
|
|
|
item_N = N_s[0]
|
|
|
item_total_score = N_s[1]
|
|
|
item_count = N_s[2]
|
|
@@ -1294,12 +1344,12 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
elif num_back1 == '分' and num_back2 == '分':
|
|
|
- if N_s[2] > N_s[1]: # *,每题*分,共*分,共*题
|
|
|
+ if N_s[2] > N_s[1]: # *,每题*分,共*分,共*题
|
|
|
item_N = N_s[0]
|
|
|
item_total_score = N_s[2]
|
|
|
item_count = N_s[3]
|
|
|
item_score = N_s[1]
|
|
|
- else: # *,共*分,每题*分,共*题
|
|
|
+ else: # *,共*分,每题*分,共*题
|
|
|
item_N = N_s[0]
|
|
|
item_total_score = N_s[1]
|
|
|
item_count = N_s[3]
|
|
@@ -1312,7 +1362,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
elif num_back2 == '分' and num_back3 == '分':
|
|
|
- if N_s[3] > N_s[2]: # *,共*题,每题*分,共*分
|
|
|
+ if N_s[3] > N_s[2]: # *,共*题,每题*分,共*分
|
|
|
item_N = N_s[0]
|
|
|
item_total_score = N_s[3]
|
|
|
item_count = N_s[1]
|
|
@@ -1329,7 +1379,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
'volume_total_score'] = item_total_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif num_back0 == '.' and num_infer1 == '.' and num_back1 == '分'and num_back3 == '分' : # 共*.*分,共*题, 每题*分/每题*.*分,共*题,共*分
|
|
|
+ elif num_back0 == '.' and num_infer1 == '.' and num_back1 == '分' and num_back3 == '分': # 共*.*分,共*题, 每题*分/每题*.*分,共*题,共*分
|
|
|
item_N = -1
|
|
|
if N_s[0] > N_s[3]:
|
|
|
item_total_score = N_s[0]
|
|
@@ -1340,10 +1390,10 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
item_score = float(N_s[0] + '.' + N_s[1])
|
|
|
item_count = int(item_total_score / item_score)
|
|
|
type_score_dict_ocr['volume_N'] = item_N
|
|
|
- type_score_dict_ocr[ 'item_total_score'] = item_total_score
|
|
|
+ type_score_dict_ocr['item_total_score'] = item_total_score
|
|
|
type_score_dict_ocr['item_count'] = item_count
|
|
|
type_score_dict_ocr['item_score'] = item_score
|
|
|
- elif num_back2 == '.' and num_infer3 == '.' and num_back0 == '分'and num_back3 == '分': # 共*分,共*题, 每题*.*分/每题*分,共*题,共*.*分
|
|
|
+ elif num_back2 == '.' and num_infer3 == '.' and num_back0 == '分' and num_back3 == '分': # 共*分,共*题, 每题*.*分/每题*分,共*题,共*.*分
|
|
|
item_N = -1
|
|
|
if N_s[0] > N_s[2]:
|
|
|
item_total_score = N_s[0]
|
|
@@ -1354,10 +1404,10 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
item_score = N_s[0]
|
|
|
item_count = int(item_total_score / item_score)
|
|
|
type_score_dict_ocr['volume_N'] = item_N
|
|
|
- type_score_dict_ocr[ 'item_total_score'] = item_total_score
|
|
|
+ type_score_dict_ocr['item_total_score'] = item_total_score
|
|
|
type_score_dict_ocr['item_count'] = item_count
|
|
|
type_score_dict_ocr['item_score'] = item_score
|
|
|
- elif num_back1 == '.' and num_infer2 == '.' and num_back2 == '分'and num_back3 == '分': # 共*题,共*.*分,每题*分/共*题,每题*.*分,共*分
|
|
|
+ elif num_back1 == '.' and num_infer2 == '.' and num_back2 == '分' and num_back3 == '分': # 共*题,共*.*分,每题*分/共*题,每题*.*分,共*分
|
|
|
item_N = -1
|
|
|
if N_s[1] > N_s[3]:
|
|
|
item_total_score = N_s[1]
|
|
@@ -1371,7 +1421,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = item_total_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif num_back2 == '.' and num_infer3 == '.' and num_back3 == '分'and num_back1 == '分' : # 共*题,共*分,每题*.*分/共*题,每题*分,共*.*分
|
|
|
+ elif num_back2 == '.' and num_infer3 == '.' and num_back3 == '分' and num_back1 == '分': # 共*题,共*分,每题*.*分/共*题,每题*分,共*.*分
|
|
|
item_N = -1
|
|
|
if N_s[1] > N_s[2]:
|
|
|
item_total_score = N_s[1]
|
|
@@ -1385,7 +1435,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = item_total_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif num_back0 == '.' and num_infer1 == '.' and num_back1 == '分'and num_back2 == '分' : # 每题*.*分,共*分,共*题/共*.*分,每题*分,共*题
|
|
|
+ elif num_back0 == '.' and num_infer1 == '.' and num_back1 == '分' and num_back2 == '分': # 每题*.*分,共*分,共*题/共*.*分,每题*分,共*题
|
|
|
item_N = -1
|
|
|
if N_s[0] > N_s[2]:
|
|
|
item_total_score = N_s[0]
|
|
@@ -1399,7 +1449,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = item_total_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif num_back1 == '.' and num_infer2 == '.' and num_back2 == '分'and num_back0 == '分' : # 每题*分,共*.*分,共*题/共*分,每题*.*分,共*题
|
|
|
+ elif num_back1 == '.' and num_infer2 == '.' and num_back2 == '分' and num_back0 == '分': # 每题*分,共*.*分,共*题/共*分,每题*.*分,共*题
|
|
|
item_N = -1
|
|
|
if N_s[0] > N_s[1]:
|
|
|
item_total_score = N_s[0]
|
|
@@ -1416,7 +1466,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
else:
|
|
|
return all_structure
|
|
|
else:
|
|
|
- if num_back1 == '.' and num_infer2 == '.' and num_back2 == '分'and num_back3 == '分' : # *,共*.*分, 每题*分/每题*.*分,共*分
|
|
|
+ if num_back1 == '.' and num_infer2 == '.' and num_back2 == '分' and num_back3 == '分': # *,共*.*分, 每题*分/每题*.*分,共*分
|
|
|
item_N = N_s[0]
|
|
|
if N_s[1] > N_s[3]:
|
|
|
item_total_score = N_s[1]
|
|
@@ -1427,10 +1477,10 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
item_score = float(N_s[1] + '.' + N_s[2])
|
|
|
item_count = int(item_total_score / item_score)
|
|
|
type_score_dict_ocr['volume_N'] = item_N
|
|
|
- type_score_dict_ocr[ 'item_total_score'] = item_total_score
|
|
|
+ type_score_dict_ocr['item_total_score'] = item_total_score
|
|
|
type_score_dict_ocr['item_count'] = item_count
|
|
|
type_score_dict_ocr['item_score'] = item_score
|
|
|
- elif num_back2== '.' and num_infer3== '.' and num_back1 == '分'and num_back3 == '分' : # *,共*分, 每题*.*分/*,每题*分,共*.*分
|
|
|
+ elif num_back2 == '.' and num_infer3 == '.' and num_back1 == '分' and num_back3 == '分': # *,共*分, 每题*.*分/*,每题*分,共*.*分
|
|
|
item_N = int(N_s[0])
|
|
|
if N_s[1] > N_s[2]:
|
|
|
item_total_score = N_s[1]
|
|
@@ -1441,7 +1491,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
item_score = N_s[1]
|
|
|
item_count = int(item_total_score / item_score)
|
|
|
type_score_dict_ocr['volume_N'] = item_N
|
|
|
- type_score_dict_ocr[ 'item_total_score'] = item_total_score
|
|
|
+ type_score_dict_ocr['item_total_score'] = item_total_score
|
|
|
type_score_dict_ocr['item_count'] = item_count
|
|
|
type_score_dict_ocr['item_score'] = item_score
|
|
|
else:
|
|
@@ -1457,7 +1507,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = item_total_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif num_back2 == '分': # *,共*.*分,*小题
|
|
|
+ elif num_back2 == '分': # *,共*.*分,*小题
|
|
|
item_total_score = N_s[1]
|
|
|
item_N = N_s[0]
|
|
|
item_count = N_s[3]
|
|
@@ -1536,7 +1586,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
N_s[4] = int(N_s[4])
|
|
|
if keyword_item2.search(C_s):
|
|
|
if keyword_item3.search(C_s):
|
|
|
- if num_back1== '.' and num_infer2== '.' and num_back2 == '分'and num_back3 == '分' : # *,每题*.*分,共*分,*小题/*,共*.*分,每题*分,共*小题
|
|
|
+ if num_back1 == '.' and num_infer2 == '.' and num_back2 == '分' and num_back3 == '分': # *,每题*.*分,共*分,*小题/*,共*.*分,每题*分,共*小题
|
|
|
item_N = N_s[0]
|
|
|
if N_s[1] > N_s[3]:
|
|
|
item_total_score = N_s[1]
|
|
@@ -1550,7 +1600,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = item_total_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif num_back2== '.' and num_infer3== '.' and num_back1 == '分'and num_back3 == '分' : # *,每题*分,共*.*分,*小题/*,共*分,每题*.*分,共*小题
|
|
|
+ elif num_back2 == '.' and num_infer3 == '.' and num_back1 == '分' and num_back3 == '分': # *,每题*分,共*.*分,*小题/*,共*分,每题*.*分,共*小题
|
|
|
item_N = N_s[0]
|
|
|
if N_s[1] > N_s[2]:
|
|
|
item_total_score = N_s[1]
|
|
@@ -1564,7 +1614,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = item_total_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif num_back2== '.' and num_infer3== '.' and num_back3 == '分'and num_back4 == '分' : # *,*小题,每题*.*分,共*分/*,*小题,共*.*分,每题*分
|
|
|
+ elif num_back2 == '.' and num_infer3 == '.' and num_back3 == '分' and num_back4 == '分': # *,*小题,每题*.*分,共*分/*,*小题,共*.*分,每题*分
|
|
|
item_N = N_s[0]
|
|
|
if N_s[2] > N_s[4]:
|
|
|
item_total_score = N_s[2]
|
|
@@ -1578,7 +1628,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = item_total_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif num_back3== '.' and num_infer4== '.' and num_back2 == '分'and num_back4 == '分' : # *,*小题,每题*分,共*.*分/*,*小题,共*分,每题*.*分
|
|
|
+ elif num_back3 == '.' and num_infer4 == '.' and num_back2 == '分' and num_back4 == '分': # *,*小题,每题*分,共*.*分/*,*小题,共*分,每题*.*分
|
|
|
item_N = N_s[0]
|
|
|
if N_s[2] > N_s[3]:
|
|
|
item_total_score = N_s[2]
|
|
@@ -1592,7 +1642,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = item_total_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif num_back1== '.' and num_infer2== '.' and num_back2 == '分'and num_back4 == '分' : # *,每题*.*分,*小题,共*分/*,共*.*分,*小题,每题*分
|
|
|
+ elif num_back1 == '.' and num_infer2 == '.' and num_back2 == '分' and num_back4 == '分': # *,每题*.*分,*小题,共*分/*,共*.*分,*小题,每题*分
|
|
|
item_N = N_s[0]
|
|
|
if N_s[1] > N_s[4]:
|
|
|
item_total_score = N_s[1]
|
|
@@ -1606,7 +1656,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = item_total_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif num_back3== '.' and num_infer4== '.' and num_back1 == '分'and num_back4 == '分' : # *,每题*分,*小题,共*.*分/*,共*分,*小题,每题*.*分
|
|
|
+ elif num_back3 == '.' and num_infer4 == '.' and num_back1 == '分' and num_back4 == '分': # *,每题*分,*小题,共*.*分/*,共*分,*小题,每题*.*分
|
|
|
item_N = N_s[0]
|
|
|
if N_s[1] > N_s[3]:
|
|
|
item_total_score = N_s[1]
|
|
@@ -1685,7 +1735,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
else:
|
|
|
if num_back1 == '分' and num_back0 == '.' and num_infer1 == '.': # *.*分
|
|
|
item_N = -1
|
|
|
- item_score = float(N_s[0]+'.'+N_s[1])
|
|
|
+ item_score = float(N_s[0] + '.' + N_s[1])
|
|
|
type_score_dict_ocr['volume_N'] = item_N
|
|
|
type_score_dict_ocr['volume_total_score'] = -1
|
|
|
type_score_dict_ocr['volume_count'] = -1
|
|
@@ -1698,7 +1748,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_count'] = -1
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
else:
|
|
|
- return all_structure
|
|
|
+ return all_structure
|
|
|
elif len(N_s) == 3:
|
|
|
num_index1 = s.index(N_s[0])
|
|
|
num_infer0 = s[num_index1 - len(N_s[0])]
|
|
@@ -1728,7 +1778,8 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
N_s[1] = int(N_s[1])
|
|
|
if isinstance(N_s[2], str):
|
|
|
N_s[2] = int(N_s[2])
|
|
|
- if num_back2 == '分' and (num_back1 == '题' or num_back1 == '小' or num_back1 == '空') and num_back0 != '分': # *,共*题,每题*分
|
|
|
+ if num_back2 == '分' and (
|
|
|
+ num_back1 == '题' or num_back1 == '小' or num_back1 == '空') and num_back0 != '分': # *,共*题,每题*分
|
|
|
item_N = int(N_s[0])
|
|
|
item_total_score = int(N_s[1]) * int(N_s[2])
|
|
|
item_count = int(N_s[1])
|
|
@@ -1737,7 +1788,8 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = item_total_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif num_back1 == '分' and (num_back2 == '题' or num_back2 == '小' or num_back2 == '空') and num_back0 != '分': # *,每题*分,共*题
|
|
|
+ elif num_back1 == '分' and (
|
|
|
+ num_back2 == '题' or num_back2 == '小' or num_back2 == '空') and num_back0 != '分': # *,每题*分,共*题
|
|
|
item_N = int(N_s[0])
|
|
|
item_total_score = int(N_s[1]) * int(N_s[2])
|
|
|
item_count = int(N_s[2])
|
|
@@ -1749,13 +1801,13 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
elif num_infer2 == '.' and num_back2 == '分' and num_back1 == '.': # 共*题,每题*.*分
|
|
|
item_N = -1
|
|
|
item_count = int(N_s[0])
|
|
|
- item_score = float(N_s[1]+'.'+N_s[2])
|
|
|
+ item_score = float(N_s[1] + '.' + N_s[2])
|
|
|
item_total_score = int(item_count * item_score)
|
|
|
type_score_dict_ocr['volume_N'] = item_N
|
|
|
type_score_dict_ocr['volume_total_score'] = item_total_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif num_infer1 == '.' and num_back1 == '分' and num_back0 == '.' : # 每题*.*分,共*题
|
|
|
+ elif num_infer1 == '.' and num_back1 == '分' and num_back0 == '.': # 每题*.*分,共*题
|
|
|
item_N = -1
|
|
|
item_count = int(N_s[2])
|
|
|
item_score = float(N_s[0] + '.' + N_s[1])
|
|
@@ -1764,7 +1816,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = item_total_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif num_back2 == '分': # * * ,每题*分
|
|
|
+ elif num_back2 == '分': # * * ,每题*分
|
|
|
item_N = -1
|
|
|
item_count = -1
|
|
|
item_score = -1
|
|
@@ -1817,7 +1869,8 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
N_s[2] = int(N_s[2])
|
|
|
if isinstance(N_s[3], str):
|
|
|
N_s[3] = int(N_s[3])
|
|
|
- if num_back2 == '.' and num_infer3 == '.' and num_back3 == '分' and (num_back1 == '题' or num_back1 == '小' or num_back1 == '空') and num_back0 != '分': # *,共*题,每题*.*分
|
|
|
+ if num_back2 == '.' and num_infer3 == '.' and num_back3 == '分' and (
|
|
|
+ num_back1 == '题' or num_back1 == '小' or num_back1 == '空') and num_back0 != '分': # *,共*题,每题*.*分
|
|
|
item_N = int(N_s[0])
|
|
|
item_count = int(N_s[1])
|
|
|
item_score = float(N_s[2] + '.' + N_s[3])
|
|
@@ -1826,7 +1879,8 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
type_score_dict_ocr['volume_total_score'] = item_total_score
|
|
|
type_score_dict_ocr['volume_count'] = item_count
|
|
|
type_score_dict_ocr['volume_score'] = item_score
|
|
|
- elif num_back1 == '.' and num_infer2 == '.' and num_back2 == '分' and (num_back3 == '题' or num_back3 == '小' or num_back3 == '空') and num_back0 != '分': # *,每题*.*分,共*题
|
|
|
+ elif num_back1 == '.' and num_infer2 == '.' and num_back2 == '分' and (
|
|
|
+ num_back3 == '题' or num_back3 == '小' or num_back3 == '空') and num_back0 != '分': # *,每题*.*分,共*题
|
|
|
item_N = int(N_s[0])
|
|
|
item_count = int(N_s[3])
|
|
|
item_score = float(N_s[1] + '.' + N_s[2])
|
|
@@ -1861,7 +1915,7 @@ def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
|
|
|
if isinstance(N_s[1], str):
|
|
|
N_s[1] = int(N_s[1])
|
|
|
if int(N_s[0]) > 1000:
|
|
|
- item_N =0
|
|
|
+ item_N = 0
|
|
|
item_N1 = int(N_s[0][-4] + N_s[0][-3])
|
|
|
item_N2 = int(N_s[0][-2] + N_s[0][-1])
|
|
|
if item_N2 - item_N1 == 1:
|