ocr_key_words.py 135 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037
  1. # -*- coding: utf-8 -*-
  2. # @Time : 2020/5/28 0022 17:04
  3. # @Author : LF
  4. # @FileName: ocr_key_words.py
  5. # @Software: PyCharm
  6. import re
  7. def find_repeat(source, elmt): # 去重后重新定位数字索引
  8. elmt_index = []
  9. s_index = 0
  10. e_index = len(source)
  11. while (s_index < e_index):
  12. try:
  13. temp = source.index(elmt, s_index, e_index)
  14. elmt_index.append(temp)
  15. s_index = temp + 1
  16. except ValueError:
  17. break
  18. return elmt_index
  19. def ocr_key_words(rect,type_score_dict): # 将ocr识别得到的文字与模型得到的type_score对应
  20. '''
  21. :param rect: OCR识别结果数组,格式:res = {'chars': [},'coordinates': [(),()},'words': []}
  22. :param type_score_dict: 模型得到的type_score(与模型得到的边框相对应)
  23. :return: 字典中添加word
  24. '''
  25. len_ocr = len(rect['chars'])
  26. xmin = type_score_dict['type_box'][0]
  27. ymin = type_score_dict['type_box'][1]
  28. xmax = type_score_dict['type_box'][2]
  29. ymax = type_score_dict['type_box'][3]
  30. words=[]
  31. for j in range(len_ocr):
  32. if rect['coordinates'][j][0] - xmin > -30 and rect['coordinates'][j][1] - ymin > -30 and rect['coordinates'][j][2] - xmax < 30 and rect['coordinates'][j][3] - ymax < 30:
  33. word = rect['chars'][j]
  34. words.append(word)
  35. type_score_dict['words']= words
  36. type_score_dict_ocr = type_score_dict
  37. return type_score_dict_ocr
  38. def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
  39. total_score = 0
  40. volume_score = 0
  41. volume_structure_item = 0
  42. volume_structure = []
  43. Score_structure_item = 0
  44. Score_structure = []
  45. all_structure = {}
  46. keyword_volume = re.compile(r'第卷|第部')
  47. keyword_type = re.compile(r'选择|非选择题|综合题|问答题|主观题|客观题|解答题|计算题')
  48. keyword_type1 = ['选择', '非选择题', '综合题', '问答题', '主观题', '客观题', '解答题','计算题']
  49. len_keyword_type1 = len(keyword_type1)
  50. keyword_item1 = re.compile(r'共分|合计分|总共分|总计分|小题满分|本小题|满分|共计|共.分|合计.分|总共.分|总计.分|小题满分.|本小题.|满分.|共计.')
  51. keyword_item2 = re.compile(r'每题分|每小题分|空分|每小题.分|每题.分|空.分|个分') # '分/题'暂未考虑
  52. keyword_item3 = re.compile(r'共题|共小题|分小题|本题小题|共个小题|分为小题|分个小题|本大题共小题')
  53. keyword_item4 = ['分']
  54. keyword_item5 = re.compile(r'题|.|、')
  55. keyword_item6 = re.compile(r'分/题|分')
  56. if 'words' in type_score_dict_ocr.keys():
  57. ocr_1 = type_score_dict_ocr['words']
  58. else:
  59. return all_structure
  60. s = ''.join((str(x) for x in ocr_1)) # 合并为一个字符串
  61. if s.find('IV') != -1 or s.find('Ⅳ') != -1:
  62. s = s.replace('Ⅳ', '4')
  63. s = s.replace('IV', '4')
  64. elif s.find('III') != -1 or s.find('Ⅲ') != -1:
  65. s = s.replace('Ⅲ', '3')
  66. s = s.replace('III', '3')
  67. elif s.find('II') != -1 or s.find('Ⅱ') != -1:
  68. s = s.replace('Ⅱ', '2')
  69. s = s.replace('II', '2')
  70. elif s.find('VI') != -1 or s.find('Ⅵ') != -1:
  71. s = s.replace('Ⅵ', '6')
  72. s = s.replace('VI', '6')
  73. elif s.find('VII') != -1 or s.find('Ⅶ') != -1:
  74. s = s.replace('Ⅶ', '7')
  75. s = s.replace('VII', '7')
  76. elif s.find('VIII') != -1 or s.find('Ⅷ') != -1:
  77. s = s.replace('Ⅷ', '8')
  78. s = s.replace('VIII', '8')
  79. elif s.find('IX') != -1 or s.find('Ⅸ') != -1:
  80. s = s.replace('Ⅸ', '9')
  81. s = s.replace('IX', '9')
  82. elif s.find('X') != -1 or s.find('Ⅹ') != -1:
  83. s = s.replace('Ⅹ', '10')
  84. s = s.replace('X', '10')
  85. elif s.find('I') != -1 or s.find('Ⅰ') != -1:
  86. s = s.replace('Ⅰ', '1')
  87. s = s.replace('I', '1')
  88. elif s.find('V') != -1 or s.find('Ⅴ') != -1:
  89. s = s.replace('Ⅴ', '5')
  90. s = s.replace('V', '5')
  91. C_s = re.sub("[A-Za-z0-9\!\%\[\]\,\。]", "", s) # 提取汉字
  92. E_s = ''.join(re.findall(r'[A-Za-z]', s)) # 提取英文字符
  93. N_s = re.findall('\d+', s) # 提取阿拉伯数字
  94. try:
  95. if len(N_s) == 1 and len(N_s[0]) < 6 and len(E_s) == 0 and (keyword_item6.search(C_s)):
  96. type_score_dict_ocr['item_N'] = -1
  97. type_score_dict_ocr['item_total_score'] = int(N_s[0])
  98. type_score_dict_ocr['item_count'] = -1
  99. type_score_dict_ocr['item_score'] = -1
  100. Score_structure_item = type_score_dict_ocr
  101. Score_structure.append(Score_structure_item)
  102. all_structure = {'volume_structure': -1,
  103. 'Score_structure': Score_structure}
  104. elif len(N_s) == 1 and len(N_s[0]) < 6 and len(E_s) == 0 and (keyword_item5.search(C_s) or len(C_s) == 0):
  105. type_score_dict_ocr['item_N'] = int(N_s[0])
  106. type_score_dict_ocr['item_total_score'] = -1
  107. type_score_dict_ocr['item_count'] = -1
  108. type_score_dict_ocr['item_score'] = -1
  109. Score_structure_item = type_score_dict_ocr
  110. Score_structure.append(Score_structure_item)
  111. all_structure = {'volume_structure': -1,
  112. 'Score_structure': Score_structure}
  113. elif N_s != []:
  114. if keyword_volume.search(C_s):
  115. '''
  116. 对应试卷中存在分卷信息的情况,根据包含数字的个数分为5类,暂定包含信息的有效数字个数小于5,并处理小题分数和总分可能包含小数点的情况
  117. 暂定小题个数不包含小数
  118. 暂定总分数中不存在有意义的小数位
  119. '''
  120. if len(N_s) == 1:
  121. num_index = s.index(N_s[0])
  122. num_infer = s[num_index - len(N_s[0])]
  123. num_back = s[num_index + len(N_s[0])]
  124. if num_back == '分': # 第卷/部*分
  125. volume_score = int(N_s[0])
  126. type_score_dict_ocr['volume_N'] = -1
  127. type_score_dict_ocr['volume_total_score'] = volume_score
  128. type_score_dict_ocr['volume_count'] = -1
  129. type_score_dict_ocr['volume_score'] = -1
  130. elif num_back == '卷' or num_back == '部': # 第*卷
  131. volume_N = int(N_s[0])
  132. type_score_dict_ocr['volume_N'] = volume_N
  133. type_score_dict_ocr['volume_total_score'] = -1
  134. type_score_dict_ocr['volume_count'] = -1
  135. type_score_dict_ocr['volume_score'] = -1
  136. else:
  137. return all_structure
  138. elif len(N_s) == 2:
  139. num_index1 = s.index(N_s[0])
  140. num_infer0 = s[num_index1 - len(N_s[0])]
  141. num_back0 = s[num_index1 + len(N_s[0])]
  142. all_1 = find_repeat(s, N_s[1])
  143. temp1 = 0
  144. for ii in range(len(N_s[0])):
  145. if N_s[0][ii] == N_s[1]:
  146. temp1 = temp1 + 1
  147. num_index2 = all_1[temp1]
  148. num_infer1 = s[num_index2 - len(N_s[1])]
  149. num_back1 = s[num_index2 + len(N_s[1])]
  150. if isinstance(N_s[0], str):
  151. N_s[0] = int(N_s[0])
  152. if isinstance(N_s[1], str):
  153. N_s[1] = int(N_s[1])
  154. if keyword_item1.search(C_s):
  155. if keyword_item2.search(C_s):
  156. if num_back0 == '分' and num_back1 == '分':
  157. if N_s[0] < N_s[1]: # 第卷,每小题*分,共*分
  158. volume_score = N_s[1]
  159. item_score = N_s[0]
  160. item_count = int(volume_score / item_score)
  161. else: # 第卷,共*分 ,每小题*分
  162. volume_score = N_s[0]
  163. item_score = N_s[1]
  164. item_count = int(volume_score / item_score)
  165. type_score_dict_ocr['volume_N'] = -1
  166. type_score_dict_ocr['volume_total_score'] = volume_score
  167. type_score_dict_ocr['volume_count'] = item_count
  168. type_score_dict_ocr['volume_score'] = item_score
  169. else:
  170. return all_structure
  171. elif keyword_item3.search(C_s):
  172. if num_back1 == '分': # 第卷,共*小题,共*分
  173. volume_score = N_s[1]
  174. item_count = N_s[0]
  175. item_score = volume_score / item_count
  176. type_score_dict_ocr['volume_N'] = -1
  177. type_score_dict_ocr['volume_total_score'] = volume_score
  178. type_score_dict_ocr['volume_count'] = item_count
  179. type_score_dict_ocr['volume_score'] = item_score
  180. elif num_back0 == '分': # 第卷,共*分 ,共*小题
  181. volume_score = N_s[0]
  182. item_count = N_s[1]
  183. item_score = volume_score / item_count
  184. type_score_dict_ocr['volume_N'] = -1
  185. type_score_dict_ocr['volume_total_score'] = volume_score
  186. type_score_dict_ocr['volume_count'] = item_count
  187. type_score_dict_ocr['volume_score'] = item_score
  188. else:
  189. return all_structure
  190. else:
  191. if (num_back1 == '卷' or num_back1 == '部') and num_back1 == '分': # 第*卷*分
  192. volume_N = N_s[0]
  193. volume_score = N_s[1]
  194. type_score_dict_ocr['volume_N'] = volume_N
  195. type_score_dict_ocr['volume_total_score'] = volume_score
  196. type_score_dict_ocr['volume_count'] = -1
  197. type_score_dict_ocr['volume_score'] = -1
  198. elif num_back0 == '.' and num_infer1 == '.' and num_back1 == '分': # 第卷,共*.*分
  199. volume_N = -1
  200. volume_score = N_s[0]
  201. type_score_dict_ocr['volume_N'] = volume_N
  202. type_score_dict_ocr['volume_total_score'] = volume_score
  203. type_score_dict_ocr['volume_count'] = -1
  204. type_score_dict_ocr['volume_score'] = -1
  205. else:
  206. return all_structure
  207. else:
  208. if keyword_item2.search(C_s):
  209. if keyword_item3.search(C_s):
  210. if num_back1 == '分': # 第卷,共*小题,每小题*分
  211. item_count = N_s[0]
  212. item_score = N_s[1]
  213. volume_score = item_score * item_count
  214. type_score_dict_ocr['volume_N'] = -1
  215. type_score_dict_ocr['volume_total_score'] = volume_score
  216. type_score_dict_ocr['volume_count'] = item_count
  217. type_score_dict_ocr['volume_score'] = item_score
  218. elif num_back0 == '分': # 第卷,每小题*分 ,共*小题
  219. item_count = N_s[1]
  220. item_score = N_s[0]
  221. volume_score = item_count * item_score
  222. type_score_dict_ocr['volume_N'] = -1
  223. type_score_dict_ocr['volume_total_score'] = volume_score
  224. type_score_dict_ocr['volume_count'] = item_count
  225. type_score_dict_ocr['volume_score'] = item_score
  226. else:
  227. return all_structure
  228. else:
  229. if num_back1 == '分': # 第卷,每小题*.*分
  230. volume_score = -1
  231. item_count = -1
  232. item_score = float(str(N_s[0]) + '.' + str(N_s[1]))
  233. type_score_dict_ocr['volume_N'] = -1
  234. type_score_dict_ocr['volume_total_score'] = volume_score
  235. type_score_dict_ocr['volume_count'] = item_count
  236. type_score_dict_ocr['volume_score'] = item_score
  237. else:
  238. return all_structure
  239. else:
  240. return all_structure
  241. elif len(N_s) == 3:
  242. num_index1 = s.index(N_s[0])
  243. num_infer0 = s[num_index1 - len(N_s[0])]
  244. num_back0 = s[num_index1 + len(N_s[0])]
  245. all_1 = find_repeat(s, N_s[1])
  246. temp1 = 0
  247. for ii in range(len(N_s[0])):
  248. if N_s[0][ii] == N_s[1]:
  249. temp1 = temp1 + 1
  250. num_index2 = all_1[temp1]
  251. num_infer1 = s[num_index2 - len(N_s[1])]
  252. num_back1 = s[num_index2 + len(N_s[1])]
  253. all_2 = find_repeat(s, N_s[2])
  254. temp2 = 0
  255. for ii in range(len(N_s[0])):
  256. if N_s[0][ii] == N_s[2]:
  257. temp2 = temp2 + 1
  258. for jj in range(len(N_s[1])):
  259. if N_s[1][jj] == N_s[2]:
  260. temp2 = temp2 + 1
  261. num_index3 = all_2[temp2]
  262. num_infer2 = s[num_index3 - len(N_s[2])]
  263. num_back2 = s[num_index3 + len(N_s[2])]
  264. if isinstance(N_s[0], str):
  265. N_s[0] = int(N_s[0])
  266. if isinstance(N_s[1], str):
  267. N_s[1] = int(N_s[1])
  268. if isinstance(N_s[2], str):
  269. N_s[2] = int(N_s[2])
  270. if keyword_item1.search(C_s):
  271. if keyword_item2.search(C_s):
  272. if keyword_item3.search(C_s):
  273. if num_back0 == '分' and num_back1 == '分':
  274. if N_s[0] > N_s[1]: # 第卷,共*分,每题*分,共*题
  275. volume_score = N_s[0]
  276. item_count = N_s[2]
  277. item_score = N_s[1]
  278. else: # 第卷,每题*分,共*分,共*题
  279. volume_score = N_s[1]
  280. item_count = N_s[2]
  281. item_score = N_s[0]
  282. type_score_dict_ocr['volume_N'] = -1
  283. type_score_dict_ocr['volume_total_score'] = volume_score
  284. type_score_dict_ocr['volume_count'] = item_count
  285. type_score_dict_ocr['volume_score'] = item_score
  286. elif num_back0 == '分' and num_back2 == '分':
  287. if N_s[0] > N_s[2]: # 第卷,共*分,共*题,每题*分
  288. volume_score = N_s[0]
  289. item_count = N_s[1]
  290. item_score = N_s[2]
  291. else: # 第卷,每题*分,共*题,共*分
  292. volume_score = N_s[2]
  293. item_count = N_s[1]
  294. item_score = N_s[0]
  295. type_score_dict_ocr['volume_N'] = -1
  296. type_score_dict_ocr['volume_total_score'] = volume_score
  297. type_score_dict_ocr['volume_count'] = item_count
  298. type_score_dict_ocr['volume_score'] = item_score
  299. elif num_back1 == '分' and num_back2 == '分': # 第卷,共*题,共*分,每题*分
  300. if N_s[1] > N_s[2]:
  301. volume_score = N_s[1]
  302. item_count = N_s[0]
  303. item_score = N_s[2]
  304. else:
  305. volume_score = N_s[2]
  306. item_count = N_s[0]
  307. item_score = N_s[1]
  308. type_score_dict_ocr['volume_N'] = -1
  309. type_score_dict_ocr['volume_total_score'] = volume_score
  310. type_score_dict_ocr['volume_count'] = item_count
  311. type_score_dict_ocr['volume_score'] = item_score
  312. else:
  313. return all_structure
  314. else:
  315. if num_back1 == '分' and num_back2 == '分': # 第*卷,共*分,每题*分 / 第*卷,每题*分,共*分
  316. volume_N = int(N_s[0])
  317. if N_s[1] > N_s[2]:
  318. volume_score = N_s[1]
  319. item_score = N_s[2]
  320. item_count = int(volume_score / item_score)
  321. else:
  322. volume_score = N_s[2]
  323. item_score = N_s[1]
  324. item_count = int(volume_score / item_score)
  325. type_score_dict_ocr['volume_N'] = volume_N
  326. type_score_dict_ocr['volume_total_score'] = volume_score
  327. type_score_dict_ocr['volume_count'] = item_count
  328. type_score_dict_ocr['volume_score'] = item_score
  329. elif num_back0 == '.' and num_infer1 == '.' and num_back1 == '分' and num_back2 == '分': # 第卷,共*.*分,每题*分 / 第卷,每题*.*分,共*分
  330. volume_N = -1
  331. if N_s[0] > N_s[2]:
  332. volume_score = N_s[0]
  333. item_score = N_s[2]
  334. item_count = int(volume_score / item_score)
  335. else:
  336. volume_score = N_s[2]
  337. item_score = float(str(N_s[0]) + '.' + str(N_s[1]))
  338. item_count = int(volume_score / item_score)
  339. type_score_dict_ocr['volume_N'] = volume_N
  340. type_score_dict_ocr['volume_total_score'] = volume_score
  341. type_score_dict_ocr['volume_count'] = item_count
  342. type_score_dict_ocr['volume_score'] = item_score
  343. elif num_back1 == '.' and num_infer2 == '.' and num_back0 == '分' and num_back2 == '分': # 第卷,共*分,每题*.*分 / 第卷,每题*分,共*.*分
  344. volume_N = -1
  345. if N_s[0] > N_s[1]:
  346. volume_score = N_s[0]
  347. item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
  348. item_count = int(volume_score / item_score)
  349. else:
  350. volume_score = N_s[1]
  351. item_score = N_s[0]
  352. item_count = int(volume_score / item_score)
  353. type_score_dict_ocr['volume_N'] = volume_N
  354. type_score_dict_ocr['volume_total_score'] = volume_score
  355. type_score_dict_ocr['volume_count'] = item_count
  356. type_score_dict_ocr['volume_score'] = item_score
  357. else:
  358. return all_structure
  359. else:
  360. if keyword_item3.search(C_s):
  361. if (num_back0 == '卷' or num_back0 == '部') and (num_back1 == '题' or num_back1 == '小') and num_back2 == '分': # 第*卷,共*题,共*分
  362. volume_N = N_s[0]
  363. volume_score = N_s[2]
  364. item_count = N_s[1]
  365. item_score = volume_score / item_count
  366. type_score_dict_ocr['volume_N'] = volume_N
  367. type_score_dict_ocr['volume_total_score'] = volume_score
  368. type_score_dict_ocr['volume_count'] = item_count
  369. type_score_dict_ocr['volume_score'] = item_score
  370. elif (num_back0 == '卷' or num_back0 == '部') and (num_back2 == '题' or num_back2 == '小') and num_back1 == '分': # 第*卷,共*分,共*题
  371. volume_N = N_s[0]
  372. volume_score = N_s[1]
  373. item_count = N_s[2]
  374. item_score = volume_score / item_count
  375. type_score_dict_ocr['volume_N'] = volume_N
  376. type_score_dict_ocr['volume_total_score'] = volume_score
  377. type_score_dict_ocr['volume_count'] = item_count
  378. type_score_dict_ocr['volume_score'] = item_score
  379. elif num_back0 == '.' and num_infer1 == '.' and (num_back2 == '题' or num_back2 == '小') and num_back1 == '分': # 第卷,共*.*分,共*题
  380. volume_N = -1
  381. volume_score = N_s[0]
  382. item_count = N_s[2]
  383. item_score = volume_score / item_count
  384. type_score_dict_ocr['volume_N'] = volume_N
  385. type_score_dict_ocr['volume_total_score'] = volume_score
  386. type_score_dict_ocr['volume_count'] = item_count
  387. type_score_dict_ocr['volume_score'] = item_score
  388. elif num_back1 == '.' and num_infer2 == '.' and (num_back0 == '题' or num_back0 == '小') and num_back2 == '分': # 第卷,共*题,共*.*分
  389. volume_N = -1
  390. volume_score = N_s[1]
  391. item_count = N_s[0]
  392. item_score = volume_score / item_count
  393. type_score_dict_ocr['volume_N'] = volume_N
  394. type_score_dict_ocr['volume_total_score'] = volume_score
  395. type_score_dict_ocr['volume_count'] = item_count
  396. type_score_dict_ocr['volume_score'] = item_score
  397. else:
  398. return all_structure
  399. else:
  400. if num_back2 == '分': # 第*卷,共*.*分
  401. volume_N = int(N_s[0])
  402. volume_score = N_s[1]
  403. item_score = -1
  404. item_count = -1
  405. type_score_dict_ocr['volume_N'] = volume_N
  406. type_score_dict_ocr['volume_total_score'] = volume_score
  407. type_score_dict_ocr['volume_count'] = item_count
  408. type_score_dict_ocr['volume_score'] = item_score
  409. else:
  410. return all_structure
  411. else:
  412. if keyword_item2.search(C_s):
  413. if keyword_item3.search(C_s):
  414. if (num_back0 == '卷' or num_back0 == '部') and num_back2 == '分': # 第*卷,共*题,每题*分
  415. volume_N = N_s[0]
  416. item_count = N_s[1]
  417. item_score = N_s[2]
  418. volume_score = item_count * item_score
  419. type_score_dict_ocr['volume_N'] = volume_N
  420. type_score_dict_ocr['volume_total_score'] = volume_score
  421. type_score_dict_ocr['volume_count'] = item_count
  422. type_score_dict_ocr['volume_score'] = item_score
  423. elif (num_back0 == '卷' or num_back0 == '部') and num_back1 == '分': # 第*卷,每题*分,共*题
  424. volume_N = N_s[0]
  425. item_count = N_s[2]
  426. item_score = N_s[1]
  427. volume_score = item_count * item_score
  428. type_score_dict_ocr['volume_N'] = volume_N
  429. type_score_dict_ocr['volume_total_score'] = volume_score
  430. type_score_dict_ocr['volume_count'] = item_count
  431. type_score_dict_ocr['volume_score'] = item_score
  432. elif (num_back0 == '卷' or num_back0 == '部') and num_back1 == '分': # 第卷,每题*.*分,共*题
  433. volume_N = -1
  434. item_score = float(str(N_s[0]) + '.' + str(N_s[1]))
  435. item_count = N_s[2]
  436. volume_score = item_score * item_count
  437. type_score_dict_ocr['volume_N'] = volume_N
  438. type_score_dict_ocr['volume_total_score'] = volume_score
  439. type_score_dict_ocr['volume_count'] = item_count
  440. type_score_dict_ocr['volume_score'] = item_score
  441. elif (num_back0 == '卷' or num_back0 == '部') and num_back2 == '分': # 第卷,共*题,每题*.*分
  442. volume_N = -1
  443. item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
  444. item_count = N_s[0]
  445. volume_score = item_score * item_count
  446. type_score_dict_ocr['volume_N'] = volume_N
  447. type_score_dict_ocr['volume_total_score'] = volume_score
  448. type_score_dict_ocr['volume_count'] = item_count
  449. type_score_dict_ocr['volume_score'] = item_score
  450. else:
  451. return all_structure
  452. else:
  453. if num_back2 == '分': # 第*卷,小题*.*分
  454. volume_N = int(N_s[0])
  455. volume_score = -1
  456. item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
  457. item_count = -1
  458. type_score_dict_ocr['volume_N'] = volume_N
  459. type_score_dict_ocr['volume_total_score'] = volume_score
  460. type_score_dict_ocr['volume_count'] = item_count
  461. type_score_dict_ocr['volume_score'] = item_score
  462. else:
  463. return all_structure
  464. elif len(N_s) == 4:
  465. num_index1 = s.index(N_s[0])
  466. num_infer0 = s[num_index1 - len(N_s[0])]
  467. num_back0 = s[num_index1 + len(N_s[0])]
  468. all_1 = find_repeat(s, N_s[1])
  469. temp1 = 0
  470. for ii in range(len(N_s[0])):
  471. if N_s[0][ii] == N_s[1]:
  472. temp1 = temp1 + 1
  473. num_index2 = all_1[temp1]
  474. num_infer1 = s[num_index2 - len(N_s[1])]
  475. num_back1 = s[num_index2 + len(N_s[1])]
  476. all_2 = find_repeat(s, N_s[2])
  477. temp2 = 0
  478. for ii in range(len(N_s[0])):
  479. if N_s[0][ii] == N_s[2]:
  480. temp2 = temp2 + 1
  481. for jj in range(len(N_s[1])):
  482. if N_s[1][jj] == N_s[2]:
  483. temp2 = temp2 + 1
  484. num_index3 = all_2[temp2]
  485. num_infer2 = s[num_index3 - len(N_s[2])]
  486. num_back2 = s[num_index3 + len(N_s[2])]
  487. all_3 = find_repeat(s, N_s[3])
  488. temp3 = 0
  489. for ii in range(len(N_s[0])):
  490. if N_s[0][ii] == N_s[3]:
  491. temp3 = temp3 + 1
  492. for jj in range(len(N_s[1])):
  493. if N_s[1][jj] == N_s[3]:
  494. temp3 = temp3 + 1
  495. for kk in range(len(N_s[2])):
  496. if N_s[2][kk] == N_s[3]:
  497. temp3 = temp3 + 1
  498. num_index4 = all_3[temp3]
  499. num_infer3 = s[num_index4 - len(N_s[3])]
  500. num_back3 = s[num_index4 + len(N_s[3])]
  501. if isinstance(N_s[0], str):
  502. N_s[0] = int(N_s[0])
  503. if isinstance(N_s[1], str):
  504. N_s[1] = int(N_s[1])
  505. if isinstance(N_s[2], str):
  506. N_s[2] = int(N_s[2])
  507. if isinstance(N_s[3], str):
  508. N_s[3] = int(N_s[3])
  509. if keyword_item1.search(C_s):
  510. if keyword_item2.search(C_s):
  511. if keyword_item3.search(C_s):
  512. if (num_back0 == '卷' or num_back0 == '部') and num_back1 == '分' and num_back3 == '分':
  513. if N_s[3] > N_s[1]: # 第*卷,每题*分,共*题,共*分
  514. volume_N = N_s[0]
  515. volume_score = N_s[3]
  516. item_count = N_s[2]
  517. item_score = N_s[1]
  518. else: # 第*卷,共*分,共*题,每题*分
  519. volume_N = N_s[0]
  520. volume_score = N_s[1]
  521. item_count = N_s[2]
  522. item_score = N_s[3]
  523. type_score_dict_ocr['volume_N'] = volume_N
  524. type_score_dict_ocr['volume_total_score'] = volume_score
  525. type_score_dict_ocr['volume_count'] = item_count
  526. type_score_dict_ocr['volume_score'] = item_score
  527. elif (num_back0 == '卷' or num_back0 == '部') and num_back2 == '分' and num_back3 == '分':
  528. if N_s[3] > N_s[2]: # 第*卷,共*题,每题*分,共*分
  529. volume_N = N_s[0]
  530. volume_score = N_s[3]
  531. item_count = N_s[1]
  532. item_score = N_s[2]
  533. else: # 第*卷,共*题,共*分 , 每题*分
  534. volume_N = N_s[0]
  535. volume_score = N_s[2]
  536. item_count = N_s[1]
  537. item_score = N_s[3]
  538. type_score_dict_ocr['volume_N'] = volume_N
  539. type_score_dict_ocr['volume_total_score'] = volume_score
  540. type_score_dict_ocr['volume_count'] = item_count
  541. type_score_dict_ocr['volume_score'] = item_score
  542. elif (num_back0 == '卷' or num_back0 == '部') and num_back2 == '分' and num_back1 == '分':
  543. if N_s[1] > N_s[2]: # 第*卷,共*分,每题*分,共*题
  544. volume_N = N_s[0]
  545. volume_score = N_s[1]
  546. item_count = N_s[3]
  547. item_score = N_s[2]
  548. else: # 第*卷,每题*分,共*分,共*题
  549. volume_N = N_s[0]
  550. volume_score = N_s[2]
  551. item_count = N_s[3]
  552. item_score = N_s[1]
  553. type_score_dict_ocr['volume_N'] = volume_N
  554. type_score_dict_ocr['volume_total_score'] = volume_score
  555. type_score_dict_ocr['volume_count'] = item_count
  556. type_score_dict_ocr['volume_score'] = item_score
  557. elif (num_back0 == '.' and num_infer1 == '.') and num_back1 == '分' and num_back2 == '分': # 第卷,每题*.*分,共*分,共*题/第卷,共*.*分,每题*分,共*题
  558. volume_N = -1
  559. if int(N_s[0]) > int(N_s[2]):
  560. volume_score = N_s[0]
  561. item_score = N_s[2]
  562. item_count = int(volume_score / item_score)
  563. else:
  564. volume_score = N_s[2]
  565. item_score = float(str(N_s[0]) + '.' + str(N_s[1]))
  566. item_count = int(volume_score / item_score)
  567. type_score_dict_ocr['volume_N'] = volume_N
  568. type_score_dict_ocr['volume_total_score'] = volume_score
  569. type_score_dict_ocr['volume_count'] = item_count
  570. type_score_dict_ocr['volume_score'] = item_score
  571. elif (num_back1 == '.' and num_infer2 == '.') and num_back0 == '分' and num_back2 == '分': # 第卷,每题*分,共*.*分,共*题/第卷,共*分,每题*.*分,共*题
  572. volume_N = -1
  573. if int(N_s[0]) > int(N_s[1]):
  574. volume_score = N_s[0]
  575. item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
  576. item_count = int(volume_score / item_score)
  577. else:
  578. volume_score = N_s[1]
  579. item_score = N_s[0]
  580. item_count = int(volume_score / item_score)
  581. type_score_dict_ocr['volume_N'] = volume_N
  582. type_score_dict_ocr['volume_total_score'] = volume_score
  583. type_score_dict_ocr['volume_count'] = item_count
  584. type_score_dict_ocr['volume_score'] = item_score
  585. elif (num_back1 == '.' and num_infer2 == '.') and num_back2 == '分' and num_back3 == '分': # 第卷,共*题,共*.*分,每题*分/第卷,共*题,每题*.*分,共*分
  586. volume_N = -1
  587. if N_s[1] > N_s[3]:
  588. volume_score = N_s[1]
  589. item_score = N_s[3]
  590. item_count = int(volume_score / item_score)
  591. else:
  592. volume_score = N_s[4]
  593. item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
  594. item_count = int(volume_score / item_score)
  595. type_score_dict_ocr['volume_N'] = volume_N
  596. type_score_dict_ocr['volume_total_score'] = volume_score
  597. type_score_dict_ocr['volume_count'] = item_count
  598. type_score_dict_ocr['volume_score'] = item_score
  599. elif (num_back2 == '.' and num_infer3 == '.') and num_back1 == '分' and num_back3 == '分': # 第卷,共*题,共*分,每题*.*分/第卷,共*题,每题*分,共*.*分
  600. volume_N = -1
  601. if int(N_s[1]) > int(N_s[2]):
  602. volume_score = N_s[1]
  603. item_score = float(str(N_s[2]) + '.' + str(N_s[3]))
  604. item_count = int(volume_score / item_score)
  605. else:
  606. volume_score = N_s[2]
  607. item_score = N_s[1]
  608. item_count = int(volume_score / item_score)
  609. type_score_dict_ocr['volume_N'] = volume_N
  610. type_score_dict_ocr['volume_total_score'] = volume_score
  611. type_score_dict_ocr['volume_count'] = item_count
  612. type_score_dict_ocr['volume_score'] = item_score
  613. elif (num_back0 == '.' and num_infer1 == '.') and num_back1 == '分' and num_back3 == '分': # 第卷,共*.*分,共*题,每题*分/第卷,每题*.*分,共*题,共*分
  614. volume_N = -1
  615. if int(N_s[0]) > int(N_s[3]):
  616. volume_score = N_s[0]
  617. item_score = N_s[3]
  618. item_count = int(volume_score / item_score)
  619. else:
  620. volume_score = N_s[3]
  621. item_score = float(str(N_s[0]) + '.' + str(N_s[1]))
  622. item_count = int(volume_score / item_score)
  623. type_score_dict_ocr['volume_N'] = volume_N
  624. type_score_dict_ocr['volume_total_score'] = volume_score
  625. type_score_dict_ocr['volume_count'] = item_count
  626. type_score_dict_ocr['volume_score'] = item_score
  627. elif (num_back2 == '.' and num_infer3 == '.') and num_back0 == '分' and num_back3 == '分': # 第卷,共*分,共*题,每题*.*分/第卷,每题*分,共*题,共*.*分
  628. volume_N = -1
  629. if int(N_s[0]) > int(N_s[2]):
  630. volume_score = N_s[0]
  631. item_score = N_s[2] + '.' + N_s[3]
  632. item_count = int(volume_score / item_score)
  633. else:
  634. volume_score = N_s[2]
  635. item_score = N_s[0]
  636. item_count = int(volume_score / item_score)
  637. type_score_dict_ocr['volume_N'] = volume_N
  638. type_score_dict_ocr['volume_total_score'] = volume_score
  639. type_score_dict_ocr['volume_count'] = item_count
  640. type_score_dict_ocr['volume_score'] = item_score
  641. else:
  642. if (num_back0 == '卷' or num_back0 == '部') and num_back1 == '.' and num_infer2 == '.' and num_back2 == '分' and num_back3 == '分': # 第*卷,每题*.*分,共*分/第*卷,共*.*分,每题*分
  643. volume_N = int(N_s[0])
  644. if N_s[1] > N_s[3]:
  645. volume_score = N_s[1]
  646. item_score = N_s[3]
  647. item_count = int(volume_score / item_score)
  648. else:
  649. volume_score = N_s[3]
  650. item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
  651. item_count = int(volume_score / item_score)
  652. type_score_dict_ocr['volume_N'] = volume_N
  653. type_score_dict_ocr['volume_total_score'] = volume_score
  654. type_score_dict_ocr['volume_count'] = item_count
  655. type_score_dict_ocr['volume_score'] = item_score
  656. elif (num_back0 == '卷' or num_back0 == '部') and num_back2 == '.' and num_infer3 == '.' and num_back1 == '分' and num_back3 == '分': # 第*卷,每题*分,共*.*分/第*卷,共*分,每题*.*分
  657. volume_N = int(N_s[0])
  658. if int(N_s[1]) > int(N_s[2]):
  659. volume_score = N_s[1]
  660. item_score = float(str(N_s[2]) + '.' + str(N_s[3]))
  661. item_count = int(volume_score / item_score)
  662. else:
  663. volume_score = N_s[2]
  664. item_score = N_s[1]
  665. item_count = int(volume_score / item_score)
  666. type_score_dict_ocr['volume_N'] = volume_N
  667. type_score_dict_ocr['volume_total_score'] = volume_score
  668. type_score_dict_ocr['volume_count'] = item_count
  669. type_score_dict_ocr['volume_score'] = item_score
  670. elif num_back0 == '.' and num_infer1 == '.' and num_back1 == '分' and num_back2 == '.' and num_infer3 == '.' and num_back3 == '分': # 第卷,每题*.*分,共*.*分/第卷,共*.*分,每题*.*分
  671. volume_N = -1
  672. if N_s[0] > N_s[2]:
  673. volume_score = float(str(N_s[2]) + '.' + str(N_s[3]))
  674. item_score = N_s[3]
  675. item_count = int(volume_score / item_score)
  676. else:
  677. volume_score = N_s[3]
  678. item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
  679. item_count = int(volume_score / item_score)
  680. type_score_dict_ocr['volume_N'] = volume_N
  681. type_score_dict_ocr['volume_total_score'] = volume_score
  682. type_score_dict_ocr['volume_count'] = item_count
  683. type_score_dict_ocr['volume_score'] = item_score
  684. elif (num_back0 == '卷' or num_back0 == '部') and num_back1 == '分' and num_back2 == '分':
  685. if N_s[3] > N_s[1]: # 第*卷,每题*.*分,共*分
  686. volume_N = N_s[0]
  687. volume_score = N_s[3]
  688. item_count = -1
  689. item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
  690. else: # 第*卷,共*.*分,每题*分
  691. volume_N = N_s[0]
  692. volume_score = N_s[1]
  693. item_count = -1
  694. item_score = N_s[3]
  695. type_score_dict_ocr['volume_N'] = volume_N
  696. type_score_dict_ocr['volume_total_score'] = volume_score
  697. type_score_dict_ocr['volume_count'] = item_count
  698. type_score_dict_ocr['volume_score'] = item_score
  699. else:
  700. return all_structure
  701. else:
  702. if keyword_item3.search(C_s):
  703. if (num_back0 == '卷' or num_back0 == '部') and num_back2 == '分': # 第*卷,共*.*分,*小题
  704. volume_N = N_s[0]
  705. volume_score = N_s[1]
  706. item_count = N_s[3]
  707. item_score = -1
  708. type_score_dict_ocr['volume_N'] = volume_N
  709. type_score_dict_ocr['volume_total_score'] = volume_score
  710. type_score_dict_ocr['volume_count'] = item_count
  711. type_score_dict_ocr['volume_score'] = item_score
  712. elif (num_back0 == '卷' or num_back0 == '部') and num_back3 == '分': # 第*卷,*小题 ,共*.*分
  713. volume_N = N_s[0]
  714. volume_score = N_s[2]
  715. item_count = N_s[1]
  716. item_score = -1
  717. type_score_dict_ocr['volume_N'] = volume_N
  718. type_score_dict_ocr['volume_total_score'] = volume_score
  719. type_score_dict_ocr['volume_count'] = item_count
  720. type_score_dict_ocr['volume_score'] = item_score
  721. else:
  722. return all_structure
  723. else:
  724. return all_structure
  725. else:
  726. if keyword_item2.search(C_s):
  727. if keyword_item3.search(C_s):
  728. if (num_back0 == '卷' or num_back0 == '部') and num_back2 == '分': # 第*卷,小题*.*分,*小题
  729. volume_N = N_s[0]
  730. volume_score = -1
  731. item_count = N_s[3]
  732. item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
  733. type_score_dict_ocr['volume_N'] = volume_N
  734. type_score_dict_ocr['volume_total_score'] = volume_score
  735. type_score_dict_ocr['volume_count'] = item_count
  736. type_score_dict_ocr['volume_score'] = item_score
  737. elif (num_back0 == '卷' or num_back0 == '部') and num_back3 == '分': # 第*卷,*小题 ,小题*.*分
  738. volume_N = N_s[0]
  739. volume_score = -1
  740. item_count = N_s[1]
  741. item_score = float(str(N_s[2]) + '.' + str(N_s[3]))
  742. type_score_dict_ocr['volume_N'] = volume_N
  743. type_score_dict_ocr['volume_total_score'] = volume_score
  744. type_score_dict_ocr['volume_count'] = item_count
  745. type_score_dict_ocr['volume_score'] = item_score
  746. else:
  747. return all_structure
  748. else:
  749. return all_structure
  750. else:
  751. return all_structure
  752. elif len(N_s) == 5:
  753. num_index1 = s.index(N_s[0])
  754. num_infer0 = s[num_index1 - len(N_s[0])]
  755. num_back0 = s[num_index1 + len(N_s[0])]
  756. all_1 = find_repeat(s, N_s[1])
  757. temp1 = 0
  758. for ii in range(len(N_s[0])):
  759. if N_s[0][ii] == N_s[1]:
  760. temp1 = temp1 + 1
  761. num_index2 = all_1[temp1]
  762. num_infer1 = s[num_index2 - len(N_s[1])]
  763. num_back1 = s[num_index2 + len(N_s[1])]
  764. all_2 = find_repeat(s, N_s[2])
  765. temp2 = 0
  766. for ii in range(len(N_s[0])):
  767. if N_s[0][ii] == N_s[2]:
  768. temp2 = temp2 + 1
  769. for jj in range(len(N_s[1])):
  770. if N_s[1][jj] == N_s[2]:
  771. temp2 = temp2 + 1
  772. num_index3 = all_2[temp2]
  773. num_infer2 = s[num_index3 - len(N_s[2])]
  774. num_back2 = s[num_index3 + len(N_s[2])]
  775. all_3 = find_repeat(s, N_s[3])
  776. temp3 = 0
  777. for ii in range(len(N_s[0])):
  778. if N_s[0][ii] == N_s[3]:
  779. temp3 = temp3 + 1
  780. for jj in range(len(N_s[1])):
  781. if N_s[1][jj] == N_s[3]:
  782. temp3 = temp3 + 1
  783. for kk in range(len(N_s[2])):
  784. if N_s[2][kk] == N_s[3]:
  785. temp3 = temp3 + 1
  786. num_index4 = all_3[temp3]
  787. num_infer3 = s[num_index4 - len(N_s[3])]
  788. num_back3 = s[num_index4 + len(N_s[3])]
  789. all_4 = find_repeat(s, N_s[4])
  790. temp4 = 0
  791. for ii in range(len(N_s[0])):
  792. if N_s[0][ii] == N_s[4]:
  793. temp4 = temp4 + 1
  794. for jj in range(len(N_s[1])):
  795. if N_s[1][jj] == N_s[4]:
  796. temp4 = temp4 + 1
  797. for kk in range(len(N_s[2])):
  798. if N_s[2][kk] == N_s[4]:
  799. temp4 = temp4 + 1
  800. for ll in range(len(N_s[3])):
  801. if N_s[3][ll] == N_s[4]:
  802. temp4 = temp4 + 1
  803. num_index5 = all_4[temp4]
  804. num_infer4 = s[num_index5 - len(N_s[4])]
  805. num_back4 = s[num_index5 + len(N_s[4])]
  806. if isinstance(N_s[0], str):
  807. N_s[0] = int(N_s[0])
  808. if isinstance(N_s[1], str):
  809. N_s[1] = int(N_s[1])
  810. if isinstance(N_s[2], str):
  811. N_s[2] = int(N_s[2])
  812. if isinstance(N_s[3], str):
  813. N_s[3] = int(N_s[3])
  814. if isinstance(N_s[4], str):
  815. N_s[4] = int(N_s[4])
  816. if keyword_item1.search(C_s):
  817. if keyword_item2.search(C_s):
  818. if keyword_item3.search(C_s):
  819. if (num_back0 == '卷' or num_back0 == '部') and (num_back1 == '.' and num_infer2 == '.') and num_back2 == '分' and num_back3 == '分': # 第*卷,每题*.*分,共*分,共*题/第卷,共*.*分,每题*分,共*题
  820. volume_N = N_s[0]
  821. if N_s[1] > N_s[3]:
  822. volume_score = N_s[1]
  823. item_score = N_s[3]
  824. item_count = int(volume_score / item_score)
  825. type_score_dict_ocr['volume_N'] = volume_N
  826. type_score_dict_ocr['volume_total_score'] = volume_score
  827. type_score_dict_ocr['volume_count'] = item_count
  828. type_score_dict_ocr['volume_score'] = item_score
  829. else:
  830. volume_score = N_s[3]
  831. item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
  832. item_count = int(volume_score / item_score)
  833. type_score_dict_ocr['volume_N'] = volume_N
  834. type_score_dict_ocr['volume_total_score'] = volume_score
  835. type_score_dict_ocr['volume_count'] = item_count
  836. type_score_dict_ocr['volume_score'] = item_score
  837. elif (num_back0 == '卷' or num_back0 == '部') and (num_back2 == '.' and num_infer3 == '.') and num_back1 == '分' and num_back3 == '分': # 第*卷,每题*分,共*.*分,共*题/第卷,共*分,每题*.*分,共*题
  838. volume_N = N_s[0]
  839. if N_s[1] > N_s[2]:
  840. volume_score = N_s[1]
  841. item_score = float(str(N_s[2]) + '.' + str(N_s[3]))
  842. item_count = int(volume_score / item_score)
  843. type_score_dict_ocr['volume_N'] = volume_N
  844. type_score_dict_ocr['volume_total_score'] = volume_score
  845. type_score_dict_ocr['volume_count'] = item_count
  846. type_score_dict_ocr['volume_score'] = item_score
  847. else:
  848. volume_score = N_s[2]
  849. item_score = N_s[1]
  850. item_count = int(volume_score / item_score)
  851. type_score_dict_ocr['volume_N'] = volume_N
  852. type_score_dict_ocr['volume_total_score'] = volume_score
  853. type_score_dict_ocr['volume_count'] = item_count
  854. type_score_dict_ocr['volume_score'] = item_score
  855. elif (num_back0 == '卷' or num_back0 == '部') and (num_back2 == '.' and num_infer3 == '.') and num_back3 == '分' and num_back4 == '分': # 第卷,共*题,共*.*分,每题*分/第卷,共*题,每题*.*分,共*分
  856. volume_N = N_s[0]
  857. if N_s[2] > N_s[4]:
  858. volume_score = N_s[2]
  859. item_score = N_s[4]
  860. item_count = int(volume_score / item_score)
  861. type_score_dict_ocr['volume_N'] = volume_N
  862. type_score_dict_ocr['volume_total_score'] = volume_score
  863. type_score_dict_ocr['volume_count'] = item_count
  864. type_score_dict_ocr['volume_score'] = item_score
  865. else:
  866. volume_score = N_s[4]
  867. item_score = float(str(N_s[2]) + '.' + str(N_s[3]))
  868. item_count = int(volume_score / item_score)
  869. type_score_dict_ocr['volume_N'] = volume_N
  870. type_score_dict_ocr['volume_total_score'] = volume_score
  871. type_score_dict_ocr['volume_count'] = item_count
  872. type_score_dict_ocr['volume_score'] = item_score
  873. elif (num_back0 == '卷' or num_back0 == '部') and (num_back3 == '.' and num_infer4 == '.') and num_back2 == '分' and num_back4 == '分': # 第*卷,共*题,共*分,每题*.*分/第卷,共*题,每题*分,共*.*分
  874. volume_N = N_s[0]
  875. if N_s[2] > N_s[3]:
  876. volume_score = N_s[2]
  877. item_score = float(str(N_s[3]) + '.' + str(N_s[4]))
  878. item_count = int(volume_score / item_score)
  879. type_score_dict_ocr['volume_N'] = volume_N
  880. type_score_dict_ocr['volume_total_score'] = volume_score
  881. type_score_dict_ocr['volume_count'] = item_count
  882. type_score_dict_ocr['volume_score'] = item_score
  883. else:
  884. volume_score = N_s[3]
  885. item_score = N_s[2]
  886. item_count = int(volume_score / item_score)
  887. type_score_dict_ocr['volume_N'] = volume_N
  888. type_score_dict_ocr['volume_total_score'] = volume_score
  889. type_score_dict_ocr['volume_count'] = item_count
  890. type_score_dict_ocr['volume_score'] = item_score
  891. elif (num_back0 == '卷' or num_back0 == '部') and (num_back1 == '.' and num_infer2 == '.') and num_back2 == '分' and num_back4 == '分': # 第*卷,共*.*分,共*题,每题*分/第*卷,每题*.*分,共*题,共*分
  892. volume_N = N_s[0]
  893. if N_s[1] > N_s[4]:
  894. volume_score = N_s[1]
  895. item_score = N_s[4]
  896. item_count = int(volume_score / item_score)
  897. type_score_dict_ocr['volume_N'] = volume_N
  898. type_score_dict_ocr['volume_total_score'] = volume_score
  899. type_score_dict_ocr['volume_count'] = item_count
  900. type_score_dict_ocr['volume_score'] = item_score
  901. else:
  902. volume_score = N_s[4]
  903. item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
  904. item_count = int(volume_score / item_score)
  905. type_score_dict_ocr['volume_N'] = volume_N
  906. type_score_dict_ocr['volume_total_score'] = volume_score
  907. type_score_dict_ocr['volume_count'] = item_count
  908. type_score_dict_ocr['volume_score'] = item_score
  909. elif (num_back0 == '卷' or num_back0 == '部') and (num_back3 == '.' and num_infer4 == '.') and num_back1 == '分' and num_back4 == '分': # 第*卷,共*分,共*题,每题*.*分/第卷,每题*分,共*题,共*.*分
  910. volume_N = N_s[0]
  911. if N_s[1] > N_s[3]:
  912. volume_score = N_s[0]
  913. item_score = float(str(N_s[4]) + '.' + str(N_s[4]))
  914. item_count = int(volume_score / item_score)
  915. type_score_dict_ocr['volume_N'] = volume_N
  916. type_score_dict_ocr['volume_total_score'] = volume_score
  917. type_score_dict_ocr['volume_count'] = item_count
  918. type_score_dict_ocr['volume_score'] = item_score
  919. else:
  920. volume_score = N_s[3]
  921. item_score = N_s[1]
  922. item_count = int(volume_score / item_score)
  923. type_score_dict_ocr['volume_N'] = volume_N
  924. type_score_dict_ocr['volume_total_score'] = volume_score
  925. type_score_dict_ocr['volume_count'] = item_count
  926. type_score_dict_ocr['volume_score'] = item_score
  927. else:
  928. return all_structure
  929. else:
  930. return all_structure
  931. else:
  932. return all_structure
  933. else:
  934. return all_structure
  935. if 'volume_N' not in type_score_dict_ocr.keys():
  936. all_structure = {'volume_structure': -1,
  937. 'Score_structure': -1}
  938. return all_structure
  939. else:
  940. for i in range(len_keyword_type1):
  941. if C_s.find(keyword_type1[i]) != -1 and C_s.find('非') != -1:
  942. type_score_dict_ocr['keyword_type'] = keyword_type1[1]
  943. break
  944. elif C_s.find(keyword_type1[0]) != -1:
  945. type_score_dict_ocr['keyword_type'] = keyword_type1[0]
  946. Score_structure_item = type_score_dict_ocr
  947. Score_structure.append(Score_structure_item)
  948. break
  949. elif C_s.find(keyword_type1[i]) != -1:
  950. type_score_dict_ocr['keyword_type'] = keyword_type1[i]
  951. break
  952. elif i == len_keyword_type1 - 1:
  953. type_score_dict_ocr['keyword_type'] = keyword_type1[0]
  954. Score_structure_item = type_score_dict_ocr
  955. Score_structure.append(Score_structure_item)
  956. break
  957. volume_structure_item = type_score_dict_ocr
  958. volume_structure.append(volume_structure_item)
  959. if Score_structure == []:
  960. all_structure = {'volume_structure': volume_structure,
  961. 'Score_structure': -1}
  962. else:
  963. all_structure = {'volume_structure': volume_structure,
  964. 'Score_structure': Score_structure}
  965. else:
  966. '''
  967. 对应试卷中不存在分卷信息的情况,根据包含数字的个数分为4类,暂定包含信息的有效数字个数小于4,并处理小题分数和总分可能包含小数点的情况
  968. 暂定小题个数不包含小数
  969. 暂定总分数中不存在有意义的小数位
  970. '''
  971. if keyword_item1.search(C_s):
  972. if len(N_s) == 1:
  973. num_index = s.index(N_s[0])
  974. num_infer = s[num_index - len(N_s[0])]
  975. num_back = s[num_index + len(N_s[0])]
  976. if isinstance(N_s[0], str):
  977. N_s[0] = int(N_s[0])
  978. if num_back == '分': # 选择题/主观题,共*分
  979. item_total_score = N_s[0]
  980. type_score_dict_ocr['volume_N'] = -1
  981. type_score_dict_ocr['volume_total_score'] = int(item_total_score)
  982. type_score_dict_ocr['volume_count'] = -1
  983. type_score_dict_ocr['volume_score'] = -1
  984. elif len(N_s) == 2:
  985. num_index1 = s.index(N_s[0])
  986. num_infer0 = s[num_index1 - len(N_s[0])]
  987. num_back0 = s[num_index1 + len(N_s[0])]
  988. all_1 = find_repeat(s, N_s[1])
  989. temp1 = 0
  990. for ii in range(len(N_s[0])):
  991. if N_s[0][ii] == N_s[1]:
  992. temp1 = temp1 + 1
  993. num_index2 = all_1[temp1]
  994. num_infer1 = s[num_index2 - len(N_s[1])]
  995. num_back1 = s[num_index2 + len(N_s[1])]
  996. if isinstance(N_s[0], str):
  997. N_s[0] = int(N_s[0])
  998. if isinstance(N_s[1], str):
  999. N_s[1] = int(N_s[1])
  1000. if keyword_item2.search(C_s):
  1001. if N_s[0] > N_s[1]: # 选择题/主观题/客观题,共*分,每题*分
  1002. item_total_score = int(N_s[0])
  1003. item_count = int(N_s[0] / N_s[1])
  1004. item_score = N_s[1]
  1005. else: # 选择题/主观题/客观题,每题*分,共*分
  1006. item_total_score = int(N_s[1])
  1007. item_count = int(N_s[1] / N_s[0])
  1008. item_score = N_s[0]
  1009. type_score_dict_ocr['volume_N'] = -1
  1010. type_score_dict_ocr['volume_total_score'] = item_total_score
  1011. type_score_dict_ocr['volume_count'] = item_count
  1012. type_score_dict_ocr['volume_score'] = item_score
  1013. else:
  1014. if keyword_item3.search(C_s):
  1015. if num_back0 == '分': # 选择题/主观题,共*分,共*题
  1016. item_total_score = N_s[0]
  1017. item_count = N_s[1]
  1018. item_score = N_s[0] / N_s[1]
  1019. type_score_dict_ocr['volume_N'] = -1
  1020. type_score_dict_ocr['volume_total_score'] = item_total_score
  1021. type_score_dict_ocr['volume_count'] = item_count
  1022. type_score_dict_ocr['volume_score'] = item_score
  1023. elif num_back1 == '分': # 选择题/主观题,共*题,共*分
  1024. item_total_score = N_s[1]
  1025. item_count = N_s[0]
  1026. item_score = N_s[1] / N_s[0]
  1027. type_score_dict_ocr['volume_N'] = -1
  1028. type_score_dict_ocr['volume_total_score'] = item_total_score
  1029. type_score_dict_ocr['volume_count'] = item_count
  1030. type_score_dict_ocr['volume_score'] = item_score
  1031. else:
  1032. return all_structure
  1033. else:
  1034. if num_back0 == '.' and num_infer1 == '.' and num_back1 == '分': # *.*分
  1035. item_N = -1
  1036. item_total_score = N_s[0]
  1037. type_score_dict_ocr['volume_N'] = item_N
  1038. type_score_dict_ocr['volume_total_score'] = item_total_score
  1039. type_score_dict_ocr['volume_count'] = -1
  1040. type_score_dict_ocr['volume_score'] = -1
  1041. elif num_back1 == '分': # *,*分
  1042. item_N = N_s[0]
  1043. item_total_score = int(N_s[1])
  1044. type_score_dict_ocr['volume_N'] = item_N
  1045. type_score_dict_ocr['volume_total_score'] = item_total_score
  1046. type_score_dict_ocr['volume_count'] = -1
  1047. type_score_dict_ocr['volume_score'] = -1
  1048. else:
  1049. return all_structure
  1050. elif len(N_s) == 3:
  1051. num_index1 = s.index(N_s[0])
  1052. num_infer0 = s[num_index1 - len(N_s[0])]
  1053. num_back0 = s[num_index1 + len(N_s[0])]
  1054. all_1 = find_repeat(s, N_s[1])
  1055. temp1 = 0
  1056. for ii in range(len(N_s[0])):
  1057. if N_s[0][ii] == N_s[1]:
  1058. temp1 = temp1 + 1
  1059. num_index2 = all_1[temp1]
  1060. num_infer1 = s[num_index2 - len(N_s[1])]
  1061. num_back1 = s[num_index2 + len(N_s[1])]
  1062. all_2 = find_repeat(s, N_s[2])
  1063. temp2 = 0
  1064. for ii in range(len(N_s[0])):
  1065. if N_s[0][ii] == N_s[2]:
  1066. temp2 = temp2 + 1
  1067. for jj in range(len(N_s[1])):
  1068. if N_s[1][jj] == N_s[2]:
  1069. temp2 = temp2 + 1
  1070. num_index3 = all_2[temp2]
  1071. num_infer2 = s[num_index3 - len(N_s[2])]
  1072. num_back2 = s[num_index3 + len(N_s[2])]
  1073. if isinstance(N_s[0], str):
  1074. N_s[0] = int(N_s[0])
  1075. if isinstance(N_s[1], str):
  1076. N_s[1] = int(N_s[1])
  1077. if isinstance(N_s[2], str):
  1078. N_s[2] = int(N_s[2])
  1079. if keyword_item2.search(C_s):
  1080. if keyword_item3.search(C_s):
  1081. if num_back0 == '分' and num_back2 == '分':
  1082. if N_s[2] > N_s[0]: # 每题*分,共*题,共*分
  1083. item_total_score = N_s[2]
  1084. item_count = N_s[1]
  1085. item_score = N_s[0]
  1086. else: # 共*分,共*题,每题*分
  1087. item_total_score = N_s[0]
  1088. item_count = N_s[1]
  1089. item_score = N_s[2]
  1090. if item_total_score < item_count * item_score:
  1091. item_total_score = item_count * item_score
  1092. type_score_dict_ocr['volume_N'] = -1
  1093. type_score_dict_ocr['volume_total_score'] = item_total_score
  1094. type_score_dict_ocr['volume_count'] = item_count
  1095. type_score_dict_ocr['volume_score'] = item_score
  1096. elif (num_infer0 == '题' or num_infer0 == '空') and num_back0 == '分' and num_back1 == '分':
  1097. if N_s[1] > N_s[0]: # 每题*分,共*分 ,共*题
  1098. item_total_score = N_s[1]
  1099. item_count = N_s[2]
  1100. item_score = N_s[0]
  1101. else: # 共*分,每题*分 ,共*题
  1102. item_total_score = N_s[0]
  1103. item_count = N_s[2]
  1104. item_score = N_s[1]
  1105. if item_total_score < item_count * item_score:
  1106. item_total_score = item_count * item_score
  1107. type_score_dict_ocr['volume_N'] = -1
  1108. type_score_dict_ocr['volume_total_score'] = item_total_score
  1109. type_score_dict_ocr['volume_count'] = item_count
  1110. type_score_dict_ocr['volume_score'] = item_score
  1111. elif num_back1 == '分' and num_back2 == '分':
  1112. if N_s[2] > N_s[1]: # 共*题,每题*分,共*分
  1113. item_total_score = N_s[2]
  1114. item_count = N_s[0]
  1115. item_score = N_s[1]
  1116. else: # 共*题,共*分,每题*分
  1117. item_total_score = N_s[1]
  1118. item_count = N_s[0]
  1119. item_score = N_s[2]
  1120. if item_total_score < item_count * item_score:
  1121. item_total_score = item_count * item_score
  1122. type_score_dict_ocr['volume_N'] = -1
  1123. type_score_dict_ocr['volume_total_score'] = item_total_score
  1124. type_score_dict_ocr['volume_count'] = item_count
  1125. type_score_dict_ocr['volume_score'] = item_score
  1126. else:
  1127. return all_structure
  1128. else:
  1129. if num_back0 != '.' and num_back1 == '分' and num_back2 == '分':
  1130. if N_s[1] > N_s[2]: # *,共*分,每题*分
  1131. item_N = N_s[0]
  1132. item_total_score = N_s[1]
  1133. item_count = int(N_s[1] / N_s[2])
  1134. item_score = N_s[2]
  1135. else: # *,每题*分 ,共*分
  1136. item_N = N_s[0]
  1137. item_total_score = N_s[2]
  1138. item_count = int(N_s[2] / N_s[1])
  1139. item_score = N_s[1]
  1140. type_score_dict_ocr['volume_N'] = item_N
  1141. type_score_dict_ocr['volume_total_score'] = item_total_score
  1142. type_score_dict_ocr['volume_count'] = item_count
  1143. type_score_dict_ocr['volume_score'] = item_score
  1144. elif num_back0 == '.' and num_infer1 == '.' and num_back2 == '分' and num_back1 == '分': # 每题*.*分,共*分/共*.*分,每题*分
  1145. item_N = -1
  1146. if int(N_s[0]) > int(N_s[2]):
  1147. item_total_score = N_s[0]
  1148. item_score = N_s[2]
  1149. item_count = int(item_total_score/item_score)
  1150. else:
  1151. item_total_score = N_s[2]
  1152. item_score = float(str(N_s[0])+'.'+str(N_s[1]))
  1153. item_count = int(item_total_score / item_score)
  1154. type_score_dict_ocr['volume_N'] = item_N
  1155. type_score_dict_ocr['volume_total_score'] = item_total_score
  1156. type_score_dict_ocr['volume_count'] = item_count
  1157. type_score_dict_ocr['volume_score'] = item_score
  1158. elif num_back1 == '.' and num_infer2 == '.' and num_back0 == '分' and num_back2 == '分': # 每题*分,共*.*分/共*分,每题*.*分
  1159. item_N = -1
  1160. if int(N_s[0]) > int(N_s[1]):
  1161. item_total_score = N_s[0]
  1162. item_score = float(str(N_s[1])+'.'+str(N_s[2]))
  1163. item_count = int(item_total_score/item_score)
  1164. else:
  1165. item_total_score = N_s[1]
  1166. item_score = N_s[0]
  1167. item_count = int(item_total_score / item_score)
  1168. type_score_dict_ocr['volume_N'] = item_N
  1169. type_score_dict_ocr['volume_total_score'] = item_total_score
  1170. type_score_dict_ocr['volume_count'] = item_count
  1171. type_score_dict_ocr['volume_score'] = item_score
  1172. else:
  1173. return all_structure
  1174. else:
  1175. if keyword_item3.search(C_s):
  1176. if num_back2 == '分' and num_infer2 =='.' and num_back1 =='.': # *小题,共*.*分,
  1177. item_N = -1
  1178. item_total_score = N_s[1]
  1179. item_count = N_s[0]
  1180. item_score = N_s[1]/N_s[0]
  1181. type_score_dict_ocr['volume_N'] = item_N
  1182. type_score_dict_ocr['volume_total_score'] = item_total_score
  1183. type_score_dict_ocr['volume_count'] = item_count
  1184. type_score_dict_ocr['volume_score'] = item_score
  1185. elif num_back1 == '分' and num_infer1 =='.'and num_back0 =='.': # 共*.*分,*小题
  1186. item_N = -1
  1187. item_total_score = N_s[0]
  1188. item_count = N_s[2]
  1189. item_score = N_s[0]/N_s[2]
  1190. type_score_dict_ocr['volume_N'] = item_N
  1191. type_score_dict_ocr['volume_total_score'] = item_total_score
  1192. type_score_dict_ocr['volume_count'] = item_count
  1193. type_score_dict_ocr['volume_score'] = item_score
  1194. elif num_back2 == '分' and num_infer2 !='.': # *,*小题,共*分,
  1195. item_N = N_s[0]
  1196. item_total_score = N_s[2]
  1197. item_count = N_s[1]
  1198. item_score = N_s[2]/N_s[1]
  1199. type_score_dict_ocr['volume_N'] = item_N
  1200. type_score_dict_ocr['volume_total_score'] = item_total_score
  1201. type_score_dict_ocr['volume_count'] = item_count
  1202. type_score_dict_ocr['volume_score'] = item_score
  1203. elif num_back1 == '分' and num_infer1 !='.': # *,共*分,共*小题
  1204. item_N = N_s[0]
  1205. item_total_score = N_s[1]
  1206. item_count = N_s[2]
  1207. item_score = N_s[1] / N_s[2]
  1208. type_score_dict_ocr['volume_N'] = item_N
  1209. type_score_dict_ocr['volume_total_score'] = item_total_score
  1210. type_score_dict_ocr['volume_count'] = item_count
  1211. type_score_dict_ocr['volume_score'] = item_score
  1212. else:
  1213. return all_structure
  1214. else:
  1215. if num_back2 == '分' and num_infer2 =='.' and num_back1 =='.': # *,共*.*分,
  1216. item_N = N_s[0]
  1217. item_total_score = N_s[1]
  1218. item_count = -1
  1219. item_score = -1
  1220. type_score_dict_ocr['volume_N'] = item_N
  1221. type_score_dict_ocr['volume_total_score'] = item_total_score
  1222. type_score_dict_ocr['volume_count'] = item_count
  1223. type_score_dict_ocr['volume_score'] = item_score
  1224. elif num_back2 == '分':
  1225. item_total_score = N_s[1]
  1226. item_N = -1
  1227. item_count = -1
  1228. item_score = -1
  1229. type_score_dict_ocr['volume_N'] = item_N
  1230. type_score_dict_ocr['volume_total_score'] = item_total_score
  1231. type_score_dict_ocr['volume_count'] = item_count
  1232. type_score_dict_ocr['volume_score'] = item_score
  1233. else:
  1234. return all_structure
  1235. elif len(N_s) == 4:
  1236. num_index1 = s.index(N_s[0])
  1237. num_infer0 = s[num_index1 - len(N_s[0])]
  1238. num_back0 = s[num_index1 + len(N_s[0])]
  1239. all_1 = find_repeat(s, N_s[1])
  1240. temp1 = 0
  1241. for ii in range(len(N_s[0])):
  1242. if N_s[0][ii] == N_s[1]:
  1243. temp1 = temp1 + 1
  1244. num_index2 = all_1[temp1]
  1245. num_infer1 = s[num_index2 - len(N_s[1])]
  1246. num_back1 = s[num_index2 + len(N_s[1])]
  1247. all_2 = find_repeat(s, N_s[2])
  1248. temp2 = 0
  1249. for ii in range(len(N_s[0])):
  1250. if N_s[0][ii] == N_s[2]:
  1251. temp2 = temp2 + 1
  1252. for jj in range(len(N_s[1])):
  1253. if N_s[1][jj] == N_s[2]:
  1254. temp2 = temp2 + 1
  1255. num_index3 = all_2[temp2]
  1256. num_infer2 = s[num_index3 - len(N_s[2])]
  1257. num_back2 = s[num_index3 + len(N_s[2])]
  1258. all_3 = find_repeat(s, N_s[3])
  1259. temp3 = 0
  1260. for ii in range(len(N_s[0])):
  1261. if N_s[0][ii] == N_s[3]:
  1262. temp3 = temp3 + 1
  1263. for jj in range(len(N_s[1])):
  1264. if N_s[1][jj] == N_s[3]:
  1265. temp3 = temp3 + 1
  1266. for kk in range(len(N_s[2])):
  1267. if N_s[2][kk] == N_s[3]:
  1268. temp3 = temp3 + 1
  1269. num_index4 = all_3[temp3]
  1270. num_infer3 = s[num_index4 - len(N_s[3])]
  1271. num_back3 = s[num_index4 + len(N_s[3])]
  1272. if isinstance(N_s[0], str):
  1273. N_s[0] = int(N_s[0])
  1274. if isinstance(N_s[1], str):
  1275. N_s[1] = int(N_s[1])
  1276. if isinstance(N_s[2], str):
  1277. N_s[2] = int(N_s[2])
  1278. if isinstance(N_s[3], str):
  1279. N_s[3] = int(N_s[3])
  1280. if keyword_item2.search(C_s):
  1281. if keyword_item3.search(C_s):
  1282. if num_back1 == '分' and num_back3 == '分':
  1283. if N_s[3] > N_s[1]: # *,每题*分,共*题,共*分
  1284. item_N = N_s[0]
  1285. item_total_score = N_s[3]
  1286. item_count = N_s[2]
  1287. item_score = N_s[1]
  1288. else: # *,共*分,共*题,每题*分
  1289. item_N = N_s[0]
  1290. item_total_score = N_s[1]
  1291. item_count = N_s[2]
  1292. item_score = N_s[3]
  1293. type_score_dict_ocr['volume_N'] = item_N
  1294. type_score_dict_ocr[
  1295. 'volume_total_score'] = item_total_score
  1296. type_score_dict_ocr['volume_count'] = item_count
  1297. type_score_dict_ocr['volume_score'] = item_score
  1298. elif num_back1 == '分' and num_back2 == '分':
  1299. if N_s[2] > N_s[1]: # *,每题*分,共*分,共*题
  1300. item_N = N_s[0]
  1301. item_total_score = N_s[2]
  1302. item_count = N_s[3]
  1303. item_score = N_s[1]
  1304. else: # *,共*分,每题*分,共*题
  1305. item_N = N_s[0]
  1306. item_total_score = N_s[1]
  1307. item_count = N_s[3]
  1308. item_score = N_s[2]
  1309. type_score_dict_ocr['volume_N'] = item_N
  1310. if item_total_score < item_count * item_score:
  1311. item_total_score = item_count * item_score
  1312. type_score_dict_ocr[
  1313. 'volume_total_score'] = item_total_score
  1314. type_score_dict_ocr['volume_count'] = item_count
  1315. type_score_dict_ocr['volume_score'] = item_score
  1316. elif num_back2 == '分' and num_back3 == '分':
  1317. if N_s[3] > N_s[2]: # *,共*题,每题*分,共*分
  1318. item_N = N_s[0]
  1319. item_total_score = N_s[3]
  1320. item_count = N_s[1]
  1321. item_score = N_s[2]
  1322. else:
  1323. item_N = N_s[0]
  1324. item_total_score = N_s[2]
  1325. item_count = N_s[1]
  1326. item_score = N_s[3]
  1327. if item_total_score < item_count * item_score:
  1328. item_total_score = item_count * item_score
  1329. type_score_dict_ocr['volume_N'] = item_N
  1330. type_score_dict_ocr[
  1331. 'volume_total_score'] = item_total_score
  1332. type_score_dict_ocr['volume_count'] = item_count
  1333. type_score_dict_ocr['volume_score'] = item_score
  1334. elif num_back0 == '.' and num_infer1 == '.' and num_back1 == '分'and num_back3 == '分' : # 共*.*分,共*题, 每题*分/每题*.*分,共*题,共*分
  1335. item_N = -1
  1336. if N_s[0] > N_s[3]:
  1337. item_total_score = N_s[0]
  1338. item_score = N_s[3]
  1339. item_count = int(item_total_score / item_score)
  1340. else:
  1341. item_total_score = N_s[3]
  1342. item_score = float(str(N_s[0]) + '.' + str(N_s[1]))
  1343. item_count = int(item_total_score / item_score)
  1344. type_score_dict_ocr['volume_N'] = item_N
  1345. type_score_dict_ocr[ 'item_total_score'] = item_total_score
  1346. type_score_dict_ocr['item_count'] = item_count
  1347. type_score_dict_ocr['item_score'] = item_score
  1348. elif num_back2 == '.' and num_infer3 == '.' and num_back0 == '分'and num_back3 == '分': # 共*分,共*题, 每题*.*分/每题*分,共*题,共*.*分
  1349. item_N = -1
  1350. if N_s[0] > N_s[2]:
  1351. item_total_score = N_s[0]
  1352. item_score = float(str(N_s[2]) + '.' + str(N_s[3]))
  1353. item_count = int(item_total_score / item_score)
  1354. else:
  1355. item_total_score = N_s[2]
  1356. item_score = N_s[0]
  1357. item_count = int(item_total_score / item_score)
  1358. type_score_dict_ocr['volume_N'] = item_N
  1359. type_score_dict_ocr[ 'item_total_score'] = item_total_score
  1360. type_score_dict_ocr['item_count'] = item_count
  1361. type_score_dict_ocr['item_score'] = item_score
  1362. elif num_back1 == '.' and num_infer2 == '.' and num_back2 == '分'and num_back3 == '分': # 共*题,共*.*分,每题*分/共*题,每题*.*分,共*分
  1363. item_N = -1
  1364. if N_s[1] > N_s[3]:
  1365. item_total_score = N_s[1]
  1366. item_score = N_s[3]
  1367. item_count = int(item_total_score / item_score)
  1368. else:
  1369. item_total_score = N_s[3]
  1370. item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
  1371. item_count = int(item_total_score / item_score)
  1372. type_score_dict_ocr['volume_N'] = item_N
  1373. type_score_dict_ocr['volume_total_score'] = item_total_score
  1374. type_score_dict_ocr['volume_count'] = item_count
  1375. type_score_dict_ocr['volume_score'] = item_score
  1376. elif num_back2 == '.' and num_infer3 == '.' and num_back3 == '分'and num_back1 == '分' : # 共*题,共*分,每题*.*分/共*题,每题*分,共*.*分
  1377. item_N = -1
  1378. if N_s[1] > N_s[2]:
  1379. item_total_score = N_s[1]
  1380. item_score = float(str(N_s[2]) + '.' + str(N_s[3]))
  1381. item_count = int(item_total_score / item_score)
  1382. else:
  1383. item_total_score = N_s[2]
  1384. item_score = N_s[1]
  1385. item_count = int(item_total_score / item_score)
  1386. type_score_dict_ocr['volume_N'] = item_N
  1387. type_score_dict_ocr['volume_total_score'] = item_total_score
  1388. type_score_dict_ocr['volume_count'] = item_count
  1389. type_score_dict_ocr['volume_score'] = item_score
  1390. elif num_back0 == '.' and num_infer1 == '.' and num_back1 == '分'and num_back2 == '分' : # 每题*.*分,共*分,共*题/共*.*分,每题*分,共*题
  1391. item_N = -1
  1392. if N_s[0] > N_s[2]:
  1393. item_total_score = N_s[0]
  1394. item_score = N_s[2]
  1395. item_count = int(item_total_score / item_score)
  1396. else:
  1397. item_total_score = N_s[2]
  1398. item_score = float(str(N_s[0]) + '.' + str(N_s[1]))
  1399. item_count = int(item_total_score / item_score)
  1400. type_score_dict_ocr['volume_N'] = item_N
  1401. type_score_dict_ocr['volume_total_score'] = item_total_score
  1402. type_score_dict_ocr['volume_count'] = item_count
  1403. type_score_dict_ocr['volume_score'] = item_score
  1404. elif num_back1 == '.' and num_infer2 == '.' and num_back2 == '分'and num_back0 == '分' : # 每题*分,共*.*分,共*题/共*分,每题*.*分,共*题
  1405. item_N = -1
  1406. if N_s[0] > N_s[1]:
  1407. item_total_score = N_s[0]
  1408. item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
  1409. item_count = int(item_total_score / item_score)
  1410. else:
  1411. item_total_score = N_s[1]
  1412. item_score = N_s[0]
  1413. item_count = int(item_total_score / item_score)
  1414. type_score_dict_ocr['volume_N'] = item_N
  1415. type_score_dict_ocr['volume_total_score'] = item_total_score
  1416. type_score_dict_ocr['volume_count'] = item_count
  1417. type_score_dict_ocr['volume_score'] = item_score
  1418. else:
  1419. return all_structure
  1420. else:
  1421. if num_back1 == '.' and num_infer2 == '.' and num_back2 == '分'and num_back3 == '分' : # *,共*.*分, 每题*分/每题*.*分,共*分
  1422. item_N = N_s[0]
  1423. if N_s[1] > N_s[3]:
  1424. item_total_score = N_s[1]
  1425. item_score = N_s[3]
  1426. item_count = int(item_total_score / item_score)
  1427. else:
  1428. item_total_score = N_s[3]
  1429. item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
  1430. item_count = int(item_total_score / item_score)
  1431. type_score_dict_ocr['volume_N'] = item_N
  1432. type_score_dict_ocr[ 'item_total_score'] = item_total_score
  1433. type_score_dict_ocr['item_count'] = item_count
  1434. type_score_dict_ocr['item_score'] = item_score
  1435. elif num_back2== '.' and num_infer3== '.' and num_back1 == '分'and num_back3 == '分' : # *,共*分, 每题*.*分/*,每题*分,共*.*分
  1436. item_N = int(N_s[0])
  1437. if N_s[1] > N_s[2]:
  1438. item_total_score = N_s[1]
  1439. item_score = float(str(N_s[2]) + '.' + str(N_s[3]))
  1440. item_count = int(item_total_score / item_score)
  1441. else:
  1442. item_total_score = N_s[2]
  1443. item_score = N_s[1]
  1444. item_count = int(item_total_score / item_score)
  1445. type_score_dict_ocr['volume_N'] = item_N
  1446. type_score_dict_ocr[ 'item_total_score'] = item_total_score
  1447. type_score_dict_ocr['item_count'] = item_count
  1448. type_score_dict_ocr['item_score'] = item_score
  1449. else:
  1450. return all_structure
  1451. else:
  1452. if keyword_item3.search(C_s):
  1453. if num_back3 == '分': # *,*小题,共*.*分
  1454. item_total_score = N_s[2]
  1455. item_N = N_s[0]
  1456. item_count = N_s[1]
  1457. item_score = item_total_score / item_count
  1458. type_score_dict_ocr['volume_N'] = item_N
  1459. type_score_dict_ocr['volume_total_score'] = item_total_score
  1460. type_score_dict_ocr['volume_count'] = item_count
  1461. type_score_dict_ocr['volume_score'] = item_score
  1462. elif num_back2 == '分': # *,共*.*分,*小题
  1463. item_total_score = N_s[1]
  1464. item_N = N_s[0]
  1465. item_count = N_s[3]
  1466. item_score = item_total_score / item_count
  1467. type_score_dict_ocr['volume_N'] = item_N
  1468. type_score_dict_ocr['volume_total_score'] = item_total_score
  1469. type_score_dict_ocr['volume_count'] = item_count
  1470. type_score_dict_ocr['volume_score'] = item_score
  1471. else:
  1472. return all_structure
  1473. else:
  1474. return all_structure
  1475. elif len(N_s) == 5:
  1476. num_index1 = s.index(N_s[0])
  1477. num_infer0 = s[num_index1 - len(N_s[0])]
  1478. num_back0 = s[num_index1 + len(N_s[0])]
  1479. all_1 = find_repeat(s, N_s[1])
  1480. temp1 = 0
  1481. for ii in range(len(N_s[0])):
  1482. if N_s[0][ii] == N_s[1]:
  1483. temp1 = temp1 + 1
  1484. num_index2 = all_1[temp1]
  1485. num_infer1 = s[num_index2 - len(N_s[1])]
  1486. num_back1 = s[num_index2 + len(N_s[1])]
  1487. all_2 = find_repeat(s, N_s[2])
  1488. temp2 = 0
  1489. for ii in range(len(N_s[0])):
  1490. if N_s[0][ii] == N_s[2]:
  1491. temp2 = temp2 + 1
  1492. for jj in range(len(N_s[1])):
  1493. if N_s[1][jj] == N_s[2]:
  1494. temp2 = temp2 + 1
  1495. num_index3 = all_2[temp2]
  1496. num_infer2 = s[num_index3 - len(N_s[2])]
  1497. num_back2 = s[num_index3 + len(N_s[2])]
  1498. all_3 = find_repeat(s, N_s[3])
  1499. temp3 = 0
  1500. for ii in range(len(N_s[0])):
  1501. if N_s[0][ii] == N_s[3]:
  1502. temp3 = temp3 + 1
  1503. for jj in range(len(N_s[1])):
  1504. if N_s[1][jj] == N_s[3]:
  1505. temp3 = temp3 + 1
  1506. for kk in range(len(N_s[2])):
  1507. if N_s[2][kk] == N_s[3]:
  1508. temp3 = temp3 + 1
  1509. num_index4 = all_3[temp3]
  1510. num_infer3 = s[num_index4 - len(N_s[3])]
  1511. num_back3 = s[num_index4 + len(N_s[3])]
  1512. all_4 = find_repeat(s, N_s[4])
  1513. temp4 = 0
  1514. for ii in range(len(N_s[0])):
  1515. if N_s[0][ii] == N_s[4]:
  1516. temp4 = temp4 + 1
  1517. for jj in range(len(N_s[1])):
  1518. if N_s[1][jj] == N_s[4]:
  1519. temp4 = temp4 + 1
  1520. for kk in range(len(N_s[2])):
  1521. if N_s[2][kk] == N_s[4]:
  1522. temp4 = temp4 + 1
  1523. for ll in range(len(N_s[3])):
  1524. if N_s[3][ll] == N_s[4]:
  1525. temp4 = temp4 + 1
  1526. num_index5 = all_4[temp4]
  1527. num_infer4 = s[num_index5 - len(N_s[4])]
  1528. num_back4 = s[num_index5 + len(N_s[4])]
  1529. if isinstance(N_s[0], str):
  1530. N_s[0] = int(N_s[0])
  1531. if isinstance(N_s[1], str):
  1532. N_s[1] = int(N_s[1])
  1533. if isinstance(N_s[2], str):
  1534. N_s[2] = int(N_s[2])
  1535. if isinstance(N_s[3], str):
  1536. N_s[3] = int(N_s[3])
  1537. if isinstance(N_s[4], str):
  1538. N_s[4] = int(N_s[4])
  1539. if keyword_item2.search(C_s):
  1540. if keyword_item3.search(C_s):
  1541. if num_back1== '.' and num_infer2== '.' and num_back2 == '分'and num_back3 == '分' : # *,每题*.*分,共*分,*小题/*,共*.*分,每题*分,共*小题
  1542. item_N = N_s[0]
  1543. if N_s[1] > N_s[3]:
  1544. item_total_score = N_s[1]
  1545. item_score = N_s[3]
  1546. item_count = N_s[4]
  1547. else:
  1548. item_total_score = N_s[3]
  1549. item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
  1550. item_count = N_s[4]
  1551. type_score_dict_ocr['volume_N'] = item_N
  1552. type_score_dict_ocr['volume_total_score'] = item_total_score
  1553. type_score_dict_ocr['volume_count'] = item_count
  1554. type_score_dict_ocr['volume_score'] = item_score
  1555. elif num_back2== '.' and num_infer3== '.' and num_back1 == '分'and num_back3 == '分' : # *,每题*分,共*.*分,*小题/*,共*分,每题*.*分,共*小题
  1556. item_N = N_s[0]
  1557. if N_s[1] > N_s[2]:
  1558. item_total_score = N_s[1]
  1559. item_score = float(str(N_s[2]) + '.' + str(N_s[3]))
  1560. item_count = N_s[4]
  1561. else:
  1562. item_total_score = N_s[2]
  1563. item_score = N_s[1]
  1564. item_count = N_s[4]
  1565. type_score_dict_ocr['volume_N'] = item_N
  1566. type_score_dict_ocr['volume_total_score'] = item_total_score
  1567. type_score_dict_ocr['volume_count'] = item_count
  1568. type_score_dict_ocr['volume_score'] = item_score
  1569. elif num_back2== '.' and num_infer3== '.' and num_back3 == '分'and num_back4 == '分' : # *,*小题,每题*.*分,共*分/*,*小题,共*.*分,每题*分
  1570. item_N = N_s[0]
  1571. if N_s[2] > N_s[4]:
  1572. item_total_score = N_s[2]
  1573. item_score = N_s[4]
  1574. item_count = N_s[1]
  1575. else:
  1576. item_total_score = N_s[4]
  1577. item_score = float(str(N_s[2]) + '.' + str(N_s[3]))
  1578. item_count = N_s[1]
  1579. type_score_dict_ocr['volume_N'] = item_N
  1580. type_score_dict_ocr['volume_total_score'] = item_total_score
  1581. type_score_dict_ocr['volume_count'] = item_count
  1582. type_score_dict_ocr['volume_score'] = item_score
  1583. elif num_back3== '.' and num_infer4== '.' and num_back2 == '分'and num_back4 == '分' : # *,*小题,每题*分,共*.*分/*,*小题,共*分,每题*.*分
  1584. item_N = N_s[0]
  1585. if N_s[2] > N_s[3]:
  1586. item_total_score = N_s[2]
  1587. item_score = float(str(N_s[3]) + '.' + str(N_s[3]))
  1588. item_count = N_s[1]
  1589. else:
  1590. item_total_score = N_s[3]
  1591. item_score = N_s[2]
  1592. item_count = N_s[1]
  1593. type_score_dict_ocr['volume_N'] = item_N
  1594. type_score_dict_ocr['volume_total_score'] = item_total_score
  1595. type_score_dict_ocr['volume_count'] = item_count
  1596. type_score_dict_ocr['volume_score'] = item_score
  1597. elif num_back1== '.' and num_infer2== '.' and num_back2 == '分'and num_back4 == '分' : # *,每题*.*分,*小题,共*分/*,共*.*分,*小题,每题*分
  1598. item_N = N_s[0]
  1599. if N_s[1] > N_s[4]:
  1600. item_total_score = N_s[1]
  1601. item_score = N_s[4]
  1602. item_count = N_s[3]
  1603. else:
  1604. item_total_score = N_s[4]
  1605. item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
  1606. item_count = N_s[3]
  1607. type_score_dict_ocr['volume_N'] = item_N
  1608. type_score_dict_ocr['volume_total_score'] = item_total_score
  1609. type_score_dict_ocr['volume_count'] = item_count
  1610. type_score_dict_ocr['volume_score'] = item_score
  1611. elif num_back3== '.' and num_infer4== '.' and num_back1 == '分'and num_back4 == '分' : # *,每题*分,*小题,共*.*分/*,共*分,*小题,每题*.*分
  1612. item_N = N_s[0]
  1613. if N_s[1] > N_s[3]:
  1614. item_total_score = N_s[1]
  1615. item_score = float(str(N_s[3]) + '.' + str(N_s[4]))
  1616. item_count = N_s[2]
  1617. else:
  1618. item_total_score = N_s[3]
  1619. item_score = N_s[1]
  1620. item_count = N_s[2]
  1621. type_score_dict_ocr['volume_N'] = item_N
  1622. type_score_dict_ocr['volume_total_score'] = item_total_score
  1623. type_score_dict_ocr['volume_count'] = item_count
  1624. type_score_dict_ocr['volume_score'] = item_score
  1625. else:
  1626. # 暂定len=5时不判断不存在题号的情况
  1627. return all_structure
  1628. else:
  1629. # 暂定len=5时不判断不存在题目个数的情况
  1630. return all_structure
  1631. else:
  1632. # 暂定len=5时不判断不存在小项分数的情况
  1633. return all_structure
  1634. else:
  1635. return all_structure
  1636. else:
  1637. if keyword_item2.search(C_s):
  1638. if len(N_s) == 1:
  1639. num_index1 = s.index(N_s[0])
  1640. num_infer0 = s[num_index1 - len(N_s[0])]
  1641. num_back0 = s[num_index1 + len(N_s[0])]
  1642. if isinstance(N_s[0], str):
  1643. N_s[0] = int(N_s[0])
  1644. if num_back0 == '分': # 每题*分
  1645. item_score = N_s[0]
  1646. type_score_dict_ocr['volume_N'] = -1
  1647. type_score_dict_ocr['volume_total_score'] = -1
  1648. type_score_dict_ocr['volume_count'] = -1
  1649. type_score_dict_ocr['volume_score'] = item_score
  1650. else:
  1651. return all_structure
  1652. elif len(N_s) == 2:
  1653. num_index1 = s.index(N_s[0])
  1654. num_infer0 = s[num_index1 - len(N_s[0])]
  1655. num_back0 = s[num_index1 + len(N_s[0])]
  1656. all_1 = find_repeat(s, N_s[1])
  1657. temp1 = 0
  1658. for ii in range(len(N_s[0])):
  1659. if N_s[0][ii] == N_s[1]:
  1660. temp1 = temp1 + 1
  1661. num_index2 = all_1[temp1]
  1662. num_infer1 = s[num_index2 - len(N_s[1])]
  1663. num_back1 = s[num_index2 + len(N_s[1])]
  1664. if isinstance(N_s[0], str):
  1665. N_s[0] = int(N_s[0])
  1666. if isinstance(N_s[1], str):
  1667. N_s[1] = int(N_s[1])
  1668. if keyword_item3.search(C_s):
  1669. if num_back1 == '分': # 共*题,每题*分
  1670. item_total_score = N_s[0] * N_s[1]
  1671. item_count = N_s[0]
  1672. item_score = N_s[1]
  1673. type_score_dict_ocr['volume_N'] = -1
  1674. type_score_dict_ocr['volume_total_score'] = item_total_score
  1675. type_score_dict_ocr['volume_count'] = item_count
  1676. type_score_dict_ocr['volume_score'] = item_score
  1677. elif num_back0 == '分': # 每题*分,共*题
  1678. item_total_score = int(N_s[0]) * int(N_s[1])
  1679. item_count = int(N_s[1])
  1680. item_score = int(N_s[0])
  1681. type_score_dict_ocr['volume_N'] = -1
  1682. type_score_dict_ocr['volume_total_score'] = item_total_score
  1683. type_score_dict_ocr['volume_count'] = item_count
  1684. type_score_dict_ocr['volume_score'] = item_score
  1685. else:
  1686. return all_structure
  1687. else:
  1688. if num_back1 == '分' and num_back0 == '.' and num_infer1 == '.': # *.*分
  1689. item_N = -1
  1690. item_score = float(str(N_s[0])+'.'+str(N_s[1]))
  1691. type_score_dict_ocr['volume_N'] = item_N
  1692. type_score_dict_ocr['volume_total_score'] = -1
  1693. type_score_dict_ocr['volume_count'] = -1
  1694. type_score_dict_ocr['volume_score'] = item_score
  1695. elif num_back1 == '分': # *,*分
  1696. item_N = int(N_s[0])
  1697. item_score = int(N_s[1])
  1698. type_score_dict_ocr['volume_N'] = item_N
  1699. type_score_dict_ocr['volume_total_score'] = -1
  1700. type_score_dict_ocr['volume_count'] = -1
  1701. type_score_dict_ocr['volume_score'] = item_score
  1702. else:
  1703. return all_structure
  1704. elif len(N_s) == 3:
  1705. num_index1 = s.index(N_s[0])
  1706. num_infer0 = s[num_index1 - len(N_s[0])]
  1707. num_back0 = s[num_index1 + len(N_s[0])]
  1708. all_1 = find_repeat(s, N_s[1])
  1709. temp1 = 0
  1710. for ii in range(len(N_s[0])):
  1711. if N_s[0][ii] == N_s[1]:
  1712. temp1 = temp1 + 1
  1713. num_index2 = all_1[temp1]
  1714. num_infer1 = s[num_index2 - len(N_s[1])]
  1715. num_back1 = s[num_index2 + len(N_s[1])]
  1716. all_2 = find_repeat(s, N_s[2])
  1717. temp2 = 0
  1718. for ii in range(len(N_s[0])):
  1719. if N_s[0][ii] == N_s[2]:
  1720. temp2 = temp2 + 1
  1721. for jj in range(len(N_s[1])):
  1722. if N_s[1][jj] == N_s[2]:
  1723. temp2 = temp2 + 1
  1724. num_index3 = all_2[temp2]
  1725. num_infer2 = s[num_index3 - len(N_s[2])]
  1726. num_back2 = s[num_index3 + len(N_s[2])]
  1727. if isinstance(N_s[0], str):
  1728. N_s[0] = int(N_s[0])
  1729. if isinstance(N_s[1], str):
  1730. N_s[1] = int(N_s[1])
  1731. if isinstance(N_s[2], str):
  1732. N_s[2] = int(N_s[2])
  1733. if num_back2 == '分' and (num_back1 == '题' or num_back1 == '小' or num_back1 == '空') and num_back0 != '分': # *,共*题,每题*分
  1734. item_N = int(N_s[0])
  1735. item_total_score = int(N_s[1]) * int(N_s[2])
  1736. item_count = int(N_s[1])
  1737. item_score = int(N_s[2])
  1738. type_score_dict_ocr['volume_N'] = item_N
  1739. type_score_dict_ocr['volume_total_score'] = item_total_score
  1740. type_score_dict_ocr['volume_count'] = item_count
  1741. type_score_dict_ocr['volume_score'] = item_score
  1742. elif num_back1 == '分' and (num_back2 == '题' or num_back2 == '小' or num_back2 == '空') and num_back0 != '分': # *,每题*分,共*题
  1743. item_N = int(N_s[0])
  1744. item_total_score = int(N_s[1]) * int(N_s[2])
  1745. item_count = int(N_s[2])
  1746. item_score = int(N_s[1])
  1747. type_score_dict_ocr['volume_N'] = item_N
  1748. type_score_dict_ocr['volume_total_score'] = item_total_score
  1749. type_score_dict_ocr['volume_count'] = item_count
  1750. type_score_dict_ocr['volume_score'] = item_score
  1751. elif num_infer2 == '.' and num_back2 == '分' and num_back1 == '.': # 共*题,每题*.*分
  1752. item_N = -1
  1753. item_count = int(N_s[0])
  1754. item_score = float(str(N_s[1])+'.'+str(N_s[2]))
  1755. item_total_score = int(item_count * item_score)
  1756. type_score_dict_ocr['volume_N'] = item_N
  1757. type_score_dict_ocr['volume_total_score'] = item_total_score
  1758. type_score_dict_ocr['volume_count'] = item_count
  1759. type_score_dict_ocr['volume_score'] = item_score
  1760. elif num_infer1 == '.' and num_back1 == '分' and num_back0 == '.' : # 每题*.*分,共*题
  1761. item_N = -1
  1762. item_count = int(N_s[2])
  1763. item_score = float(str(N_s[0]) + '.' + str(N_s[1]))
  1764. item_total_score = int(item_count * item_score)
  1765. type_score_dict_ocr['volume_N'] = item_N
  1766. type_score_dict_ocr['volume_total_score'] = item_total_score
  1767. type_score_dict_ocr['volume_count'] = item_count
  1768. type_score_dict_ocr['volume_score'] = item_score
  1769. elif num_back2 == '分': # * * ,每题*分
  1770. item_N = -1
  1771. item_count = -1
  1772. item_score = -1
  1773. item_total_score = int(N_s[2])
  1774. type_score_dict_ocr['volume_N'] = item_N
  1775. type_score_dict_ocr['volume_total_score'] = item_total_score
  1776. type_score_dict_ocr['volume_count'] = item_count
  1777. type_score_dict_ocr['volume_score'] = item_score
  1778. else:
  1779. return all_structure
  1780. elif len(N_s) == 4:
  1781. num_index1 = s.index(N_s[0])
  1782. num_infer0 = s[num_index1 - len(N_s[0])]
  1783. num_back0 = s[num_index1 + len(N_s[0])]
  1784. all_1 = find_repeat(s, N_s[1])
  1785. temp1 = 0
  1786. for ii in range(len(N_s[0])):
  1787. if N_s[0][ii] == N_s[1]:
  1788. temp1 = temp1 + 1
  1789. num_index2 = all_1[temp1]
  1790. num_infer1 = s[num_index2 - len(N_s[1])]
  1791. num_back1 = s[num_index2 + len(N_s[1])]
  1792. all_2 = find_repeat(s, N_s[2])
  1793. temp2 = 0
  1794. for ii in range(len(N_s[0])):
  1795. if N_s[0][ii] == N_s[2]:
  1796. temp2 = temp2 + 1
  1797. for jj in range(len(N_s[1])):
  1798. if N_s[1][jj] == N_s[2]:
  1799. temp2 = temp2 + 1
  1800. num_index3 = all_2[temp2]
  1801. num_infer2 = s[num_index3 - len(N_s[2])]
  1802. num_back2 = s[num_index3 + len(N_s[2])]
  1803. all_3 = find_repeat(s, N_s[3])
  1804. temp3 = 0
  1805. for ii in range(len(N_s[0])):
  1806. if N_s[0][ii] == N_s[3]:
  1807. temp3 = temp3 + 1
  1808. for jj in range(len(N_s[1])):
  1809. if N_s[1][jj] == N_s[3]:
  1810. temp3 = temp3 + 1
  1811. num_index4 = all_3[temp3]
  1812. num_infer3 = s[num_index4 - len(N_s[3])]
  1813. num_back3 = s[num_index4 + len(N_s[3])]
  1814. if isinstance(N_s[0], str):
  1815. N_s[0] = int(N_s[0])
  1816. if isinstance(N_s[1], str):
  1817. N_s[1] = int(N_s[1])
  1818. if isinstance(N_s[2], str):
  1819. N_s[2] = int(N_s[2])
  1820. if isinstance(N_s[3], str):
  1821. N_s[3] = int(N_s[3])
  1822. if num_back2 == '.' and num_infer3 == '.' and num_back3 == '分' and (num_back1 == '题' or num_back1 == '小' or num_back1 == '空') and num_back0 != '分': # *,共*题,每题*.*分
  1823. item_N = int(N_s[0])
  1824. item_count = int(N_s[1])
  1825. item_score = float(str(N_s[2]) + '.' + str(N_s[3]))
  1826. item_total_score = int(item_count * item_score)
  1827. type_score_dict_ocr['volume_N'] = item_N
  1828. type_score_dict_ocr['volume_total_score'] = item_total_score
  1829. type_score_dict_ocr['volume_count'] = item_count
  1830. type_score_dict_ocr['volume_score'] = item_score
  1831. elif num_back1 == '.' and num_infer2 == '.' and num_back2 == '分' and (num_back3 == '题' or num_back3 == '小' or num_back3 == '空') and num_back0 != '分': # *,每题*.*分,共*题
  1832. item_N = int(N_s[0])
  1833. item_count = int(N_s[3])
  1834. item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
  1835. item_total_score = int(item_count * item_score)
  1836. type_score_dict_ocr['volume_N'] = item_N
  1837. type_score_dict_ocr['volume_total_score'] = item_total_score
  1838. type_score_dict_ocr['volume_count'] = item_count
  1839. type_score_dict_ocr['volume_score'] = item_score
  1840. else:
  1841. return all_structure
  1842. else:
  1843. return all_structure
  1844. else:
  1845. if C_s.find(keyword_item4[0]) != -1:
  1846. if len(N_s) == 2: # *,*分
  1847. num_index1 = s.index(N_s[0])
  1848. num_infer0 = s[num_index1 - len(N_s[0])]
  1849. num_back0 = s[num_index1 + len(N_s[0])]
  1850. if num_infer0 == '( ' or num_back0 == ')':
  1851. return all_structure
  1852. else:
  1853. all_1 = find_repeat(s, N_s[1])
  1854. temp1 = 0
  1855. for ii in range(len(N_s[0])):
  1856. if N_s[0][ii] == N_s[1]:
  1857. temp1 = temp1 + 1
  1858. num_index2 = all_1[temp1]
  1859. num_infer1 = s[num_index2 - len(N_s[1])]
  1860. num_back1 = s[num_index2 + len(N_s[1])]
  1861. if isinstance(N_s[0], str):
  1862. N_s[0] = int(N_s[0])
  1863. if isinstance(N_s[1], str):
  1864. N_s[1] = int(N_s[1])
  1865. if int(N_s[0]) > 1000:
  1866. item_N =0
  1867. item_N1 = int(N_s[0][-4] + N_s[0][-3])
  1868. item_N2 = int(N_s[0][-2] + N_s[0][-1])
  1869. if item_N2 - item_N1 == 1:
  1870. item_N = [0, 0]
  1871. item_N = [item_N1, item_N2]
  1872. elif item_N2 - item_N1 == 2:
  1873. item_N = [0, 0, 0]
  1874. item_N = [item_N1, item_N1 + 1, item_N2]
  1875. elif item_N2 - item_N1 == 3:
  1876. item_N = [0, 0, 0, 0]
  1877. item_N = [item_N1, item_N1 + 1, item_N1 + 2, item_N2]
  1878. type_score_dict_ocr['item_N'] = item_N
  1879. item_total_score = int(N_s[1])
  1880. type_score_dict_ocr['item_total_score'] = item_total_score
  1881. type_score_dict_ocr['item_count'] = -1
  1882. type_score_dict_ocr['item_score'] = -1
  1883. Score_structure_item = type_score_dict_ocr
  1884. Score_structure.append(Score_structure_item)
  1885. all_structure = {'volume_structure': -1,
  1886. 'Score_structure': Score_structure}
  1887. return all_structure
  1888. else:
  1889. item_N = int(N_s[0])
  1890. item_total_score = int(N_s[1])
  1891. type_score_dict_ocr['item_N'] = item_N
  1892. type_score_dict_ocr['item_total_score'] = item_total_score
  1893. type_score_dict_ocr['item_count'] = -1
  1894. type_score_dict_ocr['item_score'] = -1
  1895. Score_structure_item = type_score_dict_ocr
  1896. Score_structure.append(Score_structure_item)
  1897. all_structure = {'volume_structure': -1,
  1898. 'Score_structure': Score_structure}
  1899. return all_structure
  1900. elif len(N_s) == 3: # *,*分
  1901. num_index1 = s.index(N_s[0])
  1902. num_infer1 = s[num_index1 - len(N_s[0])]
  1903. num_back1 = s[num_index1 + len(N_s[0])]
  1904. all_1 = find_repeat(s, N_s[1])
  1905. temp1 = 0
  1906. for ii in range(len(N_s[0])):
  1907. if N_s[0][ii] == N_s[1]:
  1908. temp1 = temp1 + 1
  1909. num_index2 = all_1[temp1]
  1910. num_infer2 = s[num_index2 - len(N_s[1])]
  1911. num_back2 = s[num_index2 + len(N_s[1])]
  1912. all_2 = find_repeat(s, N_s[2])
  1913. temp2 = 0
  1914. for ii in range(len(N_s[0])):
  1915. if N_s[0][ii] == N_s[2]:
  1916. temp2 = temp2 + 1
  1917. for jj in range(len(N_s[1])):
  1918. if N_s[1][jj] == N_s[2]:
  1919. temp2 = temp2 + 1
  1920. num_index3 = all_2[temp2]
  1921. num_infer3 = s[num_index3 - len(N_s[2])]
  1922. if num_index3 + len(N_s[2]) < len(s):
  1923. num_back3 = s[num_index3 + len(N_s[2])]
  1924. else:
  1925. num_back3 = []
  1926. if isinstance(N_s[0], str):
  1927. N_s[0] = int(N_s[0])
  1928. if isinstance(N_s[1], str):
  1929. N_s[1] = int(N_s[1])
  1930. if isinstance(N_s[2], str):
  1931. N_s[2] = int(N_s[2])
  1932. if num_back3 == '分' and num_infer3 == '.' and num_back2 == '分': # *,*.*分
  1933. item_N = N_s[0]
  1934. item_total_score = N_s[1]
  1935. type_score_dict_ocr['item_total_score'] = item_total_score
  1936. type_score_dict_ocr['item_N'] = item_N
  1937. type_score_dict_ocr['item_count'] = -1
  1938. type_score_dict_ocr['item_score'] = -1
  1939. Score_structure_item = type_score_dict_ocr
  1940. Score_structure.append(Score_structure_item)
  1941. all_structure = {'volume_structure': -1,
  1942. 'Score_structure': Score_structure}
  1943. return all_structure
  1944. elif num_back3 == '分':
  1945. if int(N_s[1]) - int(N_s[0]) == 1:
  1946. item_N = [0, 0]
  1947. item_N = [int(N_s[0]), int(N_s[1])]
  1948. elif int(N_s[1]) - int(N_s[0]) == 2:
  1949. item_N = [0, 0, 0]
  1950. item_N = [int(N_s[0]), int(N_s[0]) + 1, int(N_s[1])]
  1951. elif int(N_s[1]) - int(N_s[0]) == 3:
  1952. item_N = [0, 0, 0, 0]
  1953. item_N = [int(N_s[0]), int(N_s[0]) + 1, int(N_s[0]) + 2,
  1954. int(N_s[1])]
  1955. else:
  1956. return all_structure
  1957. item_total_score = int(N_s[2])
  1958. type_score_dict_ocr['item_total_score'] = item_total_score
  1959. type_score_dict_ocr['item_N'] = item_N
  1960. type_score_dict_ocr['item_count'] = -1
  1961. type_score_dict_ocr['item_score'] = -1
  1962. Score_structure_item = type_score_dict_ocr
  1963. Score_structure.append(Score_structure_item)
  1964. all_structure = {'volume_structure': -1,
  1965. 'Score_structure': Score_structure}
  1966. return all_structure
  1967. elif len(N_s) == 1:
  1968. num_index1 = s.index(N_s[0])
  1969. num_infer1 = s[num_index1 - len(N_s[0])]
  1970. if num_index1 + len(N_s[0]) < len(s):
  1971. num_back1 = s[num_index1 + len(N_s[0])]
  1972. item_total_score = int(N_s[0])
  1973. type_score_dict_ocr['item_N'] = -1
  1974. type_score_dict_ocr['item_total_score'] = item_total_score
  1975. type_score_dict_ocr['item_count'] = -1
  1976. type_score_dict_ocr['item_score'] = -1
  1977. if num_back1 == '分': # *分
  1978. Score_structure_item = type_score_dict_ocr
  1979. Score_structure.append(Score_structure_item)
  1980. all_structure = {'volume_structure': -1,
  1981. 'Score_structure': Score_structure}
  1982. return all_structure
  1983. else:
  1984. return all_structure
  1985. if 'volume_N' not in type_score_dict_ocr.keys():
  1986. all_structure = {'volume_structure': -1,
  1987. 'Score_structure': -1}
  1988. return all_structure
  1989. else:
  1990. for xxx in range(len_keyword_type1):
  1991. if C_s.find(keyword_type1[1]) != -1:
  1992. type_score_dict_ocr['keyword_type'] = keyword_type1[1]
  1993. break
  1994. elif C_s.find(keyword_type1[0]) != -1:
  1995. type_score_dict_ocr['keyword_type'] = keyword_type1[0]
  1996. Score_structure_item = type_score_dict_ocr
  1997. Score_structure.append(Score_structure_item)
  1998. break
  1999. elif C_s.find(keyword_type1[xxx]) != -1:
  2000. type_score_dict_ocr['keyword_type'] = keyword_type1[xxx]
  2001. break
  2002. elif xxx == len_keyword_type1 - 1:
  2003. type_score_dict_ocr['keyword_type'] = -2
  2004. type_score_dict_ocr['item_N'] = type_score_dict_ocr.pop('volume_N')
  2005. type_score_dict_ocr['item_total_score'] = type_score_dict_ocr.pop('volume_total_score')
  2006. type_score_dict_ocr['item_count'] = type_score_dict_ocr.pop('volume_count')
  2007. type_score_dict_ocr['item_score'] = type_score_dict_ocr.pop('volume_score')
  2008. Score_structure_item = type_score_dict_ocr
  2009. Score_structure.append(Score_structure_item)
  2010. break
  2011. volume_structure_item = type_score_dict_ocr
  2012. volume_structure.append(volume_structure_item)
  2013. if Score_structure == []:
  2014. all_structure = {'volume_structure': volume_structure,
  2015. 'Score_structure': -1}
  2016. elif Score_structure[0]['keyword_type'] != -2:
  2017. all_structure = {'volume_structure': volume_structure,
  2018. 'Score_structure': Score_structure}
  2019. else:
  2020. all_structure = {'volume_structure': -1,
  2021. 'Score_structure': Score_structure}
  2022. return all_structure
  2023. except Exception as e:
  2024. print('Skip ocr_key_words')
  2025. return all_structure