123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037 |
- # -*- coding: utf-8 -*-
- # @Time : 2020/5/28 0022 17:04
- # @Author : LF
- # @FileName: ocr_key_words.py
- # @Software: PyCharm
- import re
- def find_repeat(source, elmt): # 去重后重新定位数字索引
- elmt_index = []
- s_index = 0
- e_index = len(source)
- while (s_index < e_index):
- try:
- temp = source.index(elmt, s_index, e_index)
- elmt_index.append(temp)
- s_index = temp + 1
- except ValueError:
- break
- return elmt_index
- def ocr_key_words(rect,type_score_dict): # 将ocr识别得到的文字与模型得到的type_score对应
- '''
- :param rect: OCR识别结果数组,格式:res = {'chars': [},'coordinates': [(),()},'words': []}
- :param type_score_dict: 模型得到的type_score(与模型得到的边框相对应)
- :return: 字典中添加word
- '''
- len_ocr = len(rect['chars'])
- xmin = type_score_dict['type_box'][0]
- ymin = type_score_dict['type_box'][1]
- xmax = type_score_dict['type_box'][2]
- ymax = type_score_dict['type_box'][3]
- words=[]
- for j in range(len_ocr):
- if rect['coordinates'][j][0] - xmin > -30 and rect['coordinates'][j][1] - ymin > -30 and rect['coordinates'][j][2] - xmax < 30 and rect['coordinates'][j][3] - ymax < 30:
- word = rect['chars'][j]
- words.append(word)
- type_score_dict['words']= words
- type_score_dict_ocr = type_score_dict
- return type_score_dict_ocr
- def key_words(type_score_dict_ocr): # 根据OCR结果结合关键字解析
- total_score = 0
- volume_score = 0
- volume_structure_item = 0
- volume_structure = []
- Score_structure_item = 0
- Score_structure = []
- all_structure = {}
- keyword_volume = re.compile(r'第卷|第部')
- keyword_type = re.compile(r'选择|非选择题|综合题|问答题|主观题|客观题|解答题|计算题')
- keyword_type1 = ['选择', '非选择题', '综合题', '问答题', '主观题', '客观题', '解答题','计算题']
- len_keyword_type1 = len(keyword_type1)
- keyword_item1 = re.compile(r'共分|合计分|总共分|总计分|小题满分|本小题|满分|共计|共.分|合计.分|总共.分|总计.分|小题满分.|本小题.|满分.|共计.')
- keyword_item2 = re.compile(r'每题分|每小题分|空分|每小题.分|每题.分|空.分|个分') # '分/题'暂未考虑
- keyword_item3 = re.compile(r'共题|共小题|分小题|本题小题|共个小题|分为小题|分个小题|本大题共小题')
- keyword_item4 = ['分']
- keyword_item5 = re.compile(r'题|.|、')
- keyword_item6 = re.compile(r'分/题|分')
- if 'words' in type_score_dict_ocr.keys():
- ocr_1 = type_score_dict_ocr['words']
- else:
- return all_structure
- s = ''.join((str(x) for x in ocr_1)) # 合并为一个字符串
- if s.find('IV') != -1 or s.find('Ⅳ') != -1:
- s = s.replace('Ⅳ', '4')
- s = s.replace('IV', '4')
- elif s.find('III') != -1 or s.find('Ⅲ') != -1:
- s = s.replace('Ⅲ', '3')
- s = s.replace('III', '3')
- elif s.find('II') != -1 or s.find('Ⅱ') != -1:
- s = s.replace('Ⅱ', '2')
- s = s.replace('II', '2')
- elif s.find('VI') != -1 or s.find('Ⅵ') != -1:
- s = s.replace('Ⅵ', '6')
- s = s.replace('VI', '6')
- elif s.find('VII') != -1 or s.find('Ⅶ') != -1:
- s = s.replace('Ⅶ', '7')
- s = s.replace('VII', '7')
- elif s.find('VIII') != -1 or s.find('Ⅷ') != -1:
- s = s.replace('Ⅷ', '8')
- s = s.replace('VIII', '8')
- elif s.find('IX') != -1 or s.find('Ⅸ') != -1:
- s = s.replace('Ⅸ', '9')
- s = s.replace('IX', '9')
- elif s.find('X') != -1 or s.find('Ⅹ') != -1:
- s = s.replace('Ⅹ', '10')
- s = s.replace('X', '10')
- elif s.find('I') != -1 or s.find('Ⅰ') != -1:
- s = s.replace('Ⅰ', '1')
- s = s.replace('I', '1')
- elif s.find('V') != -1 or s.find('Ⅴ') != -1:
- s = s.replace('Ⅴ', '5')
- s = s.replace('V', '5')
- C_s = re.sub("[A-Za-z0-9\!\%\[\]\,\。]", "", s) # 提取汉字
- E_s = ''.join(re.findall(r'[A-Za-z]', s)) # 提取英文字符
- N_s = re.findall('\d+', s) # 提取阿拉伯数字
- try:
- if len(N_s) == 1 and len(N_s[0]) < 6 and len(E_s) == 0 and (keyword_item6.search(C_s)):
- type_score_dict_ocr['item_N'] = -1
- type_score_dict_ocr['item_total_score'] = int(N_s[0])
- type_score_dict_ocr['item_count'] = -1
- type_score_dict_ocr['item_score'] = -1
- Score_structure_item = type_score_dict_ocr
- Score_structure.append(Score_structure_item)
- all_structure = {'volume_structure': -1,
- 'Score_structure': Score_structure}
- elif len(N_s) == 1 and len(N_s[0]) < 6 and len(E_s) == 0 and (keyword_item5.search(C_s) or len(C_s) == 0):
- type_score_dict_ocr['item_N'] = int(N_s[0])
- type_score_dict_ocr['item_total_score'] = -1
- type_score_dict_ocr['item_count'] = -1
- type_score_dict_ocr['item_score'] = -1
- Score_structure_item = type_score_dict_ocr
- Score_structure.append(Score_structure_item)
- all_structure = {'volume_structure': -1,
- 'Score_structure': Score_structure}
- elif N_s != []:
- if keyword_volume.search(C_s):
- '''
- 对应试卷中存在分卷信息的情况,根据包含数字的个数分为5类,暂定包含信息的有效数字个数小于5,并处理小题分数和总分可能包含小数点的情况
- 暂定小题个数不包含小数
- 暂定总分数中不存在有意义的小数位
- '''
- if len(N_s) == 1:
- num_index = s.index(N_s[0])
- num_infer = s[num_index - len(N_s[0])]
- num_back = s[num_index + len(N_s[0])]
- if num_back == '分': # 第卷/部*分
- volume_score = int(N_s[0])
- type_score_dict_ocr['volume_N'] = -1
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = -1
- type_score_dict_ocr['volume_score'] = -1
- elif num_back == '卷' or num_back == '部': # 第*卷
- volume_N = int(N_s[0])
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = -1
- type_score_dict_ocr['volume_count'] = -1
- type_score_dict_ocr['volume_score'] = -1
- else:
- return all_structure
- elif len(N_s) == 2:
- num_index1 = s.index(N_s[0])
- num_infer0 = s[num_index1 - len(N_s[0])]
- num_back0 = s[num_index1 + len(N_s[0])]
- all_1 = find_repeat(s, N_s[1])
- temp1 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[1]:
- temp1 = temp1 + 1
- num_index2 = all_1[temp1]
- num_infer1 = s[num_index2 - len(N_s[1])]
- num_back1 = s[num_index2 + len(N_s[1])]
- if isinstance(N_s[0], str):
- N_s[0] = int(N_s[0])
- if isinstance(N_s[1], str):
- N_s[1] = int(N_s[1])
- if keyword_item1.search(C_s):
- if keyword_item2.search(C_s):
- if num_back0 == '分' and num_back1 == '分':
- if N_s[0] < N_s[1]: # 第卷,每小题*分,共*分
- volume_score = N_s[1]
- item_score = N_s[0]
- item_count = int(volume_score / item_score)
- else: # 第卷,共*分 ,每小题*分
- volume_score = N_s[0]
- item_score = N_s[1]
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = -1
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- elif keyword_item3.search(C_s):
- if num_back1 == '分': # 第卷,共*小题,共*分
- volume_score = N_s[1]
- item_count = N_s[0]
- item_score = volume_score / item_count
- type_score_dict_ocr['volume_N'] = -1
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back0 == '分': # 第卷,共*分 ,共*小题
- volume_score = N_s[0]
- item_count = N_s[1]
- item_score = volume_score / item_count
- type_score_dict_ocr['volume_N'] = -1
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- else:
- if (num_back1 == '卷' or num_back1 == '部') and num_back1 == '分': # 第*卷*分
- volume_N = N_s[0]
- volume_score = N_s[1]
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = -1
- type_score_dict_ocr['volume_score'] = -1
- elif num_back0 == '.' and num_infer1 == '.' and num_back1 == '分': # 第卷,共*.*分
- volume_N = -1
- volume_score = N_s[0]
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = -1
- type_score_dict_ocr['volume_score'] = -1
- else:
- return all_structure
- else:
- if keyword_item2.search(C_s):
- if keyword_item3.search(C_s):
- if num_back1 == '分': # 第卷,共*小题,每小题*分
- item_count = N_s[0]
- item_score = N_s[1]
- volume_score = item_score * item_count
- type_score_dict_ocr['volume_N'] = -1
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back0 == '分': # 第卷,每小题*分 ,共*小题
- item_count = N_s[1]
- item_score = N_s[0]
- volume_score = item_count * item_score
- type_score_dict_ocr['volume_N'] = -1
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- else:
- if num_back1 == '分': # 第卷,每小题*.*分
- volume_score = -1
- item_count = -1
- item_score = float(str(N_s[0]) + '.' + str(N_s[1]))
- type_score_dict_ocr['volume_N'] = -1
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- else:
- return all_structure
- elif len(N_s) == 3:
- num_index1 = s.index(N_s[0])
- num_infer0 = s[num_index1 - len(N_s[0])]
- num_back0 = s[num_index1 + len(N_s[0])]
- all_1 = find_repeat(s, N_s[1])
- temp1 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[1]:
- temp1 = temp1 + 1
- num_index2 = all_1[temp1]
- num_infer1 = s[num_index2 - len(N_s[1])]
- num_back1 = s[num_index2 + len(N_s[1])]
- all_2 = find_repeat(s, N_s[2])
- temp2 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[2]:
- temp2 = temp2 + 1
- for jj in range(len(N_s[1])):
- if N_s[1][jj] == N_s[2]:
- temp2 = temp2 + 1
- num_index3 = all_2[temp2]
- num_infer2 = s[num_index3 - len(N_s[2])]
- num_back2 = s[num_index3 + len(N_s[2])]
- if isinstance(N_s[0], str):
- N_s[0] = int(N_s[0])
- if isinstance(N_s[1], str):
- N_s[1] = int(N_s[1])
- if isinstance(N_s[2], str):
- N_s[2] = int(N_s[2])
- if keyword_item1.search(C_s):
- if keyword_item2.search(C_s):
- if keyword_item3.search(C_s):
- if num_back0 == '分' and num_back1 == '分':
- if N_s[0] > N_s[1]: # 第卷,共*分,每题*分,共*题
- volume_score = N_s[0]
- item_count = N_s[2]
- item_score = N_s[1]
- else: # 第卷,每题*分,共*分,共*题
- volume_score = N_s[1]
- item_count = N_s[2]
- item_score = N_s[0]
- type_score_dict_ocr['volume_N'] = -1
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back0 == '分' and num_back2 == '分':
- if N_s[0] > N_s[2]: # 第卷,共*分,共*题,每题*分
- volume_score = N_s[0]
- item_count = N_s[1]
- item_score = N_s[2]
- else: # 第卷,每题*分,共*题,共*分
- volume_score = N_s[2]
- item_count = N_s[1]
- item_score = N_s[0]
- type_score_dict_ocr['volume_N'] = -1
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back1 == '分' and num_back2 == '分': # 第卷,共*题,共*分,每题*分
- if N_s[1] > N_s[2]:
- volume_score = N_s[1]
- item_count = N_s[0]
- item_score = N_s[2]
- else:
- volume_score = N_s[2]
- item_count = N_s[0]
- item_score = N_s[1]
- type_score_dict_ocr['volume_N'] = -1
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- else:
- if num_back1 == '分' and num_back2 == '分': # 第*卷,共*分,每题*分 / 第*卷,每题*分,共*分
- volume_N = int(N_s[0])
- if N_s[1] > N_s[2]:
- volume_score = N_s[1]
- item_score = N_s[2]
- item_count = int(volume_score / item_score)
- else:
- volume_score = N_s[2]
- item_score = N_s[1]
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back0 == '.' and num_infer1 == '.' and num_back1 == '分' and num_back2 == '分': # 第卷,共*.*分,每题*分 / 第卷,每题*.*分,共*分
- volume_N = -1
- if N_s[0] > N_s[2]:
- volume_score = N_s[0]
- item_score = N_s[2]
- item_count = int(volume_score / item_score)
- else:
- volume_score = N_s[2]
- item_score = float(str(N_s[0]) + '.' + str(N_s[1]))
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back1 == '.' and num_infer2 == '.' and num_back0 == '分' and num_back2 == '分': # 第卷,共*分,每题*.*分 / 第卷,每题*分,共*.*分
- volume_N = -1
- if N_s[0] > N_s[1]:
- volume_score = N_s[0]
- item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
- item_count = int(volume_score / item_score)
- else:
- volume_score = N_s[1]
- item_score = N_s[0]
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- else:
- if keyword_item3.search(C_s):
- if (num_back0 == '卷' or num_back0 == '部') and (num_back1 == '题' or num_back1 == '小') and num_back2 == '分': # 第*卷,共*题,共*分
- volume_N = N_s[0]
- volume_score = N_s[2]
- item_count = N_s[1]
- item_score = volume_score / item_count
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif (num_back0 == '卷' or num_back0 == '部') and (num_back2 == '题' or num_back2 == '小') and num_back1 == '分': # 第*卷,共*分,共*题
- volume_N = N_s[0]
- volume_score = N_s[1]
- item_count = N_s[2]
- item_score = volume_score / item_count
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back0 == '.' and num_infer1 == '.' and (num_back2 == '题' or num_back2 == '小') and num_back1 == '分': # 第卷,共*.*分,共*题
- volume_N = -1
- volume_score = N_s[0]
- item_count = N_s[2]
- item_score = volume_score / item_count
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back1 == '.' and num_infer2 == '.' and (num_back0 == '题' or num_back0 == '小') and num_back2 == '分': # 第卷,共*题,共*.*分
- volume_N = -1
- volume_score = N_s[1]
- item_count = N_s[0]
- item_score = volume_score / item_count
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- else:
- if num_back2 == '分': # 第*卷,共*.*分
- volume_N = int(N_s[0])
- volume_score = N_s[1]
- item_score = -1
- item_count = -1
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- else:
- if keyword_item2.search(C_s):
- if keyword_item3.search(C_s):
- if (num_back0 == '卷' or num_back0 == '部') and num_back2 == '分': # 第*卷,共*题,每题*分
- volume_N = N_s[0]
- item_count = N_s[1]
- item_score = N_s[2]
- volume_score = item_count * item_score
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif (num_back0 == '卷' or num_back0 == '部') and num_back1 == '分': # 第*卷,每题*分,共*题
- volume_N = N_s[0]
- item_count = N_s[2]
- item_score = N_s[1]
- volume_score = item_count * item_score
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif (num_back0 == '卷' or num_back0 == '部') and num_back1 == '分': # 第卷,每题*.*分,共*题
- volume_N = -1
- item_score = float(str(N_s[0]) + '.' + str(N_s[1]))
- item_count = N_s[2]
- volume_score = item_score * item_count
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif (num_back0 == '卷' or num_back0 == '部') and num_back2 == '分': # 第卷,共*题,每题*.*分
- volume_N = -1
- item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
- item_count = N_s[0]
- volume_score = item_score * item_count
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- else:
- if num_back2 == '分': # 第*卷,小题*.*分
- volume_N = int(N_s[0])
- volume_score = -1
- item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
- item_count = -1
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- elif len(N_s) == 4:
- num_index1 = s.index(N_s[0])
- num_infer0 = s[num_index1 - len(N_s[0])]
- num_back0 = s[num_index1 + len(N_s[0])]
- all_1 = find_repeat(s, N_s[1])
- temp1 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[1]:
- temp1 = temp1 + 1
- num_index2 = all_1[temp1]
- num_infer1 = s[num_index2 - len(N_s[1])]
- num_back1 = s[num_index2 + len(N_s[1])]
- all_2 = find_repeat(s, N_s[2])
- temp2 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[2]:
- temp2 = temp2 + 1
- for jj in range(len(N_s[1])):
- if N_s[1][jj] == N_s[2]:
- temp2 = temp2 + 1
- num_index3 = all_2[temp2]
- num_infer2 = s[num_index3 - len(N_s[2])]
- num_back2 = s[num_index3 + len(N_s[2])]
- all_3 = find_repeat(s, N_s[3])
- temp3 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[3]:
- temp3 = temp3 + 1
- for jj in range(len(N_s[1])):
- if N_s[1][jj] == N_s[3]:
- temp3 = temp3 + 1
- for kk in range(len(N_s[2])):
- if N_s[2][kk] == N_s[3]:
- temp3 = temp3 + 1
- num_index4 = all_3[temp3]
- num_infer3 = s[num_index4 - len(N_s[3])]
- num_back3 = s[num_index4 + len(N_s[3])]
- if isinstance(N_s[0], str):
- N_s[0] = int(N_s[0])
- if isinstance(N_s[1], str):
- N_s[1] = int(N_s[1])
- if isinstance(N_s[2], str):
- N_s[2] = int(N_s[2])
- if isinstance(N_s[3], str):
- N_s[3] = int(N_s[3])
- if keyword_item1.search(C_s):
- if keyword_item2.search(C_s):
- if keyword_item3.search(C_s):
- if (num_back0 == '卷' or num_back0 == '部') and num_back1 == '分' and num_back3 == '分':
- if N_s[3] > N_s[1]: # 第*卷,每题*分,共*题,共*分
- volume_N = N_s[0]
- volume_score = N_s[3]
- item_count = N_s[2]
- item_score = N_s[1]
- else: # 第*卷,共*分,共*题,每题*分
- volume_N = N_s[0]
- volume_score = N_s[1]
- item_count = N_s[2]
- item_score = N_s[3]
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif (num_back0 == '卷' or num_back0 == '部') and num_back2 == '分' and num_back3 == '分':
- if N_s[3] > N_s[2]: # 第*卷,共*题,每题*分,共*分
- volume_N = N_s[0]
- volume_score = N_s[3]
- item_count = N_s[1]
- item_score = N_s[2]
- else: # 第*卷,共*题,共*分 , 每题*分
- volume_N = N_s[0]
- volume_score = N_s[2]
- item_count = N_s[1]
- item_score = N_s[3]
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif (num_back0 == '卷' or num_back0 == '部') and num_back2 == '分' and num_back1 == '分':
- if N_s[1] > N_s[2]: # 第*卷,共*分,每题*分,共*题
- volume_N = N_s[0]
- volume_score = N_s[1]
- item_count = N_s[3]
- item_score = N_s[2]
- else: # 第*卷,每题*分,共*分,共*题
- volume_N = N_s[0]
- volume_score = N_s[2]
- item_count = N_s[3]
- item_score = N_s[1]
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif (num_back0 == '.' and num_infer1 == '.') and num_back1 == '分' and num_back2 == '分': # 第卷,每题*.*分,共*分,共*题/第卷,共*.*分,每题*分,共*题
- volume_N = -1
- if int(N_s[0]) > int(N_s[2]):
- volume_score = N_s[0]
- item_score = N_s[2]
- item_count = int(volume_score / item_score)
- else:
- volume_score = N_s[2]
- item_score = float(str(N_s[0]) + '.' + str(N_s[1]))
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif (num_back1 == '.' and num_infer2 == '.') and num_back0 == '分' and num_back2 == '分': # 第卷,每题*分,共*.*分,共*题/第卷,共*分,每题*.*分,共*题
- volume_N = -1
- if int(N_s[0]) > int(N_s[1]):
- volume_score = N_s[0]
- item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
- item_count = int(volume_score / item_score)
- else:
- volume_score = N_s[1]
- item_score = N_s[0]
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif (num_back1 == '.' and num_infer2 == '.') and num_back2 == '分' and num_back3 == '分': # 第卷,共*题,共*.*分,每题*分/第卷,共*题,每题*.*分,共*分
- volume_N = -1
- if N_s[1] > N_s[3]:
- volume_score = N_s[1]
- item_score = N_s[3]
- item_count = int(volume_score / item_score)
- else:
- volume_score = N_s[4]
- item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif (num_back2 == '.' and num_infer3 == '.') and num_back1 == '分' and num_back3 == '分': # 第卷,共*题,共*分,每题*.*分/第卷,共*题,每题*分,共*.*分
- volume_N = -1
- if int(N_s[1]) > int(N_s[2]):
- volume_score = N_s[1]
- item_score = float(str(N_s[2]) + '.' + str(N_s[3]))
- item_count = int(volume_score / item_score)
- else:
- volume_score = N_s[2]
- item_score = N_s[1]
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif (num_back0 == '.' and num_infer1 == '.') and num_back1 == '分' and num_back3 == '分': # 第卷,共*.*分,共*题,每题*分/第卷,每题*.*分,共*题,共*分
- volume_N = -1
- if int(N_s[0]) > int(N_s[3]):
- volume_score = N_s[0]
- item_score = N_s[3]
- item_count = int(volume_score / item_score)
- else:
- volume_score = N_s[3]
- item_score = float(str(N_s[0]) + '.' + str(N_s[1]))
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif (num_back2 == '.' and num_infer3 == '.') and num_back0 == '分' and num_back3 == '分': # 第卷,共*分,共*题,每题*.*分/第卷,每题*分,共*题,共*.*分
- volume_N = -1
- if int(N_s[0]) > int(N_s[2]):
- volume_score = N_s[0]
- item_score = N_s[2] + '.' + N_s[3]
- item_count = int(volume_score / item_score)
- else:
- volume_score = N_s[2]
- item_score = N_s[0]
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- if (num_back0 == '卷' or num_back0 == '部') and num_back1 == '.' and num_infer2 == '.' and num_back2 == '分' and num_back3 == '分': # 第*卷,每题*.*分,共*分/第*卷,共*.*分,每题*分
- volume_N = int(N_s[0])
- if N_s[1] > N_s[3]:
- volume_score = N_s[1]
- item_score = N_s[3]
- item_count = int(volume_score / item_score)
- else:
- volume_score = N_s[3]
- item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif (num_back0 == '卷' or num_back0 == '部') and num_back2 == '.' and num_infer3 == '.' and num_back1 == '分' and num_back3 == '分': # 第*卷,每题*分,共*.*分/第*卷,共*分,每题*.*分
- volume_N = int(N_s[0])
- if int(N_s[1]) > int(N_s[2]):
- volume_score = N_s[1]
- item_score = float(str(N_s[2]) + '.' + str(N_s[3]))
- item_count = int(volume_score / item_score)
- else:
- volume_score = N_s[2]
- item_score = N_s[1]
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back0 == '.' and num_infer1 == '.' and num_back1 == '分' and num_back2 == '.' and num_infer3 == '.' and num_back3 == '分': # 第卷,每题*.*分,共*.*分/第卷,共*.*分,每题*.*分
- volume_N = -1
- if N_s[0] > N_s[2]:
- volume_score = float(str(N_s[2]) + '.' + str(N_s[3]))
- item_score = N_s[3]
- item_count = int(volume_score / item_score)
- else:
- volume_score = N_s[3]
- item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif (num_back0 == '卷' or num_back0 == '部') and num_back1 == '分' and num_back2 == '分':
- if N_s[3] > N_s[1]: # 第*卷,每题*.*分,共*分
- volume_N = N_s[0]
- volume_score = N_s[3]
- item_count = -1
- item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
- else: # 第*卷,共*.*分,每题*分
- volume_N = N_s[0]
- volume_score = N_s[1]
- item_count = -1
- item_score = N_s[3]
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- else:
- if keyword_item3.search(C_s):
- if (num_back0 == '卷' or num_back0 == '部') and num_back2 == '分': # 第*卷,共*.*分,*小题
- volume_N = N_s[0]
- volume_score = N_s[1]
- item_count = N_s[3]
- item_score = -1
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif (num_back0 == '卷' or num_back0 == '部') and num_back3 == '分': # 第*卷,*小题 ,共*.*分
- volume_N = N_s[0]
- volume_score = N_s[2]
- item_count = N_s[1]
- item_score = -1
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- else:
- return all_structure
- else:
- if keyword_item2.search(C_s):
- if keyword_item3.search(C_s):
- if (num_back0 == '卷' or num_back0 == '部') and num_back2 == '分': # 第*卷,小题*.*分,*小题
- volume_N = N_s[0]
- volume_score = -1
- item_count = N_s[3]
- item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif (num_back0 == '卷' or num_back0 == '部') and num_back3 == '分': # 第*卷,*小题 ,小题*.*分
- volume_N = N_s[0]
- volume_score = -1
- item_count = N_s[1]
- item_score = float(str(N_s[2]) + '.' + str(N_s[3]))
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- else:
- return all_structure
- else:
- return all_structure
- elif len(N_s) == 5:
- num_index1 = s.index(N_s[0])
- num_infer0 = s[num_index1 - len(N_s[0])]
- num_back0 = s[num_index1 + len(N_s[0])]
- all_1 = find_repeat(s, N_s[1])
- temp1 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[1]:
- temp1 = temp1 + 1
- num_index2 = all_1[temp1]
- num_infer1 = s[num_index2 - len(N_s[1])]
- num_back1 = s[num_index2 + len(N_s[1])]
- all_2 = find_repeat(s, N_s[2])
- temp2 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[2]:
- temp2 = temp2 + 1
- for jj in range(len(N_s[1])):
- if N_s[1][jj] == N_s[2]:
- temp2 = temp2 + 1
- num_index3 = all_2[temp2]
- num_infer2 = s[num_index3 - len(N_s[2])]
- num_back2 = s[num_index3 + len(N_s[2])]
- all_3 = find_repeat(s, N_s[3])
- temp3 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[3]:
- temp3 = temp3 + 1
- for jj in range(len(N_s[1])):
- if N_s[1][jj] == N_s[3]:
- temp3 = temp3 + 1
- for kk in range(len(N_s[2])):
- if N_s[2][kk] == N_s[3]:
- temp3 = temp3 + 1
- num_index4 = all_3[temp3]
- num_infer3 = s[num_index4 - len(N_s[3])]
- num_back3 = s[num_index4 + len(N_s[3])]
- all_4 = find_repeat(s, N_s[4])
- temp4 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[4]:
- temp4 = temp4 + 1
- for jj in range(len(N_s[1])):
- if N_s[1][jj] == N_s[4]:
- temp4 = temp4 + 1
- for kk in range(len(N_s[2])):
- if N_s[2][kk] == N_s[4]:
- temp4 = temp4 + 1
- for ll in range(len(N_s[3])):
- if N_s[3][ll] == N_s[4]:
- temp4 = temp4 + 1
- num_index5 = all_4[temp4]
- num_infer4 = s[num_index5 - len(N_s[4])]
- num_back4 = s[num_index5 + len(N_s[4])]
- if isinstance(N_s[0], str):
- N_s[0] = int(N_s[0])
- if isinstance(N_s[1], str):
- N_s[1] = int(N_s[1])
- if isinstance(N_s[2], str):
- N_s[2] = int(N_s[2])
- if isinstance(N_s[3], str):
- N_s[3] = int(N_s[3])
- if isinstance(N_s[4], str):
- N_s[4] = int(N_s[4])
- if keyword_item1.search(C_s):
- if keyword_item2.search(C_s):
- if keyword_item3.search(C_s):
- if (num_back0 == '卷' or num_back0 == '部') and (num_back1 == '.' and num_infer2 == '.') and num_back2 == '分' and num_back3 == '分': # 第*卷,每题*.*分,共*分,共*题/第卷,共*.*分,每题*分,共*题
- volume_N = N_s[0]
- if N_s[1] > N_s[3]:
- volume_score = N_s[1]
- item_score = N_s[3]
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- volume_score = N_s[3]
- item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif (num_back0 == '卷' or num_back0 == '部') and (num_back2 == '.' and num_infer3 == '.') and num_back1 == '分' and num_back3 == '分': # 第*卷,每题*分,共*.*分,共*题/第卷,共*分,每题*.*分,共*题
- volume_N = N_s[0]
- if N_s[1] > N_s[2]:
- volume_score = N_s[1]
- item_score = float(str(N_s[2]) + '.' + str(N_s[3]))
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- volume_score = N_s[2]
- item_score = N_s[1]
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif (num_back0 == '卷' or num_back0 == '部') and (num_back2 == '.' and num_infer3 == '.') and num_back3 == '分' and num_back4 == '分': # 第卷,共*题,共*.*分,每题*分/第卷,共*题,每题*.*分,共*分
- volume_N = N_s[0]
- if N_s[2] > N_s[4]:
- volume_score = N_s[2]
- item_score = N_s[4]
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- volume_score = N_s[4]
- item_score = float(str(N_s[2]) + '.' + str(N_s[3]))
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif (num_back0 == '卷' or num_back0 == '部') and (num_back3 == '.' and num_infer4 == '.') and num_back2 == '分' and num_back4 == '分': # 第*卷,共*题,共*分,每题*.*分/第卷,共*题,每题*分,共*.*分
- volume_N = N_s[0]
- if N_s[2] > N_s[3]:
- volume_score = N_s[2]
- item_score = float(str(N_s[3]) + '.' + str(N_s[4]))
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- volume_score = N_s[3]
- item_score = N_s[2]
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif (num_back0 == '卷' or num_back0 == '部') and (num_back1 == '.' and num_infer2 == '.') and num_back2 == '分' and num_back4 == '分': # 第*卷,共*.*分,共*题,每题*分/第*卷,每题*.*分,共*题,共*分
- volume_N = N_s[0]
- if N_s[1] > N_s[4]:
- volume_score = N_s[1]
- item_score = N_s[4]
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- volume_score = N_s[4]
- item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif (num_back0 == '卷' or num_back0 == '部') and (num_back3 == '.' and num_infer4 == '.') and num_back1 == '分' and num_back4 == '分': # 第*卷,共*分,共*题,每题*.*分/第卷,每题*分,共*题,共*.*分
- volume_N = N_s[0]
- if N_s[1] > N_s[3]:
- volume_score = N_s[0]
- item_score = float(str(N_s[4]) + '.' + str(N_s[4]))
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- volume_score = N_s[3]
- item_score = N_s[1]
- item_count = int(volume_score / item_score)
- type_score_dict_ocr['volume_N'] = volume_N
- type_score_dict_ocr['volume_total_score'] = volume_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- else:
- return all_structure
- else:
- return all_structure
- else:
- return all_structure
- if 'volume_N' not in type_score_dict_ocr.keys():
- all_structure = {'volume_structure': -1,
- 'Score_structure': -1}
- return all_structure
- else:
- for i in range(len_keyword_type1):
- if C_s.find(keyword_type1[i]) != -1 and C_s.find('非') != -1:
- type_score_dict_ocr['keyword_type'] = keyword_type1[1]
- break
- elif C_s.find(keyword_type1[0]) != -1:
- type_score_dict_ocr['keyword_type'] = keyword_type1[0]
- Score_structure_item = type_score_dict_ocr
- Score_structure.append(Score_structure_item)
- break
- elif C_s.find(keyword_type1[i]) != -1:
- type_score_dict_ocr['keyword_type'] = keyword_type1[i]
- break
- elif i == len_keyword_type1 - 1:
- type_score_dict_ocr['keyword_type'] = keyword_type1[0]
- Score_structure_item = type_score_dict_ocr
- Score_structure.append(Score_structure_item)
- break
- volume_structure_item = type_score_dict_ocr
- volume_structure.append(volume_structure_item)
- if Score_structure == []:
- all_structure = {'volume_structure': volume_structure,
- 'Score_structure': -1}
- else:
- all_structure = {'volume_structure': volume_structure,
- 'Score_structure': Score_structure}
- else:
- '''
- 对应试卷中不存在分卷信息的情况,根据包含数字的个数分为4类,暂定包含信息的有效数字个数小于4,并处理小题分数和总分可能包含小数点的情况
- 暂定小题个数不包含小数
- 暂定总分数中不存在有意义的小数位
- '''
- if keyword_item1.search(C_s):
- if len(N_s) == 1:
- num_index = s.index(N_s[0])
- num_infer = s[num_index - len(N_s[0])]
- num_back = s[num_index + len(N_s[0])]
- if isinstance(N_s[0], str):
- N_s[0] = int(N_s[0])
- if num_back == '分': # 选择题/主观题,共*分
- item_total_score = N_s[0]
- type_score_dict_ocr['volume_N'] = -1
- type_score_dict_ocr['volume_total_score'] = int(item_total_score)
- type_score_dict_ocr['volume_count'] = -1
- type_score_dict_ocr['volume_score'] = -1
- elif len(N_s) == 2:
- num_index1 = s.index(N_s[0])
- num_infer0 = s[num_index1 - len(N_s[0])]
- num_back0 = s[num_index1 + len(N_s[0])]
- all_1 = find_repeat(s, N_s[1])
- temp1 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[1]:
- temp1 = temp1 + 1
- num_index2 = all_1[temp1]
- num_infer1 = s[num_index2 - len(N_s[1])]
- num_back1 = s[num_index2 + len(N_s[1])]
- if isinstance(N_s[0], str):
- N_s[0] = int(N_s[0])
- if isinstance(N_s[1], str):
- N_s[1] = int(N_s[1])
- if keyword_item2.search(C_s):
- if N_s[0] > N_s[1]: # 选择题/主观题/客观题,共*分,每题*分
- item_total_score = int(N_s[0])
- item_count = int(N_s[0] / N_s[1])
- item_score = N_s[1]
- else: # 选择题/主观题/客观题,每题*分,共*分
- item_total_score = int(N_s[1])
- item_count = int(N_s[1] / N_s[0])
- item_score = N_s[0]
- type_score_dict_ocr['volume_N'] = -1
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- if keyword_item3.search(C_s):
- if num_back0 == '分': # 选择题/主观题,共*分,共*题
- item_total_score = N_s[0]
- item_count = N_s[1]
- item_score = N_s[0] / N_s[1]
- type_score_dict_ocr['volume_N'] = -1
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back1 == '分': # 选择题/主观题,共*题,共*分
- item_total_score = N_s[1]
- item_count = N_s[0]
- item_score = N_s[1] / N_s[0]
- type_score_dict_ocr['volume_N'] = -1
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- else:
- if num_back0 == '.' and num_infer1 == '.' and num_back1 == '分': # *.*分
- item_N = -1
- item_total_score = N_s[0]
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = -1
- type_score_dict_ocr['volume_score'] = -1
- elif num_back1 == '分': # *,*分
- item_N = N_s[0]
- item_total_score = int(N_s[1])
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = -1
- type_score_dict_ocr['volume_score'] = -1
- else:
- return all_structure
- elif len(N_s) == 3:
- num_index1 = s.index(N_s[0])
- num_infer0 = s[num_index1 - len(N_s[0])]
- num_back0 = s[num_index1 + len(N_s[0])]
- all_1 = find_repeat(s, N_s[1])
- temp1 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[1]:
- temp1 = temp1 + 1
- num_index2 = all_1[temp1]
- num_infer1 = s[num_index2 - len(N_s[1])]
- num_back1 = s[num_index2 + len(N_s[1])]
- all_2 = find_repeat(s, N_s[2])
- temp2 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[2]:
- temp2 = temp2 + 1
- for jj in range(len(N_s[1])):
- if N_s[1][jj] == N_s[2]:
- temp2 = temp2 + 1
- num_index3 = all_2[temp2]
- num_infer2 = s[num_index3 - len(N_s[2])]
- num_back2 = s[num_index3 + len(N_s[2])]
- if isinstance(N_s[0], str):
- N_s[0] = int(N_s[0])
- if isinstance(N_s[1], str):
- N_s[1] = int(N_s[1])
- if isinstance(N_s[2], str):
- N_s[2] = int(N_s[2])
- if keyword_item2.search(C_s):
- if keyword_item3.search(C_s):
- if num_back0 == '分' and num_back2 == '分':
- if N_s[2] > N_s[0]: # 每题*分,共*题,共*分
- item_total_score = N_s[2]
- item_count = N_s[1]
- item_score = N_s[0]
- else: # 共*分,共*题,每题*分
- item_total_score = N_s[0]
- item_count = N_s[1]
- item_score = N_s[2]
- if item_total_score < item_count * item_score:
- item_total_score = item_count * item_score
- type_score_dict_ocr['volume_N'] = -1
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif (num_infer0 == '题' or num_infer0 == '空') and num_back0 == '分' and num_back1 == '分':
- if N_s[1] > N_s[0]: # 每题*分,共*分 ,共*题
- item_total_score = N_s[1]
- item_count = N_s[2]
- item_score = N_s[0]
- else: # 共*分,每题*分 ,共*题
- item_total_score = N_s[0]
- item_count = N_s[2]
- item_score = N_s[1]
- if item_total_score < item_count * item_score:
- item_total_score = item_count * item_score
- type_score_dict_ocr['volume_N'] = -1
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back1 == '分' and num_back2 == '分':
- if N_s[2] > N_s[1]: # 共*题,每题*分,共*分
- item_total_score = N_s[2]
- item_count = N_s[0]
- item_score = N_s[1]
- else: # 共*题,共*分,每题*分
- item_total_score = N_s[1]
- item_count = N_s[0]
- item_score = N_s[2]
- if item_total_score < item_count * item_score:
- item_total_score = item_count * item_score
- type_score_dict_ocr['volume_N'] = -1
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- else:
- if num_back0 != '.' and num_back1 == '分' and num_back2 == '分':
- if N_s[1] > N_s[2]: # *,共*分,每题*分
- item_N = N_s[0]
- item_total_score = N_s[1]
- item_count = int(N_s[1] / N_s[2])
- item_score = N_s[2]
- else: # *,每题*分 ,共*分
- item_N = N_s[0]
- item_total_score = N_s[2]
- item_count = int(N_s[2] / N_s[1])
- item_score = N_s[1]
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back0 == '.' and num_infer1 == '.' and num_back2 == '分' and num_back1 == '分': # 每题*.*分,共*分/共*.*分,每题*分
- item_N = -1
- if int(N_s[0]) > int(N_s[2]):
- item_total_score = N_s[0]
- item_score = N_s[2]
- item_count = int(item_total_score/item_score)
- else:
- item_total_score = N_s[2]
- item_score = float(str(N_s[0])+'.'+str(N_s[1]))
- item_count = int(item_total_score / item_score)
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back1 == '.' and num_infer2 == '.' and num_back0 == '分' and num_back2 == '分': # 每题*分,共*.*分/共*分,每题*.*分
- item_N = -1
- if int(N_s[0]) > int(N_s[1]):
- item_total_score = N_s[0]
- item_score = float(str(N_s[1])+'.'+str(N_s[2]))
- item_count = int(item_total_score/item_score)
- else:
- item_total_score = N_s[1]
- item_score = N_s[0]
- item_count = int(item_total_score / item_score)
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- else:
- if keyword_item3.search(C_s):
- if num_back2 == '分' and num_infer2 =='.' and num_back1 =='.': # *小题,共*.*分,
- item_N = -1
- item_total_score = N_s[1]
- item_count = N_s[0]
- item_score = N_s[1]/N_s[0]
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back1 == '分' and num_infer1 =='.'and num_back0 =='.': # 共*.*分,*小题
- item_N = -1
- item_total_score = N_s[0]
- item_count = N_s[2]
- item_score = N_s[0]/N_s[2]
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back2 == '分' and num_infer2 !='.': # *,*小题,共*分,
- item_N = N_s[0]
- item_total_score = N_s[2]
- item_count = N_s[1]
- item_score = N_s[2]/N_s[1]
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back1 == '分' and num_infer1 !='.': # *,共*分,共*小题
- item_N = N_s[0]
- item_total_score = N_s[1]
- item_count = N_s[2]
- item_score = N_s[1] / N_s[2]
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- else:
- if num_back2 == '分' and num_infer2 =='.' and num_back1 =='.': # *,共*.*分,
- item_N = N_s[0]
- item_total_score = N_s[1]
- item_count = -1
- item_score = -1
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back2 == '分':
- item_total_score = N_s[1]
- item_N = -1
- item_count = -1
- item_score = -1
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- elif len(N_s) == 4:
- num_index1 = s.index(N_s[0])
- num_infer0 = s[num_index1 - len(N_s[0])]
- num_back0 = s[num_index1 + len(N_s[0])]
- all_1 = find_repeat(s, N_s[1])
- temp1 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[1]:
- temp1 = temp1 + 1
- num_index2 = all_1[temp1]
- num_infer1 = s[num_index2 - len(N_s[1])]
- num_back1 = s[num_index2 + len(N_s[1])]
- all_2 = find_repeat(s, N_s[2])
- temp2 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[2]:
- temp2 = temp2 + 1
- for jj in range(len(N_s[1])):
- if N_s[1][jj] == N_s[2]:
- temp2 = temp2 + 1
- num_index3 = all_2[temp2]
- num_infer2 = s[num_index3 - len(N_s[2])]
- num_back2 = s[num_index3 + len(N_s[2])]
- all_3 = find_repeat(s, N_s[3])
- temp3 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[3]:
- temp3 = temp3 + 1
- for jj in range(len(N_s[1])):
- if N_s[1][jj] == N_s[3]:
- temp3 = temp3 + 1
- for kk in range(len(N_s[2])):
- if N_s[2][kk] == N_s[3]:
- temp3 = temp3 + 1
- num_index4 = all_3[temp3]
- num_infer3 = s[num_index4 - len(N_s[3])]
- num_back3 = s[num_index4 + len(N_s[3])]
- if isinstance(N_s[0], str):
- N_s[0] = int(N_s[0])
- if isinstance(N_s[1], str):
- N_s[1] = int(N_s[1])
- if isinstance(N_s[2], str):
- N_s[2] = int(N_s[2])
- if isinstance(N_s[3], str):
- N_s[3] = int(N_s[3])
- if keyword_item2.search(C_s):
- if keyword_item3.search(C_s):
- if num_back1 == '分' and num_back3 == '分':
- if N_s[3] > N_s[1]: # *,每题*分,共*题,共*分
- item_N = N_s[0]
- item_total_score = N_s[3]
- item_count = N_s[2]
- item_score = N_s[1]
- else: # *,共*分,共*题,每题*分
- item_N = N_s[0]
- item_total_score = N_s[1]
- item_count = N_s[2]
- item_score = N_s[3]
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr[
- 'volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back1 == '分' and num_back2 == '分':
- if N_s[2] > N_s[1]: # *,每题*分,共*分,共*题
- item_N = N_s[0]
- item_total_score = N_s[2]
- item_count = N_s[3]
- item_score = N_s[1]
- else: # *,共*分,每题*分,共*题
- item_N = N_s[0]
- item_total_score = N_s[1]
- item_count = N_s[3]
- item_score = N_s[2]
- type_score_dict_ocr['volume_N'] = item_N
- if item_total_score < item_count * item_score:
- item_total_score = item_count * item_score
- type_score_dict_ocr[
- 'volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back2 == '分' and num_back3 == '分':
- if N_s[3] > N_s[2]: # *,共*题,每题*分,共*分
- item_N = N_s[0]
- item_total_score = N_s[3]
- item_count = N_s[1]
- item_score = N_s[2]
- else:
- item_N = N_s[0]
- item_total_score = N_s[2]
- item_count = N_s[1]
- item_score = N_s[3]
- if item_total_score < item_count * item_score:
- item_total_score = item_count * item_score
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr[
- 'volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back0 == '.' and num_infer1 == '.' and num_back1 == '分'and num_back3 == '分' : # 共*.*分,共*题, 每题*分/每题*.*分,共*题,共*分
- item_N = -1
- if N_s[0] > N_s[3]:
- item_total_score = N_s[0]
- item_score = N_s[3]
- item_count = int(item_total_score / item_score)
- else:
- item_total_score = N_s[3]
- item_score = float(str(N_s[0]) + '.' + str(N_s[1]))
- item_count = int(item_total_score / item_score)
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr[ 'item_total_score'] = item_total_score
- type_score_dict_ocr['item_count'] = item_count
- type_score_dict_ocr['item_score'] = item_score
- elif num_back2 == '.' and num_infer3 == '.' and num_back0 == '分'and num_back3 == '分': # 共*分,共*题, 每题*.*分/每题*分,共*题,共*.*分
- item_N = -1
- if N_s[0] > N_s[2]:
- item_total_score = N_s[0]
- item_score = float(str(N_s[2]) + '.' + str(N_s[3]))
- item_count = int(item_total_score / item_score)
- else:
- item_total_score = N_s[2]
- item_score = N_s[0]
- item_count = int(item_total_score / item_score)
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr[ 'item_total_score'] = item_total_score
- type_score_dict_ocr['item_count'] = item_count
- type_score_dict_ocr['item_score'] = item_score
- elif num_back1 == '.' and num_infer2 == '.' and num_back2 == '分'and num_back3 == '分': # 共*题,共*.*分,每题*分/共*题,每题*.*分,共*分
- item_N = -1
- if N_s[1] > N_s[3]:
- item_total_score = N_s[1]
- item_score = N_s[3]
- item_count = int(item_total_score / item_score)
- else:
- item_total_score = N_s[3]
- item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
- item_count = int(item_total_score / item_score)
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back2 == '.' and num_infer3 == '.' and num_back3 == '分'and num_back1 == '分' : # 共*题,共*分,每题*.*分/共*题,每题*分,共*.*分
- item_N = -1
- if N_s[1] > N_s[2]:
- item_total_score = N_s[1]
- item_score = float(str(N_s[2]) + '.' + str(N_s[3]))
- item_count = int(item_total_score / item_score)
- else:
- item_total_score = N_s[2]
- item_score = N_s[1]
- item_count = int(item_total_score / item_score)
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back0 == '.' and num_infer1 == '.' and num_back1 == '分'and num_back2 == '分' : # 每题*.*分,共*分,共*题/共*.*分,每题*分,共*题
- item_N = -1
- if N_s[0] > N_s[2]:
- item_total_score = N_s[0]
- item_score = N_s[2]
- item_count = int(item_total_score / item_score)
- else:
- item_total_score = N_s[2]
- item_score = float(str(N_s[0]) + '.' + str(N_s[1]))
- item_count = int(item_total_score / item_score)
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back1 == '.' and num_infer2 == '.' and num_back2 == '分'and num_back0 == '分' : # 每题*分,共*.*分,共*题/共*分,每题*.*分,共*题
- item_N = -1
- if N_s[0] > N_s[1]:
- item_total_score = N_s[0]
- item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
- item_count = int(item_total_score / item_score)
- else:
- item_total_score = N_s[1]
- item_score = N_s[0]
- item_count = int(item_total_score / item_score)
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- else:
- if num_back1 == '.' and num_infer2 == '.' and num_back2 == '分'and num_back3 == '分' : # *,共*.*分, 每题*分/每题*.*分,共*分
- item_N = N_s[0]
- if N_s[1] > N_s[3]:
- item_total_score = N_s[1]
- item_score = N_s[3]
- item_count = int(item_total_score / item_score)
- else:
- item_total_score = N_s[3]
- item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
- item_count = int(item_total_score / item_score)
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr[ 'item_total_score'] = item_total_score
- type_score_dict_ocr['item_count'] = item_count
- type_score_dict_ocr['item_score'] = item_score
- elif num_back2== '.' and num_infer3== '.' and num_back1 == '分'and num_back3 == '分' : # *,共*分, 每题*.*分/*,每题*分,共*.*分
- item_N = int(N_s[0])
- if N_s[1] > N_s[2]:
- item_total_score = N_s[1]
- item_score = float(str(N_s[2]) + '.' + str(N_s[3]))
- item_count = int(item_total_score / item_score)
- else:
- item_total_score = N_s[2]
- item_score = N_s[1]
- item_count = int(item_total_score / item_score)
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr[ 'item_total_score'] = item_total_score
- type_score_dict_ocr['item_count'] = item_count
- type_score_dict_ocr['item_score'] = item_score
- else:
- return all_structure
- else:
- if keyword_item3.search(C_s):
- if num_back3 == '分': # *,*小题,共*.*分
- item_total_score = N_s[2]
- item_N = N_s[0]
- item_count = N_s[1]
- item_score = item_total_score / item_count
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back2 == '分': # *,共*.*分,*小题
- item_total_score = N_s[1]
- item_N = N_s[0]
- item_count = N_s[3]
- item_score = item_total_score / item_count
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- else:
- return all_structure
- elif len(N_s) == 5:
- num_index1 = s.index(N_s[0])
- num_infer0 = s[num_index1 - len(N_s[0])]
- num_back0 = s[num_index1 + len(N_s[0])]
- all_1 = find_repeat(s, N_s[1])
- temp1 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[1]:
- temp1 = temp1 + 1
- num_index2 = all_1[temp1]
- num_infer1 = s[num_index2 - len(N_s[1])]
- num_back1 = s[num_index2 + len(N_s[1])]
- all_2 = find_repeat(s, N_s[2])
- temp2 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[2]:
- temp2 = temp2 + 1
- for jj in range(len(N_s[1])):
- if N_s[1][jj] == N_s[2]:
- temp2 = temp2 + 1
- num_index3 = all_2[temp2]
- num_infer2 = s[num_index3 - len(N_s[2])]
- num_back2 = s[num_index3 + len(N_s[2])]
- all_3 = find_repeat(s, N_s[3])
- temp3 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[3]:
- temp3 = temp3 + 1
- for jj in range(len(N_s[1])):
- if N_s[1][jj] == N_s[3]:
- temp3 = temp3 + 1
- for kk in range(len(N_s[2])):
- if N_s[2][kk] == N_s[3]:
- temp3 = temp3 + 1
- num_index4 = all_3[temp3]
- num_infer3 = s[num_index4 - len(N_s[3])]
- num_back3 = s[num_index4 + len(N_s[3])]
- all_4 = find_repeat(s, N_s[4])
- temp4 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[4]:
- temp4 = temp4 + 1
- for jj in range(len(N_s[1])):
- if N_s[1][jj] == N_s[4]:
- temp4 = temp4 + 1
- for kk in range(len(N_s[2])):
- if N_s[2][kk] == N_s[4]:
- temp4 = temp4 + 1
- for ll in range(len(N_s[3])):
- if N_s[3][ll] == N_s[4]:
- temp4 = temp4 + 1
- num_index5 = all_4[temp4]
- num_infer4 = s[num_index5 - len(N_s[4])]
- num_back4 = s[num_index5 + len(N_s[4])]
- if isinstance(N_s[0], str):
- N_s[0] = int(N_s[0])
- if isinstance(N_s[1], str):
- N_s[1] = int(N_s[1])
- if isinstance(N_s[2], str):
- N_s[2] = int(N_s[2])
- if isinstance(N_s[3], str):
- N_s[3] = int(N_s[3])
- if isinstance(N_s[4], str):
- N_s[4] = int(N_s[4])
- if keyword_item2.search(C_s):
- if keyword_item3.search(C_s):
- if num_back1== '.' and num_infer2== '.' and num_back2 == '分'and num_back3 == '分' : # *,每题*.*分,共*分,*小题/*,共*.*分,每题*分,共*小题
- item_N = N_s[0]
- if N_s[1] > N_s[3]:
- item_total_score = N_s[1]
- item_score = N_s[3]
- item_count = N_s[4]
- else:
- item_total_score = N_s[3]
- item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
- item_count = N_s[4]
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back2== '.' and num_infer3== '.' and num_back1 == '分'and num_back3 == '分' : # *,每题*分,共*.*分,*小题/*,共*分,每题*.*分,共*小题
- item_N = N_s[0]
- if N_s[1] > N_s[2]:
- item_total_score = N_s[1]
- item_score = float(str(N_s[2]) + '.' + str(N_s[3]))
- item_count = N_s[4]
- else:
- item_total_score = N_s[2]
- item_score = N_s[1]
- item_count = N_s[4]
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back2== '.' and num_infer3== '.' and num_back3 == '分'and num_back4 == '分' : # *,*小题,每题*.*分,共*分/*,*小题,共*.*分,每题*分
- item_N = N_s[0]
- if N_s[2] > N_s[4]:
- item_total_score = N_s[2]
- item_score = N_s[4]
- item_count = N_s[1]
- else:
- item_total_score = N_s[4]
- item_score = float(str(N_s[2]) + '.' + str(N_s[3]))
- item_count = N_s[1]
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back3== '.' and num_infer4== '.' and num_back2 == '分'and num_back4 == '分' : # *,*小题,每题*分,共*.*分/*,*小题,共*分,每题*.*分
- item_N = N_s[0]
- if N_s[2] > N_s[3]:
- item_total_score = N_s[2]
- item_score = float(str(N_s[3]) + '.' + str(N_s[3]))
- item_count = N_s[1]
- else:
- item_total_score = N_s[3]
- item_score = N_s[2]
- item_count = N_s[1]
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back1== '.' and num_infer2== '.' and num_back2 == '分'and num_back4 == '分' : # *,每题*.*分,*小题,共*分/*,共*.*分,*小题,每题*分
- item_N = N_s[0]
- if N_s[1] > N_s[4]:
- item_total_score = N_s[1]
- item_score = N_s[4]
- item_count = N_s[3]
- else:
- item_total_score = N_s[4]
- item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
- item_count = N_s[3]
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back3== '.' and num_infer4== '.' and num_back1 == '分'and num_back4 == '分' : # *,每题*分,*小题,共*.*分/*,共*分,*小题,每题*.*分
- item_N = N_s[0]
- if N_s[1] > N_s[3]:
- item_total_score = N_s[1]
- item_score = float(str(N_s[3]) + '.' + str(N_s[4]))
- item_count = N_s[2]
- else:
- item_total_score = N_s[3]
- item_score = N_s[1]
- item_count = N_s[2]
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- # 暂定len=5时不判断不存在题号的情况
- return all_structure
- else:
- # 暂定len=5时不判断不存在题目个数的情况
- return all_structure
- else:
- # 暂定len=5时不判断不存在小项分数的情况
- return all_structure
- else:
- return all_structure
- else:
- if keyword_item2.search(C_s):
- if len(N_s) == 1:
- num_index1 = s.index(N_s[0])
- num_infer0 = s[num_index1 - len(N_s[0])]
- num_back0 = s[num_index1 + len(N_s[0])]
- if isinstance(N_s[0], str):
- N_s[0] = int(N_s[0])
- if num_back0 == '分': # 每题*分
- item_score = N_s[0]
- type_score_dict_ocr['volume_N'] = -1
- type_score_dict_ocr['volume_total_score'] = -1
- type_score_dict_ocr['volume_count'] = -1
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- elif len(N_s) == 2:
- num_index1 = s.index(N_s[0])
- num_infer0 = s[num_index1 - len(N_s[0])]
- num_back0 = s[num_index1 + len(N_s[0])]
- all_1 = find_repeat(s, N_s[1])
- temp1 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[1]:
- temp1 = temp1 + 1
- num_index2 = all_1[temp1]
- num_infer1 = s[num_index2 - len(N_s[1])]
- num_back1 = s[num_index2 + len(N_s[1])]
- if isinstance(N_s[0], str):
- N_s[0] = int(N_s[0])
- if isinstance(N_s[1], str):
- N_s[1] = int(N_s[1])
- if keyword_item3.search(C_s):
- if num_back1 == '分': # 共*题,每题*分
- item_total_score = N_s[0] * N_s[1]
- item_count = N_s[0]
- item_score = N_s[1]
- type_score_dict_ocr['volume_N'] = -1
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back0 == '分': # 每题*分,共*题
- item_total_score = int(N_s[0]) * int(N_s[1])
- item_count = int(N_s[1])
- item_score = int(N_s[0])
- type_score_dict_ocr['volume_N'] = -1
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- else:
- if num_back1 == '分' and num_back0 == '.' and num_infer1 == '.': # *.*分
- item_N = -1
- item_score = float(str(N_s[0])+'.'+str(N_s[1]))
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = -1
- type_score_dict_ocr['volume_count'] = -1
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back1 == '分': # *,*分
- item_N = int(N_s[0])
- item_score = int(N_s[1])
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = -1
- type_score_dict_ocr['volume_count'] = -1
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- elif len(N_s) == 3:
- num_index1 = s.index(N_s[0])
- num_infer0 = s[num_index1 - len(N_s[0])]
- num_back0 = s[num_index1 + len(N_s[0])]
- all_1 = find_repeat(s, N_s[1])
- temp1 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[1]:
- temp1 = temp1 + 1
- num_index2 = all_1[temp1]
- num_infer1 = s[num_index2 - len(N_s[1])]
- num_back1 = s[num_index2 + len(N_s[1])]
- all_2 = find_repeat(s, N_s[2])
- temp2 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[2]:
- temp2 = temp2 + 1
- for jj in range(len(N_s[1])):
- if N_s[1][jj] == N_s[2]:
- temp2 = temp2 + 1
- num_index3 = all_2[temp2]
- num_infer2 = s[num_index3 - len(N_s[2])]
- num_back2 = s[num_index3 + len(N_s[2])]
- if isinstance(N_s[0], str):
- N_s[0] = int(N_s[0])
- if isinstance(N_s[1], str):
- N_s[1] = int(N_s[1])
- if isinstance(N_s[2], str):
- N_s[2] = int(N_s[2])
- if num_back2 == '分' and (num_back1 == '题' or num_back1 == '小' or num_back1 == '空') and num_back0 != '分': # *,共*题,每题*分
- item_N = int(N_s[0])
- item_total_score = int(N_s[1]) * int(N_s[2])
- item_count = int(N_s[1])
- item_score = int(N_s[2])
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back1 == '分' and (num_back2 == '题' or num_back2 == '小' or num_back2 == '空') and num_back0 != '分': # *,每题*分,共*题
- item_N = int(N_s[0])
- item_total_score = int(N_s[1]) * int(N_s[2])
- item_count = int(N_s[2])
- item_score = int(N_s[1])
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_infer2 == '.' and num_back2 == '分' and num_back1 == '.': # 共*题,每题*.*分
- item_N = -1
- item_count = int(N_s[0])
- item_score = float(str(N_s[1])+'.'+str(N_s[2]))
- item_total_score = int(item_count * item_score)
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_infer1 == '.' and num_back1 == '分' and num_back0 == '.' : # 每题*.*分,共*题
- item_N = -1
- item_count = int(N_s[2])
- item_score = float(str(N_s[0]) + '.' + str(N_s[1]))
- item_total_score = int(item_count * item_score)
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back2 == '分': # * * ,每题*分
- item_N = -1
- item_count = -1
- item_score = -1
- item_total_score = int(N_s[2])
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- elif len(N_s) == 4:
- num_index1 = s.index(N_s[0])
- num_infer0 = s[num_index1 - len(N_s[0])]
- num_back0 = s[num_index1 + len(N_s[0])]
- all_1 = find_repeat(s, N_s[1])
- temp1 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[1]:
- temp1 = temp1 + 1
- num_index2 = all_1[temp1]
- num_infer1 = s[num_index2 - len(N_s[1])]
- num_back1 = s[num_index2 + len(N_s[1])]
- all_2 = find_repeat(s, N_s[2])
- temp2 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[2]:
- temp2 = temp2 + 1
- for jj in range(len(N_s[1])):
- if N_s[1][jj] == N_s[2]:
- temp2 = temp2 + 1
- num_index3 = all_2[temp2]
- num_infer2 = s[num_index3 - len(N_s[2])]
- num_back2 = s[num_index3 + len(N_s[2])]
- all_3 = find_repeat(s, N_s[3])
- temp3 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[3]:
- temp3 = temp3 + 1
- for jj in range(len(N_s[1])):
- if N_s[1][jj] == N_s[3]:
- temp3 = temp3 + 1
- num_index4 = all_3[temp3]
- num_infer3 = s[num_index4 - len(N_s[3])]
- num_back3 = s[num_index4 + len(N_s[3])]
- if isinstance(N_s[0], str):
- N_s[0] = int(N_s[0])
- if isinstance(N_s[1], str):
- N_s[1] = int(N_s[1])
- if isinstance(N_s[2], str):
- N_s[2] = int(N_s[2])
- if isinstance(N_s[3], str):
- N_s[3] = int(N_s[3])
- if num_back2 == '.' and num_infer3 == '.' and num_back3 == '分' and (num_back1 == '题' or num_back1 == '小' or num_back1 == '空') and num_back0 != '分': # *,共*题,每题*.*分
- item_N = int(N_s[0])
- item_count = int(N_s[1])
- item_score = float(str(N_s[2]) + '.' + str(N_s[3]))
- item_total_score = int(item_count * item_score)
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- elif num_back1 == '.' and num_infer2 == '.' and num_back2 == '分' and (num_back3 == '题' or num_back3 == '小' or num_back3 == '空') and num_back0 != '分': # *,每题*.*分,共*题
- item_N = int(N_s[0])
- item_count = int(N_s[3])
- item_score = float(str(N_s[1]) + '.' + str(N_s[2]))
- item_total_score = int(item_count * item_score)
- type_score_dict_ocr['volume_N'] = item_N
- type_score_dict_ocr['volume_total_score'] = item_total_score
- type_score_dict_ocr['volume_count'] = item_count
- type_score_dict_ocr['volume_score'] = item_score
- else:
- return all_structure
- else:
- return all_structure
- else:
- if C_s.find(keyword_item4[0]) != -1:
- if len(N_s) == 2: # *,*分
- num_index1 = s.index(N_s[0])
- num_infer0 = s[num_index1 - len(N_s[0])]
- num_back0 = s[num_index1 + len(N_s[0])]
- if num_infer0 == '( ' or num_back0 == ')':
- return all_structure
- else:
- all_1 = find_repeat(s, N_s[1])
- temp1 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[1]:
- temp1 = temp1 + 1
- num_index2 = all_1[temp1]
- num_infer1 = s[num_index2 - len(N_s[1])]
- num_back1 = s[num_index2 + len(N_s[1])]
- if isinstance(N_s[0], str):
- N_s[0] = int(N_s[0])
- if isinstance(N_s[1], str):
- N_s[1] = int(N_s[1])
- if int(N_s[0]) > 1000:
- item_N =0
- item_N1 = int(N_s[0][-4] + N_s[0][-3])
- item_N2 = int(N_s[0][-2] + N_s[0][-1])
- if item_N2 - item_N1 == 1:
- item_N = [0, 0]
- item_N = [item_N1, item_N2]
- elif item_N2 - item_N1 == 2:
- item_N = [0, 0, 0]
- item_N = [item_N1, item_N1 + 1, item_N2]
- elif item_N2 - item_N1 == 3:
- item_N = [0, 0, 0, 0]
- item_N = [item_N1, item_N1 + 1, item_N1 + 2, item_N2]
- type_score_dict_ocr['item_N'] = item_N
- item_total_score = int(N_s[1])
- type_score_dict_ocr['item_total_score'] = item_total_score
- type_score_dict_ocr['item_count'] = -1
- type_score_dict_ocr['item_score'] = -1
- Score_structure_item = type_score_dict_ocr
- Score_structure.append(Score_structure_item)
- all_structure = {'volume_structure': -1,
- 'Score_structure': Score_structure}
- return all_structure
- else:
- item_N = int(N_s[0])
- item_total_score = int(N_s[1])
- type_score_dict_ocr['item_N'] = item_N
- type_score_dict_ocr['item_total_score'] = item_total_score
- type_score_dict_ocr['item_count'] = -1
- type_score_dict_ocr['item_score'] = -1
- Score_structure_item = type_score_dict_ocr
- Score_structure.append(Score_structure_item)
- all_structure = {'volume_structure': -1,
- 'Score_structure': Score_structure}
- return all_structure
- elif len(N_s) == 3: # *,*分
- num_index1 = s.index(N_s[0])
- num_infer1 = s[num_index1 - len(N_s[0])]
- num_back1 = s[num_index1 + len(N_s[0])]
- all_1 = find_repeat(s, N_s[1])
- temp1 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[1]:
- temp1 = temp1 + 1
- num_index2 = all_1[temp1]
- num_infer2 = s[num_index2 - len(N_s[1])]
- num_back2 = s[num_index2 + len(N_s[1])]
- all_2 = find_repeat(s, N_s[2])
- temp2 = 0
- for ii in range(len(N_s[0])):
- if N_s[0][ii] == N_s[2]:
- temp2 = temp2 + 1
- for jj in range(len(N_s[1])):
- if N_s[1][jj] == N_s[2]:
- temp2 = temp2 + 1
- num_index3 = all_2[temp2]
- num_infer3 = s[num_index3 - len(N_s[2])]
- if num_index3 + len(N_s[2]) < len(s):
- num_back3 = s[num_index3 + len(N_s[2])]
- else:
- num_back3 = []
- if isinstance(N_s[0], str):
- N_s[0] = int(N_s[0])
- if isinstance(N_s[1], str):
- N_s[1] = int(N_s[1])
- if isinstance(N_s[2], str):
- N_s[2] = int(N_s[2])
- if num_back3 == '分' and num_infer3 == '.' and num_back2 == '分': # *,*.*分
- item_N = N_s[0]
- item_total_score = N_s[1]
- type_score_dict_ocr['item_total_score'] = item_total_score
- type_score_dict_ocr['item_N'] = item_N
- type_score_dict_ocr['item_count'] = -1
- type_score_dict_ocr['item_score'] = -1
- Score_structure_item = type_score_dict_ocr
- Score_structure.append(Score_structure_item)
- all_structure = {'volume_structure': -1,
- 'Score_structure': Score_structure}
- return all_structure
- elif num_back3 == '分':
- if int(N_s[1]) - int(N_s[0]) == 1:
- item_N = [0, 0]
- item_N = [int(N_s[0]), int(N_s[1])]
- elif int(N_s[1]) - int(N_s[0]) == 2:
- item_N = [0, 0, 0]
- item_N = [int(N_s[0]), int(N_s[0]) + 1, int(N_s[1])]
- elif int(N_s[1]) - int(N_s[0]) == 3:
- item_N = [0, 0, 0, 0]
- item_N = [int(N_s[0]), int(N_s[0]) + 1, int(N_s[0]) + 2,
- int(N_s[1])]
- else:
- return all_structure
- item_total_score = int(N_s[2])
- type_score_dict_ocr['item_total_score'] = item_total_score
- type_score_dict_ocr['item_N'] = item_N
- type_score_dict_ocr['item_count'] = -1
- type_score_dict_ocr['item_score'] = -1
- Score_structure_item = type_score_dict_ocr
- Score_structure.append(Score_structure_item)
- all_structure = {'volume_structure': -1,
- 'Score_structure': Score_structure}
- return all_structure
- elif len(N_s) == 1:
- num_index1 = s.index(N_s[0])
- num_infer1 = s[num_index1 - len(N_s[0])]
- if num_index1 + len(N_s[0]) < len(s):
- num_back1 = s[num_index1 + len(N_s[0])]
- item_total_score = int(N_s[0])
- type_score_dict_ocr['item_N'] = -1
- type_score_dict_ocr['item_total_score'] = item_total_score
- type_score_dict_ocr['item_count'] = -1
- type_score_dict_ocr['item_score'] = -1
- if num_back1 == '分': # *分
- Score_structure_item = type_score_dict_ocr
- Score_structure.append(Score_structure_item)
- all_structure = {'volume_structure': -1,
- 'Score_structure': Score_structure}
- return all_structure
- else:
- return all_structure
- if 'volume_N' not in type_score_dict_ocr.keys():
- all_structure = {'volume_structure': -1,
- 'Score_structure': -1}
- return all_structure
- else:
- for xxx in range(len_keyword_type1):
- if C_s.find(keyword_type1[1]) != -1:
- type_score_dict_ocr['keyword_type'] = keyword_type1[1]
- break
- elif C_s.find(keyword_type1[0]) != -1:
- type_score_dict_ocr['keyword_type'] = keyword_type1[0]
- Score_structure_item = type_score_dict_ocr
- Score_structure.append(Score_structure_item)
- break
- elif C_s.find(keyword_type1[xxx]) != -1:
- type_score_dict_ocr['keyword_type'] = keyword_type1[xxx]
- break
- elif xxx == len_keyword_type1 - 1:
- type_score_dict_ocr['keyword_type'] = -2
- type_score_dict_ocr['item_N'] = type_score_dict_ocr.pop('volume_N')
- type_score_dict_ocr['item_total_score'] = type_score_dict_ocr.pop('volume_total_score')
- type_score_dict_ocr['item_count'] = type_score_dict_ocr.pop('volume_count')
- type_score_dict_ocr['item_score'] = type_score_dict_ocr.pop('volume_score')
- Score_structure_item = type_score_dict_ocr
- Score_structure.append(Score_structure_item)
- break
- volume_structure_item = type_score_dict_ocr
- volume_structure.append(volume_structure_item)
- if Score_structure == []:
- all_structure = {'volume_structure': volume_structure,
- 'Score_structure': -1}
- elif Score_structure[0]['keyword_type'] != -2:
- all_structure = {'volume_structure': volume_structure,
- 'Score_structure': Score_structure}
- else:
- all_structure = {'volume_structure': -1,
- 'Score_structure': Score_structure}
- return all_structure
- except Exception as e:
- print('Skip ocr_key_words')
- return all_structure
|