123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189 |
- #!/usr/bin/env/python
- # -*- coding:utf-8 -*-
- """
- 单题再解析、结构化
- """
- import re
- from structure.option import option_structure
- from utils.equation_extract import get_simpstr2eqn, get_equation_instr
- from utils.html_again_parse import css_label_wash
- from utils.washutil import base642img, css_conflict_deal, convert_huanhang, wash_after
- from utils.field_eq2latex import latex_wash
- from structure.dati2slave import get_slave
- def single_parse(one_item, item_type, wordid, source="zxhx", subject="数学"):
- """
- rtype:题型
- :return:
- """
- # if re.search("选择|不定选择|多选|单选", rtype):
- # if "【选项】" not in one_item:
- # return "请不要将本编辑框自带的“【选项】、【答案】、【解析】”字段删除"
- # if "【答案】" not in one_item or "【解析】" not in one_item:
- # return "请不要将本编辑框自带的“【答案】、【解析】”字段删除"
- print("原始:")
- # print(one_item)
- one_item = css_label_wash(one_item)
- one_item = re.sub("</?p>|<h[12345]", "", one_item)
- one_item = one_item.replace(" ", " ").replace(" ", " ")
- # print("去css:")
- # print(one_item)
- one_item = base642img(one_item, wordid)
- # one_item = css_conflict_deal(one_item)
- one_item = re.sub(r"(<br\s*/?>\s*|\n\s*)+<(/?table( .*?)?|/?tbody( .*?)?|/?t[rhd]( .*?)?)>\s*(<br\s*/?>\s*|\n\s*)+",
- r"<\2>", one_item)
- one_item = re.sub(r'(</?t[drh]( .*?")?>|</?table>|</?tbody>)(<br\s*/?>|\n)+', r"\1", one_item, flags=re.S)
- one_item = re.sub(r"\\\(\s*{\s*\}\s*\\\)", "", one_item)
- one_item = re.sub(r"\\\(\s*\\\)", "", one_item)
- one_item = re.sub(r"\\\(\s*|\s*\\\)", "$", one_item)
- one_item = latex_wash(one_item, 1)
- res_list = re.split(r"(\n+【答案】|\n+【解析】)", one_item)
- if len(res_list) > 5:
- return "编辑后的文本出现多个【答案】或【解析】字段,请删除,每个字段只保留唯一且单独成行"
- elif len(res_list) < 5:
- res_list = re.split(r"(【答案】|【解析】)", one_item)
- if len(res_list) > 5:
- return "编辑后的文本出现多个【答案】或【解析】字段,请删除,每个字段只保留唯一且单独成行"
- # elif len(res_list) < 5:
- # return "编辑后的文本掉了【答案】或【解析】字段,请添加,每个字段保留唯一"
- new_item_struct = {"type": item_type}
- new_item_struct["key"] = ""
- new_item_struct["parse"] = ""
- new_item_struct["stem"] = res_list[0]
- if len(res_list) == 5:
- if res_list[1].strip() == "【答案】" and res_list[3].strip() == "【解析】":
- new_item_struct["key"] = res_list[2]
- new_item_struct["parse"] = res_list[4]
- elif res_list[1].strip() == "【解析】" and res_list[3].strip() == "【答案】":
- new_item_struct["key"] = res_list[4]
- new_item_struct["parse"] = res_list[2]
- else:
- return "编辑后的文本出现多个【答案】或【解析】字段,请修改,每个字段只保留唯一且单独成行"
- elif len(res_list) > 2:
- if res_list[1].strip() == "【答案】":
- new_item_struct["key"] = res_list[2]
- elif res_list[1].strip() == "【解析】":
- new_item_struct["parse"] = res_list[2]
- item_ids = re.findall("^([1-9][0-9]|[1-9])\s*[..、、]", new_item_struct["stem"].strip())
- new_item_struct["item_id"] = int(item_ids[0]) if item_ids else 0
- new_item_struct["stem"] = re.sub("^([1-9][0-9]|[1-9])\s*[..、、]", "", new_item_struct["stem"].strip())
- if len(new_item_struct["stem"].strip()) < 3:
- return "题干为空,请补充完整"
- # 选项再解析
- new_item_struct["errmsgs"] = []
- if re.search("选择|不定选择|多选|单选", item_type):
- new_item_struct = option_structure(new_item_struct, new_item_struct["stem"], new_item_struct["key"], 1, 1)
- if new_item_struct["errmsgs"]:
- return ";;".join(new_item_struct["errmsgs"])
- if 'options' in new_item_struct and any([True for i in new_item_struct['options']
- if not i.replace(":", "").strip()]): # 空选项中:被当成了内容
- return "存在选项为空,请补充完整"
- new_item_struct["answer_type"] = "选择题"
- elif ("数学" in subject or "物理" in subject) and source in ["school", "teacher", "xue_guan"]:
- pass
- # elif source in ["xue_guan", "teacher"] and subject not in ["数学", "物理"]: # 校本、教师、学管端不拆小题
- else:
- new_item_struct = get_slave(new_item_struct, new_item_struct["stem"], new_item_struct["parse"], new_item_struct["key"])
- new_item_struct = wash_after([new_item_struct], wordid, subject)[0]
- # 换行符替换
- convert_huanhang(new_item_struct)
- # new_item_struct["stem"] = new_item_struct["stem"].strip().replace("\n\n", "\n").replace("\n", "<br/>") # 2020/4/10 gai
- # new_item_struct["key"] = new_item_struct["key"].strip().replace("\n\n", "\n").replace("\n", "<br/>")
- # new_item_struct["parse"] = new_item_struct["parse"].strip().replace("\n\n", "\n").replace("\n", "<br/>")
- # if "options" in new_item_struct: # 对选项部分进行格式处理 get_equation_instr
- # for i in range(len(new_item_struct['options'])):
- # new_item_struct['options'][i] = new_item_struct['options'][i].strip().replace("\n\n", "\n")\
- # .replace("\n", "<br/>")
- new_item_struct["errmsgs"] = ";".join(new_item_struct["errmsgs"])
- # print(new_item_struct)
- return new_item_struct
- if __name__ == '__main__':
- from pprint import pprint
- html = r'''
- <div class="stem-wraper" data-v-f9692732=""><span class="topic-number" data-v-f9692732="">11.</span><span class="stem" data-v-f9692732="">在2A(g)+B(g)<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/zyk/uploadfiles/wording/6232c845a7d375f4518b9b22/image8.png" width="36px" height="10px">3C(g)+4D(g)反应中,表示该反应速率最快的是<br><br>A、v(A)=8mol·L<sup>-1</sup>·min<sup>-1<br></sup>B、v(B)=0.3mol·L<sup>-1</sup>·s<sup>-1<br></sup>C、v(C)=0.6mol·L<sup>-1</sup>·s<sup>-1<br></sup>D、v(D)=1mol·L<sup>-1</sup>·s<sup>-1</sup></span></div>
- <ul class="stem-options" data-v-f9692732="">
- <li data-v-f9692732=""> </li>
- </ul>
- <div class="topic-analysis" data-v-f9692732="">
- <div class="topic-analysis-content" data-v-f9692732=""><span class="analysis-prefix" data-v-f9692732="">【答案】</span><span data-v-f9692732="">B</span></div>
- <div class="topic-analysis-content" data-v-f9692732=""><span class="analysis-prefix" data-v-f9692732="">【解析】</span><span data-v-f9692732="">都换算成用B表示的速率,分别为:<br>A.v(B)=<span class="math-tex">\(\frac{\text{8mol}\cdot {\text{L}}^{\text{-1}}·{\text{min}}^{\text{-1}}}{\text{2}×\text{60s}\cdot {\text{min}}^{\text{-1}}}\)</span>=0.067 mol·L<sup>-1</sup>·s<sup>-1</sup>;<br>B.v(B)=0.3mol·L<sup>-1</sup>·s<sup>-1</sup>;<br>C.v(B)=<span class="math-tex">\(\frac{\text{0}\text{.6mol}·{\text{L}}^{\text{-1}}·{\text{s}}^{\text{-1}}}{\text{3}}\)</span>=0.2mol·L<sup>-1</sup>·s<sup>-1</sup>;<br>D.v(B)=<span class="math-tex">\(\frac{\text{1mol}·{\text{L}}^{\text{-1}}·{\text{s}}^{\text{-1}}}{\text{4}}\)</span>=0.25mol·L<sup>-1</sup>·s<sup>-1</sup>;所以B选项最快,故选B。<br>【点睛】<br>比较用不同物质表示的反应速率快慢时,根据速率之比等于方程式的化学计量数之比,换算成用同一种物质表示的速率,同时还要注意单位的统一。<br></span></div>
- </div>
- '''
- hml2 = """
- <div class="stem-wraper" data-v-35454a0f=""><span class="topic-number" data-v-35454a0f="">7.</span><span class="stem" data-v-35454a0f="">农业生产中的一些栽培措施可以影响作物的生理活动,促进作物的生长发育,达到增加产量等目的。回答下列问题:<br>(1)中耕松土、科学施肥、合理灌溉等措施都能有效提高农作物的产量。中耕是指作物生长期中,在植株之间去除杂草并进行松土的一项栽培措施,该栽培措施对作物的作用有____________(答出1点即可)。农田施肥的同时,往往需要适当浇水,此时浇水的原因是____________________(答出1点即可)。</span></div>
- <div class="topic-analysis" data-v-35454a0f="">
- <div class="topic-analysis-content" data-v-35454a0f=""><span class="analysis-prefix" data-v-35454a0f="">【答案】</span><span data-v-35454a0f="">(1)减少杂草对水分、矿质元素和光的竞争增加土壤氧气含量,促进根系的呼吸作用 肥料中的矿质元素只有溶解在水中才能被作物根系吸收 (2)叶绿体类囊体薄膜 C<sub>5</sub> 叶绿体基质 (3)A和D 作物A光饱和点高且长得高,可利用上层光照进行光合作用;作物D光饱和点低且长得矮,与作物A间作后,能利用下层弱光进行光合作用</span></div>
- <div class="topic-analysis-content" data-v-35454a0f=""><span class="analysis-prefix" data-v-35454a0f="">【解析】</span><span data-v-35454a0f="">(1)去除杂草减小种间竞争,使作物有更多的营养物质并得到更多的光照,进行松土可以减少杂草对水分、矿质元素和光的竞争增加土壤氧气含量,促进根系的呼吸作用。农田施肥浇水可以降低肥料浓度,避免肥料浓度高导致植物失水过多,影响正常生长,另外肥料中的矿质元素只有溶解在水中才能被作物根系吸收。(2)在农作物叶肉细胞中,光合色素分布在叶绿体类囊体薄膜上;参与CO<sub>2</sub>固定的化学物质是C<sub>5</sub>,C<sub>5</sub>和CO<sub>2</sub>生成C<sub>3</sub>,为暗反应过程的酶在叶绿体基质中。(3)从提高光能利用率的角度考虑,种间套作一般高矮植株套作,高的植物对光能要求高,矮的植株对光能要求低。表中分析可得,作物A光饱和点高且长得高,可利用上层光照进行光合作用;作物D光饱和点低且长得矮,与作物A间作后,能利用下层弱光进行光合作用,故最适合进行间作的两种作物是A和D。</span></div>
- </div>
- """
- hml3="""
- <div class="stem-wraper" data-v-35454a0f=""><span class="topic-number" data-v-35454a0f="">20.</span><span class="stem" data-v-35454a0f="">如图所示,竖直放置、导热良好的<span class="math-tex">\(U\)</span>形管内装有水银,右管封闭了一段长L=20cn<br>的空气柱,此时左、右两侧的水银面高度差为<span class="math-tex">\({h_{1}=13cm}\)</span>现从管的开口端慢慢倒入水银,最<br>终左管水银面比右管水银面高<span class="math-tex">\({h_{2}=8\quad cm}\)</span>,环境温度始终不变,外界大气压po=<br><span class="math-tex">\({76cmHg_{\circ }}\)</span>求:<br>(1)右管封闭空气柱的最终长度,<br>(2)加入的水银柱长度,<br>衡中]<br>同卷<br><img src="http://zxhx-1302712961.cos.ap-shanghai.myqcloud.com/imgpaper/lqy_upload/628d938481b582c0470d02eb/img_70.png" width="187" height="211"></span></div>
- <div class="topic-analysis" data-v-35454a0f="">
- <div class="topic-analysis-content" data-v-35454a0f=""><span class="analysis-prefix" data-v-35454a0f="">【答案】</span><span data-v-35454a0f="">B</span></div>
- <div class="topic-analysis-content" data-v-35454a0f=""><span class="analysis-prefix" data-v-35454a0f="">【解析】</span><span data-v-35454a0f="">15.解:(1)以封闭气体为研究对象,初态<br><span class="math-tex">\({j_{1}=b_{0}-ogh_{1}=63\quad cmHg,V_{1}=L}\)</span><br>末态<br><span class="math-tex">\({f_{2}=p_{0}+ogh_{2}=84cmg,V_{2}=L^{\prime },}\)</span><br>根据玻意耳定律<br><span class="math-tex">\({p_{1}V_{1}=p_{2}V_{2}}\)</span><br>解得<span class="math-tex">\({L^{\prime }=15cm}\)</span><br>(2)右管中水银柱增加的长度<br><span class="math-tex">\({x_{1}=L-L^{\prime }=5\cot1}\)</span><br>左管中水银柱增加的长度<br><span class="math-tex">\({r_{2}=h_{1}+h_{2}+(L-L^{\prime })=26c}\)</span><br>加入的水银柱长度<br><span class="math-tex">\({x=x_{1}+x_{2}=31cm}\)</span></span></div>
- </div>
- """
- hml4 = r"""
- <div class="stem-wraper" data-v-3d09d6a3=""><span class="topic-number" data-v-3d09d6a3="">19.</span><span class="stem" data-v-3d09d6a3=""><span class="stem" data-v-3d09d6a3="">下列离子方程式的书写及评价均合理的是</span></span>
- <table>
- <tbody>
- <tr>
- <td>选项</td>
- <td>离子方程式</td>
- <td>评价</td>
- </tr>
- <tr>
- <td>A</td>
- <td>将<span class="math-tex">\( \text{1 mol }{\text{Cl}}_{\text{2}} \)</span>通入含<span class="math-tex">\( \text{1 mol }{\text{FeI}}_{\text{2}} \)</span>溶液中:<span class="math-tex">\({\text{2Fe}}^{\text{2+}}{\text{+2I}}^{\text{-}}{\text{+2Cl}}_{\text{2}}{\text{=2Fe}}^{\text{3+}}{\text{+4Cl}}^{\text{-}}{\text{+I}}_{\text{2}}\)</span></td>
- <td>正确;<span class="math-tex">\({\text{Cl}}_{\text{2}}\)</span>过量,可将<span class="math-tex">\({\text{Fe}}^{\text{2+}}\)</span>、<span class="math-tex">\({\text{I}}^{\text{-}}\)</span>均氧化</td>
- </tr>
- <tr>
- <td>B</td>
- <td><span class="math-tex">\( \text{1 mol}·{\text{L}}^{\text{-1}} \)</span>的<span class="math-tex">\({\text{NaA1O}}_{\text{2}}\)</span>溶液和<span class="math-tex">\( \text{2.5 mol}·{\text{L}}^{\text{-1}} \)</span>的<span class="math-tex">\(\text{HCl}\)</span>溶液等体积均匀混合:<span class="math-tex">\({\text{2AlO}}_{\text{2}}^{\text{-}}{\text{+5H}}^{\text{+}}{\text{=A1}}^{\text{3+}}{\text{+Al(OH)}}_{\text{3}}↓{\text{+H}}_{\text{2}}\text{O}\)</span></td>
- <td>正确;<span class="math-tex">\({\text{AlO}}_{\text{2}}^{\text{-}}\)</span>与<span class="math-tex">\({\text{Al(OH)}}_{\text{3}}\)</span>消耗的<span class="math-tex">\({\text{H}}^{\text{+}}\)</span>的物质的量之比为<span class="math-tex">\(\text{2:3}\)</span></td>
- </tr>
- <tr>
- <td>C</td>
- <td>过量<span class="math-tex">\({\text{SO}}_{\text{2}}\)</span>通入<span class="math-tex">\(\text{NaClO}\)</span>溶液中:<span class="math-tex">\({\text{SO}}_{\text{2}}{\text{+H}}_{\text{2}}{\text{O+ClO}}^{\text{-}}{\text{=HClO+HSO}}_{\text{3}}^{\text{-}}\)</span></td>
- <td>正确;说明酸性:<span class="math-tex">\({\text{H}}_{\text{2}}{\text{SO}}_{\text{3}}\)</span>强于<span class="math-tex">\(\text{HClO}\)</span></td>
- </tr>
- <tr>
- <td>D</td>
- <td><span class="math-tex">\({\text{Mg(HCO}}_{\text{3}}{\text{)}}_{\text{2}}\)</span>溶液与足量的<span class="math-tex">\(\text{NaOH}\)</span>溶液反应:<span class="math-tex">\({\text{Mg}}^{\text{2+}}{\text{+HCO}}_{\text{3}}^{\text{-}}{\text{+OH}}^{\text{-}}{\text{=MgCO}}_{\text{3}}↓{\text{+H}}_{\text{2}}\text{O}\)</span></td>
- <td>正确;酸式盐与碱反应生成正盐和水</td>
- </tr>
- </tbody>
- </table>
- </div>
- <ul class="stem-options" data-v-3d09d6a3="">
- <li data-v-3d09d6a3=""><span class="analysis-prefix" data-v-3d09d6a3="">A:</span><span data-v-3d09d6a3=""><span class="math-tex">\(A\)</span></span></li>
- <li data-v-3d09d6a3=""><span class="analysis-prefix" data-v-3d09d6a3="">B:</span><span data-v-3d09d6a3=""><span class="math-tex">\(B\)</span></span></li>
- <li data-v-3d09d6a3=""><span class="analysis-prefix" data-v-3d09d6a3="">C:</span><span data-v-3d09d6a3=""><span class="math-tex">\(C\)</span></span></li>
- <li data-v-3d09d6a3=""><span class="analysis-prefix" data-v-3d09d6a3="">D:</span><span data-v-3d09d6a3=""><span class="math-tex">\(D\)</span></span></li>
- </ul>
- <div class="topic-analysis" data-v-3d09d6a3="">
- <div class="topic-analysis-content" data-v-3d09d6a3=""><span class="analysis-prefix" data-v-3d09d6a3="">【答案】</span><span data-v-3d09d6a3="">B</span></div>
- <div class="topic-analysis-content" data-v-3d09d6a3=""><span class="analysis-prefix" data-v-3d09d6a3="">【解析】</span><span data-v-3d09d6a3="">A.将1molCl<sub>2</sub>通入到含1molFeI<sub>2</sub>溶液中,由于I<sup>-</sup>的还原性强于Fe<sup>2+</sup>,所以<span class="math-tex">\( \text{1 mol }{\text{Cl}}_{\text{2}} \)</span>只能将2molI<sup>-</sup>氧化,其正确的离子方程式为2I<sup>-</sup>+Cl<sub>2</sub>═2Cl<sup>-</sup>+I<sub>2</sub>,故A错误;<br>B.1mol/L的NaAlO<sub>2</sub>溶液和2.5mol/L的HCl溶液等体积互相均匀混合,设溶液体积为1L,偏铝酸钠和HCl的物质的量分别为1mol、2.5mol,1mol偏铝酸钠消耗1molHCl生成1mol氢氧化铝,剩余的1.5molHCl能够溶解0.5mol氢氧化铝,反应的离子方程式为<span class="math-tex">\({\text{2AlO}}_{\text{2}}^{\text{-}}{\text{+5H}}^{\text{+}}{\text{=A1}}^{\text{3+}}{\text{+Al(OH)}}_{\text{3}}↓{\text{+H}}_{\text{2}}\text{O}\)</span>,<span class="math-tex">\({\text{AlO}}_{\text{2}}^{\text{-}}\)</span>与<span class="math-tex">\({\text{Al(OH)}}_{\text{3}}\)</span>消耗的<span class="math-tex">\({\text{H}}^{\text{+}}\)</span>的物质的量之比为1:1.5=2:3,故B正确;<br>C.HClO有强氧化性,能够氧化SO<sub>2</sub>,正确的离子方程式为SO<sub>2</sub>+H<sub>2</sub>O+ClO<sup>-</sup>=Cl<sup>-</sup>+SO<sub>4</sub><sup>2-</sup>+2H<sup>+</sup>,故C错误;<br>D.Mg(HCO<sub>3</sub>)<sub>2</sub>溶液与足量的NaOH溶液反应,由于氢氧化镁比碳酸镁更难溶,所以反应生成氢氧化镁沉淀,其离子方程式为Mg<sup>2+</sup>+2<span class="math-tex">\({\text{HCO}}_{3}^{-}\)</span>+4OH<sup>-</sup>=Mg(OH)<sub>2</sub>↓+2H<sub>2</sub>O+2<span class="math-tex">\({\text{CO}}_{3}^{2-}\)</span>,故D错误;<br>答案为B。<br></span></div>
- </div>
- """
- aa = single_parse(hml4, "选择题", "456")
- pprint(aa)
- # tt = r"""
- # 25℃时,若测得HR溶液$\text{pH=a}$,取该溶液$\text{10}\text{.0mL}$,加蒸馏水稀释至$\text{100}\text{.0mL}$,测得$\text{pH=b,b-a<1}$,则HR是弱酸
- # """
- #
- # one_item = base642img(tt, "333333333")
- # one_item = css_conflict_deal(one_item)
- # print(one_item)
|