replace_pic_new.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. import re
  2. from pprint import pprint
  3. from util import del_outlier
  4. pat1 = re.compile(r"_*\s*[((]\s*(\d+)\s*[))]\s*_*")
  5. pat11 = re.compile(r"_*\s*[((]?\s*(\d+)\s*[))]?\s*[.、.]?\s*_+")
  6. pat111 = re.compile(r"_+\s*[((]?\s*(\d+)\s*[))]?\s*_*")
  7. pat1111 = re.compile(r"_+\s*[((]?\s*(\d+)\s*[))]?\s*[.、.]?\s*_+")
  8. pat3 = re.compile(r"_*\s*[((]\s*\d+\s*[))]\s*_*")
  9. pat33 = re.compile(r"_*\s*[((]?\s*\d+\s*[))]?\s*[.、.]?\s*_+")
  10. pat333 = re.compile(r"_+\s*[((]?\s*\d+\s*[))]?\s*_*")
  11. pat3333 = re.compile(r"_+\s*[((]?\s*\d+\s*[))]?\s*[.、.]?\s*_+")
  12. pat2 = re.compile(r'<span style="text-decoration.*?>(\s*\d+)?|<span style="border-bottom:.*?>(\s*\d+)?')
  13. pat4 = re.compile(
  14. r'(_*\s*<img.*?>\s*_*)|(_*\s*<span style="text-decoration.*?>.*?</span>\s*_*)|(_*\s*<span style="border-bottom:.*?>.*?</span>\s*_*)')
  15. pat5 = re.compile(r"(<img.*?>)|[((]?\s*\d+\s*[))]?\s*[.、.]")
  16. pat6 = re.compile(r"[((]?\s*(\d+)\s*[))]?\s*[.、.]")
  17. pat7 = re.compile(r"[((]?\s*\d+\s*[))]?\s*[.、.]")
  18. def pics(a, b, n=False):
  19. if n:
  20. t = "\n"
  21. else:
  22. t = ""
  23. replace = t + '''<img src="http://zsytk2.zhixinhuixue.com/static/images/{}.gif" _src="http://zsytk2.zhixinhuixue.com/static/images/{}.gif" class="tiankong" data-num="{}">'''.format(
  24. a, a, b)
  25. return replace
  26. def replace_pics1(content):
  27. if not content:
  28. return content
  29. old_content = content
  30. if "http://zsytk2.zhixinhuixue.com/static/images" in str(content):
  31. content = pat4.sub("#####", str(content))
  32. content = re.sub(r"_+\s*[((]?\s*(\d+)\s*[))]?\s*[.、.]?\s*_+", "#####", content)
  33. res = []
  34. for sss in range(0, 20):
  35. new_content1 = re.sub(r"#####", pics(sss + 1, sss), content, count=1, flags=re.S)
  36. content = new_content1
  37. res.append(new_content1)
  38. if "#####" not in new_content1:
  39. break
  40. if res:
  41. if "#####" in str(res[-1]):
  42. res_final = str(res[-1]).replace("#####", " ")
  43. return res_final
  44. else:
  45. return res[-1]
  46. else:
  47. return old_content
  48. else:
  49. if len(pat2.findall(content)):
  50. content = pat4.sub("#####", content)
  51. content = re.sub(r"_+\s*[((]?\s*(\d+)\s*[))]?\s*[.、.]?\s*_+", "#####", content)
  52. res = []
  53. for i in range(0, 20):
  54. new_content2 = re.sub(r"#####", pics(i + 1, i), content, count=1, flags=re.S)
  55. content = new_content2
  56. res.append(new_content2)
  57. if "#####" not in new_content2:
  58. break
  59. if res:
  60. if "#####" in str(res[-1]):
  61. res_final = str(res[-1]).replace("#####", " ")
  62. return res_final
  63. else:
  64. return res[-1]
  65. else:
  66. return old_content
  67. else:
  68. if "<br>" in old_content:
  69. old_content = str(old_content).split("<br>")
  70. elif "<br/>" in old_content:
  71. old_content = str(old_content).split("<br/>")
  72. elif "<br />" in old_content:
  73. old_content = str(old_content).split("<br />")
  74. else:
  75. old_content = str(old_content).split("\n")
  76. try:
  77. # start_index = [1 if len(re.findall(r'[\u4e00-\u9fa5]', con.strip())) > 5 else 0 for con in old_content[:6]].index(0)
  78. start_index = [
  79. 1 if len(re.findall(r'[\u4e00-\u9fa5]', con.strip())) / (len(con.strip()) + 0.0001) >= 0.5 else 0
  80. for con in old_content[:3]].index(0)
  81. except:
  82. start_index = 0
  83. old_content = "\n".join(old_content[start_index:])
  84. replace_con = pat3.findall(old_content) + pat33.findall(old_content) + pat333.findall(
  85. old_content) + pat3333.findall(old_content)
  86. replace_con = list(set(replace_con))
  87. if replace_con:
  88. num_list = pat1.findall(old_content) + pat11.findall(old_content) + pat111.findall(
  89. old_content) + pat1111.findall(old_content)
  90. new_num_list = del_outlier(num_list)
  91. new_replace_con_list = []
  92. for num in replace_con:
  93. if re.search(pat1, str(num).strip()):
  94. if re.search(pat1, str(num).strip()).group(1) in new_num_list:
  95. new_replace_con_list.append(str(num).strip())
  96. content = content.replace(str(num).strip(), "#####")
  97. if re.search(pat11, str(num).strip()):
  98. if re.search(pat11, str(num).strip()).group(1) in new_num_list:
  99. new_replace_con_list.append(str(num).strip())
  100. content = content.replace(str(num).strip(), "#####")
  101. if re.search(pat111, str(num).strip()):
  102. if re.search(pat111, str(num).strip()).group(1) in new_num_list:
  103. new_replace_con_list.append(str(num).strip())
  104. content = content.replace(str(num).strip(), "#####")
  105. if re.search(pat1111, str(num).strip()):
  106. if re.search(pat1111, str(num).strip()).group(1) in new_num_list:
  107. new_replace_con_list.append(str(num).strip())
  108. content = content.replace(str(num).strip(), "#####")
  109. res = []
  110. for i, new_replace_con in enumerate(new_replace_con_list):
  111. new_content3 = re.sub(r"#####", pics(i + 1, i), content, count=1, flags=re.S)
  112. content = new_content3
  113. res.append(new_content3)
  114. if res:
  115. return res[-1]
  116. else:
  117. return old_content
  118. else:
  119. return old_content
  120. def replace_pics2(content):
  121. if not content:
  122. return content
  123. old_content = content
  124. if "http://zsytk2.zhixinhuixue.com/static/images" in content:
  125. content = pat5.sub("#####", content)
  126. res = []
  127. for ii, sss in enumerate(range(len(pat5.findall(old_content)))):
  128. new_content1 = re.sub(r"#####", pics(sss + 1, sss, n=True), content, count=1, flags=re.S)
  129. content = new_content1
  130. res.append(new_content1)
  131. if res:
  132. return res[-1]
  133. else:
  134. return old_content
  135. else:
  136. replace_con = pat7.findall(old_content)
  137. num_list = pat6.findall(old_content)
  138. new_num_list = del_outlier(num_list)
  139. new_replace_con_list = []
  140. for num in replace_con:
  141. if pat6.search(str(num).strip()).group(1) in new_num_list:
  142. new_replace_con_list.append(str(num).strip())
  143. content = content.replace(str(num).strip(), "#####")
  144. res = []
  145. for i, new_replace_con in enumerate(new_replace_con_list):
  146. new_content2 = re.sub(r"#####", pics(i + 1, i), content, count=1, flags=re.S)
  147. content = new_content2
  148. res.append(new_content2)
  149. if res:
  150. return res[-1]
  151. else:
  152. return old_content
  153. def content_replace_result(types, contents):
  154. content = contents.get("content", None)
  155. if types in ["完形填空", "任务型阅读", '七选五', "选词填空", '语法填空', '课文填空']:
  156. # if types in ["完形填空", "任务型阅读", '七选五', "选词填空", '语法填空']:
  157. return replace_pics1(content)
  158. elif types in ["句子翻译", "完成句子", "阅读表达"]:
  159. return replace_pics2(content)
  160. else:
  161. return content
  162. if __name__ == '__main__':
  163. # a = ['阅读下面短文,\xa0从短文后各题所给的四个选项(A、B、C\xa0和\xa0D)中,\xa0选出可以填人\xa0空白处的最佳选项,并在答题卡上将该项涂黑。\nThe new science of spending comes to a surprising conclusion. How we use\xa0our money may \n<img src="http://zsytk2.zhixinhuixue.com/static/images/1.gif" _src="http://zsytk2.zhixinhuixue.com/static/images/1.gif" class="tiankong" data-num="0"/>21 as much or more than how much we’ve got it. Money spent\xa0on experiences, \xa0\xa0\n<img src="http://zsytk2.zhixinhuixue.com/static/images/2.gif" _src="http://zsytk2.zhixinhuixue.com/static/images/2.gif" class="tiankong" data-num="1"/>22 \xa0\xa0\xa0material goods, brings about more\xa0happiness.\nImagine that you wake up tomorrow morning to \xa0\n<img src="http://zsytk2.zhixinhuixue.com/static/images/3.gif" _src="http://zsytk2.zhixinhuixue.com/static/images/3.gif" class="tiankong" data-num="2"/>23 \xa0$1 million under\xa0your\nmattress. What would you do \n<img src="http://zsytk2.zhixinhuixue.com/static/images/4.gif" _src="http://zsytk2.zhixinhuixue.com/static/images/4.gif" class="tiankong" data-num="3"/>24 that cash? The new-found wealth will probably\xa0make\xa0you think about one thing \n<img src="http://zsytk2.zhixinhuixue.com/static/images/5.gif" _src="http://zsytk2.zhixinhuixue.com/static/images/5.gif" class="tiankong" data-num="4"/>25 all else—yourself. A growing body of research\xa0reveals\xa0that money \n<img src="http://zsytk2.zhixinhuixue.com/static/images/6.gif" _src="http://zsytk2.zhixinhuixue.com/static/images/6.gif" class="tiankong" data-num="5"/>26 our selfish sides, focusing us on \n<img src="http://zsytk2.zhixinhuixue.com/static/images/7.gif" _src="http://zsytk2.zhixinhuixue.com/static/images/7.gif" class="tiankong" data-num="6"/>27 that money can do for us, and\xa0us\xa0alone. Perhaps you are imagining buying new \xa0\xa0\n<img src="http://zsytk2.zhixinhuixue.com/static/images/8.gif" _src="http://zsytk2.zhixinhuixue.com/static/images/8.gif" class="tiankong" data-num="7"/>28 \xa0\xa0: a faster car, or even a\xa0larger\xa0house.\nHowever, studies show that material goods often \xa0\n<img src="http://zsytk2.zhixinhuixue.com/static/images/9.gif" _src="http://zsytk2.zhixinhuixue.com/static/images/9.gif" class="tiankong" data-num="8"/>29 \xa0deliver lasting\xa0happiness.\nFortunately, our ongoing research \n<img src="http://zsytk2.zhixinhuixue.com/static/images/10.gif" _src="http://zsytk2.zhixinhuixue.com/static/images/10.gif" class="tiankong" data-num="9"/>30 various ways to get more happiness from\xa0every dollar you spend. Changing how you spend money can lead to measurable increases\xa0in your happiness. But making these \n<img src="http://zsytk2.zhixinhuixue.com/static/images/11.gif" _src="http://zsytk2.zhixinhuixue.com/static/images/11.gif" class="tiankong" data-num="10"/>31 requires challenging some of our\xa0fundamental\xa0assumptions about spending, since it’s\xa0\n<img src="http://zsytk2.zhixinhuixue.com/static/images/12.gif" _src="http://zsytk2.zhixinhuixue.com/static/images/12.gif" class="tiankong" data-num="11"/>32 not to view buying a house as a\xa0wise\xa0investment. But new research shows it has little impact on your overall happiness.\xa0A\xa0study in the United States found that homeowners, on average, were no happier\xa0than\n\n<img src="http://zsytk2.zhixinhuixue.com/static/images/13.gif" _src="http://zsytk2.zhixinhuixue.com/static/images/13.gif" class="tiankong" data-num="12"/>\xa0\xa0\n<img src="http://zsytk2.zhixinhuixue.com/static/images/14.gif" _src="http://zsytk2.zhixinhuixue.com/static/images/14.gif" class="tiankong" data-num="13"/>33 \xa0\xa0.\nThus, diligently saving up for a down payment might not be such a good idea\xa0\n<img src="http://zsytk2.zhixinhuixue.com/static/images/15.gif" _src="http://zsytk2.zhixinhuixue.com/static/images/15.gif" class="tiankong" data-num="14"/>34 \xa0it means skipping after-work beers with friends or your annual celebration at a\xa0favorite\xa0restaurant. Dozens of studies show that people get more happiness from buying\xa0\n<img src="http://zsytk2.zhixinhuixue.com/static/images/16.gif" _src="http://zsytk2.zhixinhuixue.com/static/images/16.gif" class="tiankong" data-num="15"/>35 \xa0than material things. Experiential purchases—such as trips, concerts and special\xa0meals\n—are specially \xa0\n<img src="http://zsytk2.zhixinhuixue.com/static/images/17.gif" _src="http://zsytk2.zhixinhuixue.com/static/images/17.gif" class="tiankong" data-num="16"/>36 \xa0to our sense of self, making us who we\xa0are.\nExperiences come with one more \n<img src="http://zsytk2.zhixinhuixue.com/static/images/18.gif" _src="http://zsytk2.zhixinhuixue.com/static/images/18.gif" class="tiankong" data-num="17"/>37 : They are likely to make us \n<img src="http://zsytk2.zhixinhuixue.com/static/images/19.gif" _src="http://zsytk2.zhixinhuixue.com/static/images/19.gif" class="tiankong" data-num="18"/>38 to\xa0other\xa0people, while more often material things are enjoyed \n<img src="http://zsytk2.zhixinhuixue.com/static/images/20.gif" _src="http://zsytk2.zhixinhuixue.com/static/images/20.gif" class="tiankong" data-num="19"/>39 . The importance of\xa0social\xa0contact for \xa0\xa0\n<img src="http://zsytk2.zhixinhuixue.com/static/images/21.gif" _src="http://zsytk2.zhixinhuixue.com/static/images/21.gif" class="tiankong" data-num="20"/>40 \xa0\xa0mental and physical health has been stressed for\xa0decades.\n', '1.\nA.present\nB.matter\nC.appear\nD.equal\n', '2.\nA.better\xa0than\nB.other\xa0than\nC.rather\xa0than\nD.more\xa0than\n', '3.\nA.keep\nB.own\nC.discover\nD.accept\n', '4.\nA.to\nB.with\nC.about\nD.for\n', '5.\nA.above\nB.beyond\nC.except\nD.after\n', '6.\nA.draws\xa0out\nB.takes\xa0in\nC.stands\xa0for\nD.makes\xa0up\n', '7.\nA.who\nB.why\nC.what\nD.how\n', '8.\nA.life\nB.equipment\nC.vehicles\nD.possessions\n', '9.\nA.manage\xa0to\nB.fail\xa0to\nC.tend\xa0to\nD.expect\xa0to\n', '10.\nA.invites\nB.offers\nC.prefers\nD.follows\n', '11.\nA.changes\nB.plans\nC.decisions\nD.mistakes\n', '12.\nA.easy\nB.hard\nC.foolish\nD.sensible\n', '13.\nA.buyers\nB.sellers\nC.builders\nD.renters\n', '14.\nA.if\nB.until\nC.unless\nD.though\n', '15.\nA.experiences\nB.goods\nC.cars\nD.houses\n', '16.\nA.exposed\nB.added\nC.applied\nD.related\n', '17.\nA.disadvantage\nB.conclusion\nC.purpose\nD.benefit\n', '18.\nA.grateful\nB.close\nC.similar\nD.equal\n', '19.\nA.together\nB.worldwide\nC.long\nD.alone\n', '20.\nA.increasing\nB.damaging\nC.improving\nD.recovering\n']
  164. a = ['''It is always interesting and revealing (发人深省的) to observe human behavior. In the past years, we have always held a party for Christmas gift __16___ at our house with at least ten members of our __17__ . Each year, we all buy gifts rapped (包裹) in Christmas paper. When everyone is sitting around a circle at home, all the gifts are __18__ in the middle of the floor.
  165. People choose a __19__ from a bowl that tells him or her when it is their __20__ to choose a gift. The person with the number one can choose any gift and then, each person __21__ his or her turn. Before they choose a gift, they have the___22___to take the gift from the former person. __23__ when all people have chosen a gift, the person __24__ the first number can then take gift away from any other person.
  166. What is always interesting is __25__ each person as they choose a gift. What happens next usually happens more often than not. The biggest gift __26__ gets chosen first. It is never the __27__ gift, nor is it the most expensive one. What does this tell us about most human __28__ ?
  167. Is the __29__ always the best or worth the most? Do we __30__ that something small is not worth as much or cannot live up to what we expect?
  168. Human beings, in fact, __31__ all sorts of __32__ and packagings. Some are bigger and some are smaller; some are dressed __33__ and some are dressed poorly. Behind these lie __34__ characteristics, talents, desires, hurts and pains and dreams.
  169. Terry Josephson, a motivational (励志的) speaker says, “ __35__ thinking in terms of limitations and start thinking in terms of possibilities.”''']
  170. # pprint(a)
  171. # b = replace_pics1(",".join(a))
  172. b = replace_pics1(a[0])
  173. print(b)