.*?
里面的内容可能会被过滤掉 # test = str(line) # line.text() # 保留下划线及着重符标签 # 波浪线: # pq会将多个空格换成一个 # print(str(line)) if '.+?)', r"【1#\1##】", str(line)) line = re.sub(r'(.+?)', r"【2#\1##】", str(line)) line = re.sub(r'<(p style="text-(indent|align):.*?">.+?)# -*- coding: utf-8 -*- import re from pprint import pprint from pyquery import PyQuery as pq # # pattern = re.compile(r"\[来源.*?\]|www\..*?com") # # filter_words = ["学科网", "高考资源网", "Ziyuanku.com", "WWW.ziyuanku.com", # "高☆考♂资♀源€网", "w.w.w.k.s.5.u.c.o.m", "本资料由《七彩教育网》www.7caiedu.cn 提供!", # "本资料来源于《七彩教育网》http://www.7caiedu.cn", "本资料由《七彩教育网》www.7caiedu.cn 提", # "高考试题来源:"] # # # def filter_word(txt_list): # new_txt_list = [] # for word in txt_list: # if not word.strip(): # continue # new_word = re.sub(pattern, "", word) # for keys in filter_words: # if keys in new_word: # new_word = new_word.replace(keys, "") # new_txt_list.append(new_word) # return new_txt_list def filter_data(x): if not str(x).replace(" ", "").strip(): pass else: return str(x) def replace_k(con): # con = str(con).replace(" ", "+") # con = str(con).replace(" ", "+") con = re.sub(r'\s(?!(src="http|_src="http|class="tiankong"|data-num=))', "#+#", str(con)) return pq(con, parser="html") def css_label_wash(content): # todo add 9-4 """ 清洗文本中带有的css标签 :param content: :return: """ # temp_con = re.sub('?p(\s*|\s+style=.*?")?>', "", str(content)) if re.search('?(span|font|article|ul|ol|div)(\s*|\s+style=.*?")>|text\s*-\s*decoration: underline|border\s*-\s*bottom:', str(content)) is None: # content = re.sub('
', "\n", content).strip().replace("\n\n", "\n").replace("\n", "" content = re.sub('(
)', subp, content) content = re.sub('
', "
\n", content) return content else: content = re.sub(r'', "", str(content)) content = re.sub(r'', "", str(content)) content = re.sub(r'', "", str(content)) content = re.sub(r'', "", str(content)) content = content.replace("", "").replace("", "") # content = content.replace("", "").replace("", "") content = content.replace('
', "\n").replace('
', "", content)
# content = re.sub(' .*?
', "\n", content)
# parm = False
# if "
").replace("【strong##", "").replace("【em##", "") .replace("##strong】", "").replace("##em】", "").replace("【+】", " "), line.text().split("\n"))) a.extend(new_line) elif str(line).startswith("
", "\n").replace("', "\n"). \ replace('
', "\n") b_list = b.split("\n") # b_list = list(filter(lambda x: str(x), b_list)) b_list = list(filter(filter_data, b_list)) b_list = list(map(lambda x: str(x), b_list)) a.extend(b_list) elif str(line).startswith("
" + new_a.replace("\n\n", "\n").replace("\n", "
\n") + "
" new_a = "" + new_a.replace("\n\n", "\n") + "
" new_a = re.sub(r'(
$', r"\1", new_a, flags=re.S) # for sb, img in subs2img.items(): # 2021 # new_a = new_a.replace(sb, img) # if parm: # new_a[0] = "\xa0" * 4 + new_a[0] new_a = re.sub(r"【(/?su[bp])】", r"<\1>", new_a) return new_a def again_parse(content): # todo add 9-4 content = re.sub(r'', "", str(content)) content = re.sub(r'', "", str(content)) content = re.sub(r'', "", str(content)) content = re.sub(r'', "", str(content)) content = str(content).replace("", "").replace("", "") content = str(content).replace("
', "\n").replace('
', "\n"). \ replace('
', "\n") b_list = b.split("\n") # b_list = list(filter(lambda x: str(x), b_list)) b_list = list(filter(filter_data, b_list)) b_list = list(map(lambda x: str(x) + "\n", b_list)) a.extend(b_list) elif str(line).startswith("
Unbelievable!Oh..., _____ you don't mind, I'll stop and take a deep breath.
【答案】
1.if。考查if引导的条件状语从句。根据句意可知。
【解析】
''' # pprint(cons) # print(again_parse(cons)) # print(again_parse(cons)) # print(list(map(lambda x: str(x).replace(" ", " "), again_parse(cons)))) # con1 = r'解:A.研究跨栏动作时,刘翔的大小和形状不能忽略,不能看作质点,故A错误;
B.选取不同的参考系,物体的运动状态是不相同的,故B错误;
C.出租车收费是按路程收费的,故C错误;
D.第是指的时间,是指从末到末这一段时间,故D正确;
故选:D.
阅读下面这首宋诗,完成下列小题。(本题共2小题,9分)
除夜野宿常州城外二首(其二)
苏轼
南来三见岁云徂①,直恐终身走道途。
老去怕看新历日,退归拟学旧桃符。
烟花已作青春②意,霜雪偏寻病客须。
但把穷愁博长健,不辞最后饮屠苏③。
【注】①苏轼于熙宁四年(1071)冬到杭州任通判,至作此诗,已度过三个除夕。岁云徂,谓年岁辞去。徂,往。②青春:春季。③古俗,正月初一家人先幼后长依次饮屠苏酒。《时镜新书》晋董勋云:“正旦饮酒先从小者,何也?勋曰:‘俗以小者得岁,故先酒贺之,老者失时,故后饮酒。’”