123456789101112131415161718192021222324252627282930313233343536 |
- inf_words_dict = dict()
- with open("./segment/ocr/type_config.txt", "r", encoding="utf-8") as f:
- for i, line in enumerate(f):
- if line.startswith("#"):
- continue
- line = line.strip().replace(":", ":").replace(",", ",")
- key, val = line.split(":")
- key = key.strip()
- val = val.split(",")
- val = tuple(v.strip() for v in val)
- inf_words_dict[val] = key
- # 答案冒号 = "答案:"
- # 解析冒号 = "解析:"
- def could_skip_line(line):
- '''对于答案和解析行,不进行type_inf'''
- return line.startswith("答案:") or line.startswith("解析:")
- def contains_all(s, words):
- return all([w in s for w in words])
- def topic_type_line(line):
- if could_skip_line(line):
- return False
- for key, val in inf_words_dict.items():
- if contains_all(line, key):
- return True
- return False
|