cdZWj
/
en2cn


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666
							# -*- coding: utf-8 -*-
# import re
# import time
import json
import my_config
import func_timeout
# import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from Final_word_Similarity.Hybrid_Sim import HybridSim
from sentence_transformers import util
# from item_embedding.chinese_emb import item2emb_cn
from item_embedding.all_lang_emb import item2emb_all
from Words.Dicts import errmean_words
from full_wash import ChWash
from Words.Phrase_dict import errmean_en_dict, phrases_dict_tk
from basic_logic import ItemInit, get_mean_in_dict
from Utils.wrong_written_judgement import err_mean_judge, KM_cidian_check, youdao_cidian_check
from Utils.han_semantic_similarity import han_similarity, pos_tag_han
from Utils.util import *
from Words.Dicts import single_mean_words, fixed_mean_words_dict
from Words.Words_classify import mean_fixed_words
from Words.ch2en_dict import ch_to_en_online
from Words.word_dict_from_textbook import word2mean_high, word2mean_junior
from Words.syn_antonyms import syn_km, syn_ft, syn_bing
from Words.phrases_syn_antonyms import phrase_syn_bing, phrase_syn_km
from Utils.translator import KM_ch2en, haici_zh2en, ch2en_baidu
from concurrent.futures import ThreadPoolExecutor
from func_timeout import func_set_timeout
# import gc
# from func_timeout import func_set_timeout

# logger = my_config.myLog(__name__, log_cate="e2cc_log").getlog()

ch2en_logger = my_config.simpLog(__name__, log_cate="ch2en_online_log").getlog()

"""
存在错误类型：有多个手写意思，其中一个ocr错误，这算错还是对呢---62df443d87b408c08c51752c
规范作答测试：写错要全部涂黑
"""


def bert_similarity(vc_model):
    """
    计算bert模型输出的两个向量间的相似度，适用于句向量
    vc_model：词向量或句向量,词向量效果不佳
    :return:
    """
    # ss = [[1, 0, 0, 0], [1, 0, 0, 0]]  # numpy.array(ss)
    two_vc = np.split(vc_model, 2, axis=0)
    res = cosine_similarity(two_vc[0], two_vc[1])
    return res


class MainJudge(ItemInit):
    # @staticmethod
    def hw_ch2en(self, mean_hw):
        """
        作答数据转英文，调接口
        :param mean_hw:
        :return:
        """
        ch2en_online = []

        def km_ch2en(s):
            try:
                ch2en1 = KM_ch2en(s)
                ch2en_online.extend(ch2en1)
                # del ch2en1
                # gc.collect()
            except:
                pass

        def hc_ch2en(s):
            try:
                ch2en2 = haici_zh2en(s)
                ch2en_online.extend(ch2en2)
                # del ch2en2
                # gc.collect()
            except:
                pass

        def bd_ch2en(s):
            try:
                ch2en3 = ch2en_baidu(s)
                ch2en_online.extend(ch2en3)
                # del ch2en3
                # gc.collect()
            except:
                pass
        # -------线程1-----------
        # # IO密集型,用多线程
        # threads = [Thread(target=km_ch2en, args=(mean_hw,)), Thread(target=hc_ch2en, args=(mean_hw,)),
        #            Thread(target=bd_ch2en, args=(mean_hw,))]
        # [t.start() for t in threads]
        # [t.join() for t in threads]
        # -------线程2-----------
        all_func = [hc_ch2en, bd_ch2en, km_ch2en]
        if self.need_ch2en_hw_num >= 3:
            all_func = [hc_ch2en]
        elif self.need_ch2en_hw_num >= 2:
            all_func = [hc_ch2en, bd_ch2en]
        if len(all_func) == 1:
            all_func[0](mean_hw)
        else:
            with ThreadPoolExecutor(max_workers=len(all_func)) as executor:
                for func in all_func:
                    executor.submit(func, mean_hw)

        ch2en_online_washed = []
        if ch2en_online:
            for c in set(ch2en_online):
                c = c.replace("&#39;", "'")
                new_c = [c]
                if len(re.findall("[(（].+?[)）]", c)) == 1:
                    c_1 = re.split("[(（](.+?)[)）]", c)
                    if c_1[1] in ["s", "es", "ing"]:
                        new_c = [c_1[0], c_1[0]+c_1[1]]
                ch2en_online_washed.extend(new_c)
        return mean_hw, ch2en_online_washed

    def synonyms_cluster_judge(self, en_ptag):
        """
        同义聚类、反义聚类综合判断：1、根据中文找到其英文，再查英文的近反义词
                                   2、根据英文的近反义词查中文意思再对比
        :return:
        """
        # 2+、教材义的单词判断
        if self.word in single_mean_words or self.word in fixed_mean_words_dict \
                or self.word in sum(list(mean_fixed_words.values()), []):
            self.is_word_with_fixed_mean = 1
            fixed_mean = []
            if self.word in word2mean_high:
                fixed_mean.append(list(word2mean_high[self.word].values()))
            if self.word in word2mean_junior:
                fixed_mean.append([word2mean_junior[self.word]])
            if self.word in fixed_mean_words_dict:
                fixed_mean.append([fixed_mean_words_dict[self.word].replace("\n", ";")])
            fixed_mean.append([self.fuller_means_simpwashed])
            # print(3333333333, ";".join(sum(fixed_mean, [])))
            fixed_mean_list = ChWash(self.word, ";".join(sum(fixed_mean, []))).zh_full_wash(source="hyk")
            fixed_mean_list = means_split(";".join(fixed_mean_list))  # 切分
            if all([True if mean_hw in fixed_mean_list else False for mean_hw in self.mean_hw_list]):
                return 1
            else:
                return 0
        else:  # 近义反义判断
            synonyms, antonyms = [], []
            if self.word in syn_km:
                synonyms.extend(sum(list(syn_km[self.word]["synonyms"].values()), []))
                antonyms.extend(sum(list(syn_km[self.word]["antonyms"].values()), []))
            if self.word in syn_ft:
                synonyms.extend(syn_ft[self.word]["synonyms"])
                antonyms.extend(syn_ft[self.word]["antonyms"])
            if self.word in syn_bing:
                synonyms.extend(syn_bing[self.word]["synonyms"])
                antonyms.extend(syn_bing[self.word]["antonyms"])
            if self.word in phrase_syn_km:
                synonyms.extend(sum(list(phrase_syn_km[self.word]["synonyms"].values()), []))
                antonyms.extend(sum(list(phrase_syn_km[self.word]["antonyms"].values()), []))
            if self.word in phrase_syn_bing:
                synonyms.extend(phrase_syn_bing[self.word]["synonyms"])
                antonyms.extend(phrase_syn_bing[self.word]["antonyms"])
            synonyms = list([s.replace("&#39;", "'") for s in set(synonyms)])
            antonyms = list([a.replace("&#39;", "'") for a in set(antonyms)])
            # print("近义词：", synonyms)
            # print("反义词：", antonyms)
            # 存在词的近义之间有差别：hw_en的词虽在近义词表中但两者不是同一个层意思
            is_all_synonyms = 1
            hw2en_equal_word = []  # hw2en与word相同的hw
            no_synonyms = 0  # 无近义词
            is_all_hw_en_exist = []  # 是否所有的hw都存在en
            need_ch2en_online = []  # 需要在线获取en的hw
            # ch2en_dict_online = {}  # 在线获取的hw-en词典
            # ----------统计需要在线获取en的中文作答----------
            time1 = time.time()
            for mean_hw in self.mean_hw_list:
                # len(self.word.split(" ")) == 1 and
                if "(" not in mean_hw and "（" not in mean_hw:  # 带括号的不考虑
                    need_ch2en_online.append(mean_hw)
            # ----------在线获取en----------
            need_ch2en_online = list(set(need_ch2en_online))
            need_ch2en_online = [ch for ch in need_ch2en_online if ch not in ch_to_en_online]
            ch2en_dict_online = {ch: ch_to_en_online[ch] for ch in need_ch2en_online if ch in ch_to_en_online}
            if need_ch2en_online:
                max_workers = 1
                if len(need_ch2en_online) >= 6:
                    max_workers = 6
                elif len(need_ch2en_online) > 1:
                    max_workers = len(need_ch2en_online)
                self.need_ch2en_hw_num = len(need_ch2en_online)  # 需要在线获取en的hw的个数
                if max_workers > 1:
                    with ThreadPoolExecutor(max_workers=max_workers) as executor:
                        for future in executor.map(self.hw_ch2en, need_ch2en_online):
                            ch2en_dict_online[future[0]] = future[1]
                            if future[1]:  # 将在线获取得ch2en先保存，每天更新一次
                                ch2en_logger.info(json.dumps({"{}".format(future[0]): future[1]}, ensure_ascii=False))
                else:  # 需要在线获取en的hw的个数只有1个
                    hw, hw2en = self.hw_ch2en(need_ch2en_online[0])
                    if hw2en:
                        ch2en_dict_online[hw] = hw2en
                        ch2en_logger.info(json.dumps({"{}".format(hw): hw2en}, ensure_ascii=False))
            # print("在线获取en的时间：", time.time() - time1)
            # -----------------根据hw的en与近反义词进行判断---------------------
            for mean_hw in self.mean_hw_list:
                dict_ch2en = my_config.dict_ch2en
                hw_en = []
                if mean_hw in dict_ch2en:  # 从整理的ch2en获取
                    hw_en = dict_ch2en[mean_hw]
                    # print("整理的hw_en:", mean_hw, hw_en)
                if mean_hw in ch2en_dict_online and ch2en_dict_online[mean_hw]:
                    # print("ch2en_online:", ch2en_dict_online[mean_hw])
                    hw_en.extend(ch2en_dict_online[mean_hw])

                # print("all--hw_en:", hw_en)  # 近义词： ['southeast', 'southeasterly']
                if hw_en:
                    hw_en = list(set(map(lambda x: x.lower(), hw_en)))  # 变小写
                    # print("变形hw_en：", hw_en)
                    if any([True for en in hw_en if en in antonyms and en not in synonyms]):  # 反义词
                        return 0
                    if any([True for en in hw_en if en == self.word]):  # hw_en与题干相等
                        if any([re.search(mean_hw+"[的地]$", u) for u in self.union_means]):  # 是错，可能也是对（如果有非adj的近义的话）
                            return -1
                        if not self.mean_hw_cutted:
                            continue
                        else:  # 多个mean_hw时，且被截取后，只要有一个存在就可以
                            return 1
                    # synonyms.append(self.word)
                    if en_ptag == "n-phrase" and mean_hw not in self.mean_hw_cutted and \
                        any([True for en in hw_en if en != self.word and re.search("^"+en + "[a-z']{,4} [a-z]{3,}$", self.word)]):
                        return 0
                    # print("hw_en:::", hw_en)
                    if any([True for en in hw_en if en != self.word and re.search("^"+en + "[a-z']{,4}$", self.word)]):
                        # steady progress 与steady progression    # utter与utterly
                        equal_en = [en for en in hw_en if en != self.word and re.search("^"+en + "[a-z']{,4}$", self.word)
                                    and en + 'ly' == self.word and en_ptag in ["RB", "ADV"]]
                        if equal_en:
                            return 0
                        continue
                    if any([True for en in hw_en if en == self.word.replace(" ", "") and
                            self.word.replace(" ", "") not in ["breakdown"]]):  # 对题干去空格与en同
                        hw2en_equal_word.append(mean_hw)
                    elif not self.mean_hw_cutted and any([True for en in hw_en if en != self.word
                                      and re.search("\s"+en+"|"+en + "\s", self.word)
                                      and en_ptag == "n-phrase"]):  # en in self.word 名词短语,作答只译了一半的情况
                        return 0
                    elif not synonyms:
                        no_synonyms = 1
                        break
                    elif all([True if en not in synonyms else False for en in hw_en]):  # hw的en都不在近义词中
                        # 是否限定形容词、副词走近义不满足时继续走相似度匹配！！！！
                        if not self.is_phrase:  # 单词
                            if re.search("[\u4e00-\u9fa5]+的[\u4e00-\u9fa5]", mean_hw):  # 例：virtue（高尚的品德）
                                return -1
                            if len(mean_hw) <= 2:
                                is_all_synonyms = 0  # 存在不是近义
                            else:
                                if not self.sbert_row_maxsimi_res:
                                    embs = item2emb_all(self.mean_hw_list + list(self.union_means))
                                    self.sbert_simi_res = util.cos_sim(embs[0:len(self.mean_hw_list)],
                                                                       embs[len(self.mean_hw_list):])
                                    self.sbert_row_maxsimi_res = [max(s) for s in self.sbert_simi_res.tolist()]
                                # print("近义词聚类中new_row_maxsimi_res：", self.sbert_row_maxsimi_res)
                                if max(self.sbert_row_maxsimi_res) >= 0.9:
                                    return -1
                                else:
                                    is_all_synonyms = 0  # 存在不是近义
                        else:  # 短语继续走后面的相似度判断
                            return -1
                    else:
                        # 存在hw的en虽然近义词中，但在哪个释义上属于近义是不一样的
                        syns = [en for en in hw_en if en in synonyms]  # 在synonyms中的en
                        # print("在synonyms中的en:", syns)
                        for syn in syns:
                            _, syn_mean = get_mean_in_dict(syn.strip(), mod="all")
                            # syn_mean = ""  # en的中文释义
                            # if syn in words_dict:
                            #     syn_mean += words_dict[syn]
                            # if syn in more_words_dict:
                            #     syn_mean += ";" + more_words_dict[syn]
                            # if syn in phrases_dict_tk:
                            #     syn_mean += ";" + phrases_dict_tk[syn]
                            # print(11111,syn_mean)
                            # mean_hw在syn_mean中可能属于en的释义也可能不是
                            if re.search("(?<![\u4e00-\u9fa5])"+mean_hw+"(?![\u4e00-\u9fa5])", syn_mean):
                                return -1
                            # 根据词性筛选(存在有的近义词是错的)
                            pos_mean = re.findall("(?<!=[a-z])([a-z]+)\.", syn_mean)
                            pos_fuller_mean = re.findall("(?<!=[a-z])([a-z]+)\.", self.fuller_means_nowashed)
                            if re.search("n\.\s*"+mean_hw, syn_mean) and pos_fuller_mean and \
                                    all([True if a[0] in ["v", "V"] else False for a in pos_fuller_mean]):
                                return 0
                            if (re.search("的$", mean_hw) is None and pos_tag_han(mean_hw, flag="by_list") != 'a') and \
                                    (phrase_classify(syn) == "JJ" or (pos_mean and
                                                        all([True if a[0] == "adj" else False for a in pos_mean]))):
                                return 0
                        if not self.sbert_row_maxsimi_res:
                            embs = item2emb_all(self.mean_hw_list + list(self.union_means))
                            self.sbert_simi_res = util.cos_sim(embs[0:len(self.mean_hw_list)], embs[len(self.mean_hw_list):])
                            self.sbert_row_maxsimi_res = [max(s) for s in self.sbert_simi_res.tolist()]
                            # print("近义词聚类中new_row_maxsimi_res：", self.sbert_row_maxsimi_res)
                        if max(self.sbert_row_maxsimi_res) < 0.6:
                            return 0
                        else:
                            return -1
                    #     # 还需要判断一下，存在近义词聚类不全的情况
                    #     en_means_text = ""
                    #     for en in hw_en:
                    #         en_means_text += get_mean_in_dict(en, mod="all")
                    #     all_means = means_split(en_means_text)  # 切分
                    #     all_means, _ = wash(all_means, pos="standard")
                    #     # if any([True for j in all_means if j in self.ans_list or j in self.fuller_means_simpwashed]):
                    #     # print(all_means)
                    #     # print(self.fuller_means_simpwashed, other_mean)
                    #     # print([j for j in all_means if j in self.ans_list or j in self.fuller_means_simpwashed])
                    #     if any([True for j in all_means if j in self.ans_list or j in other_mean]):
                    #         hw2en_equal_word.append(mean_hw)
                    #     else:
                    #         is_all_synonyms = 0
                    #         break
                else:
                    is_all_hw_en_exist.append(-1)
                    # return -1

            if hw2en_equal_word:
                self.mean_hw_list = [j for j in self.mean_hw_list if j not in hw2en_equal_word]
            if not is_all_synonyms:
                return 0
            elif no_synonyms:
                return -1
            elif -1 in is_all_hw_en_exist:
                return -1
            else:
                return 1

    def cilin_similarity_judge(self):
        """
        词林相似度判断
        :return:
        """
        scores = []
        t3 = time.time()
        hb = HybridSim()
        for idi, mean_hw in enumerate(self.mean_hw_list):
            if re.search("[(（）)，,、]", mean_hw):
                continue
            one_scores = []
            for idj, ans in enumerate(self.ans_list):
                # print(mean_hw, " vs ", ans)
                if re.search("[(（）)，,、]", ans):
                    continue
                cilin_score = hb.get_Final_sim(mean_hw, ans)
                # print("cilin_score:", cilin_score)
                if cilin_score >= 0.9 or cilin_score == -2:  # -1需不需要判断一下？？？？
                    # 用han相似度判断再一下
                    if not self.simires_han_rawshape:
                        self.han_simi_res, self.simires_han_rawshape = han_similarity(self.word, self.mean_hw_list,
                                                                                      self.ans_list, self.cutted_words)
                    # mean_hw = re.sub(r"^(.{2,})[地了的得等]$", r"\1", mean_hw)
                    # ans = re.sub(r"^(.{2,})[地了的得等]$", r"\1", ans)
                    # han_score = han_similarity(self.word, [mean_hw], [ans], self.cutted_words)[0][0]
                    han_score = self.han_simi_res[idi][idj]
                    if han_score > 0.4:
                        if cilin_score == -2:  # 词林中没找到这个词
                            one_scores.append(han_score)
                        else:
                            one_scores.append(cilin_score)
                    elif han_score == 0.0:
                        # return 0
                        one_scores.append(0.0)
                    else:
                        one_scores.append(han_score)
                else:
                    one_scores.append(cilin_score)
            if one_scores:
                if max(one_scores) == 0.0 and min(one_scores) != -1:
                    return 0
                scores.append(max(one_scores))
        # print("\n4、同义词词林相似度得分：", scores, "时间：", time.time() - t3)
        if all([True if s >= 0.9 else False for s in scores]):  # 相似度都很高时
            return 1
        else:
            # 将相似度为0.9以上的剔除，在后面逻辑中无需比较
            idx_09 = [i for i, s in enumerate(scores) if s >= 0.9]
            if idx_09:
                self.mean_hw_list = [mean for i, mean in enumerate(self.mean_hw_list) if i not in idx_09]
        return scores

    def score_res(self, general_score):
        if self.mod in ["book", "all"]:
            if self.score_onbook == -1:
                self.score_onbook = general_score
        if self.mod == "all":
            return general_score, self.score_onbook
        elif self.mod == "book":
            return self.score_onbook
        else:
            return general_score

    # @func_set_timeout(1.2)
    def judge_err_wrriten(self):
        """
        错别字判断得分
        :return:
        """
        # time2 = time.time()
        noerr_res, char_similar_score, zhword_judged = err_mean_judge(self.mean_hw_list, self.ans_list)
        if noerr_res < 1:
            # print("纠错所花时间：：", time.time() - time2)
            if not KM_cidian_check(zhword_judged):
                # 数据库搜索判断
                if not youdao_cidian_check(zhword_judged):
                    self.score_on_err_wrriten = 0
                else:
                    noerr_res = 1  # 表示没有错误
                # print("纠错所花时间2：：", time.time() - time2)
            else:
                noerr_res = 1  # 表示没有错误
        return noerr_res

    def emb_similar(self, en, hw_list, ans_given, is_token=0, noerr=1, flag=0):
        """
        向量语义相似度
        is_token:表示手写答案是否为分词后的
        flag:为1表示传cutted_words
        :return:
        """
        score = 0
        # sentences-bert模型:适用长文本
        # embs = item2emb_cn(hw_list + ans_given)
        # simi_res = util.cos_sim(embs[0:len(hw_list)], embs[len(hw_list):])
        # row_maxsimi_res = [max(s) for s in simi_res.tolist()]
        # hanlp 语义相似度模型：适用短文本
        st7 = time.time()
        if flag == 1:
            self.han_simi_res, _ = han_similarity(en, hw_list, ans_given, self.cutted_words, is_token)
        else:
            self.han_simi_res, _ = han_similarity(en, hw_list, ans_given, [], is_token)
        # print("simi_res:", hw_list, ans_given, self.han_simi_res)
        print("hanlp模型所花时间:", time.time() - st7)
        row_maxsimi_res = [max(s) for s in self.han_simi_res if s]
        if is_token:
            if all([True if i * noerr > 0.9 else False for i in row_maxsimi_res]):
                score = 1
        elif min(row_maxsimi_res) * noerr == 0 and hw_list[row_maxsimi_res.index(min(row_maxsimi_res))] \
                not in self.mean_hw_cutted:  # not is_token时
            score = 0
        elif max(row_maxsimi_res) * noerr >= 0.895:
            st8 = time.time()
            embs = item2emb_all(hw_list + ans_given)  # 和前面不一样，需单词计算
            self.sbert_simi_res = util.cos_sim(embs[0:len(hw_list)], embs[len(hw_list):]).tolist()
            self.sbert_row_maxsimi_res = [max(s) for s in self.sbert_simi_res]
            # print("sentences-bert模型:", self.sbert_simi_res)
            print("sentences-bert模型所花时间:", time.time() - st8)
            if max(self.sbert_row_maxsimi_res) < 0.45:
                score = max(self.sbert_row_maxsimi_res)
            else:
                score = 1
                for n, one_res in enumerate(self.han_simi_res):  # 两模型的索引是不同步的，还需调整!!!!
                    if max(one_res) > 0.9:
                        maxres_idx = one_res.index(max(one_res))
                        if maxres_idx < len(self.sbert_simi_res[n]) and self.sbert_simi_res[n][maxres_idx] < 0.45:
                            score = 0
                            break
        else:
            score = max(row_maxsimi_res) * noerr

        # print("模型语义相似度得分：{}------{}vs{},时间：{}".format(row_maxsimi_res, str(hw_list),
        #                                               str(ans_given), time.time() - t4))
        return score

    # @func_set_timeout(6)
    def main_judge(self, again=0):
        """
        语义等价判断算法综合：1>>根据英语短词、短语在题库中获取其意思，生成list，再比对
                             2>>词林相似度
                             3>>语义相似度
                             4>>针对2、3相似度方法的缺陷增加对中文、英文近反义的辅助判断
        教材义批改时：答案比对为0时还需要走相似度比较；不属于教材中的单词则走广义批改
        en_word：一个英语单词或短语
        zh_mean_hw: 手写的中文翻译, (只要有一个意思答对算对吗？)
        :return:
        """
        # ------基本判断：答案比对-------
        st1 = time.time()
        score = self.simp_judge()
        print("简单判断时间：", time.time()-st1)
        if score == "0" or score == 1:
            return int(score)
        else:
            if not self.mean_hw_list:
                return 0
            # -------含有错误意思的单词单独判断---------特殊方法特殊处理-----------
            washed_word = re.sub("…+\s*$", "", self.word).strip()
            if washed_word in errmean_words and any([True for hw in self.mean_hw_list
                                                    if re.sub("…+\s*$", "", hw).strip() in errmean_words[washed_word]]):
                return 0
            if washed_word in errmean_en_dict or self.err_mean_list:
                temp_mean_hw_list = [re.sub("(?<!=[….])(…+|\.{3,})", "…", m) for m in self.mean_hw_list]
                errmean_list = self.err_mean_list
                if washed_word in errmean_en_dict:
                    errmean_list.extend(re.split("[;；,，]", errmean_en_dict[washed_word]))
                if any([True for hw in temp_mean_hw_list if re.sub("…+\s*$", "", hw).strip() in errmean_list]):
                    return 0

            # --------------特殊情况处理--------------------------
            # 例：生效与使生效不同
            try:
                if any([True for m in self.mean_hw_list if re.search(r"[使令对让].{,2}" + m, ";".join(self.union_means))]):
                    return 0
                if any([True for u in self.union_means if re.search(r"[使令对让].{,2}" + u, ";".join(self.mean_hw_list))
                                                          and re.search(r"(?<=[）;])[使令对让].+",
                                                                        ";" + ";".join(self.union_means)) is None]):
                    return 0
            except:
                pass
            # 的  得  地
            if any([True for hw in self.mean_hw_list if re.search("[\u4e00-\u9fa5]{2,}的[\u4e00-\u9fa5]+", hw) and \
                    re.sub(r"([\u4e00-\u9fa5]{2,})的([\u4e00-\u9fa5]+)", r"\1得\2", hw) in ";".join(self.union_means)
                    and all([True if re.search("的(?!$)", a) is None else False for a in self.union_means])]):
                return 0
            # xxx for/of
            w1 = re.search("^(.+?) (for|of)$", self.word.strip())
            if w1 and phrase_classify(w1.group(1)) in ["n-phrase", "NN"]:
                _, part_mean = get_mean_in_dict(w1.group(1).strip(), mod="all")
                part_mean_list = means_split(part_mean)
                if any([True for hw in self.mean_hw_list if hw in part_mean_list]):
                    return 0

            # 若是短语，先分析下词性（是动词短语还是adj/n短语）
            ptag = phrase_classify(self.word)
            # print("ptag：：：", ptag)
            if ptag == "v-phrase" and any([True for h in self.mean_hw_list if re.search(r"(.+)的$", h)]):
                return 0
            if ptag == 'prep-phrase' and self.word.split(" ")[0] in ["in", "with", "on", "at"] \
                    and any([True for h in self.mean_hw_list if re.search("的[\u4e00-\u9fa5]+[^中上下]$", h)
                             and re.search("^在.+?的[\u4e00-\u9fa5]+$", h) is None]):  # with reason vs …的理由
                return 0
            # 忽略的和可忽略的不同
            if re.search("ble$", self.word) and any([True for hw in self.mean_hw_list
                                                     if re.search("可以?" + hw + "的?", self.fuller_means_simpwashed)]):
                return 0
            # 针对形容词填成了名词的情况
            if any([True for hw in self.mean_hw_list if hw + "的" in self.union_means and ptag == "JJ"]):
                return 0
            union_mean_txt = self.ans_given + ";" + self.fuller_means_simpwashed
            if any([True for hw in self.mean_hw_list if (hw[-1] == "地" and hw[:-1] + "的" in union_mean_txt and
                                                         re.search("的(?![\u4e00-\u9fa5])", union_mean_txt) is None)
                    or (hw[-1] == "的" and hw[:-1] + "地" in union_mean_txt and
                    re.search("地(?![\u4e00-\u9fa5])", union_mean_txt) is None)]):
                return 0

        # -----------------近义词聚类判断--------------------
        # 需要验证一下去掉这一项判断对最后结果的影响，
        # 在教材义批改模式下属于近义词的释义不一定在教材义
        if not again or self.syn_judge_score == -2:  # -2表示初始值没被修改过(针对mod=all时走两遍的情况)
            st1 = time.time()
            self.syn_judge_score = self.synonyms_cluster_judge(ptag)
            print("聚类判断时间：", time.time() - st1)
        score = self.syn_judge_score  # 若again=1，则延用上一次的值
        print("同义判断得分：", score)
        if score != -1:
            return score
        else:
            if self.is_word_with_fixed_mean:  # 是意思固定的一些单词
                return 0
            # -----------错别字判断，只要有一个错的就判错---------------
            st1 = time.time()
            if not again or self.noerr_res == -1:  # -1表示初始值没被修改过(针对mod=all时走两遍的情况)
                self.noerr_res = self.judge_err_wrriten()
            print("判错时间：", time.time() - st1)
            print(self.score_on_err_wrriten)
            if self.score_on_err_wrriten == 0:  # 若again=1，则延用上一次的值
                return 0
            noerr_res = self.noerr_res  # 若again=1，则延用上一次的值
            # ------------------词林相似度比较------------------------------
            # 4、词林相似度比较时，前面答案比对过的就不要继续拿来比较了,在simp_judge中更新mean_hw_list,
            # scores = self.cilin_similarity_judge()
            # if type(scores) == int:
            #     return scores
            # ----------------------模型相似度比较------------------------------
            # if not score or max(score) < 0.9 or min(scores) < 0:
            if score < 0.9:
                # 将公共尾子串去掉，如欣赏外国文学与赞美外国文学,放后面判断
                # if self.cutted_words and len(self.word.split(" ")) > 1:
                #     for j in self.cutted_words[::-1]:
                #         if j+";" in ";".join(self.ans_list) + ";":
                #             self.ans_list = [re.sub(j+"$", "", ans) for ans in self.ans_list]
                #             self.mean_hw_list = [re.sub(j+"$", "", mean) for mean in self.mean_hw_list]
                # print(111111,self.mean_hw_list)
                score = self.emb_similar(self.word, self.mean_hw_list, self.ans_list, noerr=noerr_res, flag=1)  # 第一次
                # if not score or score == 1:
                if score == 1:  # 此时得分为0时还需考虑more_means的情况
                    self.final_score = score
                else:
                    score_first = score
                    if self.hw_with_brace:
                        self.mean_hw_list.extend(sum(self.hw_with_brace.values(), []))
                    if self.more_mean:  # 再调一次相似度模型
                        # print("more_means:", self.more_mean)
                        score = self.emb_similar(self.word, self.mean_hw_list, self.more_mean, noerr=noerr_res)  # 第二次
                        if not score or score == 1:
                            return score
                        if score_first < 0.1 and score < 0.1:
                            return 0
                    if time.time() - self.stime < 3:
                        if self.cutted_words:
                            final_score = self.emb_similar(self.word, self.cutted_words, self.ans_list, 1, noerr=noerr_res)  # 第三次
                            if final_score and final_score != 1:
                                more_means = list(set(self.more_mean) - set(self.ans_list))
                                final_score = self.emb_similar(self.word, self.cutted_words, more_means, 1, noerr=noerr_res)  # 第四次
                            if final_score < 1:
                                self.final_score = 0
                        else:
                            self.final_score = 0
                    else:
                        self.final_score = 1 if score > 0.8 else 0

        return self.final_score

    def __call__(self):
        """
        根据批改模式（教材义book、广义general，all）返回判断得分
        教材义不存在时按广义批改
        :return:
        """
        self.simp_wash()
        if not self.mean_hw_list:
            if self.mod == "all":
                return 0, 0, []
            return 0, []
        self.getmean_inbook()

        if self.mod == "all":
            self.mod = "general"
            general_score = self.main_judge()
            mean_hw_nindict = self.mean_hw_nindict
            if self.syn_judge_score in [0, 1]:
                book_score = self.syn_judge_score
            elif not self.score_on_err_wrriten:
                book_score = 0
            else:
                self.mod = "book"
                book_score = self.main_judge(again=1)
            return general_score, book_score, mean_hw_nindict
        else:  # "general"/"book"
            try:
                score = self.main_judge()
            except func_timeout.exceptions.FunctionTimedOut:
                print("批改超时")
                score = self.final_score
            return score, self.mean_hw_nindict


if __name__ == '__main__':
    import os
    # s = cosine_similarity([[1, 0, 0, 0]], [[1, 0, 0, 0]])
    st = time.time()

    s, score1 = MainJudge("differ", "不同;区别于",
                                     "vi.相异,不同,不一样 ",
                       mod="general")()   # hw，ans
    print("最后得分：", score1, s)  # , score_onbook   general  book
    print("时间花费：", time.time() - st)