tujintao
/
physics_repeat_check


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
							import torch
import torch.nn as nn
from transformers import AutoConfig, BertTokenizer, AutoModel

import config

class Solution_Model(nn.Module):
    def __init__(self):
        super(Solution_Model, self).__init__()
        self.bert_config = AutoConfig.from_pretrained(config.bert_path)
        self.bert = AutoModel.from_pretrained(config.bert_path)
        self.fc = nn.Linear(in_features=self.bert_config.hidden_size, out_features=8)

    def forward(self, input_ids, attention_mask):
        x = self.bert(input_ids, attention_mask)[0][:, 0, :]
        x = self.fc(x)

        return x

class Difficulty_Model(nn.Module):
    def __init__(self):
        super(Difficulty_Model, self).__init__()
        self.bert_config = AutoConfig.from_pretrained(config.bert_path)
        self.bert = AutoModel.from_pretrained(config.bert_path)
        self.fc = nn.Linear(in_features=self.bert_config.hidden_size, out_features=1)

    def forward(self, input_ids, attention_mask):
        x = self.bert(input_ids, attention_mask)[0][:, 0, :]
        x = self.fc(x)

        return x

class Dimension_Classification():
    def __init__(self, dim_mode=2, logger=None):
        self.dim_mode = dim_mode
        self.tokenizer = BertTokenizer.from_pretrained(config.bert_path)
        self.solution_model, self.difficulty_model = None, None
        if self.dim_mode in {0, 2}:
            self.solution_model = torch.load(config.solution_model_path)
        if self.dim_mode in {1, 2}:
            self.difficulty_model = torch.load(config.difficulty_model_path)
        self.max_squence_length = 500
        self.solving_type_dict = {
            0: "实验操作", 
            1: "计算分析", 
            2: "连线作图", 
            3: "实验读数", 
            4: "现象解释", 
            5: "概念辨析", 
            6: "规律理解", 
            7: "物理学史"
        }
        # 日志采集
        self.logger = logger

    def __call__(self, sentence, quesType):
        solution_list, difficulty_value = [], 0.6
        if self.dim_mode in {0, 2}:
            solution_list = self.solution_classify(sentence, quesType)
        if self.dim_mode in {1, 2}:
            difficulty_value = self.difficulty_classify(sentence)
        res_dict = {
            "solving_type": solution_list, 
            "difficulty": difficulty_value, 
        }

        return res_dict

    def solution_classify(self, sentence, quesType):
        solution_tensor = self.model_calculate(self.solution_model, sentence)
        solution_tensor[solution_tensor >= 0.5] = 1
        solution_tensor[solution_tensor < 0.5] = 0
        solution_list = solution_tensor[0].int().tolist()
        solution_result = [self.solving_type_dict[i] for i,idx in enumerate(solution_list) if idx == 1]
        # 题型判断
        if quesType == "计算题":
            solution_result.append("计算分析")
        elif quesType == "作图题":
            solution_result.append("连线作图")
        if len(solution_result) == 0:
            solution_result.append("规律理解")
            
        return list(set(solution_result))

    def difficulty_classify(self, sentence):
        difficulty_tensor = self.model_calculate(self.difficulty_model, sentence).item()
        difficulty_value = 0.6
        if difficulty_tensor >= 0.8:
            difficulty_value = 0.8
        elif difficulty_tensor <= 0.2:
            difficulty_value = 0.4
        else:
            difficulty_value = 0.6
        
        return difficulty_value

    def model_calculate(self, model, sentence):
        model.eval()
        with torch.no_grad():
            token_tensor = self.sentence_tokenize(sentence)
            mask_tensor = torch.ones_like(token_tensor, dtype=torch.float)
            output_tensor = model(token_tensor, attention_mask=mask_tensor)
            output_tensor = torch.sigmoid(output_tensor)

        return output_tensor

    def sentence_tokenize(self, sentence):
        # 直接截断
        # 编码时: 开头添加[LCS]->101, 结尾添加[SEP]->102, 未知的字或单词变为[UNK]->100
        token_list = self.tokenizer.encode(sentence[:self.max_squence_length])
        
        return torch.tensor([token_list])


if __name__ == "__main__":
    dc = Dimension_Classification(dim_mode=0)
    sentence = "请在图乙中的虚线框内画出与图甲中实物图对应的电路图。"
    res = dc(sentence, "")
    print(res)