import json import pandas as pd keyword2id_dict = dict() # 求解类型 solution_type_list = ["概念辨析","规律理解","现象解释","物理学史","计算分析","实验操作","连线作图","实验读数"] solving_type2id = dict() for i, ele in enumerate(solution_type_list): solving_type2id[ele] = 1 + i keyword2id_dict["solving_type2id"] = solving_type2id # 物理量 excel_path = r"data/物理量.xlsx" df = pd.read_excel(excel_path) quantity2id = dict() count_index = 0 for i in range(len(df)): if not pd.isna(df['类别'][i]): count_index += 1 sign_index = count_index * 100 knowledge = df['物理量'][i] if not pd.isna(knowledge): sign_index += 1 quantity2id[knowledge] = sign_index keyword2id_dict["quantity2id"] = quantity2id # 物理场景 excel_path = r"data/物理情景.xlsx" df = pd.read_excel(excel_path) scene2id = dict() count_index = 0 for i in range(len(df)): if not pd.isna(df['知识点'][i]): count_index += 1 sign_index = 10000 + count_index * 10 knowledge = df['情景'][i] if not pd.isna(knowledge): sign_index += 1 scene2id[knowledge] = sign_index keyword2id_dict["scene2id"] = scene2id # 知识点 excel_path = r"data/物理知识点.xlsx" df = pd.read_excel(excel_path) knowledge2id = dict() init_id2max_id = dict() count_index = 0 for i in range(len(df)): if not pd.isna(df['2级知识点'][i]): count_index += 1 if not pd.isna(df['3级知识点'][i]): sign = df['3级知识点'][i].split(' ')[0].split('.') # sign_index = 10000 + int(sign[0]) * 100 + int(sign[1]) * 10 sign_index = 10000 + count_index * 100 + int(sign[1]) * 10 init_id = sign_index init_id2max_id[init_id] = sign_index knowledge = df['4级知识点'][i] if not pd.isna(knowledge): sign_index += 1 knowledge2id[knowledge] = sign_index init_id2max_id[init_id] = sign_index keyword2id_dict["knowledge2id"] = knowledge2id keyword2id_dict["init_id2max_id"] = init_id2max_id # 映射转换 with open("data/keyword_mapping.json", 'w', encoding="utf8") as f: json.dump(keyword2id_dict, f, ensure_ascii=False, indent=2)