import json import numpy as np import pandas as pd keyword2id_dict = dict() # 物理量 excel_path = r"data/物理量.xlsx" df = pd.read_excel(excel_path) quantity2id = dict() count_index = 0 for i in range(len(df)): if not pd.isna(df['类别'][i]): count_index += 1 sign_index = count_index * 100 knowledge = df['物理量'][i] if not pd.isna(knowledge): sign_index += 1 quantity2id[knowledge] = sign_index keyword2id_dict["quantity2id"] = quantity2id # # 物理场景 # excel_path = r"data/物理情景.xlsx" # df = pd.read_excel(excel_path) # scene2id = dict() # count_index = 0 # for i in range(len(df)): # if not pd.isna(df['知识点'][i]): # count_index += 1 # sign_index = 10000 + count_index * 10 # knowledge = df['情景'][i] # if not pd.isna(knowledge): # sign_index += 1 # scene2id[knowledge] = sign_index # keyword2id_dict["scene2id"] = scene2id # 风向标-知识点 excel_path = r"data/物理知识点.xlsx" df = pd.read_excel(excel_path) knowledge2id = dict() init_id2max_id = dict() count_index = 0 for i in range(len(df)): if not pd.isna(df['2级知识点'][i]): count_index += 1 if not pd.isna(df['3级知识点'][i]): sign = df['3级知识点'][i].split(' ')[0].split('.') # sign_index = 10000 + int(sign[0]) * 100 + int(sign[1]) * 10 sign_index = 10000 + count_index * 100 + int(sign[1]) * 10 init_id = sign_index if init_id not in init_id2max_id: init_id2max_id[init_id] = [] else: init_id2max_id[init_id].append(sign_index) knowledge = df['4级知识点'][i] if not pd.isna(knowledge): sign_index += 1 knowledge2id[knowledge] = sign_index if init_id not in init_id2max_id: init_id2max_id[init_id] = [] else: init_id2max_id[init_id].append(sign_index) keyword2id_dict["knowledge2id"] = knowledge2id keyword2id_dict["init_id2max_id"] = init_id2max_id # # 考试院-知识点 # excel_path = r"data/初中物理知识对应关系.xlsx" # df = pd.read_excel(excel_path) # knowledge2id = dict() # init_id2max_id = dict() # count_index = 0 # for i in range(len(df)): # if not pd.isna(df.iloc[i][2]): # count_index += 1 # sign_index = 100000000 + count_index * 1000000 # if pd.isna(df.iloc[i+1][3]): # knowledge = df.iloc[i][2].split(' ')[1] # knowledge2id[knowledge] = sign_index # continue # if not pd.isna(df.iloc[i][3]): # sign_index = int(str(sign_index)[:-4]) * 10000 # sign_index += 10000 # relate_index = sign_index # init_id2max_id[relate_index] = [] # if pd.isna(df.iloc[i+1][4]): # knowledge = df.iloc[i][3].split(' ')[1] # knowledge2id[knowledge] = sign_index # continue # if not pd.isna(df.iloc[i][4]): # sign_index = int(str(sign_index)[:-2]) * 100 # sign_index += 100 # if pd.isna(df.iloc[i+1][5]): # knowledge = df.iloc[i][4].split(' ')[1] # knowledge2id[knowledge] = sign_index # init_id2max_id[relate_index].append(sign_index) # continue # if not pd.isna(df.iloc[i][5]): # sign_index += 1 # knowledge = df.iloc[i][5].split(' ')[1] # knowledge2id[knowledge] = sign_index # init_id2max_id[relate_index].append(sign_index) # keyword2id_dict["knowledge2id"] = knowledge2id # keyword2id_dict["init_id2max_id"] = init_id2max_id # 映射转换 with open("model_data/keyword_mapping.json", 'w', encoding="utf8") as f: json.dump(keyword2id_dict, f, ensure_ascii=False, indent=2)