1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768 |
- import json
- import pandas as pd
- keyword2id_dict = dict()
- # 求解类型
- solution_type_list = ["概念辨析","规律理解","现象解释","物理学史","计算分析","实验操作","连线作图","实验读数"]
- solving_type2id = dict()
- for i, ele in enumerate(solution_type_list):
- solving_type2id[ele] = 1 + i
- keyword2id_dict["solving_type2id"] = solving_type2id
- # 物理量
- excel_path = r"data/物理量.xlsx"
- df = pd.read_excel(excel_path)
- quantity2id = dict()
- count_index = 0
- for i in range(len(df)):
- if not pd.isna(df['类别'][i]):
- count_index += 1
- sign_index = count_index * 100
- knowledge = df['物理量'][i]
- if not pd.isna(knowledge):
- sign_index += 1
- quantity2id[knowledge] = sign_index
- keyword2id_dict["quantity2id"] = quantity2id
- # 物理场景
- excel_path = r"data/物理情景.xlsx"
- df = pd.read_excel(excel_path)
- scene2id = dict()
- count_index = 0
- for i in range(len(df)):
- if not pd.isna(df['知识点'][i]):
- count_index += 1
- sign_index = 10000 + count_index * 10
- knowledge = df['情景'][i]
- if not pd.isna(knowledge):
- sign_index += 1
- scene2id[knowledge] = sign_index
- keyword2id_dict["scene2id"] = scene2id
- # 知识点
- excel_path = r"data/物理知识点.xlsx"
- df = pd.read_excel(excel_path)
- knowledge2id = dict()
- init_id2max_id = dict()
- count_index = 0
- for i in range(len(df)):
- if not pd.isna(df['2级知识点'][i]):
- count_index += 1
- if not pd.isna(df['3级知识点'][i]):
- sign = df['3级知识点'][i].split(' ')[0].split('.')
- # sign_index = 10000 + int(sign[0]) * 100 + int(sign[1]) * 10
- sign_index = 10000 + count_index * 100 + int(sign[1]) * 10
- init_id = sign_index
- init_id2max_id[init_id] = sign_index
- knowledge = df['4级知识点'][i]
- if not pd.isna(knowledge):
- sign_index += 1
- knowledge2id[knowledge] = sign_index
- init_id2max_id[init_id] = sign_index
- keyword2id_dict["knowledge2id"] = knowledge2id
- keyword2id_dict["init_id2max_id"] = init_id2max_id
- # 映射转换
- with open("data/keyword_mapping.json", 'w', encoding="utf8") as f:
- json.dump(keyword2id_dict, f, ensure_ascii=False, indent=2)
|