tujintao
/
physics_repeat_check


			
				
					
						
						
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
							import json
import pandas as pd

keyword2id_dict = dict()

# 物理量
excel_path = r"data/物理量.xlsx"
df = pd.read_excel(excel_path)
quantity2id = dict()
count_index = 0
for i in range(len(df)):
    if not pd.isna(df['类别'][i]):
        count_index += 1
        sign_index = count_index * 100
    knowledge = df['物理量'][i]
    if not pd.isna(knowledge):
        sign_index += 1
        quantity2id[knowledge] = sign_index
keyword2id_dict["quantity2id"] = quantity2id

# # 风向标-知识点
# excel_path = r"data/物理知识点.xlsx"
# df = pd.read_excel(excel_path)
# knowledge2id = dict()
# init_id2max_id = dict()
# count_index = 0
# for i in range(len(df)):
#     if not pd.isna(df['2级知识点'][i]):
#         count_index += 1
#     if not pd.isna(df['3级知识点'][i]):
#         sign = df['3级知识点'][i].split(' ')[0].split('.')
#         # sign_index = 10000 + int(sign[0]) * 100 + int(sign[1]) * 10
#         sign_index = 10000 + count_index * 100 + int(sign[1]) * 10
#         init_id = sign_index
#         if init_id not in init_id2max_id:
#             init_id2max_id[init_id] = []
#         else:
#             init_id2max_id[init_id].append(sign_index)
#     knowledge = df['4级知识点'][i]
#     if not pd.isna(knowledge):
#         sign_index += 1
#         knowledge2id[knowledge] = sign_index
#         if init_id not in init_id2max_id:
#             init_id2max_id[init_id] = []
#         else:
#             init_id2max_id[init_id].append(sign_index)
# keyword2id_dict["knowledge2id"] = knowledge2id
# keyword2id_dict["init_id2max_id"] = init_id2max_id

# 考试院-知识点
excel_path = r"data/初中物理知识对应关系.xlsx"
df = pd.read_excel(excel_path)
knowledge2id = dict()
init_id2max_id = dict()
count_index = 0
for i in range(len(df)):
    if not pd.isna(df.iloc[i][2]):
        count_index += 1
        sign_index = 100000000 + count_index * 1000000
        if  pd.isna(df.iloc[i+1][3]):
            knowledge = df.iloc[i][2].split(' ')[1]
            knowledge2id[knowledge] = sign_index
            continue
    if not pd.isna(df.iloc[i][3]):
        sign_index = int(str(sign_index)[:-4]) * 10000
        sign_index += 10000
        relate_index = sign_index
        init_id2max_id[relate_index] = []
        if pd.isna(df.iloc[i+1][4]):
            knowledge = df.iloc[i][3].split(' ')[1]
            knowledge2id[knowledge] = sign_index
            continue
    if not pd.isna(df.iloc[i][4]):
        sign_index = int(str(sign_index)[:-2]) * 100
        sign_index += 100
        if pd.isna(df.iloc[i+1][5]):
            knowledge = df.iloc[i][4].split(' ')[1]
            knowledge2id[knowledge] = sign_index
            init_id2max_id[relate_index].append(sign_index)
            continue
    if not pd.isna(df.iloc[i][5]):
        sign_index += 1
        knowledge = df.iloc[i][5].split(' ')[1]
        knowledge2id[knowledge] = sign_index
        init_id2max_id[relate_index].append(sign_index)

keyword2id_dict["knowledge2id"] = knowledge2id
keyword2id_dict["init_id2max_id"] = init_id2max_id

# 映射转换
with open("model_data/keyword_mapping.json", 'w', encoding="utf8") as f:
    json.dump(keyword2id_dict, f, ensure_ascii=False, indent=2)