comparison.py 3.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. import json
  2. import pandas as pd
  3. keyword2id_dict = dict()
  4. # 物理量
  5. excel_path = r"data/物理量.xlsx"
  6. df = pd.read_excel(excel_path)
  7. quantity2id = dict()
  8. count_index = 0
  9. for i in range(len(df)):
  10. if not pd.isna(df['类别'][i]):
  11. count_index += 1
  12. sign_index = count_index * 100
  13. knowledge = df['物理量'][i]
  14. if not pd.isna(knowledge):
  15. sign_index += 1
  16. quantity2id[knowledge] = sign_index
  17. keyword2id_dict["quantity2id"] = quantity2id
  18. # # 风向标-知识点
  19. # excel_path = r"data/物理知识点.xlsx"
  20. # df = pd.read_excel(excel_path)
  21. # knowledge2id = dict()
  22. # init_id2max_id = dict()
  23. # count_index = 0
  24. # for i in range(len(df)):
  25. # if not pd.isna(df['2级知识点'][i]):
  26. # count_index += 1
  27. # if not pd.isna(df['3级知识点'][i]):
  28. # sign = df['3级知识点'][i].split(' ')[0].split('.')
  29. # # sign_index = 10000 + int(sign[0]) * 100 + int(sign[1]) * 10
  30. # sign_index = 10000 + count_index * 100 + int(sign[1]) * 10
  31. # init_id = sign_index
  32. # if init_id not in init_id2max_id:
  33. # init_id2max_id[init_id] = []
  34. # else:
  35. # init_id2max_id[init_id].append(sign_index)
  36. # knowledge = df['4级知识点'][i]
  37. # if not pd.isna(knowledge):
  38. # sign_index += 1
  39. # knowledge2id[knowledge] = sign_index
  40. # if init_id not in init_id2max_id:
  41. # init_id2max_id[init_id] = []
  42. # else:
  43. # init_id2max_id[init_id].append(sign_index)
  44. # keyword2id_dict["knowledge2id"] = knowledge2id
  45. # keyword2id_dict["init_id2max_id"] = init_id2max_id
  46. # 考试院-知识点
  47. excel_path = r"data/初中物理知识对应关系.xlsx"
  48. df = pd.read_excel(excel_path)
  49. knowledge2id = dict()
  50. init_id2max_id = dict()
  51. count_index = 0
  52. for i in range(len(df)):
  53. if not pd.isna(df.iloc[i][2]):
  54. count_index += 1
  55. sign_index = 100000000 + count_index * 1000000
  56. if pd.isna(df.iloc[i+1][3]):
  57. knowledge = df.iloc[i][2].split(' ')[1]
  58. knowledge2id[knowledge] = sign_index
  59. continue
  60. if not pd.isna(df.iloc[i][3]):
  61. sign_index = int(str(sign_index)[:-4]) * 10000
  62. sign_index += 10000
  63. relate_index = sign_index
  64. init_id2max_id[relate_index] = []
  65. if pd.isna(df.iloc[i+1][4]):
  66. knowledge = df.iloc[i][3].split(' ')[1]
  67. knowledge2id[knowledge] = sign_index
  68. continue
  69. if not pd.isna(df.iloc[i][4]):
  70. sign_index = int(str(sign_index)[:-2]) * 100
  71. sign_index += 100
  72. if pd.isna(df.iloc[i+1][5]):
  73. knowledge = df.iloc[i][4].split(' ')[1]
  74. knowledge2id[knowledge] = sign_index
  75. init_id2max_id[relate_index].append(sign_index)
  76. continue
  77. if not pd.isna(df.iloc[i][5]):
  78. sign_index += 1
  79. knowledge = df.iloc[i][5].split(' ')[1]
  80. knowledge2id[knowledge] = sign_index
  81. init_id2max_id[relate_index].append(sign_index)
  82. keyword2id_dict["knowledge2id"] = knowledge2id
  83. keyword2id_dict["init_id2max_id"] = init_id2max_id
  84. # 映射转换
  85. with open("model_data/keyword_mapping.json", 'w', encoding="utf8") as f:
  86. json.dump(keyword2id_dict, f, ensure_ascii=False, indent=2)