comparison.py 3.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. import json
  2. import pandas as pd
  3. keyword2id_dict = dict()
  4. # 物理量
  5. excel_path = r"data/物理量.xlsx"
  6. df = pd.read_excel(excel_path)
  7. quantity2id = dict()
  8. count_index = 0
  9. for i in range(len(df)):
  10. if not pd.isna(df['类别'][i]):
  11. count_index += 1
  12. sign_index = count_index * 100
  13. knowledge = df['物理量'][i]
  14. if not pd.isna(knowledge):
  15. sign_index += 1
  16. quantity2id[knowledge] = sign_index
  17. keyword2id_dict["quantity2id"] = quantity2id
  18. # # 风向标-知识点
  19. # excel_path = r"data/物理知识点.xlsx"
  20. # df = pd.read_excel(excel_path)
  21. # knowledge2id = dict()
  22. # init_id2max_id = dict()
  23. # count_index = 0
  24. # for i in range(len(df)):
  25. # if not pd.isna(df['2级知识点'][i]):
  26. # count_index += 1
  27. # if not pd.isna(df['3级知识点'][i]):
  28. # sign = df['3级知识点'][i].split(' ')[0].split('.')
  29. # # sign_index = 10000 + int(sign[0]) * 100 + int(sign[1]) * 10
  30. # sign_index = 10000 + count_index * 100 + int(sign[1]) * 10
  31. # init_id = sign_index
  32. # if init_id not in init_id2max_id:
  33. # init_id2max_id[init_id] = []
  34. # else:
  35. # init_id2max_id[init_id].append(sign_index)
  36. # knowledge = df['4级知识点'][i]
  37. # if not pd.isna(knowledge):
  38. # sign_index += 1
  39. # knowledge2id[knowledge] = sign_index
  40. # if init_id not in init_id2max_id:
  41. # init_id2max_id[init_id] = []
  42. # else:
  43. # init_id2max_id[init_id].append(sign_index)
  44. # keyword2id_dict["knowledge2id"] = knowledge2id
  45. # keyword2id_dict["init_id2max_id"] = init_id2max_id
  46. # # 映射转换
  47. # with open("model_data/fxb_keyword_mapping.json", 'w', encoding="utf8") as f:
  48. # json.dump(keyword2id_dict, f, ensure_ascii=False, indent=2)
  49. # 考试院-知识点
  50. excel_path = r"data/初中物理知识对应关系.xlsx"
  51. df = pd.read_excel(excel_path)
  52. knowledge2id = dict()
  53. init_id2max_id = dict()
  54. count_index = 0
  55. for i in range(len(df)):
  56. if not pd.isna(df.iloc[i][2]):
  57. count_index += 1
  58. sign_index = 100000000 + count_index * 1000000
  59. if pd.isna(df.iloc[i+1][3]):
  60. knowledge = df.iloc[i][2].split(' ')[1]
  61. knowledge2id[knowledge] = sign_index
  62. continue
  63. if not pd.isna(df.iloc[i][3]):
  64. sign_index = int(str(sign_index)[:-4]) * 10000
  65. sign_index += 10000
  66. relate_index = sign_index
  67. init_id2max_id[relate_index] = []
  68. if pd.isna(df.iloc[i+1][4]):
  69. knowledge = df.iloc[i][3].split(' ')[1]
  70. knowledge2id[knowledge] = sign_index
  71. continue
  72. if not pd.isna(df.iloc[i][4]):
  73. sign_index = int(str(sign_index)[:-2]) * 100
  74. sign_index += 100
  75. if pd.isna(df.iloc[i+1][5]):
  76. knowledge = df.iloc[i][4].split(' ')[1]
  77. knowledge2id[knowledge] = sign_index
  78. init_id2max_id[relate_index].append(sign_index)
  79. continue
  80. if not pd.isna(df.iloc[i][5]):
  81. sign_index += 1
  82. knowledge = df.iloc[i][5].split(' ')[1]
  83. knowledge2id[knowledge] = sign_index
  84. init_id2max_id[relate_index].append(sign_index)
  85. keyword2id_dict["knowledge2id"] = knowledge2id
  86. keyword2id_dict["init_id2max_id"] = init_id2max_id
  87. # 映射转换
  88. with open("model_data/ksy_keyword_mapping.json", 'w', encoding="utf8") as f:
  89. json.dump(keyword2id_dict, f, ensure_ascii=False, indent=2)