comparison.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. import json
  2. import numpy as np
  3. import pandas as pd
  4. keyword2id_dict = dict()
  5. # 物理量
  6. excel_path = r"data/物理量.xlsx"
  7. df = pd.read_excel(excel_path)
  8. quantity2id = dict()
  9. count_index = 0
  10. for i in range(len(df)):
  11. if not pd.isna(df['类别'][i]):
  12. count_index += 1
  13. sign_index = count_index * 100
  14. knowledge = df['物理量'][i]
  15. if not pd.isna(knowledge):
  16. sign_index += 1
  17. quantity2id[knowledge] = sign_index
  18. keyword2id_dict["quantity2id"] = quantity2id
  19. # # 物理场景
  20. # excel_path = r"data/物理情景.xlsx"
  21. # df = pd.read_excel(excel_path)
  22. # scene2id = dict()
  23. # count_index = 0
  24. # for i in range(len(df)):
  25. # if not pd.isna(df['知识点'][i]):
  26. # count_index += 1
  27. # sign_index = 10000 + count_index * 10
  28. # knowledge = df['情景'][i]
  29. # if not pd.isna(knowledge):
  30. # sign_index += 1
  31. # scene2id[knowledge] = sign_index
  32. # keyword2id_dict["scene2id"] = scene2id
  33. # 风向标-知识点
  34. excel_path = r"data/物理知识点.xlsx"
  35. df = pd.read_excel(excel_path)
  36. knowledge2id = dict()
  37. init_id2max_id = dict()
  38. count_index = 0
  39. for i in range(len(df)):
  40. if not pd.isna(df['2级知识点'][i]):
  41. count_index += 1
  42. if not pd.isna(df['3级知识点'][i]):
  43. sign = df['3级知识点'][i].split(' ')[0].split('.')
  44. # sign_index = 10000 + int(sign[0]) * 100 + int(sign[1]) * 10
  45. sign_index = 10000 + count_index * 100 + int(sign[1]) * 10
  46. init_id = sign_index
  47. if init_id not in init_id2max_id:
  48. init_id2max_id[init_id] = []
  49. else:
  50. init_id2max_id[init_id].append(sign_index)
  51. knowledge = df['4级知识点'][i]
  52. if not pd.isna(knowledge):
  53. sign_index += 1
  54. knowledge2id[knowledge] = sign_index
  55. if init_id not in init_id2max_id:
  56. init_id2max_id[init_id] = []
  57. else:
  58. init_id2max_id[init_id].append(sign_index)
  59. keyword2id_dict["knowledge2id"] = knowledge2id
  60. keyword2id_dict["init_id2max_id"] = init_id2max_id
  61. # # 考试院-知识点
  62. # excel_path = r"data/初中物理知识对应关系.xlsx"
  63. # df = pd.read_excel(excel_path)
  64. # knowledge2id = dict()
  65. # init_id2max_id = dict()
  66. # count_index = 0
  67. # for i in range(len(df)):
  68. # if not pd.isna(df.iloc[i][2]):
  69. # count_index += 1
  70. # sign_index = 100000000 + count_index * 1000000
  71. # if pd.isna(df.iloc[i+1][3]):
  72. # knowledge = df.iloc[i][2].split(' ')[1]
  73. # knowledge2id[knowledge] = sign_index
  74. # continue
  75. # if not pd.isna(df.iloc[i][3]):
  76. # sign_index = int(str(sign_index)[:-4]) * 10000
  77. # sign_index += 10000
  78. # relate_index = sign_index
  79. # init_id2max_id[relate_index] = []
  80. # if pd.isna(df.iloc[i+1][4]):
  81. # knowledge = df.iloc[i][3].split(' ')[1]
  82. # knowledge2id[knowledge] = sign_index
  83. # continue
  84. # if not pd.isna(df.iloc[i][4]):
  85. # sign_index = int(str(sign_index)[:-2]) * 100
  86. # sign_index += 100
  87. # if pd.isna(df.iloc[i+1][5]):
  88. # knowledge = df.iloc[i][4].split(' ')[1]
  89. # knowledge2id[knowledge] = sign_index
  90. # init_id2max_id[relate_index].append(sign_index)
  91. # continue
  92. # if not pd.isna(df.iloc[i][5]):
  93. # sign_index += 1
  94. # knowledge = df.iloc[i][5].split(' ')[1]
  95. # knowledge2id[knowledge] = sign_index
  96. # init_id2max_id[relate_index].append(sign_index)
  97. # keyword2id_dict["knowledge2id"] = knowledge2id
  98. # keyword2id_dict["init_id2max_id"] = init_id2max_id
  99. # 映射转换
  100. with open("model_data/keyword_mapping.json", 'w', encoding="utf8") as f:
  101. json.dump(keyword2id_dict, f, ensure_ascii=False, indent=2)