config.py 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. import os
  2. import pymongo
  3. # 开发模式: 0-fxb, 1-ksy
  4. dev_mode_list = ["fxb", "ksy"]
  5. dev_mode = dev_mode_list[0]
  6. # 建立mongodb连接
  7. client_url = dict(fxb="mongodb://192.168.1.140:27017/", ksy="mongodb://127.0.0.1:27017/")[dev_mode]
  8. myclient = pymongo.MongoClient(client_url)
  9. mongo_info_db = myclient["ksy"]
  10. mongo_coll = mongo_info_db['test_topic']
  11. # mongodb句向量训练标志
  12. sent_train_flag = 1
  13. # 批量处理数据字典格式
  14. batch_processing_dict = {
  15. "id_list": [],
  16. "cont_clear_list": [],
  17. "cont_cut_list": [],
  18. "cut_idx_list": [0]
  19. }
  20. # 词向量维度
  21. vector_dim = 384
  22. # hnsw评价指标('l2','cosine','ip')
  23. hnsw_metric = ('l2','cosine','ip')[0]
  24. # hnsw最大索引数量
  25. num_elements = 1000000
  26. # hnsw召回数量参数
  27. hnsw_set_ef = 150
  28. # 调用api链接
  29. # 配图查重
  30. illustration_url = dict(fxb="http://192.168.1.204:8068/topic_retrieval_http", ksy="http://127.0.0.1:8068/topic_retrieval_http")[dev_mode]
  31. # 图片查重
  32. image_url = dict(fxb="http://192.168.1.204:8068/img_retrieval_http", ksy="http://127.0.0.1:8068/img_retrieval_http")[dev_mode]
  33. # hnsw模型检索链接
  34. hnsw_retrieve_url = r"http://127.0.0.1:8836/retrieve"
  35. # 多维度(求解类型/难度)分类链接
  36. dim_classify_url = r"http://127.0.0.1:8837/dim_classify"
  37. # 知识点标注链接
  38. knowledge_tagging_url = r"http://127.0.0.1:8840/generate"
  39. # 根地址
  40. root_path = os.getcwd()
  41. data_root_path = os.path.join(root_path, "model_data")
  42. # Sentence_BERT模型地址
  43. sbert_path = os.path.join(data_root_path, "all-MiniLM-L6-v2")
  44. # # bert-whitening参数地址
  45. # whitening_path = os.path.join(data_root_path, "whitening_param.pkl")
  46. # 知识点关键词映射
  47. keyword_mapping_path = os.path.join(data_root_path, dict(fxb="fxb_keyword_mapping.json", ksy="ksy_keyword_mapping.json")[dev_mode])
  48. # 停用词地址
  49. stop_words_path = os.path.join(data_root_path, "stop_words.txt")
  50. # sqlite数据库地址
  51. sqlite_path = os.path.join(data_root_path, "info_retrieval.db")
  52. sqlite_copy_path = os.path.join(data_root_path, "info_retrieval_copy.db")
  53. # hnsw模型地址
  54. hnsw_path = "hnsw_model.bin"
  55. # 公式处理数据地址
  56. bow_model_path = os.path.join(data_root_path, "bow_model.pkl")
  57. bow_vector_path = os.path.join(data_root_path, "bow_vector.npy")
  58. formula_data_path = os.path.join(data_root_path, "formula_data.json")
  59. # 分词器地址
  60. bert_path = "bert-base-chinese"
  61. # 多维度分类模型地址
  62. solution_model_path = os.path.join(data_root_path, "solution_classify.pt")
  63. difficulty_model_path = os.path.join(data_root_path, "difficulty_classify.pt")
  64. # 日志地址
  65. log_root_path = os.path.join(root_path, "logs")
  66. # 查重日志地址
  67. retrieval_path = os.path.join(log_root_path, "retrieval_app.log")
  68. # 日志信息(message)格式
  69. log_msg = "id : {id} -> {type} -> {message}"