config.py 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. import os
  2. import pymongo
  3. # 开发模式: 0-fxb, 1-ksy
  4. dev_mode_list = ["fxb", "ksy"]
  5. dev_mode = dev_mode_list[0]
  6. # 建立mongodb连接
  7. client_url = dict(fxb="mongodb://192.168.1.140:27017/", ksy="mongodb://127.0.0.1:27017/")[dev_mode]
  8. myclient = pymongo.MongoClient(client_url)
  9. mongo_info_db = myclient["ksy"]
  10. mongo_coll = mongo_info_db['test_topic']
  11. # mongodb句向量训练标志
  12. sent_train_flag = 1
  13. # 批量处理数据字典格式
  14. batch_processing_dict = {
  15. "id_list": [],
  16. "cont_clear_list": [],
  17. "cont_cut_list": [],
  18. "cut_idx_list": [0]
  19. }
  20. # 词向量维度
  21. vector_dim = 384
  22. # hnsw评价指标('l2','cosine','ip')
  23. hnsw_metric = ('l2','cosine','ip')[0]
  24. # hnsw最大索引数量
  25. num_elements = 1000000
  26. # hnsw召回数量参数
  27. hnsw_set_ef = 150
  28. # 调用api链接
  29. # 配图查重
  30. illustration_url = dict(fxb="http://192.168.1.204:8068/topic_retrieval_http", ksy="http://127.0.0.1:8068/topic_retrieval_http")[dev_mode]
  31. # 图片查重
  32. image_url = dict(fxb="http://192.168.1.204:8068/img_retrieval_http", ksy="http://127.0.0.1:8068/img_retrieval_http")[dev_mode]
  33. # hnsw模型检索链接
  34. hnsw_retrieve_url = r"http://127.0.0.1:8836/retrieve"
  35. # 多维度分类链接
  36. dim_classify_url = r"http://127.0.0.1:8837/dim_classify"
  37. # 根地址
  38. root_path = os.getcwd()
  39. data_root_path = os.path.join(root_path, "model_data")
  40. # Sentence_BERT模型地址
  41. sbert_path = os.path.join(data_root_path, "all-MiniLM-L6-v2")
  42. # bert-whitening参数地址
  43. whitening_path = os.path.join(data_root_path, "whitening_param.pkl")
  44. # 停用词地址
  45. stop_words_path = os.path.join(data_root_path, "stop_words.txt")
  46. # sqlite数据库地址
  47. sqlite_path = os.path.join(data_root_path, "info_retrieval.db")
  48. sqlite_copy_path = os.path.join(data_root_path, "info_retrieval_copy.db")
  49. # hnsw模型地址
  50. hnsw_path = "hnsw_model.bin"
  51. # 公式处理数据地址
  52. bow_model_path = os.path.join(data_root_path, "bow_model.pkl")
  53. bow_vector_path = os.path.join(data_root_path, "bow_vector.npy")
  54. formula_data_path = os.path.join(data_root_path, "formula_data.json")
  55. # 分词器地址
  56. bert_path = "bert-base-chinese"
  57. # 多维度分类模型地址
  58. solution_model_path = os.path.join(data_root_path, "solution_classify.pt")
  59. difficulty_model_path = os.path.join(data_root_path, "difficulty_classify.pt")
  60. # 日志地址
  61. log_root_path = os.path.join(root_path, "logs")
  62. # 查重日志地址
  63. retrieval_path = os.path.join(log_root_path, "retrieval_app.log")
  64. # 日志信息(message)格式
  65. log_msg = "id : {id} -> {type} -> {message}"