config.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. import os
  2. import pymongo
  3. # 建立mongodb连接
  4. myclient = pymongo.MongoClient("mongodb://192.168.1.140:27017/")
  5. mongo_info_db = myclient["ksy"]
  6. mongo_coll = mongo_info_db['test_topic']
  7. # mongodb句向量训练标志
  8. sent_train_flag = 1
  9. # 批量处理数据字典格式
  10. batch_processing_dict = {
  11. "id_list": [],
  12. "cont_clear_list": [],
  13. "cont_cut_list": [],
  14. "cut_idx_list": [0]
  15. }
  16. # 词向量维度
  17. vector_dim = 384
  18. # hnsw评价指标('l2','cosine','ip')
  19. hnsw_metric = ('l2','cosine','ip')[0]
  20. # hnsw最大索引数量
  21. num_elements = 1000000
  22. # hnsw召回数量参数
  23. hnsw_set_ef = 150
  24. # hnsw模型检索链接
  25. hnsw_retrieve_url = r"http://localhost:8836/retrieve"
  26. # 根地址
  27. root_path = os.getcwd()
  28. data_root_path = os.path.join(root_path, "model_data")
  29. # Sentence_BERT模型地址
  30. sbert_path = os.path.join(data_root_path, "all-MiniLM-L6-v2")
  31. # bert-whitening参数地址
  32. whitening_path = os.path.join(data_root_path, "whitening_param.pkl")
  33. # 停用词地址
  34. stop_words_path = os.path.join(data_root_path, "stop_words.txt")
  35. # sqlite数据库地址
  36. sqlite_path = os.path.join(data_root_path, "info_retrieval.db")
  37. sqlite_copy_path = os.path.join(data_root_path, "info_retrieval_copy.db")
  38. # hnsw模型地址
  39. hnsw_path = "hnsw_model.bin"
  40. # 公式处理数据地址
  41. bow_model_path = os.path.join(data_root_path, "bow_model.pkl")
  42. bow_vector_path = os.path.join(data_root_path, "bow_vector.npy")
  43. formula_data_path = os.path.join(data_root_path, "formula_data.json")
  44. # 日志地址
  45. log_root_path = os.path.join(root_path, "logs")
  46. # 查重日志地址
  47. retrieval_path = os.path.join(log_root_path, "retrieval_app.log")
  48. # 日志信息(message)格式
  49. log_msg = "id : {id} -> {type} -> {message}"