import os import pymongo # 开发模式: 0-fxb, 1-ksy dev_mode_list = ["fxb", "ksy"] dev_mode = dev_mode_list[0] # 建立mongodb连接 client_url = dict(fxb="mongodb://192.168.1.140:27017/", ksy="mongodb://127.0.0.1:27017/")[dev_mode] myclient = pymongo.MongoClient(client_url) mongo_info_db = myclient["ksy"] mongo_coll = mongo_info_db['test_topic'] # mongodb句向量训练标志 sent_train_flag = 1 # 批量处理数据字典格式 batch_processing_dict = { "id_list": [], "cont_clear_list": [], "cont_cut_list": [], "cut_idx_list": [0] } # 词向量维度 vector_dim = 384 # hnsw评价指标('l2','cosine','ip') hnsw_metric = ('l2','cosine','ip')[0] # hnsw最大索引数量 num_elements = 1000000 # hnsw召回数量参数 hnsw_set_ef = 150 # 调用api链接 # 配图查重 illustration_url = dict(fxb="http://192.168.1.204:8068/topic_retrieval_http", ksy="http://127.0.0.1:8068/topic_retrieval_http")[dev_mode] # 图片查重 image_url = dict(fxb="http://192.168.1.204:8068/img_retrieval_http", ksy="http://127.0.0.1:8068/img_retrieval_http")[dev_mode] # hnsw模型检索链接 hnsw_retrieve_url = r"http://127.0.0.1:8836/retrieve" # 多维度分类链接 dim_classify_url = r"http://127.0.0.1:8837/dim_classify" # 根地址 root_path = os.getcwd() data_root_path = os.path.join(root_path, "model_data") # Sentence_BERT模型地址 sbert_path = os.path.join(data_root_path, "all-MiniLM-L6-v2") # bert-whitening参数地址 whitening_path = os.path.join(data_root_path, "whitening_param.pkl") # 停用词地址 stop_words_path = os.path.join(data_root_path, "stop_words.txt") # sqlite数据库地址 sqlite_path = os.path.join(data_root_path, "info_retrieval.db") sqlite_copy_path = os.path.join(data_root_path, "info_retrieval_copy.db") # hnsw模型地址 hnsw_path = "hnsw_model.bin" # 公式处理数据地址 bow_model_path = os.path.join(data_root_path, "bow_model.pkl") bow_vector_path = os.path.join(data_root_path, "bow_vector.npy") formula_data_path = os.path.join(data_root_path, "formula_data.json") # 分词器地址 bert_path = "bert-base-chinese" # 多维度分类模型地址 solution_model_path = os.path.join(data_root_path, "solution_classify.pt") difficulty_model_path = os.path.join(data_root_path, "difficulty_classify.pt") # 日志地址 log_root_path = os.path.join(root_path, "logs") # 查重日志地址 retrieval_path = os.path.join(log_root_path, "retrieval_app.log") # 日志信息(message)格式 log_msg = "id : {id} -> {type} -> {message}"