123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172 |
- import os
- import pymongo
- # 开发模式: 0-fxb, 1-ksy
- dev_mode_list = ["fxb", "ksy"]
- dev_mode = dev_mode_list[0]
- # 建立mongodb连接
- client_url = dict(fxb="mongodb://192.168.1.140:27017/", ksy="mongodb://127.0.0.1:27017/")[dev_mode]
- myclient = pymongo.MongoClient(client_url)
- mongo_info_db = myclient["ksy"]
- mongo_coll = mongo_info_db['test_topic']
- # mongodb句向量训练标志
- sent_train_flag = 1
- # 批量处理数据字典格式
- batch_processing_dict = {
- "id_list": [],
- "cont_clear_list": [],
- "cont_cut_list": [],
- "cut_idx_list": [0]
- }
- # 词向量维度
- vector_dim = 384
- # hnsw评价指标('l2','cosine','ip')
- hnsw_metric = ('l2','cosine','ip')[0]
- # hnsw最大索引数量
- num_elements = 1000000
- # hnsw召回数量参数
- hnsw_set_ef = 150
- # 调用api链接
- # 配图查重
- illustration_url = dict(fxb="http://192.168.1.204:8068/topic_retrieval_http", ksy="http://127.0.0.1:8068/topic_retrieval_http")[dev_mode]
- # 图片查重
- image_url = dict(fxb="http://192.168.1.204:8068/img_retrieval_http", ksy="http://127.0.0.1:8068/img_retrieval_http")[dev_mode]
- # hnsw模型检索链接
- hnsw_retrieve_url = r"http://127.0.0.1:8836/retrieve"
- # 多维度分类链接
- dim_classify_url = r"http://127.0.0.1:8837/dim_classify"
- # 根地址
- root_path = os.getcwd()
- data_root_path = os.path.join(root_path, "model_data")
- # Sentence_BERT模型地址
- sbert_path = os.path.join(data_root_path, "all-MiniLM-L6-v2")
- # bert-whitening参数地址
- whitening_path = os.path.join(data_root_path, "whitening_param.pkl")
- # 停用词地址
- stop_words_path = os.path.join(data_root_path, "stop_words.txt")
- # sqlite数据库地址
- sqlite_path = os.path.join(data_root_path, "info_retrieval.db")
- sqlite_copy_path = os.path.join(data_root_path, "info_retrieval_copy.db")
- # hnsw模型地址
- hnsw_path = "hnsw_model.bin"
- # 公式处理数据地址
- bow_model_path = os.path.join(data_root_path, "bow_model.pkl")
- bow_vector_path = os.path.join(data_root_path, "bow_vector.npy")
- formula_data_path = os.path.join(data_root_path, "formula_data.json")
- # 分词器地址
- bert_path = "bert-base-chinese"
- # 多维度分类模型地址
- solution_model_path = os.path.join(data_root_path, "solution_classify.pt")
- difficulty_model_path = os.path.join(data_root_path, "difficulty_classify.pt")
- # 日志地址
- log_root_path = os.path.join(root_path, "logs")
- # 查重日志地址
- retrieval_path = os.path.join(log_root_path, "retrieval_app.log")
- # 日志信息(message)格式
- log_msg = "id : {id} -> {type} -> {message}"
|