tujintao
/
physics_repeat_check


			
				
					
						
						
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
							import os
import pymongo

# 开发模式: 0-fxb, 1-ksy
dev_mode_list = ["fxb", "ksy"]
dev_mode = dev_mode_list[0]

# 建立mongodb连接
client_url = dict(fxb="mongodb://192.168.1.140:27017/", ksy="mongodb://127.0.0.1:27017/")[dev_mode]
myclient = pymongo.MongoClient(client_url)
mongo_info_db = myclient["ksy"]
mongo_coll = mongo_info_db['test_topic']

# mongodb句向量训练标志
sent_train_flag = 1
# 批量处理数据字典格式
batch_processing_dict = {
    "id_list": [],
    "cont_clear_list": [],
    "cont_cut_list": [],
    "cut_idx_list": [0]
}

# 词向量维度
vector_dim = 384
# hnsw评价指标('l2','cosine','ip')
hnsw_metric = ('l2','cosine','ip')[0]
# hnsw最大索引数量
num_elements = 1000000
# hnsw召回数量参数
hnsw_set_ef = 150

# 调用api链接
# 配图查重
illustration_url = dict(fxb="http://192.168.1.204:8068/topic_retrieval_http", ksy="http://127.0.0.1:8068/topic_retrieval_http")[dev_mode]
# 图片查重
image_url = dict(fxb="http://192.168.1.204:8068/img_retrieval_http", ksy="http://127.0.0.1:8068/img_retrieval_http")[dev_mode]
# hnsw模型检索链接
hnsw_retrieve_url = r"http://127.0.0.1:8836/retrieve"
# 多维度(求解类型/难度)分类链接
dim_classify_url = r"http://127.0.0.1:8837/dim_classify"
# 知识点标注链接
knowledge_tagging_url = r"http://127.0.0.1:8840/generate"

# 根地址
root_path = os.getcwd()
data_root_path = os.path.join(root_path, "model_data")
# Sentence_BERT模型地址
sbert_path = os.path.join(data_root_path, "all-MiniLM-L6-v2")
# # bert-whitening参数地址
# whitening_path = os.path.join(data_root_path, "whitening_param.pkl")
# 知识点关键词映射
keyword_mapping_path = os.path.join(data_root_path, dict(fxb="fxb_keyword_mapping.json", ksy="ksy_keyword_mapping.json")[dev_mode])
# 停用词地址
stop_words_path = os.path.join(data_root_path, "stop_words.txt")
# sqlite数据库地址
sqlite_path = os.path.join(data_root_path, "info_retrieval.db")
sqlite_copy_path = os.path.join(data_root_path, "info_retrieval_copy.db")
# hnsw模型地址
hnsw_path = "hnsw_model.bin"
# 公式处理数据地址
bow_model_path = os.path.join(data_root_path, "bow_model.pkl")
bow_vector_path = os.path.join(data_root_path, "bow_vector.npy")
formula_data_path = os.path.join(data_root_path, "formula_data.json")
# 分词器地址
bert_path = "bert-base-chinese"
# 多维度分类模型地址
solution_model_path = os.path.join(data_root_path, "solution_classify.pt")
difficulty_model_path = os.path.join(data_root_path, "difficulty_classify.pt")

# 日志地址
log_root_path = os.path.join(root_path, "logs")
# 查重日志地址
retrieval_path = os.path.join(log_root_path, "retrieval_app.log")
# 日志信息(message)格式
log_msg = "id : {id} -> {type} -> {message}"