import os import time import json import logging import pymongo # 建立mongodb连接 # myclient = pymongo.MongoClient("mongodb://192.168.1.140:27017/") myclient = pymongo.MongoClient( # host="49.232.97.180", host="10.19.1.2", port=8888, username="root", password="oyiqd!oy@wxc=ykw@2*jei!") mongo_info_db = myclient["math_tk"] mongo_coll_cloud = mongo_info_db['cloud_topic'] mongo_coll_school = mongo_info_db['school_topic'] # mongodb题库名称 mongo_coll_list = [mongo_coll_cloud, mongo_coll_school] # 建立mysql连接 # mysqldb = { # "host": '192.168.1.232', # "user": 'mysql_dev', # "password": 'mydb_3307', # "db": 'zsy_tk', # "charset": 'utf8mb4' # } mysqldb = { # "host": '10.19.63.195', "host": '172.16.5.14', "user": 'zsy', "password": 'Hbt3sZNxepnZQNPU', "db": 'zsy_tk', "charset": 'utf8mb4' } # 图片公式识别链接 # formula_url = r"http://192.168.1.208:8000/segment/formula/" formula_url = r"http://10.19.1.11:7080/segment/formula/" # mongodb句向量训练标志 sent_train_flag = 1 # 批量处理数据字典格式 batch_processing_dict = { "topic_id_list": [], "cont_clear_list": [], "cont_cut_list": [], "cut_idx_list": [0] } # 云题库和校本题库阈值 database_threshold = [[0.94, 0.8], [0.92, 0.72]] # hnsw模型数量 hnsw_num = 2 # 词向量维度 vector_dim = 384 # hnsw评价指标('l2','cosine','ip') hnsw_metric = ('l2','cosine','ip')[0] # hnsw最大索引数量 num_elements = 5000000 # hnsw召回数量参数 hnsw_set_ef = 150 # hnsw模型更新和检索链接 hnsw_update_url = r"http://localhost:8858/update" hnsw_retrieve_url = r"http://localhost:8858/retrieve" # 根地址 root_path = os.getcwd() data_root_path = os.path.join(root_path, "model_data") # Sentence_BERT模型地址 sbert_path = os.path.join(data_root_path, "all-MiniLM-L6-v2") # 获取hnsw模型地址 hnsw_path_list = ["hnsw_cloud.bin", "hnsw_school.bin"] # hnsw待更新保存数据文件地址 hnsw_update_save_path = os.path.join(root_path, "hnsw_update_data.txt") # 日志地址 log_root_path = os.path.join(root_path, "logs") # 查重日志地址 math_dup_path = os.path.join(log_root_path, "math_dup_app.log") # 关联和自查重日志地址 rlt_ddc_path = os.path.join(log_root_path, "rlt_ddc_app.log") # 日志信息(message)格式 log_msg = "id : {id} -> {type} -> {message}" # 封装logging class LogConfig(): def __init__(self, log_path, logger_name): ''' 指定保存日志的文件路径,日志级别,以及调用文件 将日志存入到指定的文件中 ''' # 创建logger对象 self.logger = logging.getLogger(logger_name) self.log_path = log_path # 设置日志等级 self.logger.setLevel(logging.INFO) # DEBUG # 追加写入文件a ,设置utf-8编码防止中文写入乱码 fh = logging.FileHandler(self.log_path, mode='a', encoding='utf8', delay=True) # 向文件输出的日志级别 fh.setLevel(logging.INFO) # 向文件输出的日志信息格式 formatter_dict = { "sys-msg": "%(asctime)s-%(filename)s-%(lineno)s-%(levelname)s", "log-msg": "%(message)s", } formatter = logging.Formatter(json.dumps(formatter_dict)) # 将日志信息格式加载到日志文件中 fh.setFormatter(formatter) # 加载文件到logger对象中 self.logger.addHandler(fh) # 重置刷新日志 def log_reset(self): log_reset_path = (math_dup_path, rlt_ddc_path) for log_idx, lr_path in enumerate(log_reset_path): if os.path.exists(lr_path) is False: continue # 设置日志定长自动新建 logsize = os.path.getsize(lr_path) if log_idx > 0 and logsize <= 100*1024*1024: # 100M continue os.rename(lr_path, lr_path.split('.')[0]+'_'+\ str(time.strftime('%Y_%m%d_%H%M', time.localtime()))+'.log') # 获取生成日志读写操作并删除多余日志 def get_log(self): self.del_log() return self.logger # 删除长期未处理日志 def del_log(self): file_list = os.listdir(log_root_path) file_path_list = [os.path.join(log_root_path, file) for file in file_list] file_path_list = [file_path for file_path in file_path_list if self.log_path.split('.')[0] in file_path.split('.')[0] and self.log_path != file_path] file_path_list = [(file_path, os.path.getctime(file_path)) for file_path in file_path_list] # 根据创建时间排序获取需要删除文件路径名称 file_sort_list = sorted(file_path_list, key=lambda x: x[1], reverse=True)[3:] if len(file_sort_list) > 0: for ele in file_sort_list: os.remove(ele[0])