config.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. import os
  2. import time
  3. import json
  4. import logging
  5. import pymongo
  6. # 建立mongodb连接
  7. # myclient = pymongo.MongoClient("mongodb://192.168.1.140:27017/")
  8. myclient = pymongo.MongoClient(
  9. # host="49.232.97.180",
  10. host="10.19.1.2",
  11. port=8888,
  12. username="root",
  13. password="oyiqd!oy@wxc=ykw@2*jei!")
  14. mongo_info_db = myclient["math_tk"]
  15. mongo_coll_cloud = mongo_info_db['cloud_topic']
  16. mongo_coll_school = mongo_info_db['school_topic']
  17. # mongodb题库名称
  18. mongo_coll_list = [mongo_coll_cloud, mongo_coll_school]
  19. # 建立mysql连接
  20. # mysqldb = {
  21. # "host": '192.168.1.232',
  22. # "user": 'mysql_dev',
  23. # "password": 'mydb_3307',
  24. # "db": 'zsy_tk',
  25. # "charset": 'utf8mb4'
  26. # }
  27. mysqldb = {
  28. # "host": '10.19.63.195',
  29. "host": '172.16.5.14',
  30. "user": 'zsy',
  31. "password": 'Hbt3sZNxepnZQNPU',
  32. "db": 'zsy_tk',
  33. "charset": 'utf8mb4'
  34. }
  35. # 图片公式识别链接
  36. # formula_url = r"http://192.168.1.208:8000/segment/formula/"
  37. formula_url = r"http://10.19.1.11:7080/segment/formula/"
  38. # mongodb句向量训练标志
  39. sent_train_flag = 1
  40. # 批量处理数据字典格式
  41. batch_processing_dict = {
  42. "topic_id_list": [],
  43. "cont_clear_list": [],
  44. "cont_cut_list": [],
  45. "cut_idx_list": [0]
  46. }
  47. # 云题库和校本题库阈值
  48. database_threshold = [[0.94, 0.8], [0.92, 0.72]]
  49. # hnsw模型数量
  50. hnsw_num = 2
  51. # 词向量维度
  52. vector_dim = 384
  53. # hnsw评价指标('l2','cosine','ip')
  54. hnsw_metric = ('l2','cosine','ip')[0]
  55. # hnsw最大索引数量
  56. num_elements = 5000000
  57. # hnsw召回数量参数
  58. hnsw_set_ef = 150
  59. # hnsw模型更新和检索链接
  60. hnsw_update_url = r"http://localhost:8858/update"
  61. hnsw_retrieve_url = r"http://localhost:8858/retrieve"
  62. # 根地址
  63. root_path = os.getcwd()
  64. data_root_path = os.path.join(root_path, "model_data")
  65. # Sentence_BERT模型地址
  66. sbert_path = os.path.join(data_root_path, "all-MiniLM-L6-v2")
  67. # 获取hnsw模型地址
  68. hnsw_path_list = ["hnsw_cloud.bin", "hnsw_school.bin"]
  69. # hnsw待更新保存数据文件地址
  70. hnsw_update_save_path = os.path.join(root_path, "hnsw_update_data.txt")
  71. # 日志地址
  72. log_root_path = os.path.join(root_path, "logs")
  73. # 查重日志地址
  74. math_dup_path = os.path.join(log_root_path, "math_dup_app.log")
  75. # 关联和自查重日志地址
  76. rlt_ddc_path = os.path.join(log_root_path, "rlt_ddc_app.log")
  77. # 日志信息(message)格式
  78. log_msg = "id : {id} -> {type} -> {message}"
  79. # 封装logging
  80. class LogConfig():
  81. def __init__(self, log_path, logger_name):
  82. '''
  83. 指定保存日志的文件路径,日志级别,以及调用文件
  84. 将日志存入到指定的文件中
  85. '''
  86. # 创建logger对象
  87. self.logger = logging.getLogger(logger_name)
  88. self.log_path = log_path
  89. # 设置日志等级
  90. self.logger.setLevel(logging.INFO) # DEBUG
  91. # 追加写入文件a ,设置utf-8编码防止中文写入乱码
  92. fh = logging.FileHandler(self.log_path, mode='a', encoding='utf8', delay=True)
  93. # 向文件输出的日志级别
  94. fh.setLevel(logging.INFO)
  95. # 向文件输出的日志信息格式
  96. formatter_dict = {
  97. "sys-msg": "%(asctime)s-%(filename)s-%(lineno)s-%(levelname)s",
  98. "log-msg": "%(message)s",
  99. }
  100. formatter = logging.Formatter(json.dumps(formatter_dict))
  101. # 将日志信息格式加载到日志文件中
  102. fh.setFormatter(formatter)
  103. # 加载文件到logger对象中
  104. self.logger.addHandler(fh)
  105. # 重置刷新日志
  106. def log_reset(self):
  107. log_reset_path = (math_dup_path, rlt_ddc_path)
  108. for log_idx, lr_path in enumerate(log_reset_path):
  109. if os.path.exists(lr_path) is False:
  110. continue
  111. # 设置日志定长自动新建
  112. logsize = os.path.getsize(lr_path)
  113. if log_idx > 0 and logsize <= 100*1024*1024: # 100M
  114. continue
  115. os.rename(lr_path, lr_path.split('.')[0]+'_'+\
  116. str(time.strftime('%Y_%m%d_%H%M', time.localtime()))+'.log')
  117. # 获取生成日志读写操作并删除多余日志
  118. def get_log(self):
  119. self.del_log()
  120. return self.logger
  121. # 删除长期未处理日志
  122. def del_log(self):
  123. file_list = os.listdir(log_root_path)
  124. file_path_list = [os.path.join(log_root_path, file) for file in file_list]
  125. file_path_list = [file_path for file_path in file_path_list
  126. if self.log_path.split('.')[0] in file_path.split('.')[0]
  127. and self.log_path != file_path]
  128. file_path_list = [(file_path, os.path.getctime(file_path))
  129. for file_path in file_path_list]
  130. # 根据创建时间排序获取需要删除文件路径名称
  131. file_sort_list = sorted(file_path_list, key=lambda x: x[1], reverse=True)[3:]
  132. if len(file_sort_list) > 0:
  133. for ele in file_sort_list:
  134. os.remove(ele[0])