my_config.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. #!/usr/bin/env/python
  2. # -*- coding:utf-8 -*-
  3. import logging
  4. import json
  5. import os, re
  6. import sys
  7. import datetime
  8. import threading, time, multiprocessing
  9. import jieba
  10. import hanlp
  11. import requests
  12. import logging.handlers
  13. jieba.lcut("load_jieba")
  14. class myLog(object):
  15. """
  16. 封装后的logging
  17. """
  18. def __init__(self, logger=None, log_cate='my_log'):
  19. """
  20. 指定保存日志的文件路径,日志级别,以及调用文件
  21. 将日志存入到指定的文件中
  22. :param logger:
  23. :param log_cate: 日志名
  24. """
  25. # 创建一个logger
  26. self.logger = logging.getLogger(logger)
  27. self.logger.setLevel(logging.INFO) # DEBUG
  28. # 创建一个handler,用于写入日志文件
  29. self.log_name = os.path.join(parse_log_dir, '{}.log'.format(log_cate)) # 日志地址
  30. # if os.path.exists(self.log_name): # 设置日志定长自动新建
  31. # logsize = os.path.getsize(self.log_name)
  32. # if logsize > 110000000: # 110M
  33. # os.rename(self.log_name, os.path.join(parse_log_dir, '{}_{}.log'.format(log_cate, int(time.time()))))
  34. # # 有6个日志时,开始删除最早的一份
  35. # logfiles = [int(re.search("e2cc_log_(\d+)\.log", i).group(1)) for i in os.listdir(parse_log_dir)
  36. # if re.search("e2cc_log_\d+\.log", i)]
  37. # if len(logfiles) > 5:
  38. # os.remove(os.path.join(parse_log_dir, "e2cc_log_{}.log".format(min(logfiles))))
  39. fh = logging.handlers.RotatingFileHandler(self.log_name, maxBytes=120000000, backupCount=5,
  40. mode='a', encoding='utf-8', delay=True)
  41. # temp.log设置删除条件
  42. temp_log = os.path.join(parse_log_dir, 'temp.log')
  43. if os.path.exists(temp_log):
  44. logsize = os.path.getsize(temp_log)
  45. if logsize > 110000000: # 110M
  46. os.remove(temp_log)
  47. # fh = logging.FileHandler(self.log_name, mode='a', encoding='utf-8', delay=True)
  48. fh.setLevel(logging.INFO)
  49. # 定义handler的输出格式pip
  50. formatter_dict = {
  51. "host-ip": "{}".format(external_ip),
  52. "log-msg": "%(message)s",
  53. "other-msg": "%(filename)s-%(lineno)s-%(levelname)s-%(asctime)s"
  54. }
  55. formatter = logging.Formatter(json.dumps(formatter_dict, ensure_ascii=False))
  56. # 第二种格式使用方法
  57. # formatter_dict = '{"host-ip": ' + '"{}"'.format(external_ip) \
  58. # + ', "log-msg": "%(message)s", '\
  59. # '"other-msg": "%(filename)s-%(lineno)s-%(asctime)s"}'
  60. # formatter = logging.Formatter(formatter_dict)
  61. fh.setFormatter(formatter)
  62. self.logger.addHandler(fh) # 给logger添加handler
  63. # 添加下面一句,在记录日志之后移除句柄
  64. # self.logger.removeHandler(ch)
  65. # self.logger.removeHandler(fh)
  66. # 关闭打开的文件
  67. fh.close()
  68. def getlog(self):
  69. return self.logger
  70. class simpLog(object):
  71. def __init__(self, logger=None, log_cate='simp_log'):
  72. """
  73. 指定保存日志的文件路径,日志级别,以及调用文件
  74. 将日志存入到指定的文件中
  75. :param logger:
  76. :param log_cate: 日志名
  77. """
  78. # 创建一个logger
  79. self.logger = logging.getLogger(logger)
  80. self.logger.setLevel(logging.INFO) # DEBUG
  81. # 创建一个handler,用于写入日志文件
  82. self.log_name = os.path.join(parse_log_dir, '{}.log'.format(log_cate)) # 日志地址
  83. if os.path.exists(self.log_name): # 设置日志定长自动新建
  84. logsize = os.path.getsize(self.log_name)
  85. if logsize > 20000000: # 20M
  86. os.rename(self.log_name, os.path.join(parse_log_dir, '{}_{}.log'.format(log_cate,
  87. datetime.datetime.now().strftime('%m_%d'))))
  88. fh = logging.FileHandler(self.log_name, mode='a', encoding='utf-8', delay=True)
  89. fh.setLevel(logging.INFO)
  90. # 定义handler的输出格式
  91. # formatter_dict = {
  92. # "ch2en": "%(message)s",
  93. # }
  94. # formatter = logging.Formatter(json.dumps(formatter_dict, ensure_ascii=False))
  95. formatter = logging.Formatter("%(message)s")
  96. fh.setFormatter(formatter)
  97. self.logger.addHandler(fh) # 给logger添加handler
  98. # 添加下面一句,在记录日志之后移除句柄
  99. # self.logger.removeHandler(ch)
  100. # self.logger.removeHandler(fh)
  101. # 关闭打开的文件
  102. fh.close()
  103. def getlog(self):
  104. return self.logger
  105. # (重写)MyThread.py线程类,使其能够返回值
  106. class MyProcess(multiprocessing.Process):
  107. def __init__(self, func, args=(), kwargs=None):
  108. super(MyProcess, self).__init__()
  109. self.func = func
  110. self.args = args
  111. self.kwargs = kwargs
  112. # 重写后的run()方法不再执行以前的run()方法了
  113. # 注意:即使加了return也不会返回值,如return self.func(*self.args)
  114. def run(self):
  115. if self.kwargs:
  116. self.result = self.func(self.kwargs["arg1"], self.kwargs["arg2"], self.kwargs["arg3"])
  117. else:
  118. self.result = self.func(*self.args)
  119. def get_result(self):
  120. # return self.result
  121. # 必须等待线程执行完毕,如果线程还未执行完毕就去获取result是没有结果的
  122. multiprocessing.Process.join(self)
  123. try:
  124. return self.result
  125. except Exception:
  126. return None
  127. # 获取百度云access_token
  128. class BaiduApi:
  129. def __init__(self):
  130. self.host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=OuO8mVZNIkDCP9eDQgF8txER&client_secret=2Wv28a9WBDKvkoXvcHMINNWZt8QdOzZg'
  131. self.expires_in = 0
  132. self.access_token = ""
  133. def update_access_token(self):
  134. """
  135. 判断是否需要更新access_token,默认 {expires_in * 0.5} 每15天更新一次
  136. 默认 expires_in = 2592000为30天,一天为86400
  137. :return:
  138. """
  139. try:
  140. print(self.host)
  141. response = requests.get(self.host)
  142. if response.status_code == 200:
  143. result = response.json()
  144. # print(result)
  145. self.access_token = result["access_token"]
  146. self.expires_in = int(result["expires_in"])
  147. print(f"开启access_token: {self.access_token} expires_in: {self.expires_in}")
  148. else:
  149. print("获取Access Token失败")
  150. self.access_token = ""
  151. except Exception as ex:
  152. print("获取Access Token异常", ex)
  153. self.access_token = ""
  154. return self.access_token, self.expires_in
  155. #################################################################
  156. class LocalCfg: # testing
  157. internal_ip = '192.168.1.65' # internal
  158. external_ip = '192.168.1.65' # external
  159. server_port = 10116
  160. public_bucket = 'zxhx-1302712961' # 桶名称
  161. region = "ap-shanghai" # 存储桶地域
  162. public_bucket_addr = 'zxhx-1302712961.cos.ap-shanghai.myqcloud.com'
  163. PyTorch_REST_API_URL = "http://192.168.1.209:7015/{0}/{1}"
  164. # parse_log_dir = "G:/zwj/WL/en2cn/logs"
  165. # client = pymongo.MongoClient(host="192.168.1.140", port=27017)
  166. # mycol = client["hfs_math"]["e2cc"]
  167. correct_mod = "book"
  168. class TestingCfg: # testing
  169. internal_ip = '192.168.1.192' # internal
  170. external_ip = '192.168.1.192' # external
  171. server_port = 10116
  172. public_bucket = 'zxhx-1302712961' # 桶名称
  173. region = "ap-shanghai" # 存储桶地域
  174. public_bucket_addr = 'zxhx-1302712961.cos.ap-shanghai.myqcloud.com'
  175. PyTorch_REST_API_URL = "http://192.168.1.209:7015/{0}/{1}"
  176. # parse_log_dir = "G:/zwj/WL/en2cn/logs"
  177. # client = pymongo.MongoClient(host="192.168.1.140", port=27017)
  178. # mycol = client["hfs_math"]["e2cc"]
  179. correct_mod = "general"
  180. class ProductionCfg: # production
  181. internal_ip = '0.0.0.0' # internal
  182. external_ip = '49.232.72.198' if sys.argv[-1] == '198' else '82.156.64.176' # external
  183. server_port = 10116
  184. public_bucket = 'zxhx-pro-1302712961' # 桶名称
  185. region = "ap-beijing" # 存储桶地域
  186. public_bucket_addr = 'zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com' # 桶地址
  187. PyTorch_REST_API_URL = "http://api.cv.zxhx.com/{0}/{1}"
  188. # parse_log_dir = "E:/en2cn/logs"
  189. # mongo_ip = ""
  190. # mongo_port = ""
  191. correct_mod = "general"
  192. config_class = LocalCfg # 没有参数时,默认按测试环境
  193. if len(sys.argv) > 1:
  194. print(sys.argv, sys.argv[0])
  195. if sys.argv[1] == 'test':
  196. config_class = TestingCfg
  197. elif sys.argv[1] == 'product':
  198. config_class = ProductionCfg
  199. else:
  200. print('cmd should be: python server.py test')
  201. print('or: python server.py product')
  202. raise ValueError("命令不正确")
  203. server_ip = config_class.internal_ip
  204. external_ip = config_class.external_ip
  205. server_port = config_class.server_port
  206. correct_mod = config_class.correct_mod
  207. # mycol = config_class.mycol
  208. region = config_class.region
  209. public_bucket = config_class.public_bucket
  210. PyTorch_REST_API_URL = config_class.PyTorch_REST_API_URL
  211. dirpath = os.getcwd()
  212. print("dirpath:", dirpath)
  213. parse_log_dir = os.path.join(dirpath, "logs")
  214. if not os.path.isdir(parse_log_dir):
  215. os.makedirs(parse_log_dir)
  216. LANG_EMB_MODEL = {
  217. "eng": os.path.join(dirpath, "Bert_Base/bert-base-uncased"),
  218. "cn": os.path.join(dirpath, "Bert_Base/bert-base-chinese"),
  219. "all": os.path.join(dirpath, "Bert_Base/all-MiniLM-L6-v2")
  220. }
  221. if config_class == ProductionCfg:
  222. LANG_EMB_MODEL["all"] = "/home/zhengwenjuan/Models/all-MiniLM-L6-v2"
  223. secret_id = "AKIDC9pETRbZfWBbmhoglkT4PUJGzRjmj3Ia" # "云 API 密钥 SecretId";
  224. secret_key = "C6jlX4LKfleGdmfQvGNgj74lESRpBIEJ" # "云 API 密钥 SecretKey";
  225. # APPID = '1302712961'
  226. # TIMEOUT = 30
  227. token = None # 使用临时密钥需要传入Token,默认为空,可不填
  228. path_phrase_yhk = os.path.join(dirpath, "files/main/en-ch_phrase_dict_yhk.json")
  229. path_txt = os.path.join(dirpath, "files/main/en-ch_dict_from_txt.json")
  230. path_word_yhk = os.path.join(dirpath, "files/main/en-ch_dict_from_3_website.json")
  231. path_word_bd = os.path.join(dirpath, "files/main/en-ch_dict_bd.json")
  232. phrase_dict_yhk = json.loads(open(path_phrase_yhk, encoding="utf8").read())
  233. word_dict_from_txt = json.loads(open(path_txt, encoding="utf8").read())
  234. word_dict_yhk = json.loads(open(path_word_yhk, encoding="utf8").read())
  235. word_dict_bd = json.loads(open(path_word_bd, encoding="utf8").read())
  236. path1 = os.path.join(dirpath, "files/main/dict_ch2en_merge.json")
  237. dict_ch2en = json.loads(open(path1, encoding="utf8").read())
  238. # # hanlp相关模型
  239. sts = hanlp.load(hanlp.pretrained.sts.STS_ELECTRA_BASE_ZH)
  240. pos = hanlp.load(hanlp.pretrained.pos.PKU_POS_ELECTRA_SMALL)
  241. dict_tags = {'明白': ["v", "n"], "意识": "n"}
  242. pos.dict_tags = dict_tags
  243. # 获取百度云access_token
  244. # bce_access_token, bce_expires_in = BaiduApi().update_access_token()
  245. # last_token_update_time = time.time()
  246. # print("开启时间:", last_token_update_time)