translator.py 31 KB


  1. #!/usr/bin/env/python
  2. # -*- coding:utf-8 -*-
  3. """
  4. 在线翻译
  5. """
  6. import http.client
  7. import hashlib
  8. import urllib
  9. import random
  10. import re
  11. import json
  12. import requests
  13. def trans_baidu(str_text, mod="en2cn"):
  14. """
  15. 2022.8月底开始收费了
  16. 输入要翻译的文本(单词或句子或文章,但是<6000bytes)
  17. :param str_text:
  18. :return:
  19. """
  20. appid = '20191115000357320' # 填写你的appid
  21. secretKey = 'ovn8spgUH6GQ_GhF7V6u' # 填写你的密钥
  22. httpClient = None
  23. myurl = '/api/trans/vip/translate'
  24. fromLang = 'en' # 原文语种
  25. toLang = 'zh' # 译文语种
  26. salt = random.randint(32768, 65536)
  27. q = str_text
  28. sign = appid + q + str(salt) + secretKey
  29. sign = hashlib.md5(sign.encode()).hexdigest()
  30. myurl = myurl + '?appid=' + appid + '&q=' + urllib.parse.quote(q) + '&from=' + fromLang + '&to=' + toLang + '&salt=' + str(
  31. salt) + '&sign=' + sign
  32. try:
  33. httpClient = http.client.HTTPConnection('api.fanyi.baidu.com', timeout=5)
  34. httpClient.request('GET', myurl)
  35. # response是HTTPResponse对象
  36. response = httpClient.getresponse()
  37. result_all = response.read().decode("utf-8")
  38. result = json.loads(result_all).get("trans_result")[0]["dst"]
  39. # print(result)
  40. return result
  41. except Exception as e:
  42. print(e)
  43. return ""
  44. finally:
  45. if httpClient:
  46. httpClient.close()
  47. def googleTranslate(text):
  48. headers = {
  49. 'origin': "https://translate.google.cn",
  50. 'referer': "https://translate.google.cn/",
  51. 'sec-fetch-dest': "empty",
  52. 'sec-fetch-mode': "cors",
  53. 'sec-fetch-site': "same-origin",
  54. 'x-same-domain': "1",
  55. # 'cookie': "NID=511=mJLHCmmZj7H4zcU4YUucXYSlaD67X5HEt4VVVm9Q04ZNr82Hoei_fig0IPjcnpmteHRktufKmckQtyTe6w3GdT0Uk8xTG8F3-ymMn1u6xNiQp4EGlTtFrmXYbCqGw-RJWtM3eQHBGJhxSSxGFCMEZwDuhRAM0bUy1uN2EKdftAI; _ga=GA1.3.126656540.1654615557; OTZ=6538526_24_24__24_; _gid=GA1.3.490013375.1656158367",
  56. # 'cookie': "NID=511=mJLHCmmZj7H4zcU4YUucXYSlaD67X5HEt4VVVm9Q04ZNr82Hoei_fig0IPjcnpmteHRktufKmckQtyTe6w3GdT0Uk8xTG8F3-ymMn1u6xNiQp4EGlTtFrmXYbCqGw-RJWtM3eQHBGJhxSSxGFCMEZwDuhRAM0bUy1uN2EKdftAI; _ga=GA1.3.126656540.1654615557; OTZ=6538526_24_24__24_; _gid=GA1.3.490013375.1656158367",
  57. # 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.53 Safari/537.36 Edg/103.0.1264.37",
  58. # 'user-agent': "'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36",
  59. # 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.53 Safari/537.36 Edg/103.0.1264.37",
  60. 'user-agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36",
  61. 'accept-encoding': "gzip, deflate, br",
  62. 'content-type': "application/x-www-form-urlencoded;charset=UTF-8",
  63. # 'x-client-data': 'CIW2yQEIpbbJAQjEtskBCKmdygEIrMfKAQj2x8oBCPfHygEItMvKAQihz8oBCNzVygEIi5nLAQjBnMsB',
  64. # 'Decoded': 'message ClientVariations {repeated int32 variation_id = [3300101, 3300133, 3300164, 3313321, 3318700, 3318774, 3318775, 3319220, 3319713, 3320540, 3329163, 3329601];}',
  65. # 'x-goog-batchexecute-bgr':'[";qrS4tPnQAAYvqchvD6lftQckgqu-IikmACkAIwj8Rl2XS1QudgthqdZU0SqH0vq1-Bet3w6a53VSL12iqjefO-AFZh8AAAD0TwAAAAZ1AQcXAM87g0YlzGEpU91FDlXisD9zIJjkeYnnWCmMotddHWrZBkIpzy46jxKdAuo3aVXneE15f0O10ZpS9nTBSJdcScQqzGcI93uNpDbDtSbwr8Xz67U4XIz5FW3C9I5y7bOK5pX3nMy3dad3_gdoicKNjO1U7ey1NqdWArjc5PE6uCn3YnoiN-YUhlwK-j702Vm8nz-6B4oKWr-e3NSr2nkrHLBU4WZ9AmIg4CpGGGfk8Ri2aTLchvslaLcG0gs3p1LqCVGrFMmCDR5oG7KKjZStPtiEAkYJaGGP8_RMLYiPzFieCluXD2UXjFr-l8wWYcE65veCnnk_m7F4LZwnbl9BLPcnaHzkvNYTP9ATsK-N8-_YZQoNRnZ6nAuuO6y5GnHRtdluT_7kcN8VNiARohU-Ml0FPK2KWhO0pwtaaYVlAREQLhliaB9QSRjsZ3px1ehUoYiHhXKb5X70ohOC4Slkt7XcbHziir_qaOZpf73UAucLEE_PEKkTWZ5ZbssiwmPpYsuZI7Blw92neAb2sgbOsltR3iO6We3_vBpZd7hFZMFBrTrD_VZKqAWntKZXCmhG5YIcCR8Td4I35BG_YugQ60Gh_tVzN877HHARJXxEcHM743G7GsqZAxbqqNu_Rj5q9VPbCR_nDD24Me-PMzyTKUUGR3I970nngEu6vLFl-0tEb7wJLzm2K_LwzdkJ2ZHxJlNI-YsNB5IwczOMSJwNwPqjzKy2BxRzSOHuDr2zEnCW-p9G4HLP84fVo9kJQdmTN7mt54oMuqgr7jpaVDNo-8tMFVNnY2Q1HoO2xG8MvKMVrOcqqdnKabsTlcCSHVkL8kOGSQY_m_5MUs2o_XRd5gGhZ57ounYDD6GOWJOH_8QCZMHdwgFt77aD4wN-5YX5WKLTVwBFY3lyr1I2TrlzJxDJJDnypdxsQBc3hheLtQQT4b0ESR7EL_2QxwAcQXSf7ZS93a9MkTaHilyhFu7vZCcAPTLt6EhWauUp-c0jB7RPv197GlKwszoW4Cxc7j6tudGTa5LL8L2zl3loLp5D3c2Oo9xORYYof0A",null,null,45,null,null,null,0,"2"]'
  66. # 'x-goog-batchexecute-bgr': '[";9-m46aTQAAYvqchvD6lfvixoEmPOG-YmACkAIwj8RmVE4Uk-wgpjt980C0vAZ5LBbEfkpJEt4ex2_2NMH89REoPTQx8AAAIqTwAAABh1AQcXANB82_W4qupq8ZfSWi61a126SF6X1py4DkFHHFKqueP-WTlQE6Z0E8C-Uw4o33saxgtvueS4jE2ohXNyZfkhqLj7oNw7BTvQOFyb1z2Wiie6BtVIqgCFjUzAuMM0_mvAJHY6VMVkvFTC8-O9oEzfWh_Axeq8xo-lMVUnjVkgMRBDDBGvrpyY1qbovJNH1NOkvIDy_rljBzS0RPi_37sa8sUFQ2OwFt1kmu6JS1YE-NbTSJpcZKuQKxP9EA4vqXN5l_m4T4xjrrqcrKlvwefRRXAKhAJQRBbT-YgNfCkOzgpGH5qVvEPKk7Ieax5SCs8_bE7ZWgB8vRxMfCQ8a_gd6U4hVQC_z5ZJ7AZOiSHJNAeAvv7RUZnXbILpOUV4YZr3jUt384lXzXuDeYiXR3XZHxoKi7F72FOcVrJsSnNETVS9mEkzSEnC5espGRlBFy0OaCxhjLgXZ07sHvDh4os82DDbDVPKBljrGCES1sYC1YwlHEAK0UJvz1PjM-DYiXxXM4Wnj5t3PL1g3Blgrb2jFHfOhLHq63AIo2HdWqawTggJpkpDH0fFBgQPc5rOPLK-6t7qTfR4v00kae6Qk5LHovqOoDuY87UUGbnUvzL3U1U7E2rhGsaTAt-CxMIx6NN_0LJdfsX_17oj_1d6ImRseI6xrFgwZDtoO5Wn2JsL77yBbZCZP3PNos0FhINR2o8qAVjEKDTb6Ymg7-T-fVB3iBq-AngHpCL2d6KwSgrEzdo4IdBzzWjBqU3JwxF-gzBm7b_FBy_knJbkXsHrdl9aFdMiPqCNBHbMVZnYH0i6ny2cLduqq3Pd-KB7CHwJixyF1vz-583xbpYcs2i5al68ScMhwuLqikoSAjGbGbxyjZEWl0n2FapLDLSs4RtWIYFnlrIUh5PKgLC6qPaa_iW6dWdQJoyyEvpUr90LsduNyzTlW2menACrUW2w6GMBkQ1SVhAOptbxLWyNggCLGT9Kyg5aeXS_NAKn7Y3y10eqILu_aGMOnGBizeiO2_brmmgtYPG6AeeT5X1RF08ECbZhSfAOTPiQvgQ5VaO-ybUMVq8EanCb9Q",null,null,1047,103,null,null,0,"2"]',
  67. 'x-goog-batchexecute-bgr': '[";x9m42ZTQAAYEzRk0JM1f55ylXij7ksAmACkAIwj8RvxaMzZ1iOvebhAxZVDBGfAZwvR3YpljVZMeO6IUNBDwy5wDMh8AAABSTwAAAAJ1AQcXAIlC1pgskV32-yOzV6FmYVKrbi30KaxqJklCs1zPpGHX_hLbfLToBJPYoWuAZ_EDLf25YVbCAKmrwVah78G7syapqN1EYiNKZfShvZp60vbhcqeJWm7zwznMjhKjfdKuTq5Rq41JelMT_EQVGypnsHBqTderknEjPtt6TmIMIJjWRfjZsUVvRnde3oQCTLCuZUBVv3eTx3oT7W-EyIImn9XZEs8AKxQKK2vZRRc6VkveQMHi5DqyNT8YgqSntvAGOLImK402fdeua07F-nXkiiULeaJJ-7vjCMM-3jJNgEO6Vw6UJ6U74c_esPqwbtehb8dAO1uv_Lv9WJDX0KnHNkSRrsdD1z0lqxCQBZOxUpDGLc4Cvz_1FmWBU3Hy9RXuT_NW149LFR5JG6TgRQchengsqAUaC48H1vejyaG9Q6MDJdbWdnQxFtcNjIJDM7fOvVlsHzcNcCW11uP4AA55e28eCrbIi3ZeVRSRpWhWfRvoWg8Yzp1dIwnDS6vlnjr1UwZ6wLTfqF5ho0kSe1qcwza73nSZvi73NeyeaSkbsnO25bi4Jzp5sronwfrOqQALcgYWKX-krf6j-ug17IQm278z-sP4dJy3s4bZyWndiF5wO9tlJ7vZ_GRDiUVAZUNho_9WGi9oB89WvzjzqDMxvlx1WV_SonpzVaX9WZX0oIoDEs0FqRW165sinSZGAk63gejcPbH9ADj1xYQpP6YrS_iYNpabpSR544jwXT0V-f8bGf-bOBMViidcwgW0UloFN8i9v7ppuOxXbrp83H7Q8Lh5Lw0YCWijHaZ_JC3w-FZOa_j66ujYsxCF8t5cVB_O2Ttnks1eRYutJwg7a7nKvhDv24N_CPPvgn4bLG8WzEs_av9QGIfwGt1_ADJ4f8snG9rMsVOXzL7FMtV4b5Q9olCSi-CsXhwqxWIxDs9xdiolrpKKupfK89U6x13vv8kiq0Y5qnU9Un6AuQ",null,null,3,null,null,null,0,"2"]',
  68. }
  69. # url = "https://translate.google.cn/_/TranslateWebserverUi/data/batchexecute?rpcids=MkEWBc&source-path=/&f.sid=-127429216746380775&bl=boq_translate-webserver_20220622.08_p0&hl=zh-CN&soc-app=1&soc-platform=1&soc-device=1&_reqid=882057&rt=c"
  70. # url = "https://translate.google.cn/_/TranslateWebserverUi/data/batchexecute?rpcids=rpcids=MkEWBc&f.sid=-2984828793698248690&bl=boq_translate-webserver_20201221.17_p0&hl=zh-CN&soc-app=1&soc-platform=1&soc-device=1&_reqid=5445720&rt=c"
  71. # url = "https://translate.google.cn/_/TranslateWebserverUi/data/batchexecute?rpcids=MkEWBc&source-path=/&f.sid=5723009767851758792&bl=boq_translate-webserver_20220622.08_p0&hl=zh-CN&soc-app=1&soc-platform=1&soc-device=1&_reqid=66251&rt=c"
  72. url = "https://translate.google.cn/_/TranslateWebserverUi/data/batchexecute?rpcids=MkEWBc&source-path=/&f.sid=424574510049276369&bl=boq_translate-webserver_20220803.08_p0&hl=zh-CN&soc-app=1&soc-platform=1&soc-device=1&_reqid=1235460&rt=c"
  73. # data = {'f.req': f'[[["MkEWBc","[[\\"{text}\\",\\"zh-CN\\",\\"en\\",true],[null]]",null,"generic"]]]'}
  74. data = {'f.req': f'[[["MkEWBc","[[\\"{text}\\",\\"en\\",\\"zh-CN\\",true],[null]]",null,"generic"]]]'}
  75. means = []
  76. try:
  77. res=requests.post(url, headers=headers, data=data, timeout=5).text
  78. print(res)
  79. pattern = '\)\]\}\'\s*\d{3,4}\s*\[(.*)\s*'
  80. part1 = re.findall(pattern, res)
  81. part1_list = json.loads('[' + part1[0])[0]
  82. if part1_list[2] is None:
  83. # print(text)
  84. return text
  85. content1 = part1_list[2].replace('\n', '')
  86. content1 = json.loads(content1)
  87. print(content1)
  88. if len(content1) > 3 and content1[3]:
  89. if len(content1[3]) > 5:
  90. specific_means = content1[3][5:]
  91. # print("specific_means:", specific_means)
  92. if specific_means[0]:
  93. part2_list = specific_means[0][0]
  94. means = [j[0] for i in part2_list for j in i[1]] # i[0]是词性
  95. return ";".join(means)
  96. part2_list = content1[1][0][0][5:][0]
  97. means = [i[0] for i in part2_list]
  98. print(means)
  99. return ";".join(means)
  100. except:
  101. return ""
  102. def translate_youdao(word=None):
  103. """
  104. 结果不全
  105. :param word:
  106. :return:
  107. """
  108. url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
  109. # url = 'http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule'
  110. Form_data = {
  111. 'i': word,
  112. 'from': 'AUTO',
  113. 'to': 'AUTO',
  114. 'smartresult': 'dict',
  115. 'client': 'fanyideskweb',
  116. 'salt': '1528199964615',
  117. 'sign': 'f6cf55466c876c404ff85ea3fc8c453f',
  118. 'doctype': 'json',
  119. 'version': '2.1',
  120. 'keyfrom': 'fanyi.web',
  121. 'action': 'FY_BY_REALTIME',
  122. 'typoResult': 'false'
  123. }
  124. try:
  125. response = requests.post(url=url, data=Form_data, timeout=5)
  126. content = json.loads(response.text)
  127. print(content)
  128. # print(content['translateResult'][0][0]['tgt'])
  129. return content['translateResult'][0][0]['tgt']
  130. except:
  131. return ""
  132. def youdaoTranslate(text):
  133. """
  134. 通过html标签获取
  135. :param text:
  136. :return:
  137. """
  138. r = requests.get("http://dict.youdao.com/w/eng/{}/#keyfrom=dict2.top.suggest".format(text))
  139. all_content = r.content.decode('utf-8')
  140. trans_container = re.findall(r'<div class="trans-container">\s*<ul>(.+?)</ul>',
  141. all_content.replace(" ", " ").replace("\t", ""), flags=re.S)
  142. # print(trans_container)
  143. if trans_container:
  144. trans = [i for i in re.split(r"</?li>", trans_container[0]) if i.strip() and re.search(
  145. "[((]\s*人名\s*[))]", i) is None] # 人名的翻译不需要
  146. return ";#;" .join(trans)
  147. return ""
  148. def haiciTranslate(text):
  149. """
  150. 通过html标签获取
  151. :param text:
  152. :return:
  153. """
  154. r = requests.get("https://dict.cn/search?q={}".format(text))
  155. all_content = r.content.decode('utf-8')
  156. # print(all_content)
  157. dict_basic = re.findall(r'<ul class="dict-basic-ul">(.+?)</ul>',
  158. all_content.replace(" ", " ").replace("\t", ""), flags=re.S)
  159. if not dict_basic:
  160. dict_basic = re.findall(r'<div class="basic clearfix">[\n\s]*<ul\s*>(.+?)</ul>',
  161. all_content.replace(" ", " ").replace("\t", ""), flags=re.S)
  162. if dict_basic:
  163. trans = [re.sub("</?span>|</?strong>", "", i).replace("\n", "") for i in
  164. re.findall(r"<li>(.+?)</li>", dict_basic[0], flags=re.S) if i.strip() and re.search(
  165. "[((]\s*人名\s*[))]", i) is None]
  166. return ";#;".join(trans)
  167. return ""
  168. def haici_zh2en(text):
  169. r = requests.get("https://dict.cn/search?q={}".format(text), timeout=0.5)
  170. all_content = r.content.decode('utf-8')
  171. # print(all_content)
  172. dict_basic = re.findall(r'<div class="layout cn">(.+?)</div>',
  173. all_content.replace(" ", " ").replace("\t", ""), flags=re.S)
  174. if dict_basic:
  175. en_list = re.findall('<a href=[^<]+?>(.+?)</a>', dict_basic[0], flags=re.S)
  176. en_list = [en.strip() for en in en_list]
  177. # print(en_list)
  178. return en_list
  179. def baiduTranslate(text):
  180. """
  181. 通过html标签获取
  182. :param text:
  183. :return:
  184. """
  185. # url = "https://fanyi.baidu.com/translate?aldtype=16047&query=break+down&keyfrom=baidu&smartresult=dict&lang=auto2zh#en/zh/break%20down"
  186. # url = "https://fanyi.baidu.com/#en/zh/break%20down"
  187. url = "https://fanyi.baidu.com/sug"
  188. Form_data = {
  189. 'kw': text,
  190. }
  191. response = requests.post(url=url, data=Form_data, timeout=5)
  192. content = json.loads(response.text)
  193. print(content)
  194. def en2ch_baidu(text):
  195. """
  196. 百度翻译
  197. :return:
  198. """
  199. url = "https://fanyi.baidu.com/transapi"
  200. Form_data = {
  201. 'query': text, # .replace(" ", "+")
  202. 'from': "en",
  203. 'to': "zh",
  204. 'source': "txt"
  205. }
  206. headers = {
  207. # "Acs-Token": "1662274986285_1662362866177_PZ9RGDfoZT611QxcCOPF8OFKwuWbo2SxPixIRvRUghgD6AKWj+XA4XpEy7jAsAxs3jxn/CnuUNVYI3E2GZI/pkKZt9XbuQOuHUVVAf6WjA7RfsNhNa0Va1Rr9au9Fskav7ZLIVmTCJ+cFFnstBktKqO8OFOYHfdVgXFcqjPzmz34yuMe/0xH8FmAibeLtkQE/OHK4rJ18afENGyeGlFpY1R/mQUCfROo1CjZ1wk9yhUFxFZmgToyLDDuIfngU3HPylJZtk6LmV20vbSS8hA+7vJk4XPppOLRI/+Uapoks//SxVm2OtpMlmM4zGuQ82N9YBaQy3nS2NjbYy08hnNI1rsPDikdQI0tBawhm91wsrgMsdXaRSfkEqy7ZpAOMH0ebMYck5r4l+OSXe7lR3XoA4CZU6Rd+eaQ9xHktcdtaKA=",
  208. "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
  209. "Cookie": 'PSTM=1646122173; BIDUPSID=4A46D96DC8423B1990661EC67CB17B97; __yjs_duid=1_5eebbceeddec85cf72e17b95994355071646122377960; REALTIME_TRANS_SWITCH=1; HISTORY_SWITCH=1; FANYI_WORD_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; APPGUIDE_10_0_2=1; BAIDUID=68FE11EE3BE9ADA190124B58EB36B628:FG=1; MCITY=-289%3A; H_PS_PSSID=; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1666148184,1666156674,1666168716,1666248882; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1666248882; BAIDUID_BFESS=68FE11EE3BE9ADA190124B58EB36B628:FG=1; delPer=0; PSINO=5; BA_HECTOR=218l21alahak2g00ak0k45e51hl268u1a; ZFY=tzTetLMt:AQKpcqHBdtdKVRVB8GNVB7XLc:BQRFb:BJvjQ:C; BDRCVFR[b3Q6T2w4uHD]=qdOo65HpBYTTh7znWc8mvqV; ab_sr=1.0.1_NWU3MmM0NWQyOWQxZmY5ZTBhZDNhODcwNTBmN2JkMjM3YmVjZWFlNzE2ODJmZWFjMzNhNWQwM2RmZDc4NGQ4ODJiMTVhMGUxZWM4YzkzMmI2ZDY2N2YzMWY0MWFkOWZkMjFkNzQ0YTc1NmFjOWJhNzRmZjVlZjUwZmNmNjc1ZTlhNzYxYTEzODkxY2NiZWMzOGVmMGIwMWFlYmNjNzhlYg==',
  210. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36",
  211. }
  212. # url = "https://fanyi.baidu.com/translate?aldtype=16047&query=break+down&keyfrom=baidu&smartresult=dict&lang=auto2zh"
  213. response = requests.post(url=url, data=Form_data, headers=headers, timeout=5)
  214. # re = eval("u" + "\'" + response.text + "\'")
  215. # aa = response.content.decode('unicode_escape')
  216. content = json.loads(response.content)
  217. print(content)
  218. # try:
  219. if "result" in content:
  220. res = eval(content["result"].replace("null", "\"\""))["content"]
  221. mean_list = []
  222. for i in res[0]['mean']:
  223. # print(i)
  224. ptag = i["pre"] if "pre" in i else ""
  225. mean = list(i["cont"].keys())
  226. mean_list.append(ptag + ";".join(mean))
  227. return "\n".join(mean_list)
  228. if "data" in content:
  229. mean_list = []
  230. for i in content['data']:
  231. # print(i['dst'])
  232. mean_list.append(i['dst'])
  233. return "\n".join(mean_list)
  234. # except:
  235. # return ""
  236. def ch2en_baidu(text):
  237. url = "https://fanyi.baidu.com/transapi"
  238. Form_data = {
  239. 'query': text.replace(" ", "+"),
  240. 'from': "zh",
  241. 'to': "en",
  242. 'source': "txt"
  243. }
  244. headers = {
  245. # "Acs-Token": "1662274986285_1662362866177_PZ9RGDfoZT611QxcCOPF8OFKwuWbo2SxPixIRvRUghgD6AKWj+XA4XpEy7jAsAxs3jxn/CnuUNVYI3E2GZI/pkKZt9XbuQOuHUVVAf6WjA7RfsNhNa0Va1Rr9au9Fskav7ZLIVmTCJ+cFFnstBktKqO8OFOYHfdVgXFcqjPzmz34yuMe/0xH8FmAibeLtkQE/OHK4rJ18afENGyeGlFpY1R/mQUCfROo1CjZ1wk9yhUFxFZmgToyLDDuIfngU3HPylJZtk6LmV20vbSS8hA+7vJk4XPppOLRI/+Uapoks//SxVm2OtpMlmM4zGuQ82N9YBaQy3nS2NjbYy08hnNI1rsPDikdQI0tBawhm91wsrgMsdXaRSfkEqy7ZpAOMH0ebMYck5r4l+OSXe7lR3XoA4CZU6Rd+eaQ9xHktcdtaKA=",
  246. "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
  247. "Cookie": 'PSTM=1646122173; BIDUPSID=4A46D96DC8423B1990661EC67CB17B97; __yjs_duid=1_5eebbceeddec85cf72e17b95994355071646122377960; REALTIME_TRANS_SWITCH=1; HISTORY_SWITCH=1; FANYI_WORD_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; APPGUIDE_10_0_2=1; BAIDUID=68FE11EE3BE9ADA190124B58EB36B628:FG=1; MCITY=-289%3A; H_PS_PSSID=; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1666148184,1666156674,1666168716,1666248882; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1666248882; BAIDUID_BFESS=68FE11EE3BE9ADA190124B58EB36B628:FG=1; delPer=0; PSINO=5; BA_HECTOR=218l21alahak2g00ak0k45e51hl268u1a; ZFY=tzTetLMt:AQKpcqHBdtdKVRVB8GNVB7XLc:BQRFb:BJvjQ:C; BDRCVFR[b3Q6T2w4uHD]=qdOo65HpBYTTh7znWc8mvqV; ab_sr=1.0.1_NWU3MmM0NWQyOWQxZmY5ZTBhZDNhODcwNTBmN2JkMjM3YmVjZWFlNzE2ODJmZWFjMzNhNWQwM2RmZDc4NGQ4ODJiMTVhMGUxZWM4YzkzMmI2ZDY2N2YzMWY0MWFkOWZkMjFkNzQ0YTc1NmFjOWJhNzRmZjVlZjUwZmNmNjc1ZTlhNzYxYTEzODkxY2NiZWMzOGVmMGIwMWFlYmNjNzhlYg==',
  248. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36",
  249. }
  250. response = requests.post(url=url, data=Form_data, headers=headers, timeout=0.5)
  251. content = json.loads(response.content)
  252. # print(content)
  253. mean_list = []
  254. if "result" in content:
  255. res = eval(content["result"].replace("null", "\"\""))["content"]
  256. for i in res[0]['mean']:
  257. # print(i)
  258. ptag = i["pre"]
  259. mean = list(i["cont"].keys())
  260. # mean_list.append(ptag + ";".join(mean))
  261. mean_list.extend(mean)
  262. # print("\n".join(mean_list))
  263. if "data" in content:
  264. for i in content['data']:
  265. # print(i['dst'])
  266. mean_list.append(i['dst'])
  267. return mean_list
  268. def KM_en2ch(text):
  269. """
  270. KM词典,通过html标签获取 https://kmcha.com
  271. :param text:
  272. :return:
  273. """
  274. word = str(text).strip().replace(" ", "%20").replace("!", " ") # .replace(".", "")
  275. r = requests.get("https://kmcha.com/dict/{}".format(word.strip()))
  276. text = r.content.decode('utf8')
  277. # print(text)
  278. res_means = re.findall("<ul>(.+?)</ul>", text, flags=re.S)
  279. if res_means:
  280. means = re.findall('<li(?=[\s>])[^>]*?>(.+?)</li>', res_means[0])
  281. means = [i.replace("&lt;", "<").replace("&gt;", ">") for i in means if "人名" not in i]
  282. # print(means)
  283. # print(";##;".join(means))
  284. return ";#;".join(means)
  285. return ""
  286. def Bing_en2ch(text):
  287. """
  288. 必应词典,通过html标签获取 https://cn.bing.com/dict/
  289. :param text:
  290. :return:
  291. """
  292. print("raw_word:", text)
  293. word = str(text).replace("!", " ").replace(" ", " ").strip()
  294. while " " in word:
  295. word = word.replace(" ", " ")
  296. if len(re.findall("…+|\.{3,}", word)) > 1:
  297. word = re.sub("(…+|\.{3,})$", "", word.strip())
  298. word = re.sub("(…+|\.{3,})\s*", "…", word.strip())
  299. # word = word.replace(" ", "+").replace(".", "").strip()
  300. print(word)
  301. r = requests.get("https://cn.bing.com/dict/{}".format(word))
  302. text = r.content.decode('utf8')
  303. if '<div class=" contentPadding">' not in text:
  304. r = requests.get("https://cn.bing.com/dict/search?q={}".format(word))
  305. text = r.content.decode('utf8')
  306. # print(text)
  307. a = re.findall('<div class=" contentPadding">(.+?)</ul></div><!--foo--></footer><script type="text/javascript">', text, flags=re.S)
  308. # print(a[0])
  309. all_mean = []
  310. mean_net = []
  311. mean_text = ""
  312. synonyms = []
  313. antonyms = []
  314. if a:
  315. mean1 = []
  316. mean_head = re.findall("<ul>(.+?)</ul>", a[0], flags=re.S) # 开头给出的释义
  317. if mean_head:
  318. mean_1 = re.findall('<span class="[^>]*?">(.*?)</span><span [^>]*?"><span>(.+?)</span></span>', a[0], flags=re.S)
  319. # mean_1 = [i[0]+i[1] for i in mean_1 if i[0] != "网络"]
  320. # mean_net = [i[1] for i in mean_1 if i[0] == "网络"]
  321. new_mean_1 = []
  322. for i in mean_1:
  323. mm = i[1]
  324. if '<a href' in i[1]:
  325. mm = re.sub(r'<a href=[^>]+?">(.+?)</a>', r"\1", mm)
  326. mm = re.sub('</?span>', "", mm)
  327. if i[0] != "网络":
  328. new_mean_1.append(i[0]+mm)
  329. else:
  330. mean_net.append(mm)
  331. mean1.extend(new_mean_1)
  332. # mean_text = ";##;".join(all_mean)
  333. # print(mean_text)
  334. # print("开头的释义", mean1)
  335. # ----------------------不带词性----------------------------
  336. # # b = re.findall("<table>.*?</table>", a[0])
  337. # # b = [re.search('<span class="bil b_primtxt">(.+?)</span>', i).group(1) for i in b if "bil b_primtxt" in i]
  338. # b = re.findall('<span class="bil b_primtxt">(.+?)</span>', a[0])
  339. # print("b权威英汉双解:", b)
  340. # b1 = re.findall('<span class="p1-1 b_regtxt">(.+?)</span>', a[0])
  341. # print("b1英汉:", b1)
  342. # -------------------------------------------------
  343. mean2 = []
  344. pos2 = ""
  345. bi2 = re.split('<div class="li_pos"><div class="pos_lin">', a[0])
  346. for i in bi2[1:]:
  347. pos2 = re.findall('<div class="pos">(.+?)</div>', i)[0]
  348. b = re.findall('<span class="bil b_primtxt">(.+?)</span>', i)
  349. mean2.append(pos2 + "; ".join(list(set(b))))
  350. # print("mean2权威英汉双解:", mean2)
  351. mean3 = []
  352. pos3 = ""
  353. bi = re.findall('<div id="crossid" style="display:none;"><table>(.+?)</table>', a[0])
  354. if bi:
  355. for n, i in enumerate(re.findall('<td>(.+?)</td>', bi[0])):
  356. if n % 2 == 0:
  357. pos3 = re.findall('<div class="pos pos1">(.+?)</div>', i)[0]
  358. else:
  359. b1 = re.findall('<span class="p1-1 b_regtxt">(.+?)</span>', i)
  360. mean3.append(pos3+"; ".join(list(set(b1))))
  361. # print("mean3英汉:", mean3)
  362. b2 = re.findall('<div class="p1-1 b_regtxt">(.+?)</div>', a[0])
  363. print("b2:", b2)
  364. b2 = [k for k in b2 if k not in mean_net and re.search("([;;]\s*|^)"+k+"([;;]\s*|$)",
  365. ";".join(mean_net)) is None]
  366. print("mean_net开始:", mean_net)
  367. if b2:
  368. mean_net.append("【次】->")
  369. mean_net.extend(b2)
  370. if mean1:
  371. all_mean.append("【第一种】"+";<br>".join(mean1))
  372. if mean2:
  373. all_mean.append("【第二种】"+";<br>".join(mean2))
  374. if mean3:
  375. all_mean.append("【第三种】"+";<br>".join(mean3))
  376. if mean_net:
  377. # all_mean.append("<网络>"+";".join([j for j in mean_net if j not in mean_text]))
  378. mean_net_nodupl = list(set(mean_net))
  379. mean_net_nodupl.sort(key=mean_net.index)
  380. all_mean.append("<网络>" + ";".join(mean_net_nodupl))
  381. mean_text = ";<br>".join(all_mean)
  382. antoid = re.search('(<div id="antoid" .*?</div></div></div>)', a[0])
  383. if antoid:
  384. antonyms = re.findall('<span class="p1-4 b_alink">(.+?)</span>', antoid.group(1))
  385. synoid = re.search('(<div id="synoid" .*?</div></div></div>)', a[0])
  386. if synoid:
  387. synonyms = re.findall('<span class="p1-4 b_alink">(.+?)</span>', synoid.group(1))
  388. print("反义词:", antonyms)
  389. print("同义词:", synonyms)
  390. print(mean_text)
  391. return mean_text, synonyms, antonyms
  392. def KM_ch2en(word_zh):
  393. r = requests.get("https://kmcha.com/dict/{}".format(word_zh.strip()), timeout=0.5)
  394. text = r.content.decode('utf8')
  395. if "{}的英文翻译".format(word_zh.strip()) in text:
  396. en_list = re.findall("的英文翻译</strong>[\s\n]*</p>[\s\n]*<p>(.+?)</p>", text, flags=re.S)
  397. en_list = sum([re.findall("<span>(.+?)</span>", i) for i in en_list], [])
  398. # print(en_list)
  399. return en_list
  400. if __name__ == '__main__':
  401. import time
  402. import concurrent
  403. t1 = time.time()
  404. # KM_ch2en("高速公路")
  405. # Bing_en2ch("二百")
  406. # haici_zh2en("高速公路")
  407. print(haici_zh2en("稳"))
  408. # print(youdaoTranslate("at worst"))
  409. # print(haiciTranslate("at worst"))
  410. # print(KM_en2ch("at worst"))
  411. # trans_baidu("break down")
  412. # baiduTranslate("salad")
  413. # baiduTranslate("无害的")
  414. # print(en2ch_baidu("salad"))
  415. # print(en2ch_baidu("the voice stress analyser"))
  416. # print(haici_zh2en("乌七八糟的东西"))
  417. print(ch2en_baidu("稳"))
  418. print(KM_ch2en("稳"))
  419. # print(time.time() - t1)
  420. # st = time.time()
  421. # ch2en_baidu("征求意见")
  422. # print(time.time() - st)
  423. # baiduTranslate2("声音")
  424. # print("::::", time.time() - t1)
  425. # t = requests.get("https://fanyi.baidu.com/translate?aldtype=16047&query=break+down&keyfrom=baidu&smartresult=dict&lang=auto2zh#en/zh/break%20down")
  426. # print(t.content)
  427. Bing_en2ch("fresh water")
  428. # KM_en2ch("Mrs.")
  429. # -----------------------------------------------------------
  430. # bing搜集近义词
  431. # wordnet = json.loads(open(r"G:\zwj\WL\en2cn\files\词汇整理\res_without_baidu.json", encoding='utf8').read())
  432. # from Words.Dicts import phrases_dict
  433. #
  434. # near_synonym = {}
  435. # go = False
  436. # for i in list(phrases_dict.keys()):
  437. # # if str(i).strip() == 'windsurfing' and not go:
  438. # # go = True
  439. # # if not go:
  440. # # continue
  441. #
  442. # if re.search("[()()\d,,]", i):
  443. # pass
  444. # else:
  445. # mean_text, synonyms, antonyms = Bing_en2ch(i)
  446. # if mean_text:
  447. # with open(r"G:\zwj\WL\en2cn\files\Bing_phrases_dict.txt", 'a+', encoding='utf-8') as f1:
  448. # f1.write('"{}": "{}",\n'.format(i.strip(), mean_text.strip()))
  449. # f1.close()
  450. # if synonyms or antonyms:
  451. # with open(r"G:\zwj\WL\en2cn\files\Bing_phrases_synonyms", 'a+', encoding='utf-8') as f2:
  452. # dd = {"synonyms": synonyms,
  453. # "antonyms": antonyms}
  454. # f2.write('"{}": {},\n'.format(i.strip(), dd))
  455. # f2.close()
  456. # ------------------yhk搜集短语释义---------
  457. # from Words.Phrase_dict import phrases_dict_tk
  458. #
  459. # new_en2ch = {}
  460. # for k, v in phrases_dict_tk.items():
  461. # k = k.strip()
  462. # try:
  463. # a = youdaoTranslate(k)
  464. # print(a)
  465. # if a:
  466. # new_en2ch[k] = a
  467. # b = haiciTranslate(k)
  468. # print(b)
  469. # if b:
  470. # new_en2ch[k] += ";##;" + b
  471. # c = KM_en2ch(k)
  472. # print(c)
  473. # if c:
  474. # new_en2ch[k] += ";##;" + c
  475. # except:
  476. # re_f = open(r"G:\zwj\WL\en2cn\files\yhk_phrases_dict.json", 'w', encoding='utf-8')
  477. # json.dump(new_en2ch, re_f, ensure_ascii=False)
  478. # ----------------百度词义搜集--------------
  479. import os
  480. path1 = r"G:\zwj\WL\en2cn\files\main\en-ch_dict_from_3_website.json"
  481. path2 = r"G:\zwj\WL\en2cn\files\main\en-ch_dict_bd3.json"
  482. # ens = json.loads(open(path1, encoding="utf8").read())
  483. # from Words.Dicts import words_dict, more_words_dict
  484. # ens_bd = json.loads(open(path2, encoding="utf8").read())
  485. # print(len(ens_bd))
  486. # new_ens = [
  487. # 'Arab-Israeli',
  488. # 'English-language',
  489. # 'Feb.',
  490. # 'Israeli-Palestinian',
  491. # 'Mar.',
  492. # 'Mexican-American',
  493. # 'O. K.',
  494. # 'Oct.',
  495. # 'Spanish-language',
  496. # 'Spanish-speaking',
  497. # 'US-led',
  498. # 'Washington-based',
  499. # 'ad.',
  500. # 'agro-scientific',
  501. # 'all-white',
  502. # 'antispit',
  503. # 'arm-up',
  504. # 'asteriod',
  505. # 'at-bat',
  506. # 'at-large',
  507. # 'axe ax',
  508. # 'backward-curving',
  509. # 'beddings',
  510. # 'black-owned',
  511. # 'booby-hatch',
  512. # 'bumpkinly',
  513. # 'characteristi',
  514. # 'civil-military',
  515. # 'classificatio',
  516. # 'clean-burning',
  517. # 'correspondenc',
  518. # 'dress-down',
  519. # 'e-mailed',
  520. # 'early-morning',
  521. # 'eight-year',
  522. # 'eight-year-old',
  523. # 'eighteenth-century',
  524. # 'eighth-grade',
  525. # 'ex-gangster',
  526. # 'first-place',
  527. # 'five-day',
  528. # 'five-minute',
  529. # 'five-year-old',
  530. # 'four-day',
  531. # 'four-hour',
  532. # 'fourth-grade',
  533. # 'fourth-quarter',
  534. # 'free-trade',
  535. # 'generaliza-tion',
  536. # 'gloss-over',
  537. # 'gray-haired',
  538. # 'greyhound',
  539. # 'half-mile',
  540. # 'half-million',
  541. # 'hear-sensing',
  542. # 'hour-long',
  543. # 'identificatio',
  544. # 'iluminate',
  545. # 'interteam',
  546. # 'just-me-ism',
  547. # 'long-term-care',
  548. # 'low-wage',
  549. # 'malnutritious',
  550. # 'mileometre',
  551. # 'million-dollar',
  552. # 'mm-hmm',
  553. # 'more',
  554. # 'multimillion-dollar',
  555. # 'nine-year-old',
  556. # 'nineteenth-century',
  557. # 'no-fly',
  558. # 'non-verbal-leakage',
  559. # 'notwithstandi',
  560. # 'null',
  561. # 'nullify',
  562. # 'number-one',
  563. # 'one-party',
  564. # 'one-year',
  565. # 'over-emotional',
  566. # 'oˈclock',
  567. # 'public-relations',
  568. # 'reconciliatio',
  569. # 'saut',
  570. # 'saute',
  571. # 'scaper',
  572. # 'sea-shell',
  573. # 'second-floor',
  574. # 'second-largest',
  575. # 'second-round',
  576. # 'second-year',
  577. # 'self-critical',
  578. # 'seven-year',
  579. # 'seventeenth-century',
  580. # 'sideroad',
  581. # 'six-month',
  582. # 'six-week',
  583. # 'six-year-old',
  584. # 'sixteenth-century',
  585. # 'sixth-grade',
  586. # 'sling-shots',
  587. # 'small-business',
  588. # 'snowrafting',
  589. # 'sonwflake',
  590. # 'straightforwa',
  591. # 'stress-reducing',
  592. # 'sun-cream',
  593. # 'superintenden',
  594. # 'swim-suit',
  595. # 'teacher-librarian',
  596. # 'telecommunica',
  597. # 'ten-year',
  598. # 'the voice stress analyser',
  599. # 'third-largest',
  600. # 'three-day',
  601. # 'three-time',
  602. # 'three-week',
  603. # 'three-year',
  604. # 'two-year-old',
  605. # 'video-game',
  606. # ]
  607. # en2ch_bd = ens_bd.copy()
  608. # for k, v in ens.items():
  609. # for k in new_ens:
  610. # # print(k)
  611. # # if "(" in k or "(" in k:
  612. # # continue
  613. # # if k not in ens_bd:
  614. # # print("'{}', ".format(k))
  615. # if k in ens_bd:
  616. # print("重复:", k)
  617. # continue
  618. # print('++++++++++++++++【{}】++++++++++++++++'.format(k))
  619. # if "(" in k or "(" in k:
  620. # continue
  621. # try:
  622. # ch = en2ch_baidu(k)
  623. # print(2222222222, ch)
  624. # if ch:
  625. # en2ch_bd[k] = ch
  626. # except:
  627. # re_f = open(r"G:\zwj\WL\en2cn\files\main\en-ch_dict_bd3.json", 'w', encoding='utf-8')
  628. # json.dump(en2ch_bd, re_f, ensure_ascii=False)
  629. # re_f = open(r"G:\zwj\WL\en2cn\files\main\en-ch_dict_bd3.json", 'w', encoding='utf-8')
  630. # json.dump(en2ch_bd, re_f, ensure_ascii=False)