123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678 |
- #!/usr/bin/env/python
- # -*- coding:utf-8 -*-
- """
- 在线翻译
- """
- import http.client
- import hashlib
- import urllib
- import random
- import re
- import json
- import requests
- def trans_baidu(str_text, mod="en2cn"):
- """
- 2022.8月底开始收费了
- 输入要翻译的文本(单词或句子或文章,但是<6000bytes)
- :param str_text:
- :return:
- """
- appid = '20191115000357320' # 填写你的appid
- secretKey = 'ovn8spgUH6GQ_GhF7V6u' # 填写你的密钥
- httpClient = None
- myurl = '/api/trans/vip/translate'
- fromLang = 'en' # 原文语种
- toLang = 'zh' # 译文语种
- salt = random.randint(32768, 65536)
- q = str_text
- sign = appid + q + str(salt) + secretKey
- sign = hashlib.md5(sign.encode()).hexdigest()
- myurl = myurl + '?appid=' + appid + '&q=' + urllib.parse.quote(q) + '&from=' + fromLang + '&to=' + toLang + '&salt=' + str(
- salt) + '&sign=' + sign
- try:
- httpClient = http.client.HTTPConnection('api.fanyi.baidu.com', timeout=5)
- httpClient.request('GET', myurl)
- # response是HTTPResponse对象
- response = httpClient.getresponse()
- result_all = response.read().decode("utf-8")
- result = json.loads(result_all).get("trans_result")[0]["dst"]
- # print(result)
- return result
- except Exception as e:
- print(e)
- return ""
- finally:
- if httpClient:
- httpClient.close()
- def googleTranslate(text):
- headers = {
- 'origin': "https://translate.google.cn",
- 'referer': "https://translate.google.cn/",
- 'sec-fetch-dest': "empty",
- 'sec-fetch-mode': "cors",
- 'sec-fetch-site': "same-origin",
- 'x-same-domain': "1",
- # 'cookie': "NID=511=mJLHCmmZj7H4zcU4YUucXYSlaD67X5HEt4VVVm9Q04ZNr82Hoei_fig0IPjcnpmteHRktufKmckQtyTe6w3GdT0Uk8xTG8F3-ymMn1u6xNiQp4EGlTtFrmXYbCqGw-RJWtM3eQHBGJhxSSxGFCMEZwDuhRAM0bUy1uN2EKdftAI; _ga=GA1.3.126656540.1654615557; OTZ=6538526_24_24__24_; _gid=GA1.3.490013375.1656158367",
- # 'cookie': "NID=511=mJLHCmmZj7H4zcU4YUucXYSlaD67X5HEt4VVVm9Q04ZNr82Hoei_fig0IPjcnpmteHRktufKmckQtyTe6w3GdT0Uk8xTG8F3-ymMn1u6xNiQp4EGlTtFrmXYbCqGw-RJWtM3eQHBGJhxSSxGFCMEZwDuhRAM0bUy1uN2EKdftAI; _ga=GA1.3.126656540.1654615557; OTZ=6538526_24_24__24_; _gid=GA1.3.490013375.1656158367",
- # 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.53 Safari/537.36 Edg/103.0.1264.37",
- # 'user-agent': "'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36",
- # 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.53 Safari/537.36 Edg/103.0.1264.37",
- 'user-agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36",
- 'accept-encoding': "gzip, deflate, br",
- 'content-type': "application/x-www-form-urlencoded;charset=UTF-8",
- # 'x-client-data': 'CIW2yQEIpbbJAQjEtskBCKmdygEIrMfKAQj2x8oBCPfHygEItMvKAQihz8oBCNzVygEIi5nLAQjBnMsB',
- # 'Decoded': 'message ClientVariations {repeated int32 variation_id = [3300101, 3300133, 3300164, 3313321, 3318700, 3318774, 3318775, 3319220, 3319713, 3320540, 3329163, 3329601];}',
- # 'x-goog-batchexecute-bgr':'[";qrS4tPnQAAYvqchvD6lftQckgqu-IikmACkAIwj8Rl2XS1QudgthqdZU0SqH0vq1-Bet3w6a53VSL12iqjefO-AFZh8AAAD0TwAAAAZ1AQcXAM87g0YlzGEpU91FDlXisD9zIJjkeYnnWCmMotddHWrZBkIpzy46jxKdAuo3aVXneE15f0O10ZpS9nTBSJdcScQqzGcI93uNpDbDtSbwr8Xz67U4XIz5FW3C9I5y7bOK5pX3nMy3dad3_gdoicKNjO1U7ey1NqdWArjc5PE6uCn3YnoiN-YUhlwK-j702Vm8nz-6B4oKWr-e3NSr2nkrHLBU4WZ9AmIg4CpGGGfk8Ri2aTLchvslaLcG0gs3p1LqCVGrFMmCDR5oG7KKjZStPtiEAkYJaGGP8_RMLYiPzFieCluXD2UXjFr-l8wWYcE65veCnnk_m7F4LZwnbl9BLPcnaHzkvNYTP9ATsK-N8-_YZQoNRnZ6nAuuO6y5GnHRtdluT_7kcN8VNiARohU-Ml0FPK2KWhO0pwtaaYVlAREQLhliaB9QSRjsZ3px1ehUoYiHhXKb5X70ohOC4Slkt7XcbHziir_qaOZpf73UAucLEE_PEKkTWZ5ZbssiwmPpYsuZI7Blw92neAb2sgbOsltR3iO6We3_vBpZd7hFZMFBrTrD_VZKqAWntKZXCmhG5YIcCR8Td4I35BG_YugQ60Gh_tVzN877HHARJXxEcHM743G7GsqZAxbqqNu_Rj5q9VPbCR_nDD24Me-PMzyTKUUGR3I970nngEu6vLFl-0tEb7wJLzm2K_LwzdkJ2ZHxJlNI-YsNB5IwczOMSJwNwPqjzKy2BxRzSOHuDr2zEnCW-p9G4HLP84fVo9kJQdmTN7mt54oMuqgr7jpaVDNo-8tMFVNnY2Q1HoO2xG8MvKMVrOcqqdnKabsTlcCSHVkL8kOGSQY_m_5MUs2o_XRd5gGhZ57ounYDD6GOWJOH_8QCZMHdwgFt77aD4wN-5YX5WKLTVwBFY3lyr1I2TrlzJxDJJDnypdxsQBc3hheLtQQT4b0ESR7EL_2QxwAcQXSf7ZS93a9MkTaHilyhFu7vZCcAPTLt6EhWauUp-c0jB7RPv197GlKwszoW4Cxc7j6tudGTa5LL8L2zl3loLp5D3c2Oo9xORYYof0A",null,null,45,null,null,null,0,"2"]'
- # 'x-goog-batchexecute-bgr': '[";9-m46aTQAAYvqchvD6lfvixoEmPOG-YmACkAIwj8RmVE4Uk-wgpjt980C0vAZ5LBbEfkpJEt4ex2_2NMH89REoPTQx8AAAIqTwAAABh1AQcXANB82_W4qupq8ZfSWi61a126SF6X1py4DkFHHFKqueP-WTlQE6Z0E8C-Uw4o33saxgtvueS4jE2ohXNyZfkhqLj7oNw7BTvQOFyb1z2Wiie6BtVIqgCFjUzAuMM0_mvAJHY6VMVkvFTC8-O9oEzfWh_Axeq8xo-lMVUnjVkgMRBDDBGvrpyY1qbovJNH1NOkvIDy_rljBzS0RPi_37sa8sUFQ2OwFt1kmu6JS1YE-NbTSJpcZKuQKxP9EA4vqXN5l_m4T4xjrrqcrKlvwefRRXAKhAJQRBbT-YgNfCkOzgpGH5qVvEPKk7Ieax5SCs8_bE7ZWgB8vRxMfCQ8a_gd6U4hVQC_z5ZJ7AZOiSHJNAeAvv7RUZnXbILpOUV4YZr3jUt384lXzXuDeYiXR3XZHxoKi7F72FOcVrJsSnNETVS9mEkzSEnC5espGRlBFy0OaCxhjLgXZ07sHvDh4os82DDbDVPKBljrGCES1sYC1YwlHEAK0UJvz1PjM-DYiXxXM4Wnj5t3PL1g3Blgrb2jFHfOhLHq63AIo2HdWqawTggJpkpDH0fFBgQPc5rOPLK-6t7qTfR4v00kae6Qk5LHovqOoDuY87UUGbnUvzL3U1U7E2rhGsaTAt-CxMIx6NN_0LJdfsX_17oj_1d6ImRseI6xrFgwZDtoO5Wn2JsL77yBbZCZP3PNos0FhINR2o8qAVjEKDTb6Ymg7-T-fVB3iBq-AngHpCL2d6KwSgrEzdo4IdBzzWjBqU3JwxF-gzBm7b_FBy_knJbkXsHrdl9aFdMiPqCNBHbMVZnYH0i6ny2cLduqq3Pd-KB7CHwJixyF1vz-583xbpYcs2i5al68ScMhwuLqikoSAjGbGbxyjZEWl0n2FapLDLSs4RtWIYFnlrIUh5PKgLC6qPaa_iW6dWdQJoyyEvpUr90LsduNyzTlW2menACrUW2w6GMBkQ1SVhAOptbxLWyNggCLGT9Kyg5aeXS_NAKn7Y3y10eqILu_aGMOnGBizeiO2_brmmgtYPG6AeeT5X1RF08ECbZhSfAOTPiQvgQ5VaO-ybUMVq8EanCb9Q",null,null,1047,103,null,null,0,"2"]',
- 'x-goog-batchexecute-bgr': '[";x9m42ZTQAAYEzRk0JM1f55ylXij7ksAmACkAIwj8RvxaMzZ1iOvebhAxZVDBGfAZwvR3YpljVZMeO6IUNBDwy5wDMh8AAABSTwAAAAJ1AQcXAIlC1pgskV32-yOzV6FmYVKrbi30KaxqJklCs1zPpGHX_hLbfLToBJPYoWuAZ_EDLf25YVbCAKmrwVah78G7syapqN1EYiNKZfShvZp60vbhcqeJWm7zwznMjhKjfdKuTq5Rq41JelMT_EQVGypnsHBqTderknEjPtt6TmIMIJjWRfjZsUVvRnde3oQCTLCuZUBVv3eTx3oT7W-EyIImn9XZEs8AKxQKK2vZRRc6VkveQMHi5DqyNT8YgqSntvAGOLImK402fdeua07F-nXkiiULeaJJ-7vjCMM-3jJNgEO6Vw6UJ6U74c_esPqwbtehb8dAO1uv_Lv9WJDX0KnHNkSRrsdD1z0lqxCQBZOxUpDGLc4Cvz_1FmWBU3Hy9RXuT_NW149LFR5JG6TgRQchengsqAUaC48H1vejyaG9Q6MDJdbWdnQxFtcNjIJDM7fOvVlsHzcNcCW11uP4AA55e28eCrbIi3ZeVRSRpWhWfRvoWg8Yzp1dIwnDS6vlnjr1UwZ6wLTfqF5ho0kSe1qcwza73nSZvi73NeyeaSkbsnO25bi4Jzp5sronwfrOqQALcgYWKX-krf6j-ug17IQm278z-sP4dJy3s4bZyWndiF5wO9tlJ7vZ_GRDiUVAZUNho_9WGi9oB89WvzjzqDMxvlx1WV_SonpzVaX9WZX0oIoDEs0FqRW165sinSZGAk63gejcPbH9ADj1xYQpP6YrS_iYNpabpSR544jwXT0V-f8bGf-bOBMViidcwgW0UloFN8i9v7ppuOxXbrp83H7Q8Lh5Lw0YCWijHaZ_JC3w-FZOa_j66ujYsxCF8t5cVB_O2Ttnks1eRYutJwg7a7nKvhDv24N_CPPvgn4bLG8WzEs_av9QGIfwGt1_ADJ4f8snG9rMsVOXzL7FMtV4b5Q9olCSi-CsXhwqxWIxDs9xdiolrpKKupfK89U6x13vv8kiq0Y5qnU9Un6AuQ",null,null,3,null,null,null,0,"2"]',
- }
- # url = "https://translate.google.cn/_/TranslateWebserverUi/data/batchexecute?rpcids=MkEWBc&source-path=/&f.sid=-127429216746380775&bl=boq_translate-webserver_20220622.08_p0&hl=zh-CN&soc-app=1&soc-platform=1&soc-device=1&_reqid=882057&rt=c"
- # url = "https://translate.google.cn/_/TranslateWebserverUi/data/batchexecute?rpcids=rpcids=MkEWBc&f.sid=-2984828793698248690&bl=boq_translate-webserver_20201221.17_p0&hl=zh-CN&soc-app=1&soc-platform=1&soc-device=1&_reqid=5445720&rt=c"
- # url = "https://translate.google.cn/_/TranslateWebserverUi/data/batchexecute?rpcids=MkEWBc&source-path=/&f.sid=5723009767851758792&bl=boq_translate-webserver_20220622.08_p0&hl=zh-CN&soc-app=1&soc-platform=1&soc-device=1&_reqid=66251&rt=c"
- url = "https://translate.google.cn/_/TranslateWebserverUi/data/batchexecute?rpcids=MkEWBc&source-path=/&f.sid=424574510049276369&bl=boq_translate-webserver_20220803.08_p0&hl=zh-CN&soc-app=1&soc-platform=1&soc-device=1&_reqid=1235460&rt=c"
- # data = {'f.req': f'[[["MkEWBc","[[\\"{text}\\",\\"zh-CN\\",\\"en\\",true],[null]]",null,"generic"]]]'}
- data = {'f.req': f'[[["MkEWBc","[[\\"{text}\\",\\"en\\",\\"zh-CN\\",true],[null]]",null,"generic"]]]'}
- means = []
- try:
- res=requests.post(url, headers=headers, data=data, timeout=5).text
- print(res)
- pattern = '\)\]\}\'\s*\d{3,4}\s*\[(.*)\s*'
- part1 = re.findall(pattern, res)
- part1_list = json.loads('[' + part1[0])[0]
- if part1_list[2] is None:
- # print(text)
- return text
- content1 = part1_list[2].replace('\n', '')
- content1 = json.loads(content1)
- print(content1)
- if len(content1) > 3 and content1[3]:
- if len(content1[3]) > 5:
- specific_means = content1[3][5:]
- # print("specific_means:", specific_means)
- if specific_means[0]:
- part2_list = specific_means[0][0]
- means = [j[0] for i in part2_list for j in i[1]] # i[0]是词性
- return ";".join(means)
- part2_list = content1[1][0][0][5:][0]
- means = [i[0] for i in part2_list]
- print(means)
- return ";".join(means)
- except:
- return ""
- def translate_youdao(word=None):
- """
- 结果不全
- :param word:
- :return:
- """
- url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
- # url = 'http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule'
- Form_data = {
- 'i': word,
- 'from': 'AUTO',
- 'to': 'AUTO',
- 'smartresult': 'dict',
- 'client': 'fanyideskweb',
- 'salt': '1528199964615',
- 'sign': 'f6cf55466c876c404ff85ea3fc8c453f',
- 'doctype': 'json',
- 'version': '2.1',
- 'keyfrom': 'fanyi.web',
- 'action': 'FY_BY_REALTIME',
- 'typoResult': 'false'
- }
- try:
- response = requests.post(url=url, data=Form_data, timeout=5)
- content = json.loads(response.text)
- print(content)
- # print(content['translateResult'][0][0]['tgt'])
- return content['translateResult'][0][0]['tgt']
- except:
- return ""
- def youdaoTranslate(text):
- """
- 通过html标签获取
- :param text:
- :return:
- """
- r = requests.get("http://dict.youdao.com/w/eng/{}/#keyfrom=dict2.top.suggest".format(text))
- all_content = r.content.decode('utf-8')
- trans_container = re.findall(r'<div class="trans-container">\s*<ul>(.+?)</ul>',
- all_content.replace(" ", " ").replace("\t", ""), flags=re.S)
- # print(trans_container)
- if trans_container:
- trans = [i for i in re.split(r"</?li>", trans_container[0]) if i.strip() and re.search(
- "[((]\s*人名\s*[))]", i) is None] # 人名的翻译不需要
- return ";#;" .join(trans)
- return ""
- def haiciTranslate(text):
- """
- 通过html标签获取
- :param text:
- :return:
- """
- r = requests.get("https://dict.cn/search?q={}".format(text))
- all_content = r.content.decode('utf-8')
- # print(all_content)
- dict_basic = re.findall(r'<ul class="dict-basic-ul">(.+?)</ul>',
- all_content.replace(" ", " ").replace("\t", ""), flags=re.S)
- if not dict_basic:
- dict_basic = re.findall(r'<div class="basic clearfix">[\n\s]*<ul\s*>(.+?)</ul>',
- all_content.replace(" ", " ").replace("\t", ""), flags=re.S)
- if dict_basic:
- trans = [re.sub("</?span>|</?strong>", "", i).replace("\n", "") for i in
- re.findall(r"<li>(.+?)</li>", dict_basic[0], flags=re.S) if i.strip() and re.search(
- "[((]\s*人名\s*[))]", i) is None]
- return ";#;".join(trans)
- return ""
- def haici_zh2en(text):
- r = requests.get("https://dict.cn/search?q={}".format(text), timeout=0.5)
- all_content = r.content.decode('utf-8')
- # print(all_content)
- dict_basic = re.findall(r'<div class="layout cn">(.+?)</div>',
- all_content.replace(" ", " ").replace("\t", ""), flags=re.S)
- if dict_basic:
- en_list = re.findall('<a href=[^<]+?>(.+?)</a>', dict_basic[0], flags=re.S)
- en_list = [en.strip() for en in en_list]
- # print(en_list)
- return en_list
- def baiduTranslate(text):
- """
- 通过html标签获取
- :param text:
- :return:
- """
- # url = "https://fanyi.baidu.com/translate?aldtype=16047&query=break+down&keyfrom=baidu&smartresult=dict&lang=auto2zh#en/zh/break%20down"
- # url = "https://fanyi.baidu.com/#en/zh/break%20down"
- url = "https://fanyi.baidu.com/sug"
- Form_data = {
- 'kw': text,
- }
- response = requests.post(url=url, data=Form_data, timeout=5)
- content = json.loads(response.text)
- print(content)
- def en2ch_baidu(text):
- """
- 百度翻译
- :return:
- """
- url = "https://fanyi.baidu.com/transapi"
- Form_data = {
- 'query': text, # .replace(" ", "+")
- 'from': "en",
- 'to': "zh",
- 'source': "txt"
- }
- headers = {
- # "Acs-Token": "1662274986285_1662362866177_PZ9RGDfoZT611QxcCOPF8OFKwuWbo2SxPixIRvRUghgD6AKWj+XA4XpEy7jAsAxs3jxn/CnuUNVYI3E2GZI/pkKZt9XbuQOuHUVVAf6WjA7RfsNhNa0Va1Rr9au9Fskav7ZLIVmTCJ+cFFnstBktKqO8OFOYHfdVgXFcqjPzmz34yuMe/0xH8FmAibeLtkQE/OHK4rJ18afENGyeGlFpY1R/mQUCfROo1CjZ1wk9yhUFxFZmgToyLDDuIfngU3HPylJZtk6LmV20vbSS8hA+7vJk4XPppOLRI/+Uapoks//SxVm2OtpMlmM4zGuQ82N9YBaQy3nS2NjbYy08hnNI1rsPDikdQI0tBawhm91wsrgMsdXaRSfkEqy7ZpAOMH0ebMYck5r4l+OSXe7lR3XoA4CZU6Rd+eaQ9xHktcdtaKA=",
- "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
- "Cookie": 'PSTM=1646122173; BIDUPSID=4A46D96DC8423B1990661EC67CB17B97; __yjs_duid=1_5eebbceeddec85cf72e17b95994355071646122377960; REALTIME_TRANS_SWITCH=1; HISTORY_SWITCH=1; FANYI_WORD_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; APPGUIDE_10_0_2=1; BAIDUID=68FE11EE3BE9ADA190124B58EB36B628:FG=1; MCITY=-289%3A; H_PS_PSSID=; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1666148184,1666156674,1666168716,1666248882; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1666248882; BAIDUID_BFESS=68FE11EE3BE9ADA190124B58EB36B628:FG=1; delPer=0; PSINO=5; BA_HECTOR=218l21alahak2g00ak0k45e51hl268u1a; ZFY=tzTetLMt:AQKpcqHBdtdKVRVB8GNVB7XLc:BQRFb:BJvjQ:C; BDRCVFR[b3Q6T2w4uHD]=qdOo65HpBYTTh7znWc8mvqV; ab_sr=1.0.1_NWU3MmM0NWQyOWQxZmY5ZTBhZDNhODcwNTBmN2JkMjM3YmVjZWFlNzE2ODJmZWFjMzNhNWQwM2RmZDc4NGQ4ODJiMTVhMGUxZWM4YzkzMmI2ZDY2N2YzMWY0MWFkOWZkMjFkNzQ0YTc1NmFjOWJhNzRmZjVlZjUwZmNmNjc1ZTlhNzYxYTEzODkxY2NiZWMzOGVmMGIwMWFlYmNjNzhlYg==',
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36",
- }
- # url = "https://fanyi.baidu.com/translate?aldtype=16047&query=break+down&keyfrom=baidu&smartresult=dict&lang=auto2zh"
- response = requests.post(url=url, data=Form_data, headers=headers, timeout=5)
- # re = eval("u" + "\'" + response.text + "\'")
- # aa = response.content.decode('unicode_escape')
- content = json.loads(response.content)
- print(content)
- # try:
- if "result" in content:
- res = eval(content["result"].replace("null", "\"\""))["content"]
- mean_list = []
- for i in res[0]['mean']:
- # print(i)
- ptag = i["pre"] if "pre" in i else ""
- mean = list(i["cont"].keys())
- mean_list.append(ptag + ";".join(mean))
- return "\n".join(mean_list)
- if "data" in content:
- mean_list = []
- for i in content['data']:
- # print(i['dst'])
- mean_list.append(i['dst'])
- return "\n".join(mean_list)
- # except:
- # return ""
- def ch2en_baidu(text):
- url = "https://fanyi.baidu.com/transapi"
- Form_data = {
- 'query': text.replace(" ", "+"),
- 'from': "zh",
- 'to': "en",
- 'source': "txt"
- }
- headers = {
- # "Acs-Token": "1662274986285_1662362866177_PZ9RGDfoZT611QxcCOPF8OFKwuWbo2SxPixIRvRUghgD6AKWj+XA4XpEy7jAsAxs3jxn/CnuUNVYI3E2GZI/pkKZt9XbuQOuHUVVAf6WjA7RfsNhNa0Va1Rr9au9Fskav7ZLIVmTCJ+cFFnstBktKqO8OFOYHfdVgXFcqjPzmz34yuMe/0xH8FmAibeLtkQE/OHK4rJ18afENGyeGlFpY1R/mQUCfROo1CjZ1wk9yhUFxFZmgToyLDDuIfngU3HPylJZtk6LmV20vbSS8hA+7vJk4XPppOLRI/+Uapoks//SxVm2OtpMlmM4zGuQ82N9YBaQy3nS2NjbYy08hnNI1rsPDikdQI0tBawhm91wsrgMsdXaRSfkEqy7ZpAOMH0ebMYck5r4l+OSXe7lR3XoA4CZU6Rd+eaQ9xHktcdtaKA=",
- "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
- "Cookie": 'PSTM=1646122173; BIDUPSID=4A46D96DC8423B1990661EC67CB17B97; __yjs_duid=1_5eebbceeddec85cf72e17b95994355071646122377960; REALTIME_TRANS_SWITCH=1; HISTORY_SWITCH=1; FANYI_WORD_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; APPGUIDE_10_0_2=1; BAIDUID=68FE11EE3BE9ADA190124B58EB36B628:FG=1; MCITY=-289%3A; H_PS_PSSID=; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1666148184,1666156674,1666168716,1666248882; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1666248882; BAIDUID_BFESS=68FE11EE3BE9ADA190124B58EB36B628:FG=1; delPer=0; PSINO=5; BA_HECTOR=218l21alahak2g00ak0k45e51hl268u1a; ZFY=tzTetLMt:AQKpcqHBdtdKVRVB8GNVB7XLc:BQRFb:BJvjQ:C; BDRCVFR[b3Q6T2w4uHD]=qdOo65HpBYTTh7znWc8mvqV; ab_sr=1.0.1_NWU3MmM0NWQyOWQxZmY5ZTBhZDNhODcwNTBmN2JkMjM3YmVjZWFlNzE2ODJmZWFjMzNhNWQwM2RmZDc4NGQ4ODJiMTVhMGUxZWM4YzkzMmI2ZDY2N2YzMWY0MWFkOWZkMjFkNzQ0YTc1NmFjOWJhNzRmZjVlZjUwZmNmNjc1ZTlhNzYxYTEzODkxY2NiZWMzOGVmMGIwMWFlYmNjNzhlYg==',
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36",
- }
- response = requests.post(url=url, data=Form_data, headers=headers, timeout=0.5)
- content = json.loads(response.content)
- # print(content)
- mean_list = []
- if "result" in content:
- res = eval(content["result"].replace("null", "\"\""))["content"]
- for i in res[0]['mean']:
- # print(i)
- ptag = i["pre"]
- mean = list(i["cont"].keys())
- # mean_list.append(ptag + ";".join(mean))
- mean_list.extend(mean)
- # print("\n".join(mean_list))
- if "data" in content:
- for i in content['data']:
- # print(i['dst'])
- mean_list.append(i['dst'])
- return mean_list
- def KM_en2ch(text):
- """
- KM词典,通过html标签获取 https://kmcha.com
- :param text:
- :return:
- """
- word = str(text).strip().replace(" ", "%20").replace("!", " ") # .replace(".", "")
- r = requests.get("https://kmcha.com/dict/{}".format(word.strip()))
- text = r.content.decode('utf8')
- # print(text)
- res_means = re.findall("<ul>(.+?)</ul>", text, flags=re.S)
- if res_means:
- means = re.findall('<li(?=[\s>])[^>]*?>(.+?)</li>', res_means[0])
- means = [i.replace("<", "<").replace(">", ">") for i in means if "人名" not in i]
- # print(means)
- # print(";##;".join(means))
- return ";#;".join(means)
- return ""
- def Bing_en2ch(text):
- """
- 必应词典,通过html标签获取 https://cn.bing.com/dict/
- :param text:
- :return:
- """
- print("raw_word:", text)
- word = str(text).replace("!", " ").replace(" ", " ").strip()
- while " " in word:
- word = word.replace(" ", " ")
- if len(re.findall("…+|\.{3,}", word)) > 1:
- word = re.sub("(…+|\.{3,})$", "", word.strip())
- word = re.sub("(…+|\.{3,})\s*", "…", word.strip())
- # word = word.replace(" ", "+").replace(".", "").strip()
- print(word)
- r = requests.get("https://cn.bing.com/dict/{}".format(word))
- text = r.content.decode('utf8')
- if '<div class=" contentPadding">' not in text:
- r = requests.get("https://cn.bing.com/dict/search?q={}".format(word))
- text = r.content.decode('utf8')
- # print(text)
- a = re.findall('<div class=" contentPadding">(.+?)</ul></div><!--foo--></footer><script type="text/javascript">', text, flags=re.S)
- # print(a[0])
- all_mean = []
- mean_net = []
- mean_text = ""
- synonyms = []
- antonyms = []
- if a:
- mean1 = []
- mean_head = re.findall("<ul>(.+?)</ul>", a[0], flags=re.S) # 开头给出的释义
- if mean_head:
- mean_1 = re.findall('<span class="[^>]*?">(.*?)</span><span [^>]*?"><span>(.+?)</span></span>', a[0], flags=re.S)
- # mean_1 = [i[0]+i[1] for i in mean_1 if i[0] != "网络"]
- # mean_net = [i[1] for i in mean_1 if i[0] == "网络"]
- new_mean_1 = []
- for i in mean_1:
- mm = i[1]
- if '<a href' in i[1]:
- mm = re.sub(r'<a href=[^>]+?">(.+?)</a>', r"\1", mm)
- mm = re.sub('</?span>', "", mm)
- if i[0] != "网络":
- new_mean_1.append(i[0]+mm)
- else:
- mean_net.append(mm)
- mean1.extend(new_mean_1)
- # mean_text = ";##;".join(all_mean)
- # print(mean_text)
- # print("开头的释义", mean1)
- # ----------------------不带词性----------------------------
- # # b = re.findall("<table>.*?</table>", a[0])
- # # b = [re.search('<span class="bil b_primtxt">(.+?)</span>', i).group(1) for i in b if "bil b_primtxt" in i]
- # b = re.findall('<span class="bil b_primtxt">(.+?)</span>', a[0])
- # print("b权威英汉双解:", b)
- # b1 = re.findall('<span class="p1-1 b_regtxt">(.+?)</span>', a[0])
- # print("b1英汉:", b1)
- # -------------------------------------------------
- mean2 = []
- pos2 = ""
- bi2 = re.split('<div class="li_pos"><div class="pos_lin">', a[0])
- for i in bi2[1:]:
- pos2 = re.findall('<div class="pos">(.+?)</div>', i)[0]
- b = re.findall('<span class="bil b_primtxt">(.+?)</span>', i)
- mean2.append(pos2 + "; ".join(list(set(b))))
- # print("mean2权威英汉双解:", mean2)
- mean3 = []
- pos3 = ""
- bi = re.findall('<div id="crossid" style="display:none;"><table>(.+?)</table>', a[0])
- if bi:
- for n, i in enumerate(re.findall('<td>(.+?)</td>', bi[0])):
- if n % 2 == 0:
- pos3 = re.findall('<div class="pos pos1">(.+?)</div>', i)[0]
- else:
- b1 = re.findall('<span class="p1-1 b_regtxt">(.+?)</span>', i)
- mean3.append(pos3+"; ".join(list(set(b1))))
- # print("mean3英汉:", mean3)
- b2 = re.findall('<div class="p1-1 b_regtxt">(.+?)</div>', a[0])
- print("b2:", b2)
- b2 = [k for k in b2 if k not in mean_net and re.search("([;;]\s*|^)"+k+"([;;]\s*|$)",
- ";".join(mean_net)) is None]
- print("mean_net开始:", mean_net)
- if b2:
- mean_net.append("【次】->")
- mean_net.extend(b2)
- if mean1:
- all_mean.append("【第一种】"+";<br>".join(mean1))
- if mean2:
- all_mean.append("【第二种】"+";<br>".join(mean2))
- if mean3:
- all_mean.append("【第三种】"+";<br>".join(mean3))
- if mean_net:
- # all_mean.append("<网络>"+";".join([j for j in mean_net if j not in mean_text]))
- mean_net_nodupl = list(set(mean_net))
- mean_net_nodupl.sort(key=mean_net.index)
- all_mean.append("<网络>" + ";".join(mean_net_nodupl))
- mean_text = ";<br>".join(all_mean)
- antoid = re.search('(<div id="antoid" .*?</div></div></div>)', a[0])
- if antoid:
- antonyms = re.findall('<span class="p1-4 b_alink">(.+?)</span>', antoid.group(1))
- synoid = re.search('(<div id="synoid" .*?</div></div></div>)', a[0])
- if synoid:
- synonyms = re.findall('<span class="p1-4 b_alink">(.+?)</span>', synoid.group(1))
- print("反义词:", antonyms)
- print("同义词:", synonyms)
- print(mean_text)
- return mean_text, synonyms, antonyms
- def KM_ch2en(word_zh):
- r = requests.get("https://kmcha.com/dict/{}".format(word_zh.strip()), timeout=0.5)
- text = r.content.decode('utf8')
- if "{}的英文翻译".format(word_zh.strip()) in text:
- en_list = re.findall("的英文翻译</strong>[\s\n]*</p>[\s\n]*<p>(.+?)</p>", text, flags=re.S)
- en_list = sum([re.findall("<span>(.+?)</span>", i) for i in en_list], [])
- # print(en_list)
- return en_list
- if __name__ == '__main__':
- import time
- import concurrent
- t1 = time.time()
- # KM_ch2en("高速公路")
- # Bing_en2ch("二百")
- # haici_zh2en("高速公路")
- print(haici_zh2en("稳"))
- # print(youdaoTranslate("at worst"))
- # print(haiciTranslate("at worst"))
- # print(KM_en2ch("at worst"))
- # trans_baidu("break down")
- # baiduTranslate("salad")
- # baiduTranslate("无害的")
- # print(en2ch_baidu("salad"))
- # print(en2ch_baidu("the voice stress analyser"))
- # print(haici_zh2en("乌七八糟的东西"))
- print(ch2en_baidu("稳"))
- print(KM_ch2en("稳"))
- # print(time.time() - t1)
- # st = time.time()
- # ch2en_baidu("征求意见")
- # print(time.time() - st)
- # baiduTranslate2("声音")
- # print("::::", time.time() - t1)
- # t = requests.get("https://fanyi.baidu.com/translate?aldtype=16047&query=break+down&keyfrom=baidu&smartresult=dict&lang=auto2zh#en/zh/break%20down")
- # print(t.content)
- Bing_en2ch("fresh water")
- # KM_en2ch("Mrs.")
- # -----------------------------------------------------------
- # bing搜集近义词
- # wordnet = json.loads(open(r"G:\zwj\WL\en2cn\files\词汇整理\res_without_baidu.json", encoding='utf8').read())
- # from Words.Dicts import phrases_dict
- #
- # near_synonym = {}
- # go = False
- # for i in list(phrases_dict.keys()):
- # # if str(i).strip() == 'windsurfing' and not go:
- # # go = True
- # # if not go:
- # # continue
- #
- # if re.search("[()()\d,,]", i):
- # pass
- # else:
- # mean_text, synonyms, antonyms = Bing_en2ch(i)
- # if mean_text:
- # with open(r"G:\zwj\WL\en2cn\files\Bing_phrases_dict.txt", 'a+', encoding='utf-8') as f1:
- # f1.write('"{}": "{}",\n'.format(i.strip(), mean_text.strip()))
- # f1.close()
- # if synonyms or antonyms:
- # with open(r"G:\zwj\WL\en2cn\files\Bing_phrases_synonyms", 'a+', encoding='utf-8') as f2:
- # dd = {"synonyms": synonyms,
- # "antonyms": antonyms}
- # f2.write('"{}": {},\n'.format(i.strip(), dd))
- # f2.close()
- # ------------------yhk搜集短语释义---------
- # from Words.Phrase_dict import phrases_dict_tk
- #
- # new_en2ch = {}
- # for k, v in phrases_dict_tk.items():
- # k = k.strip()
- # try:
- # a = youdaoTranslate(k)
- # print(a)
- # if a:
- # new_en2ch[k] = a
- # b = haiciTranslate(k)
- # print(b)
- # if b:
- # new_en2ch[k] += ";##;" + b
- # c = KM_en2ch(k)
- # print(c)
- # if c:
- # new_en2ch[k] += ";##;" + c
- # except:
- # re_f = open(r"G:\zwj\WL\en2cn\files\yhk_phrases_dict.json", 'w', encoding='utf-8')
- # json.dump(new_en2ch, re_f, ensure_ascii=False)
- # ----------------百度词义搜集--------------
- import os
- path1 = r"G:\zwj\WL\en2cn\files\main\en-ch_dict_from_3_website.json"
- path2 = r"G:\zwj\WL\en2cn\files\main\en-ch_dict_bd3.json"
- # ens = json.loads(open(path1, encoding="utf8").read())
- # from Words.Dicts import words_dict, more_words_dict
- # ens_bd = json.loads(open(path2, encoding="utf8").read())
- # print(len(ens_bd))
- # new_ens = [
- # 'Arab-Israeli',
- # 'English-language',
- # 'Feb.',
- # 'Israeli-Palestinian',
- # 'Mar.',
- # 'Mexican-American',
- # 'O. K.',
- # 'Oct.',
- # 'Spanish-language',
- # 'Spanish-speaking',
- # 'US-led',
- # 'Washington-based',
- # 'ad.',
- # 'agro-scientific',
- # 'all-white',
- # 'antispit',
- # 'arm-up',
- # 'asteriod',
- # 'at-bat',
- # 'at-large',
- # 'axe ax',
- # 'backward-curving',
- # 'beddings',
- # 'black-owned',
- # 'booby-hatch',
- # 'bumpkinly',
- # 'characteristi',
- # 'civil-military',
- # 'classificatio',
- # 'clean-burning',
- # 'correspondenc',
- # 'dress-down',
- # 'e-mailed',
- # 'early-morning',
- # 'eight-year',
- # 'eight-year-old',
- # 'eighteenth-century',
- # 'eighth-grade',
- # 'ex-gangster',
- # 'first-place',
- # 'five-day',
- # 'five-minute',
- # 'five-year-old',
- # 'four-day',
- # 'four-hour',
- # 'fourth-grade',
- # 'fourth-quarter',
- # 'free-trade',
- # 'generaliza-tion',
- # 'gloss-over',
- # 'gray-haired',
- # 'greyhound',
- # 'half-mile',
- # 'half-million',
- # 'hear-sensing',
- # 'hour-long',
- # 'identificatio',
- # 'iluminate',
- # 'interteam',
- # 'just-me-ism',
- # 'long-term-care',
- # 'low-wage',
- # 'malnutritious',
- # 'mileometre',
- # 'million-dollar',
- # 'mm-hmm',
- # 'more',
- # 'multimillion-dollar',
- # 'nine-year-old',
- # 'nineteenth-century',
- # 'no-fly',
- # 'non-verbal-leakage',
- # 'notwithstandi',
- # 'null',
- # 'nullify',
- # 'number-one',
- # 'one-party',
- # 'one-year',
- # 'over-emotional',
- # 'oˈclock',
- # 'public-relations',
- # 'reconciliatio',
- # 'saut',
- # 'saute',
- # 'scaper',
- # 'sea-shell',
- # 'second-floor',
- # 'second-largest',
- # 'second-round',
- # 'second-year',
- # 'self-critical',
- # 'seven-year',
- # 'seventeenth-century',
- # 'sideroad',
- # 'six-month',
- # 'six-week',
- # 'six-year-old',
- # 'sixteenth-century',
- # 'sixth-grade',
- # 'sling-shots',
- # 'small-business',
- # 'snowrafting',
- # 'sonwflake',
- # 'straightforwa',
- # 'stress-reducing',
- # 'sun-cream',
- # 'superintenden',
- # 'swim-suit',
- # 'teacher-librarian',
- # 'telecommunica',
- # 'ten-year',
- # 'the voice stress analyser',
- # 'third-largest',
- # 'three-day',
- # 'three-time',
- # 'three-week',
- # 'three-year',
- # 'two-year-old',
- # 'video-game',
- # ]
- # en2ch_bd = ens_bd.copy()
- # for k, v in ens.items():
- # for k in new_ens:
- # # print(k)
- # # if "(" in k or "(" in k:
- # # continue
- # # if k not in ens_bd:
- # # print("'{}', ".format(k))
- # if k in ens_bd:
- # print("重复:", k)
- # continue
- # print('++++++++++++++++【{}】++++++++++++++++'.format(k))
- # if "(" in k or "(" in k:
- # continue
- # try:
- # ch = en2ch_baidu(k)
- # print(2222222222, ch)
- # if ch:
- # en2ch_bd[k] = ch
- # except:
- # re_f = open(r"G:\zwj\WL\en2cn\files\main\en-ch_dict_bd3.json", 'w', encoding='utf-8')
- # json.dump(en2ch_bd, re_f, ensure_ascii=False)
- # re_f = open(r"G:\zwj\WL\en2cn\files\main\en-ch_dict_bd3.json", 'w', encoding='utf-8')
- # json.dump(en2ch_bd, re_f, ensure_ascii=False)
|