x1.py 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. # -*- coding:utf-8 -*-
  2. # import requests
  3. # from flask import Flask,request
  4. # from datetime import timedelta
  5. #
  6. # app = Flask(__name__)
  7. #
  8. #
  9. # @app.route('/bridge', methods=['POST', 'GET'])
  10. # def requests_bridge():
  11. # if request.method == 'post':
  12. # req_Info = request.json
  13. # if req_Info['method'] == 'get':
  14. # return requests.get(req_Info['url'])
  15. # elif req_Info['method'] == 'post':
  16. # if 'json' in req_Info:
  17. # return requests.post(req_Info['url'],json=req_Info['json'])
  18. # if 'data' in req_Info:
  19. # return requests.post(req_Info['url'], data=req_Info['data'])
  20. #
  21. #
  22. # if __name__ == '__main__':
  23. # app.run('0.0.0.0',8989)
  24. a = 0
  25. with open('D:\BaiduNetdiskDownload\sgns.literature.bigram-char\sgns.literature.bigram-char',encoding='utf8') as f:
  26. for i in f:
  27. a+=1
  28. if a>5:
  29. break
  30. print(i)
  31. import bcolz
  32. def load_embeddings(folder_path):
  33. """从 bcolz 加载 词/字 向量
  34. Args:
  35. - folder_path (str): 解压后的 bcolz rootdir(如 zh.64),
  36. 里面包含 2 个子目录 embeddings 和 words,
  37. 分别存储 嵌入向量 和 词(字)典
  38. Returns:
  39. - words (bcolz.carray): 词(字)典列表(bcolz carray 具有和 numpy array 类似的接口)
  40. - embeddings (bcolz.carray): 嵌入矩阵,每 1 行为 1 个 词向量/字向量,
  41. 其行号即为该 词(字) 在 words 中的索引编号
  42. """
  43. folder_path = folder_path.rstrip('/')
  44. words = bcolz.carray(rootdir='%s/words'%folder_path, mode='r')
  45. embeddings = bcolz.carray(rootdir='%s/embeddings'%folder_path, mode='r')
  46. return words, embeddings
  47. folder_path = r'C:\Users\mayn\Downloads\zh_char.64'
  48. words, embeddings = load_embeddings(folder_path)
  49. with open('char64d.txt','w',encoding='utf8') as f:
  50. f.write(str(len(words))+' '+'64\n')
  51. for i,j in zip(words, embeddings):
  52. f.write(i+' '+' '.join([str(n) for n in j])+'\n')