x.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. # -*- coding:utf-8 -*-
  2. # import nltk
  3. import re
  4. # from collections import Counter
  5. # def get_key_word(s:str):
  6. # tokens = nltk.word_tokenize(s.lower())
  7. # word_tag = nltk.pos_tag(tokens)
  8. # print(word_tag)
  9. # ns = []
  10. # for i in word_tag:
  11. # if i[1] in ['NN','NNS'] and i[1] not in ['s','i']:
  12. # ns.append(i[0])
  13. # res = Counter(ns).most_common()
  14. # if len(res):
  15. # for i in res:
  16. # if len(i[0])>2 and i[0] not in ['times'] and re.search('[A-Za-z]+$',i[0]):
  17. # return i[0]
  18. # return s.split()[0]
  19. #
  20. #
  21. #
  22. # s = get_key_word("An Italian company has created the world’s first underwater farm.</br>The station named Nemo’s Garden consists of five biospheres(生物圈)</br>fixed to the bottom of the sea off the coast of Savona, Italy. They’re being used to grow strawberries, beans, garlic, and lettuce.</br>“The main target of this project is to create other sources of plant production in areas where environmental conditions make it difficult to grow crops through traditional farming, including lack of fresh water, fertile soil, and extreme temperature changes,’’ said project spokesperson Luca Gamberini.</br>The five biospheres, currently floating between depths of 18 and', 'final_draft': 1, '_id': ObjectId('5dc2724289d8aa1e84586b64'), 'status': 0, 'type': 1, 'all_content': 'An Italian company has created the world’s first underwater farm.</br>The station named Nemo’s Garden consists of five biospheres(生物圈)</br>fixed to the bottom of the sea off the coast of Savona, Italy. They’re being used to grow strawberries, beans, garlic, and lettuce.</br>“The main target of this project is to create other sources of plant production in areas where environmental conditions make it difficult to grow crops through traditional farming, including lack of fresh water, fertile soil, and extreme temperature changes,’’ said project spokesperson Luca Gamberini.</br>The five biospheres, currently floating between depths of 18 and feet, are constantly watched by Ocean Reef Group —— a diving equipment company——from a control center on dry land. According to various news reports,\nthe plants are watered by drips of water on the inner walls of the biospheres.\nWith a constant temperature of 79 degrees day and night, and humidity(湿度)\nat around 83 percent, the conditions are ideal for plants to grow well. The high amount of carbon dioxide also encourages growth.\nOcean Reef president Sergio Gamberini said he came up with the idea of growing plants underwater during a summer vacation in Italy. In his own words, he wanted to do something that’s different and to show the beauty of the ocean. After two years of failed attempts, they finally were able to get these five biospheres working. Their success may lay the foundation for a new form of crop production that can be done without harming the environment.\nIn fact, the biosphere seems to be attracting wildlife. Octopuses(章鱼)\nand endangered seahorses are taking shelter under the structure, while crabs(螃蟹)\nare climbing up the anchors and into the greenhouses. None of the creatures have damaged the plants so far. “It’s so kind of science-fiction to see these two different forms of life interact,” Gamberini said. Why was the underwater farm created? Sergio Gamberini got the idea of growing plants underwater _______. The last paragraph is written mainly to tell us that _____. What is the text mainly about")
  23. #
  24. # print(s)
  25. print(re.match('a','bbva'.strip()))
  26. def db_checker(mode: int):
  27. # 0->云题库
  28. # 1->临时题库
  29. # 2->美文
  30. group = get_group()
  31. data = 1
  32. while data:
  33. start = time.time()
  34. print(group)
  35. data = ld.Executor().find_one({'checked': 0, 'type': mode, 'status': 1})
  36. if not data:
  37. break
  38. res = get_similar(data)
  39. if not res:
  40. group += 1
  41. data['group'] = group
  42. data['final_draft'] = 1
  43. data['checked'] = 1
  44. data['sim_score'] = 1
  45. ld.update(data, 'exis')
  46. else:
  47. add = 0
  48. all_topic_id = [i[0] for i in res]
  49. res_dic = {k: v for k, v in res}
  50. all_sim_topic = ld.Executor().find({'topic_id': {'$in': all_topic_id}, 'checked': 1})
  51. if all_sim_topic.count() > 0:
  52. sim_group = -1
  53. data_topic = None
  54. for temp in all_sim_topic:
  55. if 'final_draft' in temp and temp['final_draft'] == 1:
  56. sim_group = temp['group']
  57. data_topic = temp
  58. if sim_group > 0:
  59. add = 1
  60. data['group'] = sim_group
  61. data['final_draft'] = 0
  62. data['checked'] = 1
  63. data['sim_score'] = res_dic[data['topic_id']]
  64. ld.update(data, 'exis')
  65. not_checked_topic = ld.Executor().find({'topic_id': {'$in': all_topic_id}, 'checked': 0})
  66. for not_checked_data in not_checked_topic:
  67. not_checked_data['group'] = sim_group
  68. not_checked_data['final_draft'] = 0
  69. not_checked_data['checked'] = 1
  70. try:
  71. not_checked_data['sim_score'] = Radio.fast_ratio(not_checked_data['all_content'],
  72. data_topic['all_content'])
  73. except Exception as e:
  74. print(e)
  75. not_checked_data['sim_score'] = 0.8
  76. ld.update(not_checked_data, 'exis')
  77. if add == 0:
  78. group += 1
  79. data['group'] = group
  80. data['final_draft'] = 1
  81. data['checked'] = 1
  82. data['sim_score'] = res_dic[data['topic_id']]
  83. ld.update(data, 'exis')
  84. not_checked_topic = ld.Executor().find({'topic_id': {'$in': all_topic_id}})
  85. for not_checked_data in not_checked_topic:
  86. not_checked_data['group'] = group
  87. not_checked_data['final_draft'] = 0
  88. not_checked_data['checked'] = 1
  89. not_checked_data['sim_score'] = res_dic[not_checked_data['topic_id']]
  90. ld.update(not_checked_data, 'exis')
  91. print(time.time() - start)