data_preproccessing.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Wed Nov 21 10:47:49 2018
  4. @author: Youly
  5. """
  6. def Separate_items_answers(path):
  7. """
  8. test paper have 3 types:1.answer is at the final sign:#########################
  9. 2.answer is in the paper
  10. 3.no answer sign:************************************
  11. :param path:where your txt_file is
  12. :return:list:items;list:answers << one line content is one element
  13. """
  14. items = []
  15. answers = []
  16. try:
  17. with open(path,'r',encoding='utf-8') as f:
  18. # print('txt的所有内容: %s' % f.read())
  19. content = f.readlines()
  20. # print(content)
  21. answers_start = 0
  22. if '####################' in ''.join(content): # if txt has no ##############,has no answers or answers among test_paper,do not deal with this kind of txt
  23. for i,line in enumerate(content):
  24. Nline = line.replace(' ','').replace('#','')
  25. if line == '' or line.rstrip() == '': # \n
  26. # items.append('')
  27. pass
  28. elif Nline.rstrip() != '': # not :################### not \n
  29. items.append(line)
  30. elif '####################' in line and Nline.rstrip() == '': # only get data before ################
  31. answers_start = i+1
  32. break
  33. for line in content[answers_start:]:
  34. Nline = line.replace(' ', '').replace('#', '')
  35. if line == '' or line.rstrip() == '': # \n
  36. answers.append('')
  37. elif Nline.rstrip() != '': # not :################### not \n
  38. answers.append(line)
  39. elif '*******************' in ''.join(content): # no answer
  40. for i,line in enumerate(content):
  41. Nline = line.replace(' ', '').replace('#', '')
  42. if line == '' or line.rstrip() == '': # \n
  43. # items.append('')
  44. pass
  45. elif line.rstrip() != '': # not :################### not \n
  46. items.append(line)
  47. elif '*************************' in line and Nline.rstrip() == '': # only get data before ################
  48. answers_start = i+1
  49. break
  50. return items, answers
  51. except:
  52. print('txt读取失败!')
  53. if __name__ == '__main__':
  54. items, ans = Separate_items_answers(r'./test_paper.txt')