paper_text_structure.py 448 B

1234567891011121314
  1. from utils.washutil import HtmlWash
  2. class WordParseStructure:
  3. def __init__(self, html, wordid, is_reparse=0, subject=""):
  4. self.html = html
  5. self.is_reparse = is_reparse
  6. self.wordid = wordid
  7. self.subject = subject
  8. # 清洗
  9. self.row_list, self.subs2src, self.new_html = HtmlWash(self.html,self.wordid,
  10. self.is_reparse).html_cleal()