paper_text_structure.py 463 B

1234567891011121314
  1. from utils.washutil import HtmlWash
  2. class WordParseStructure:
  3. def __init__(self, html, wordid, subject="", is_reparse=0):
  4. self.html = html
  5. self.is_reparse = is_reparse
  6. self.wordid = wordid
  7. self.subject = subject
  8. # 清洗
  9. self.row_list, self.subs2src, self.new_html = HtmlWash(self.html, self.wordid,
  10. self.subject, self.is_reparse).html_cleal()