|
@@ -37,7 +37,7 @@ class Inverted_Index_Establish():
|
|
for i,data in enumerate(origin_dataset):
|
|
for i,data in enumerate(origin_dataset):
|
|
if "content_clear" not in data:
|
|
if "content_clear" not in data:
|
|
continue
|
|
continue
|
|
- seg_list = self.word_seg(data["content_clear"])
|
|
|
|
|
|
+ seg_list, _ = self.word_seg(data["content_clear"])
|
|
# 计算每篇文档长度和总文档长度
|
|
# 计算每篇文档长度和总文档长度
|
|
doc_length = len(seg_list)
|
|
doc_length = len(seg_list)
|
|
all_doc_length += doc_length
|
|
all_doc_length += doc_length
|