|
@@ -40,7 +40,7 @@ def hnsw_retrieve():
|
|
type="hnsw_retrieve接收",
|
|
type="hnsw_retrieve接收",
|
|
message=retrieve_dict))
|
|
message=retrieve_dict))
|
|
# hnsw模型查重
|
|
# hnsw模型查重
|
|
- post_url = r"http://127.0.0.1:8068/topic_retrieval_http"
|
|
|
|
|
|
+ post_url = r"http://localhost:8068/topic_retrieval_http"
|
|
res_list = hnsw_model.retrieve(retrieve_list, post_url, similar, doc_flag)
|
|
res_list = hnsw_model.retrieve(retrieve_list, post_url, similar, doc_flag)
|
|
# 返回日志采集
|
|
# 返回日志采集
|
|
retrieval_logger.info(config.log_msg.format(id=id_name,
|
|
retrieval_logger.info(config.log_msg.format(id=id_name,
|
|
@@ -59,7 +59,7 @@ def image_retrieve():
|
|
retrieve_img = retrieve_dict["content"]
|
|
retrieve_img = retrieve_dict["content"]
|
|
similar = retrieve_dict["similar"] / 100
|
|
similar = retrieve_dict["similar"] / 100
|
|
# 图片查重链接
|
|
# 图片查重链接
|
|
- post_url = r"http://127.0.0.1:8068/img_retrieval_http"
|
|
|
|
|
|
+ post_url = r"http://localhost:8068/img_retrieval_http"
|
|
img_dict = dict(img_url=retrieve_img, img_threshold=similar, img_max_num=30)
|
|
img_dict = dict(img_url=retrieve_img, img_threshold=similar, img_max_num=30)
|
|
try:
|
|
try:
|
|
res_list = requests.post(post_url, json=img_dict, timeout=20).json()
|
|
res_list = requests.post(post_url, json=img_dict, timeout=20).json()
|
|
@@ -111,11 +111,12 @@ def info_retrieve():
|
|
id_list, seg_list = ir_model(sentence)
|
|
id_list, seg_list = ir_model(sentence)
|
|
id_list = [int(idx) for idx in id_list]
|
|
id_list = [int(idx) for idx in id_list]
|
|
# 语义相似度查重
|
|
# 语义相似度查重
|
|
- if len(sentence) > 15:
|
|
|
|
|
|
+ retrieve_list = [dict(stem=sentence, topic_num=1)]
|
|
|
|
+ if len(sentence) > 30:
|
|
retrieve_list = [dict(stem=sentence, topic_num=1)]
|
|
retrieve_list = [dict(stem=sentence, topic_num=1)]
|
|
doc_list = hnsw_model.retrieve(retrieve_list, '', similar, False)[0]["semantics"]
|
|
doc_list = hnsw_model.retrieve(retrieve_list, '', similar, False)[0]["semantics"]
|
|
else:
|
|
else:
|
|
- doc_list = []
|
|
|
|
|
|
+ doc_list = hnsw_model.retrieve(retrieve_list, '', similar, False, 0.3)[0]["semantics"]
|
|
res_dict = dict(info=[id_list, seg_list], doc=doc_list)
|
|
res_dict = dict(info=[id_list, seg_list], doc=doc_list)
|
|
# 返回日志采集
|
|
# 返回日志采集
|
|
retrieval_logger.info(config.log_msg.format(id="文本查重",
|
|
retrieval_logger.info(config.log_msg.format(id="文本查重",
|