123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486 |
- # @Author : mbq
- # @File : sheet_adjust.py
- # @Time : 2019/9/26 0026 上午 10:12
- import copy
- import json
- import os
- import cv2
- import numpy as np
- ''' 根据CV检测矩形框 调整模型输出框'''
- ''' LSD直线检测 暂时改用 霍夫曼检测'''
- ADJUST_CLASS = ['solve', 'solve0', 'composition', 'composition0', 'choice', 'cloze', 'correction']
- # 用户自己计算阈值
- def custom_threshold(gray, type_inv=cv2.THRESH_BINARY):
- # gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) #把输入图像灰度化
- h, w = gray.shape[:2]
- m = np.reshape(gray, [1, w * h])
- mean = m.sum() / (w * h)
- ret, binary = cv2.threshold(gray, min(230, mean), 255, type_inv)
- return binary
- # 开运算
- def open_img(image_bin, kera=(5, 5)):
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, kera)
- opening = cv2.morphologyEx(image_bin, cv2.MORPH_OPEN, kernel)
- return opening
- # 闭运算
- def close_img(image_bin, kera=(5, 5)):
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, kera)
- closing = cv2.morphologyEx(image_bin, cv2.MORPH_CLOSE, kernel)
- return closing
- # 腐蚀
- def erode_img(image, kernel_size):
- kernel = np.ones((kernel_size, kernel_size), np.uint8)
- erosion = cv2.erode(image, kernel)
- return erosion
- # 膨胀
- def dilation_img(image, kernel_size):
- kernel = np.ones((kernel_size, kernel_size), np.uint8)
- dilaion = cv2.dilate(image, kernel)
- return dilaion
- # 图像padding
- def image_padding(image, padding_w, padding_h):
- h, w = image.shape[:2]
- if 3 == len(image.shape):
- image_new = np.zeros((h + padding_h, w + padding_w, 3), np.uint8)
- else:
- image_new = np.zeros((h + padding_h, w + padding_w), np.uint8)
- image_new[int(padding_h / 2):int(padding_h / 2) + h, int(padding_w / 2):int(padding_w / 2) + w] = image
- return image_new
- def horizontal_projection(img_bin, mut=0):
- """水平方向投影"""
- h, w = img_bin.shape[:2]
- hist = [0 for i in range(w)]
- for x in range(w):
- tmp = 0
- for y in range(h):
- if img_bin[y][x]:
- tmp += 1
- if tmp > mut:
- hist[x] = tmp
- return hist
- def vertical_projection(img_bin, mut=0):
- """垂直方向投影"""
- h, w = img_bin.shape[:2]
- hist = [0 for i in range(h)]
- for y in range(h):
- tmp = 0
- for x in range(w):
- if img_bin[y][x]:
- tmp += 1
- if tmp > mut:
- hist[y] = tmp
- return hist
- def get_white_blok_pos(arry, blok_w=0):
- """获取投影结果中的白色块"""
- pos = []
- start = 1
- x0 = 0
- x1 = 0
- for idx, val in enumerate(arry):
- if start:
- if val:
- x0 = idx
- start = 0
- else:
- if 0 == val:
- x1 = idx
- start = 1
- if x1 - x0 > blok_w:
- pos.append((x0, x1))
- if 0 == start:
- x1 = len(arry) - 1
- if x1 - x0 > blok_w:
- pos.append((x0, x1))
- return pos
- def get_decide_boberLpa(itemRe, itemGT):
- """
- IOU 计算
- """
- x1 = int(itemRe[0])
- y1 = int(itemRe[1])
- x1_ = int(itemRe[2])
- y1_ = int(itemRe[3])
- width1 = x1_ - x1
- height1 = y1_ - y1
- x2 = int(float(itemGT[0]))
- y2 = int(float(itemGT[1]))
- x2_ = int(float(itemGT[2]))
- y2_ = int(float(itemGT[3]))
- width2 = x2_ - x2
- height2 = y2_ - y2
- endx = max(x1_, x2_)
- startx = min(x1, x2)
- width = width1 + width2 - (endx - startx)
- endy = max(y1_, y2_)
- starty = min(y1, y2)
- height = height1 + height2 - (endy - starty)
- AreaJc = 0
- ratio = 0.0
- if width <= 0 or height <= 0:
- res = 0
- else:
- AreaJc = width * height
- AreaRe = width1 * height1
- AreaGT = width2 * height2
- ratio = float(AreaJc) / float((AreaGT + AreaRe - AreaJc))
- return ratio
- # 查找连通区域 微调专用 不通用
- def get_contours(image):
- # image = cv2.imread(img_path,0)
- # if debug: plt_imshow(image)
- image_binary = custom_threshold(image)
- # if debug: plt_imshow(image_binary)
- # if debug: cv2.imwrite(os.path.join(file_dir,"bin.jpg"),image_binary)
- image_dilation = open_img(image_binary, kera=(5, 1))
- image_dilation = open_img(image_dilation, kera=(1, 5))
- # if debug: plt_imshow(image_dilation)
- # if debug: cv2.imwrite(os.path.join(file_dir,"dia.jpg"),image_dilation)
- _, labels, stats, centers = cv2.connectedComponentsWithStats(image_dilation)
- rects = []
- img_h, img_w = image.shape[:2]
- for box in stats:
- x0 = int(box[0])
- y0 = int(box[1])
- w = int(box[2])
- h = int(box[3])
- area = int(box[4])
- if w < img_w / 5 or w > img_w - 10 or h < 50 or h > img_h - 10: # 常见框大小限定
- continue
- if img_w > img_h: # 多栏答题卡 w大于宽度的一般肯定是错误的框
- if w > img_w / 2:
- continue
- if area < w * h / 3: # 大框套小框 中空白色区域形成的面积 排除
- continue
- rects.append((x0, y0, x0 + w, y0 + h))
- return rects
- def adjust_alarm_info(image, box):
- """
- 调整上下坐标 排除内部含有了边框线情况
- 左右调整只有100%确认的 从边界开始遇到的第一个非0列就终止 误伤情况太多
- LSD算法转不过来 霍夫曼检测不靠谱 连通区域测试后排除误伤情况太多 改用投影
- image: 灰度 非 二值图
- box : 坐标信息
- """
- # debug
- # debug = 0
- if image is None:
- print("error image")
- return box
- img_box = image[box[1]:box[3], box[0]:box[2]]
- h, w = img_box.shape[:2]
- # debug
- # if debug: ia.imshow(img_box)
- img_bin = custom_threshold(img_box, type_inv=cv2.THRESH_BINARY_INV)
- img_padding = image_padding(img_bin, 100, 100)
- img_close = close_img(img_padding, kera=(30, 3))
- img_back = img_close[50:50 + h, 50:50 + w]
- # debug
- # if debug: ia.imshow(img_back)
- # 垂直投影 找 left top
- hist_vert = vertical_projection(img_back, mut=h / 4)
- # debug
- # if debug:
- # print(hist_vert)
- # black_img_h = np.zeros_like(img_back)
- # for idx, val in enumerate(hist_vert):
- # if (val == 0):
- # continue
- # for x in range(val):
- # black_img_h[idx][x] = 255
- # ia.imshow(black_img_h)
- y_pos = get_white_blok_pos(hist_vert, 2)
- if (len(y_pos) == 0):
- return box
- # 获取最大的作为alarm_info的区域
- max_id = 0
- max_len = 0
- for idx, pos_tmp in enumerate(y_pos):
- pos_len = abs(pos_tmp[1] - pos_tmp[0])
- if (pos_len > max_len):
- max_id = idx
- max_len = pos_len
- # debug to show
- # if debug:
- # img_show = cv2.cvtColor(img_box, cv2.COLOR_GRAY2BGR)
- # cv2.line(img_show, (0, y_pos[max_id][0]), (w - 1, y_pos[max_id][0]), (0, 0, 255), 2)
- # cv2.line(img_show, (0, y_pos[max_id][1]), (w - 1, y_pos[max_id][1]), (0, 0, 255), 2)
- # ia.imshow(img_show)
- # 左右 的微调
- img_next = img_bin[y_pos[max_id][0]:y_pos[max_id][1], 0:w - 1]
- img_lr_close = open_img(img_next, kera=(1, 1))
- img_lr_close = close_img(img_lr_close, kera=(3, 1))
- # debug
- # if debug: ia.imshow(img_lr_close)
- hist_proj = horizontal_projection(img_lr_close, mut=1)
- w_len = len(hist_proj)
- new_left = 0
- new_right = w_len - 1
- b_flag = [0, 0]
- for idx, val in enumerate(hist_proj):
- if 0 == b_flag[0]:
- if val != 0:
- new_left = idx
- b_flag[0] = 1
- if 0 == b_flag[1]:
- if hist_proj[w_len - 1 - idx] != 0:
- new_right = w_len - idx - 1
- b_flag[1] = 1
- if b_flag[0] and b_flag[1]:
- break
- new_top = box[1] + y_pos[max_id][0]
- new_bottom = box[1] + y_pos[max_id][1]
- new_left += box[0]
- new_right += box[0]
- box[1] = new_top
- box[3] = new_bottom
- box[0] = new_left
- box[2] = new_right
- return box
- def adjust_zg_info(image, box, cv_boxes):
- """
- 调整大区域的box
- 1、cvbox要与box纵坐标有交叉
- 2、IOU值大于0。8时 默认相等拷贝区域坐标
- """
- if image is None:
- return box
- min_rotio = 0.5
- img_box = image[box[1]:box[3], box[0]:box[2]]
- h, w = img_box.shape[:2]
- jc_boxes = [] # 记录与box存在交叉的 cv_boxes
- tmp_rotio = 0
- rc_mz = box
- for idx, cv_box in enumerate(cv_boxes):
- if (box[1] - 10) > (cv_box[3]): # 首先要保证纵坐标有交叉
- continue
- if (box[3] + 10) < cv_box[1]:
- continue
- jc_x = max(box[0], cv_box[0])
- jc_y = min(box[2], cv_box[2])
- bj_x = min(box[0], cv_box[0])
- bj_y = max(box[2], cv_box[2])
- rt = abs(jc_y - jc_x) * 1.0 / abs(bj_y - bj_x) * 1.0
- if rt < min_rotio:
- continue
- jc_boxes.append(cv_box)
- if rt > tmp_rotio:
- rc_mz = cv_box
- tmp_rotio = rt
- # 判断 调整
- if len(jc_boxes) != 0:
- box[0] = rc_mz[0]
- box[2] = rc_mz[2]
- b_find = 0
- frotio = 0.0
- rc_biggst = rc_mz
- for mz_box in jc_boxes:
- iou = get_decide_boberLpa(mz_box, box)
- if iou > 0.8:
- b_find = 1
- frotio = iou
- rc_biggst = mz_box
- if b_find:
- box[1] = rc_biggst[1]
- box[3] = rc_biggst[3]
- return box
- def adjust_item_edge(img_path, reback_json):
- """
- 根据图像的CV分析结果和 模型直接输出结果 对模型输出的边框做微调
- 1、外接矩形查找
- 2、LSD直线检测 替换方法 霍夫曼直线检测
- 3、只处理有把握的情况 任何含有不确定因素的一律不作任何处理
- img_path: 待处理图像绝对路径
- re_json : 模型输出结果
- """
- debug = 1
- # 存放新的结果
- re_json = copy.deepcopy(reback_json)
- if not os.path.exists(img_path) or 0 == len(re_json):
- return
- image = cv2.imread(img_path, 0)
- # 获取CV连通区域结果
- cv_boxes = get_contours(image)
- if debug:
- print(len(cv_boxes))
- image_draw = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
- # for item in cv_boxes:
- # cv2.rectangle(image_draw, (item[0], item[1]), (item[2], item[3]), (0, 0, 250), 2)
- # cv2.imwrite(os.path.join(file_dir, "show.jpg"), image_draw)
- # 循环处理指定的box
- for idx, item in enumerate(re_json):
- name = item["class_name"]
- box = [item["bounding_box"]["xmin"], item["bounding_box"]["ymin"], item["bounding_box"]["xmax"],
- item["bounding_box"]["ymax"]]
- # print(name ,box)
- if name == "alarm_info" or name == "page" or name == "type_score":
- if debug:
- cv2.rectangle(image_draw, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
- new_box = adjust_alarm_info(image, box)
- if debug:
- cv2.rectangle(image_draw, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 2)
- item["bounding_box"]["xmin"] = box[0]
- item["bounding_box"]["xmax"] = box[2]
- item["bounding_box"]["ymin"] = box[1]
- item["bounding_box"]["ymax"] = box[3]
- elif (name == "solve" or name == "solve0"
- or name == "cloze" or name == "choice"
- or name == "composition" or name == "composition0"):
- if debug:
- cv2.rectangle(image_draw, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
- new_box = adjust_zg_info(image, box, cv_boxes)
- if debug:
- cv2.rectangle(image_draw, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 2)
- item["bounding_box"]["xmin"] = box[0]
- item["bounding_box"]["xmax"] = box[2]
- item["bounding_box"]["ymin"] = box[1]
- item["bounding_box"]["ymax"] = box[3]
- else:
- pass
- if debug:
- cv2.imwrite(os.path.join(r"E:\data\aug_img\adjust", "show.jpg"), image_draw)
- return re_json
- def adjust_item_edge_by_gray_image(image, reback_json):
- '''
- 根据图像的CV分析结果和 模型直接输出结果 对模型输出的边框做微调
- 1、外接矩形查找
- 2、LSD直线检测 替换方法 霍夫曼直线检测
- 3、只处理有把握的情况 任何含有不确定因素的一律不作任何处理
- img_path: 待处理图像绝对路径
- re_json : 模型输出结果
- '''
- debug = 0
- re_json = copy.deepcopy(reback_json)
- # 存放新的结果
- # 获取CV连通区域结果
- if len(image.shape) > 2:
- image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
- cv_boxes = get_contours(image)
- if debug:
- print(len(cv_boxes))
- image_draw = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
- # for item in cv_boxes:
- # cv2.rectangle(image_draw, (item[0], item[1]), (item[2], item[3]), (0, 0, 250), 2)
- # cv2.imwrite(os.path.join(file_dir, "show.jpg"), image_draw)
- # 循环处理指定的box
- for idx, item in enumerate(re_json):
- name = item["class_name"]
- box = [item["bounding_box"]["xmin"], item["bounding_box"]["ymin"], item["bounding_box"]["xmax"],
- item["bounding_box"]["ymax"]]
- # print(name ,box)
- if name == "alarm_info" or name == "page" or name == "type_score":
- if debug:
- cv2.rectangle(image_draw, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
- new_box = adjust_alarm_info(image, box)
- if debug:
- cv2.rectangle(image_draw, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 2)
- item["bounding_box"]["xmin"] = box[0]
- item["bounding_box"]["xmax"] = box[2]
- item["bounding_box"]["ymin"] = box[1]
- item["bounding_box"]["ymax"] = box[3]
- elif name in ADJUST_CLASS:
- if debug:
- cv2.rectangle(image_draw, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
- new_box = adjust_zg_info(image, box, cv_boxes)
- if debug:
- cv2.rectangle(image_draw, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 2)
- item["bounding_box"]["xmin"] = box[0]
- item["bounding_box"]["xmax"] = box[2]
- item["bounding_box"]["ymin"] = box[1]
- item["bounding_box"]["ymax"] = box[3]
- else:
- pass
- if debug:
- cv2.imwrite(os.path.join(r"E:\data\aug_img\adjust", "show.jpg"), image_draw)
- return re_json
- # if __name__ == '__main__':
- # '''服务端传入数据为json内数据 和图像
- # 使用方法:
- # new_json = adjust_item_edge(img_path, key_json)
- # key_json : regions 数组
- # new_json : 调整后的结果 size == key_json.size
- # '''
- #
- # print("前置解析")
- # file_dir = r"E:\data\aug_img\adjust"
- # img_path = os.path.join(file_dir, "7642572.jpg")
- # json_path = os.path.join(file_dir, "7642572.json")
- # print(img_path, json_path)
- # # 读取json
- # output_ios = open(json_path).read()
- # output_json = json.loads(output_ios)
- # for item in output_json:
- # # print(item,output_json[item])
- # if (item == "regions"):
- # key_json = output_json[item]
- # # print(len(key_json))
- # for idx, item in enumerate(key_json):
- # # print(key_json[idx])
- # if (item["class_name"] == "alarm_info"):
- # key_json[idx]["bounding_box"]["ymin"] -= 10
- # key_json[idx]["bounding_box"]["ymax"] += 10
- # # print(key_json[idx])
- #
- # new_json = adjust_item_edge(img_path, key_json)
- # for idx, val in enumerate(key_json):
- # print(key_json[idx])
- # print(new_json[idx])
|