123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211 |
- # @Author : liu fan
- import numpy as np
- import tensorflow as tf
- from segment.sheet_resolve.lib.ssd_model.utils import label_map_util, ops as utils_ops
- from segment.sheet_resolve.tools import tf_settings
- from segment.sheet_resolve.tools.tf_sess import SsdSess
- from PIL import Image
- tf_sess_dict = {
- 'choice_ssd': SsdSess('choice_ssd'),
- }
- choice_ssd_sess = tf_sess_dict['choice_ssd']
- sess = choice_ssd_sess.sess
- detection_graph = choice_ssd_sess.graph
- def load_image_into_numpy_array(image):
- # print(image)
- image = image.convert('RGB')
- (im_width, im_height) = image.size
- return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)
- def run_inference_for_single_image(image):
- ops = detection_graph.get_operations()
- all_tensor_names = {output.name for op in ops for output in op.outputs}
- tensor_dict = {}
- for key in [
- 'num_detections', 'detection_boxes', 'detection_scores',
- 'detection_classes', 'detection_masks'
- ]:
- tensor_name = key + ':0'
- if tensor_name in all_tensor_names:
- tensor_dict[key] = detection_graph.get_tensor_by_name(
- tensor_name)
- if 'detection_masks' in tensor_dict:
- # The following processing is only for single image
- detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
- detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
- # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
- real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
- detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
- detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
- detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
- detection_masks, detection_boxes, image.shape[0], image.shape[1])
- detection_masks_reframed = tf.cast(
- tf.greater(detection_masks_reframed, 0.5), tf.uint8)
- # Follow the convention by adding back the batch dimension
- tensor_dict['detection_masks'] = tf.expand_dims(
- detection_masks_reframed, 0)
- image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
- # Run inference
- # start = time.time()
- output_dict = sess.run(tensor_dict,
- feed_dict={image_tensor: np.expand_dims(image, 0)})
- # print(time.time()-start)
- # all outputs are float32 numpy arrays, so convert types as appropriate
- output_dict['num_detections'] = int(output_dict['num_detections'][0])
- output_dict['detection_classes'] = output_dict[
- 'detection_classes'][0].astype(np.uint8)
- output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
- output_dict['detection_scores'] = output_dict['detection_scores'][0]
- if 'detection_masks' in output_dict:
- output_dict['detection_masks'] = output_dict['detection_masks'][0]
- return output_dict
- def image_detect(image_np, category, score_threshold):
- image_np = load_image_into_numpy_array(image_np)
- detections = []
- w, h = image_np.shape[1], image_np.shape[0]
- with tf.device("/device:GPU:{}".format(0)):
- output_dict = run_inference_for_single_image(image_np)
- boxes = output_dict['detection_boxes']
- scores = output_dict['detection_scores']
- labels = output_dict['detection_classes']
- indices = np.where(scores > score_threshold)
- image_scores = scores[indices]
- image_boxes = boxes[indices]
- image_labels = labels[indices]
- image_detections = np.concatenate(
- [image_boxes, np.expand_dims(image_scores, axis=1), np.expand_dims(image_labels, axis=1)], axis=1)
- for detection in image_detections:
- y0 = int(detection[0] * h)
- x0 = int(detection[1] * w)
- y1 = int(detection[2] * h)
- x1 = int(detection[3] * w)
- label_index = int(detection[5])
- label_name = category[label_index]['name']
- detections.append((x0, y0, x1, y1, label_index, detection[4], label_name))
- return detections
- def get_choice_m_row_and_col(left, top, image):
- im_resize = 300
- ''' choice_m resize to 300*300'''
- image_src = Image.fromarray(image)
- if image_src.mode == 'RGB':
- image_src = image_src.convert("L")
- w, h = image_src.size
- if h > w:
- image_src = image_src.resize((int(im_resize / h * w), im_resize))
- else:
- image_src = image_src.resize((im_resize, int(im_resize / w * h)))
- w_, h_ = image_src.size
- image_300 = Image.new(image_src.mode, (im_resize, im_resize), (255))
- image_300.paste(image_src, [0, 0, w_, h_])
- category_index = label_map_util.create_category_index_from_labelmap(tf_settings.choice_m_ssd_label,
- use_display_name=True)
- detections = image_detect(image_300, category_index, 0.5)
- if len(detections) > 1:
- box_xmin = []
- box_ymin = []
- box_xmax = []
- box_ymax = []
- x_distance_all = []
- y_distance_all = []
- x_width_all = []
- y_height_all = []
- all_small_coordinate = []
- ssd_column = 1
- ssd_row = 1
- count_x = 0
- count_y = 0
- for index, box in enumerate(detections):
- if box[-1] != 'T' and box[2] <= w_ and box[3] <= h_:
- box0 = round(box[0] * (w / w_)) # Map to the original image
- box1 = round(box[1] * (h / h_))
- box2 = round(box[2] * (w / w_))
- box3 = round(box[3] * (h / h_))
- box_xmin.append(box0)
- box_ymin.append(box1)
- box_xmax.append(box2)
- box_ymax.append(box3)
- small_coordinate = {'xmin': box0 + left,
- 'ymin': box1 + top,
- 'xmax': box2 + left,
- 'ymax': box3 + top}
- all_small_coordinate.append(small_coordinate)
- x_width = box2 - box0
- y_height = box3 - box1
- x_width_all.append(x_width)
- y_height_all.append(y_height)
- sorted_xmin = sorted(box_xmin)
- sorted_ymin = sorted(box_ymin)
- sorted_xmax = sorted(box_xmax)
- sorted_ymax = sorted(box_ymax)
- x_width_all_sorted = sorted(x_width_all, reverse=True)
- y_height_all_sorted = sorted(y_height_all, reverse=True)
- len_x = len(x_width_all)
- len_y = len(y_height_all)
- x_width_median = np.median(x_width_all_sorted)
- y_height_median = np.median(y_height_all_sorted)
- for i in range(len(sorted_xmin) - 1):
- x_distance = abs(sorted_xmin[i + 1] - sorted_xmin[i])
- y_distance = abs(sorted_ymin[i + 1] - sorted_ymin[i])
- if x_distance > 20:
- ssd_column = ssd_column + 1
- x_distance_all.append(x_distance)
- if x_distance > 2 * x_width_median + 4:
- count_x = count_x + 1
- if y_distance > 10:
- ssd_row = ssd_row + 1
- y_distance_all.append(y_distance)
- if y_distance > 2 * y_height_median + 3:
- count_y = count_y + 1
- if x_width_all_sorted[i] - x_width_median > 40:
- ssd_column = ssd_column - 1
- elif x_width_median - x_width_all_sorted[i] > 40:
- ssd_column = ssd_column - 1
- if y_height_all_sorted[i] - y_height_median > 20:
- ssd_row = ssd_row - 1
- elif y_height_median - y_height_all_sorted[i] > 20:
- ssd_row = ssd_row - 1
- if count_x < len(x_distance_all) / 2 + 1:
- ssd_column = ssd_column + count_x
- elif count_y < len(y_distance_all) / 2 + 1:
- ssd_row = ssd_row + count_y
- average_height = int(np.mean(y_height_all))
- average_width = int(np.mean(x_width_all))
- # average_height = format(np.mean(y_height_all), '.2f')
- # average_width = format(np.mean(x_width_all), '.2f')
- # average_height = int(np.mean(y_distance_all))
- # average_width = int(np.mean(x_distance_all))
- location_ssd = {'xmin': sorted_xmin[0] + left,
- 'ymin': sorted_ymin[0] + top,
- 'xmax': sorted_xmax[-1] + left,
- 'ymax': sorted_ymax[-1] + top}
- choice_m_ssd = {'bounding_box': location_ssd,
- "single_height": average_height,
- "single_width": average_width,
- "rows": ssd_row,
- "cols": ssd_column,
- 'class_name': 'choice_m',
- 'all_small_coordinate': all_small_coordinate
- }
- else:
- choice_m_ssd = {}
- return choice_m_ssd
|