Skip to content

Commit 645b459

Browse files
committed
add a model that matches tflite outputs
add argmax, max, and box enconding transform layers so that we can match TFLite outputs: * detection_boxes * detection_classes * detection_scores * number_of_detections
1 parent 65282da commit 645b459

2 files changed

Lines changed: 234 additions & 0 deletions
Binary file not shown.
Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
#!/usr/bin/env python
2+
# coding: utf-8
3+
4+
import tensorflow as tf
5+
import coremltools as ct
6+
import numpy as np
7+
8+
input_height = 320
9+
input_width = 320
10+
11+
input_node = 'Preprocessor/map/TensorArrayStack/TensorArrayGatherV3'
12+
bbox_output_node = 'Squeeze'
13+
class_output_node = 'Postprocessor/convert_scores'
14+
15+
original_model = tf.saved_model.load('mobiledet')
16+
pruned_model = original_model.prune(input_node+':0', [bbox_output_node+':0', class_output_node+':0'])
17+
18+
inputs=[ct.TensorType(name=input_node, shape=(1, input_height, input_width, 3))]
19+
ssd_model = ct.convert([pruned_model], source='tensorflow', inputs=inputs)
20+
21+
spec = ssd_model.get_spec()
22+
23+
ct.utils.rename_feature(spec, input_node.replace('/', '_'), 'image')
24+
ct.utils.rename_feature(spec, class_output_node.replace('/', '_'), 'scores')
25+
ct.utils.rename_feature(spec, bbox_output_node.replace('/', '_'), 'boxes')
26+
27+
spec.description.output[0].shortDescription = "Predicted class scores for each bounding box"
28+
spec.description.output[1].shortDescription = "Predicted coordinates for each bounding box"
29+
30+
num_classes = 90
31+
num_anchors = 2034
32+
33+
ssd_model = ct.models.MLModel(spec)
34+
ssd_model.save('/tmp/mobiledet.mlmodel')
35+
36+
def get_anchors(start_tensor, end_tensor):
37+
"""
38+
Computes the list of anchor boxes by sending a fake image through the graph.
39+
Outputs an array of size (4, num_anchors) where each element is an anchor box
40+
given as [ycenter, xcenter, height, width] in normalized coordinates.
41+
"""
42+
anchors_model = original_model.prune(start_tensor, [end_tensor])
43+
box_corners = tf.squeeze(anchors_model(tf.zeros((1, input_height, input_width, 3), tf.uint8)))
44+
45+
# The TensorFlow graph gives each anchor box as [ymin, xmin, ymax, xmax].
46+
# Convert these min/max values to a center coordinate, width and height.
47+
ymin, xmin, ymax, xmax = np.transpose(box_corners)
48+
width = xmax - xmin
49+
height = ymax - ymin
50+
ycenter = ymin + height / 2.
51+
xcenter = xmin + width / 2.
52+
return np.stack([ycenter, xcenter, height, width])
53+
54+
# Read the anchors into a (4, 2034) tensor.
55+
start_tensor = "image_tensor:0"
56+
anchors_tensor = "Concatenate/concat:0"
57+
anchors = get_anchors(start_tensor, anchors_tensor)
58+
assert(anchors.shape[1] == num_anchors)
59+
60+
61+
from coremltools.models import datatypes
62+
from coremltools.models import neural_network
63+
from coremltools.proto.FeatureTypes_pb2 import ArrayFeatureType
64+
65+
spec = ssd_model.get_spec()
66+
builder = neural_network.NeuralNetworkBuilder(spec=spec, use_float_arraytype=True)
67+
68+
builder.add_permute(name="permute_boxed",
69+
dim=(0, 3, 2, 1),
70+
# input_name="expanded_boxes",
71+
input_name='boxes',
72+
output_name="permute_boxes_output")
73+
74+
# Grab the y, x coordinates (channels 0-1).
75+
builder.add_slice(name="slice_yx",
76+
input_name="permute_boxes_output",
77+
output_name="slice_yx_output",
78+
axis="channel",
79+
start_index=0,
80+
end_index=2)
81+
82+
# boxes_yx / 10
83+
builder.add_elementwise(name="scale_yx",
84+
input_names="slice_yx_output",
85+
output_name="scale_yx_output",
86+
mode="MULTIPLY",
87+
alpha=0.1)
88+
89+
# Split the anchors into two (2, 2034, 1) arrays.
90+
anchors_yx = np.expand_dims(anchors[:2, :], axis=-1)
91+
anchors_hw = np.expand_dims(anchors[2:, :], axis=-1)
92+
93+
builder.add_load_constant(name="anchors_yx",
94+
output_name="anchors_yx",
95+
constant_value=anchors_yx,
96+
shape=[2, num_anchors, 1])
97+
98+
builder.add_load_constant(name="anchors_hw",
99+
output_name="anchors_hw",
100+
constant_value=anchors_hw,
101+
shape=[2, num_anchors, 1])
102+
103+
# (boxes_yx / 10) * anchors_hw
104+
builder.add_elementwise(name="yw_times_hw",
105+
input_names=["scale_yx_output", "anchors_hw"],
106+
output_name="yw_times_hw_output",
107+
mode="MULTIPLY")
108+
109+
# (boxes_yx / 10) * anchors_hw + anchors_yx
110+
builder.add_elementwise(name="decoded_yx",
111+
input_names=["yw_times_hw_output", "anchors_yx"],
112+
output_name="decoded_yx_output",
113+
mode="ADD")
114+
115+
# Grab the height and width (channels 2-3).
116+
builder.add_slice(name="slice_hw",
117+
input_name="permute_boxes_output",
118+
output_name="slice_hw_output",
119+
axis="channel",
120+
start_index=2,
121+
end_index=4)
122+
123+
# (boxes_hw / 5)
124+
builder.add_elementwise(name="scale_hw",
125+
input_names="slice_hw_output",
126+
output_name="scale_hw_output",
127+
mode="MULTIPLY",
128+
alpha=0.2)
129+
130+
# exp(boxes_hw / 5)
131+
builder.add_unary(name="exp_hw",
132+
input_name="scale_hw_output",
133+
output_name="exp_hw_output",
134+
mode="exp")
135+
136+
# exp(boxes_hw / 5) * anchors_hw
137+
builder.add_elementwise(name="decoded_hw",
138+
input_names=["exp_hw_output", "anchors_hw"],
139+
output_name="decoded_hw_output",
140+
mode="MULTIPLY")
141+
142+
# The coordinates are now (y, x) and (height, width) but NonMaximumSuppression
143+
# wants them as (x, y, width, height). So create four slices and then concat
144+
# them into the right order.
145+
builder.add_slice(name="slice_y",
146+
input_name="decoded_yx_output",
147+
output_name="slice_y_output",
148+
axis="channel",
149+
start_index=0,
150+
end_index=1)
151+
152+
builder.add_slice(name="slice_x",
153+
input_name="decoded_yx_output",
154+
output_name="slice_x_output",
155+
axis="channel",
156+
start_index=1,
157+
end_index=2)
158+
159+
builder.add_slice(name="slice_h",
160+
input_name="decoded_hw_output",
161+
output_name="slice_h_output",
162+
axis="channel",
163+
start_index=0,
164+
end_index=1)
165+
166+
builder.add_slice(name="slice_w",
167+
input_name="decoded_hw_output",
168+
output_name="slice_w_output",
169+
axis="channel",
170+
start_index=1,
171+
end_index=2)
172+
173+
builder.add_elementwise(name="concat2",
174+
input_names=["slice_x_output", "slice_y_output",
175+
"slice_w_output", "slice_h_output"],
176+
output_name="concat_output",
177+
mode="CONCAT")
178+
179+
builder.add_permute(name="permute_output",
180+
dim=(0, 1, 2, 3),
181+
input_name="concat_output",
182+
output_name="raw_coordinates")
183+
184+
input_names = ['raw_coordinates', 'scores']
185+
output_names = ['coordinates', 'confidence','box_index', 'number_of_boxes']
186+
builder.add_nms('nms', input_names, output_names, 0.3, 0.3, 10, True)
187+
188+
# TF/TFLite and Core ML use different box encoding
189+
# TF/TFLite: $ (y_1, x_1, y_2, x_2) $
190+
# Core ML: $ (x_{center}, y_{center}, width, height) $
191+
# use maxtrix multiplication to calculte
192+
# $ (x_{center}, y_{center}, width, height) x (0, 1, 0, -0.5)^T = y_{center} + (- 1/2 * width) = y_1 $
193+
# ...
194+
195+
t = np.array([
196+
[0, 1, 0, 1],
197+
[1, 0, 1, 0],
198+
[0, -0.5, 0, 0.5],
199+
[-0.5, 0, 0.5, 0]])
200+
201+
builder.add_batched_mat_mul('boxes_transform',
202+
['coordinates'],
203+
'detection_boxes',
204+
weight_matrix_rows=4,
205+
weight_matrix_columns=4,
206+
W=t)
207+
208+
builder.add_argmax('confidence_argmax', 'confidence', 'detection_classes_1', 2)
209+
builder.add_load_constant('the_one', 'the_one',
210+
constant_value=np.ones(1),
211+
shape=[1,1,1])
212+
builder.add_subtract_broadcastable('sub_1', ['detection_classes_1', 'the_one'], 'detection_classes')
213+
builder.add_reduce_max('confidence_max', 'confidence', 'detection_scores', [2])
214+
215+
spec.description.output.add()
216+
spec.description.output.add()
217+
218+
output_names = ['detection_boxes', 'detection_classes', 'detection_scores', 'number_of_boxes']
219+
output_dims = [(1, 10, 4), (1, 10), (1, 10), (1,)]
220+
builder.set_output(output_names, output_dims)
221+
for i in range(4):
222+
spec.description.output[i].type.multiArrayType.dataType = ArrayFeatureType.FLOAT32
223+
224+
spec.description.metadata.versionString = "ssdlite_mobiledet_edgetpu_320x320_coco_2020_05_19"
225+
spec.description.metadata.shortDescription = "MobileDet, trained on COCO"
226+
spec.description.metadata.author = "Converted to Core ML by Koan-Sin Tan. Original Authors: Yunyang Xiong, Hanxiao Liu, Suyog Gupta, Berkin Akin, Gabriel Bender, Yongzhe Wang, Pieter-Jan Kindermans, Mingxing Tan, Vikas Singh, Bo Chen"
227+
spec.description.metadata.license = "https://github.com/tensorflow/models/blob/master/research/object_detection"
228+
229+
final_model = ct.models.MLModel(builder.spec)
230+
final_model.save('MobileDet_4_outputs_in_one_model_matching_tflite_outputs.mlmodel')
231+
232+
print(final_model)
233+
print("Done!")
234+

0 commit comments

Comments
 (0)