# Serving
tf supports model with signature functions to be served remotely. To provide model functions as a service:
- save the model as expected
- pass arguments to the model in the http request as expected
- starts the tf serving client as expected
To save a model:
# some built-in method or @tf.function as serving_default, errors are consumed silently
model.save(path)
tf.saved_model.save(model, path)
tf.keras.models.save_model(model, path)
# errors are thrown out
# model.func is decorated with @tf.function(input_signatures=[])
model.save(path, signatures={'name': model.func})
tf.saved_model.save(model, path, signatures={'name': model.func})
tf.keras.models.save_model(model, path, signatures={'name': model.func})
# errors are thrown out
# model.func is decorated with @tf.function baldly
model.save(path, signatures={'name': model.func.get_concrete_function(some_input_signatures)})
tf.saved_model.save(model, path, signatures={'name': model.func.get_concrete_function(some_input_signatures)})
tf.keras.models.save_model(model, path, signatures={'name': model.func.get_concrete_function(some_input_signatures)})
A saved model contains signatures, e.g.,
Defined by tf.function
tf.function over-simplification
@tf.function
wraps python code into graph by simplifying execution logic
# original
@tf.function(input_signature=[tf.TensorSpec(shape=[None,None], dtype=tf.float32)])
def add(self, x):
if x.shape[1] is None:
print(x)
return tf.constant('666')
else:
return tf.constant(x.shape[1])
# simplified due to an execution with empty tensor
def add(self, x):
return tf.constant('666')
Function becomes part of graph: same shape + same type => reuse graph and not running python code.
Fix one: remove input_signature
@tf.function()
def add(self, x):
if x.shape[1] is None:
print(x)
return tf.constant('666')
else:
return tf.constant(x.shape[1])
Fix two: use tensors in branch conditional
@tf.function(input_signature=[tf.TensorSpec(shape=[None,None], dtype=tf.float32)])
def add(self, x):
if tf.equal(tf.size(x), 0): # empty tensor size is 0; [[1,2],[3,4]] size is 4
print(x)
return tf.constant('666')
else:
return tf.constant(x.shape[1])
Fix three: run tf functions in eager mode
tf.config.run_functions_eagerly(True) # tf >= 2.3
tf.config.experimental_run_functions_eagerly(True) # tf >= 2.0
@tf.function(input_signature=[tf.TensorSpec(shape=[None, 224, 224, 3], dtype=tf.float32)]
def tf_func(x):
do_sth(x)
return
# auto parsed to test(one_arg=, one_arg_1=)
# if multiple tensors are passed through a single argument, they are differentiated by number suffix
@tf.function(input_signatures=[(tf.TensorSpec(shape=None, dtype=tf.int32), tf.TensorSpec(shape=None, dtype=tf.int32))])
def test(one_arg: Tuple(int, int)):
do_sth(one_arg)
return
# later use get_concrete_function(tf.TensorSpec(shape=[None, 224, 224, 3], dtype=tf.float32))
@tf.function
def bald_tf_func(x):
do_sth(x)
return
Raw format:
signature_def['serving_default']:
The given SavedModel SignatureDef contains the following input(s):
inputs['mobilenetv2_1.00_224_input'] tensor_info:
dtype: DT_FLOAT
shape: (-1, 224, 224, 3)
name: serving_default_mobilenetv2_1.00_224_input:0
The given SavedModel SignatureDef contains the following output(s):
outputs['dense'] tensor_info:
dtype: DT_FLOAT
shape: (-1, 5)
name: StatefulPartitionedCall:0
Method name is: tensorflow/serving/predict
some tips about how to pass arguments to served model:
Number of arguments
"""Only accept sequence of tensors"""
# one argument
model.signatures['name'](one_arg_in_tensor)
# multiple arguments should be passed as **kwargs
.signatures['name'](**dict_multiple_args)
Steps to serve a model with tf serving.
# TFX
row format: "instances": <value>|<(nested)list>|<list-of-objects>
, "predictions": <value>|<(nested)list>|<list-of-objects>
{
# List of 3 scalar tensors. 1 named input. <list>
"instances": [ "foo", "bar", "baz" ]
}
{
# List of 2 tensors each of [1, 2] shape. 1 named input. <nested list>
"instances": [ [[1, 2]], [[3, 4]] ]
}
{
# 3 named inputs. <list-of-objects>
"instances": [
{
"tag": "foo",
"signal": [1, 2, 3, 4, 5],
"sensor": [[1, 2], [3, 4]]
},
{
"tag": "bar",
"signal": [3, 4, 1, 2, 5]],
"sensor": [[4, 5], [6, 8]]
}
]
}
column format: "inputs": <value>|<(nested)list>|<object>
, "outputs": <value>|<(nested)list>|<object>
{
# 3 named inputs, batch size might be different. <object>
"inputs": {
"tag": ["foo", "bar"],
"signal": [[1, 2, 3, 4, 5], [3, 4, 1, 2, 5]],
"sensor": [[[1, 2], [3, 4]], [[4, 5], [6, 8]]]
}
}
{
# 1 named inputs. <value>
"inputs": ""
}
{
# 1 named inputs. <object>
"inputs": {
"meta": ""
}
}
← Keras Determinism →