# Serving

tf supports model with signature functions to be served remotely. To provide model functions as a service:

save the model as expected
pass arguments to the model in the http request as expected
starts the tf serving client as expected

To save a model:

# some built-in method or @tf.function as serving_default, errors are consumed silently
model.save(path)
tf.saved_model.save(model, path)
tf.keras.models.save_model(model, path)

# errors are thrown out
# model.func is decorated with @tf.function(input_signatures=[])
model.save(path, signatures={'name': model.func})
tf.saved_model.save(model, path, signatures={'name': model.func})
tf.keras.models.save_model(model, path, signatures={'name': model.func})

# errors are thrown out
# model.func is decorated with @tf.function baldly
model.save(path, signatures={'name': model.func.get_concrete_function(some_input_signatures)})
tf.saved_model.save(model, path, signatures={'name': model.func.get_concrete_function(some_input_signatures)})
tf.keras.models.save_model(model, path, signatures={'name': model.func.get_concrete_function(some_input_signatures)})

A saved model contains signatures, e.g.,

Defined by tf.function

tf.function over-simplification

@tf.function wraps python code into graph by simplifying execution logic

# original
@tf.function(input_signature=[tf.TensorSpec(shape=[None,None], dtype=tf.float32)])
def add(self, x):
    if x.shape[1] is None:
        print(x)
        return tf.constant('666')
    else:
        return tf.constant(x.shape[1])

# simplified due to an execution with empty tensor
def add(self, x):
    return tf.constant('666')

Function becomes part of graph: same shape + same type => reuse graph and not running python code.

Fix one: remove input_signature

@tf.function()
def add(self, x):
    if x.shape[1] is None:
        print(x)
        return tf.constant('666')
    else:
        return tf.constant(x.shape[1])

Fix two: use tensors in branch conditional

@tf.function(input_signature=[tf.TensorSpec(shape=[None,None], dtype=tf.float32)])
def add(self, x):
    if tf.equal(tf.size(x), 0):  # empty tensor size is 0; [[1,2],[3,4]] size is 4
        print(x)
        return tf.constant('666')
    else:
        return tf.constant(x.shape[1])

Fix three: run tf functions in eager mode

tf.config.run_functions_eagerly(True)  # tf >= 2.3
tf.config.experimental_run_functions_eagerly(True)  # tf >= 2.0

@tf.function(input_signature=[tf.TensorSpec(shape=[None, 224, 224, 3], dtype=tf.float32)]
def tf_func(x):
    do_sth(x)
    return

# auto parsed to test(one_arg=, one_arg_1=)
# if multiple tensors are passed through a single argument, they are differentiated by number suffix
@tf.function(input_signatures=[(tf.TensorSpec(shape=None, dtype=tf.int32), tf.TensorSpec(shape=None, dtype=tf.int32))])
def test(one_arg: Tuple(int, int)):
    do_sth(one_arg)
    return

# later use get_concrete_function(tf.TensorSpec(shape=[None, 224, 224, 3], dtype=tf.float32))
@tf.function
def bald_tf_func(x):
    do_sth(x)
    return

Raw format:

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['mobilenetv2_1.00_224_input'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 224, 224, 3)
        name: serving_default_mobilenetv2_1.00_224_input:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['dense'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 5)
        name: StatefulPartitionedCall:0
  Method name is: tensorflow/serving/predict

some tips about how to pass arguments to served model:

Number of arguments

"""Only accept sequence of tensors"""

# one argument
model.signatures['name'](one_arg_in_tensor)
# multiple arguments should be passed as **kwargs
.signatures['name'](**dict_multiple_args)

Steps to serve a model with tf serving.

# TFX

row format: "instances": <value>|<(nested)list>|<list-of-objects>, "predictions": <value>|<(nested)list>|<list-of-objects>

{
    # List of 3 scalar tensors. 1 named input. <list>
    "instances": [ "foo", "bar", "baz" ]
}

{
    # List of 2 tensors each of [1, 2] shape. 1 named input. <nested list>
    "instances": [ [[1, 2]], [[3, 4]] ]
}

{
  # 3 named inputs. <list-of-objects>
    "instances": [
    {
        "tag": "foo",
        "signal": [1, 2, 3, 4, 5],
        "sensor": [[1, 2], [3, 4]]
   },
   {
        "tag": "bar",
        "signal": [3, 4, 1, 2, 5]],
        "sensor": [[4, 5], [6, 8]]
   }
 ]
}

column format: "inputs": <value>|<(nested)list>|<object>, "outputs": <value>|<(nested)list>|<object>

{
    # 3 named inputs, batch size might be different. <object>
    "inputs": {
        "tag": ["foo", "bar"],
        "signal": [[1, 2, 3, 4, 5], [3, 4, 1, 2, 5]],
        "sensor": [[[1, 2], [3, 4]], [[4, 5], [6, 8]]]
    }
}

{
    # 1 named inputs. <value>
    "inputs": ""
}

{
    # 1 named inputs. <object>
    "inputs": {
        "meta": ""
    }
}

← Keras Determinism →