ekalda commented on a change in pull request #8368: URL: https://github.com/apache/tvm/pull/8368#discussion_r675471946
########## File path: python/tvm/relay/testing/tflite.py ########## @@ -0,0 +1,468 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +TensorFlow Lite model generation infrastructure that uses flatbuffers +============================================================ +""" +import json +import subprocess +import tempfile +from enum import Enum +from typing import List, Dict, Optional, Any, Tuple, Union +import numpy as np +from tvm.contrib.download import download + +# We are currently using TensorFlow Lite 2.4.2 schema to write the model buffers +SCHEMA_URL = ( + "https://raw.githubusercontent.com/tensorflow/tensorflow/v2.4.2/" + "tensorflow/lite/schema/schema.fbs" +) + + +class ActivationFunction(Enum): + NONE = "NONE" + RELU = "RELU" + RELU_N1_TO_1 = "RELU_N1_TO_1" + RELU6 = "RELU6" + TANH = "TANH" + SIGN_BIT = "SIGN_BIT" + + +class Quantization: + "A class representing quantization of a tensor" + + def __init__( + self, + scale: List[float], + zero_point: List[int], Review comment: Yeah could do. I was thinking though that since we have multiple scales and zero points only when we have per channel quantization (i.e. only for the convolutions), it would not make much sense for all the other operators. I was also thinking to change it to accept both, a scalar and a list and convert the scalar to a list inside the class. ########## File path: python/tvm/relay/testing/tflite.py ########## @@ -0,0 +1,468 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +TensorFlow Lite model generation infrastructure that uses flatbuffers +============================================================ +""" +import json +import subprocess +import tempfile +from enum import Enum +from typing import List, Dict, Optional, Any, Tuple, Union +import numpy as np +from tvm.contrib.download import download + +# We are currently using TensorFlow Lite 2.4.2 schema to write the model buffers +SCHEMA_URL = ( + "https://raw.githubusercontent.com/tensorflow/tensorflow/v2.4.2/" + "tensorflow/lite/schema/schema.fbs" +) + + +class ActivationFunction(Enum): + NONE = "NONE" + RELU = "RELU" + RELU_N1_TO_1 = "RELU_N1_TO_1" + RELU6 = "RELU6" + TANH = "TANH" + SIGN_BIT = "SIGN_BIT" + + +class Quantization: + "A class representing quantization of a tensor" + + def __init__( + self, + scale: List[float], + zero_point: List[int], + quantized_dimension: int = 0, + ): + """ + Parameters + ---------- + scale: List[float] + The scale(s) + zero_point: List[int] + The zero point(s) Review comment: In case of per channel quantization where we have several scales, the list of zero points needs to be the same length as the list of scales. But that will just be a list of zeros, so technically we don't have multiple zero points. I'm thinking of changing it such that you can pass just one scalar zero point and if the there are more than one scales, broadcast that zero point to a list of a correct size. ########## File path: python/tvm/relay/testing/tflite.py ########## @@ -0,0 +1,468 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +TensorFlow Lite model generation infrastructure that uses flatbuffers +============================================================ +""" +import json +import subprocess +import tempfile +from enum import Enum +from typing import List, Dict, Optional, Any, Tuple, Union +import numpy as np +from tvm.contrib.download import download + +# We are currently using TensorFlow Lite 2.4.2 schema to write the model buffers +SCHEMA_URL = ( + "https://raw.githubusercontent.com/tensorflow/tensorflow/v2.4.2/" + "tensorflow/lite/schema/schema.fbs" +) + + +class ActivationFunction(Enum): + NONE = "NONE" + RELU = "RELU" + RELU_N1_TO_1 = "RELU_N1_TO_1" + RELU6 = "RELU6" + TANH = "TANH" + SIGN_BIT = "SIGN_BIT" + + +class Quantization: + "A class representing quantization of a tensor" + + def __init__( + self, + scale: List[float], + zero_point: List[int], + quantized_dimension: int = 0, + ): + """ + Parameters + ---------- + scale: List[float] + The scale(s) + zero_point: List[int] + The zero point(s) + quantized_dimension: int + The dimension across which quantization is applied + """ + self.scale = scale + self.zero_point = zero_point + self.quantized_dimension = quantized_dimension + + def to_json(self) -> Dict[str, Any]: + return { + "scale": self.scale, + "zero_point": self.zero_point, + "quantized_dimension": self.quantized_dimension, + } + + +class Tensor: + """A class representing a tensor""" + + def __init__( + self, + data_type: str, + shape: List[int], + quantization: Optional[Quantization] = None, + buffer_data: Optional[List[int]] = None, + ): + """ + Parameters + ---------- + data_type: str + The data type of data in the tensor + shape: List[int] + The shape of the tensor + quantization: Optional[Quantization] + The quantization parameters of the tensor + buffer_data: Optional[List[int]] + The data in the tensor + """ + self.data_type = data_type + self.buffer_idx = None + self.name = None + self.shape = shape + self.quantization = quantization + self.buffer_data = buffer_data + + def to_json(self) -> Dict[str, Any]: + tensor_json = { + "type": self.data_type.upper(), + "buffer": self.buffer_idx, + "name": self.name, + "shape": self.shape, + } + if self.quantization is not None: + tensor_json["quantization"] = self.quantization.to_json() + return tensor_json + + +class Operator: + """A class representing an operator""" + + def __init__( + self, + opcode: int, + options_type: str, + options: Dict[str, Any], + ): + """ + Parameters + ---------- + opcode: int + The operator's builtin_code + options_type: str + The operator's builtin_options_type + options: Dict[str, Any] + The operator's builtin_options + """ + self.opcode = opcode + self.options_type = options_type + self.options = options + self.op_inputs_idx = [] + self.op_outputs_idx = [] + + +def generate_tflite_model( + inputs: List[Tensor], + outputs: List[Tensor], + operator: Operator, +) -> bytes: + """Generate a TensorFlow Lite model + + Parameters + ---------- + inputs: List[Tensor], + The list of input tensors + outputs: List[Tensor], + The list of output tensors + operator: Operator, + The operator in the model + + Returns + ------------ + TensorFlow Lite model as bytes + """ + tmp_dir = tempfile.gettempdir() + + schema_path = tmp_dir + "/schema.fbs" + + download(SCHEMA_URL, schema_path) + + json_path = tmp_dir + "/tflite_model.json" + tflite_model_path = tmp_dir + "/tflite_model.tflite" + + # figure out which input tensors are inputs to the model and which are inputs to the op + model_inputs_idx = [] + + for idx, tensor in enumerate(inputs): + # all input tensors are inputs to the operator + operator.op_inputs_idx.append(idx) + if tensor.buffer_data is None: + model_inputs_idx.append(idx) + + tensors = inputs + outputs + # model and operator has the same output tensors + model_outputs_idx = list(range(len(inputs), len(tensors))) + operator.op_outputs_idx = model_outputs_idx + + model_json = _make_json(tensors, operator, model_inputs_idx, model_outputs_idx) + with open(json_path, "w") as json_file: + json_file.write(model_json) + + subprocess.run( + ["flatc", "-b", schema_path, json_path], + cwd=tmp_dir, + check=True, + ) + + with open(tflite_model_path, "rb") as file: + model = file.read() + return model + + +def _make_json( + tensors: List[int], + operator: Operator, + model_inputs_idx: List[int], + model_outputs_idx: List[int], +) -> str: + + # first element in list of buffers is always an empty list + buffers = [{"data": []}] + + # turn the Tensor objects into JSONable dicts + tensors_as_json = [] + for idx, tensor in enumerate(tensors, start=1): + tensor.buffer_idx = idx + tensor.name = "x-" + str(idx) + tensors_as_json.append(tensor.to_json()) + + buffers.append({"data": tensor.buffer_data if tensor.buffer_data else []}) + + op = { + "opcode_index": 0, + "inputs": operator.op_inputs_idx, + "outputs": operator.op_outputs_idx, + "mutating_variable_inputs": [], + } + if operator.options_type != "": + op["builtin_options_type"] = operator.options_type + op["builtin_options"] = operator.options + + dictionary = { + "version": 3, + "operator_codes": [{"builtin_code": operator.opcode}], + "subgraphs": [ + { + "tensors": tensors_as_json, + "inputs": model_inputs_idx, + "outputs": model_outputs_idx, + "operators": [op], + } + ], + "buffers": buffers, + } + + return json.dumps(dictionary, indent=True) + + +def make_buffer_data(data_type: str, data_low: int, data_high: int, shape: List[int]) -> List[int]: + """ + Create random data for constant tensors. + + Parameters + ---------- + data_type : str + a type string (e.g., int8) + data_low : int + smallest value in the tensor + data_high : int + highest value in the tensor + shape : List[int] + Shape of the tensor to be filled + + Returns + ------- + data_uint8.tolist() : List[int] + Buffer data in uint8 + """ + shape_multiplier = np.prod(shape) + data = np.random.randint(data_low, high=data_high, size=[shape_multiplier], dtype=data_type) Review comment: Will do! ########## File path: python/tvm/relay/testing/tflite.py ########## @@ -0,0 +1,468 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +TensorFlow Lite model generation infrastructure that uses flatbuffers +============================================================ +""" +import json +import subprocess +import tempfile +from enum import Enum +from typing import List, Dict, Optional, Any, Tuple, Union +import numpy as np +from tvm.contrib.download import download + +# We are currently using TensorFlow Lite 2.4.2 schema to write the model buffers +SCHEMA_URL = ( + "https://raw.githubusercontent.com/tensorflow/tensorflow/v2.4.2/" + "tensorflow/lite/schema/schema.fbs" +) + + +class ActivationFunction(Enum): + NONE = "NONE" + RELU = "RELU" + RELU_N1_TO_1 = "RELU_N1_TO_1" + RELU6 = "RELU6" + TANH = "TANH" + SIGN_BIT = "SIGN_BIT" + + +class Quantization: + "A class representing quantization of a tensor" + + def __init__( + self, + scale: List[float], + zero_point: List[int], + quantized_dimension: int = 0, + ): + """ + Parameters + ---------- + scale: List[float] + The scale(s) + zero_point: List[int] + The zero point(s) + quantized_dimension: int + The dimension across which quantization is applied + """ + self.scale = scale + self.zero_point = zero_point + self.quantized_dimension = quantized_dimension + + def to_json(self) -> Dict[str, Any]: + return { + "scale": self.scale, + "zero_point": self.zero_point, + "quantized_dimension": self.quantized_dimension, + } + + +class Tensor: + """A class representing a tensor""" + + def __init__( + self, + data_type: str, + shape: List[int], + quantization: Optional[Quantization] = None, + buffer_data: Optional[List[int]] = None, + ): + """ + Parameters + ---------- + data_type: str + The data type of data in the tensor + shape: List[int] + The shape of the tensor + quantization: Optional[Quantization] + The quantization parameters of the tensor + buffer_data: Optional[List[int]] + The data in the tensor + """ + self.data_type = data_type + self.buffer_idx = None + self.name = None + self.shape = shape + self.quantization = quantization + self.buffer_data = buffer_data + + def to_json(self) -> Dict[str, Any]: + tensor_json = { + "type": self.data_type.upper(), + "buffer": self.buffer_idx, + "name": self.name, + "shape": self.shape, + } + if self.quantization is not None: + tensor_json["quantization"] = self.quantization.to_json() + return tensor_json + + +class Operator: + """A class representing an operator""" + + def __init__( + self, + opcode: int, + options_type: str, + options: Dict[str, Any], + ): + """ + Parameters + ---------- + opcode: int + The operator's builtin_code + options_type: str + The operator's builtin_options_type + options: Dict[str, Any] + The operator's builtin_options + """ + self.opcode = opcode + self.options_type = options_type + self.options = options + self.op_inputs_idx = [] + self.op_outputs_idx = [] + + +def generate_tflite_model( + inputs: List[Tensor], + outputs: List[Tensor], + operator: Operator, +) -> bytes: + """Generate a TensorFlow Lite model + + Parameters + ---------- + inputs: List[Tensor], + The list of input tensors + outputs: List[Tensor], + The list of output tensors + operator: Operator, + The operator in the model + + Returns + ------------ + TensorFlow Lite model as bytes + """ + tmp_dir = tempfile.gettempdir() + + schema_path = tmp_dir + "/schema.fbs" + + download(SCHEMA_URL, schema_path) + + json_path = tmp_dir + "/tflite_model.json" + tflite_model_path = tmp_dir + "/tflite_model.tflite" + + # figure out which input tensors are inputs to the model and which are inputs to the op + model_inputs_idx = [] + + for idx, tensor in enumerate(inputs): + # all input tensors are inputs to the operator + operator.op_inputs_idx.append(idx) + if tensor.buffer_data is None: + model_inputs_idx.append(idx) + + tensors = inputs + outputs + # model and operator has the same output tensors + model_outputs_idx = list(range(len(inputs), len(tensors))) + operator.op_outputs_idx = model_outputs_idx + + model_json = _make_json(tensors, operator, model_inputs_idx, model_outputs_idx) + with open(json_path, "w") as json_file: + json_file.write(model_json) + + subprocess.run( + ["flatc", "-b", schema_path, json_path], + cwd=tmp_dir, + check=True, + ) + + with open(tflite_model_path, "rb") as file: + model = file.read() + return model + + +def _make_json( + tensors: List[int], + operator: Operator, + model_inputs_idx: List[int], + model_outputs_idx: List[int], +) -> str: + + # first element in list of buffers is always an empty list + buffers = [{"data": []}] + + # turn the Tensor objects into JSONable dicts + tensors_as_json = [] + for idx, tensor in enumerate(tensors, start=1): + tensor.buffer_idx = idx + tensor.name = "x-" + str(idx) + tensors_as_json.append(tensor.to_json()) + + buffers.append({"data": tensor.buffer_data if tensor.buffer_data else []}) + + op = { + "opcode_index": 0, + "inputs": operator.op_inputs_idx, + "outputs": operator.op_outputs_idx, + "mutating_variable_inputs": [], + } + if operator.options_type != "": + op["builtin_options_type"] = operator.options_type + op["builtin_options"] = operator.options + + dictionary = { + "version": 3, + "operator_codes": [{"builtin_code": operator.opcode}], + "subgraphs": [ + { + "tensors": tensors_as_json, + "inputs": model_inputs_idx, + "outputs": model_outputs_idx, + "operators": [op], + } + ], + "buffers": buffers, + } + + return json.dumps(dictionary, indent=True) + + +def make_buffer_data(data_type: str, data_low: int, data_high: int, shape: List[int]) -> List[int]: + """ + Create random data for constant tensors. + + Parameters + ---------- + data_type : str + a type string (e.g., int8) + data_low : int + smallest value in the tensor + data_high : int + highest value in the tensor + shape : List[int] + Shape of the tensor to be filled + + Returns + ------- + data_uint8.tolist() : List[int] + Buffer data in uint8 + """ + shape_multiplier = np.prod(shape) + data = np.random.randint(data_low, high=data_high, size=[shape_multiplier], dtype=data_type) + # The buffer entries in JSON need to be in uint8, so temporarily converting the data + data_bytes = data.tobytes() + data_uint8 = np.frombuffer(data_bytes, dtype="uint8") + return data_uint8.tolist() + + +def get_range_for_dtype_str(dtype: str) -> Tuple[int, int]: + """ + Produce the min and max for a give data type. + + Parameters + ---------- + dtype : str + a type string (e.g., int8) + + Returns + ------- + type_info.min : int + the minimum of the range + type_info.max : int + the maximum of the range + """ + + try: + type_info = np.iinfo(dtype) + except ValueError: + type_info = np.finfo(dtype) + return type_info.min, type_info.max + + +def get_output_qnn_params( Review comment: Yeah I agree that it should be renamed to reflect its conv-ness. It would probably make sense to move it to test_forward indeed, we might also want to think if we can maybe make it shorter, e.g by specializing it to int8 (then we always know the zero point and dtype limits) or if we make the test deterministic, we could get rid of it altogether :) Another option I can think of is that since it is convolution specific, we can attach it as a function to the operator classes and maybe have it return an output tensor with the legit QNN params. Any thoughts? ########## File path: tests/python/frontend/tflite/test_forward.py ########## @@ -868,74 +869,177 @@ def test_forward_l2_pool2d(): # ----------- -def _test_tflite2_quantized_convolution( - input_shape, kernel_shape, dilations, strides, padding, data_format +def _test_tflite2_quantized_conv2d( + input_shape, + weights_shape, + dilations, + strides, + padding, + dtype="int8", + quantize_per_channel=False, ): """One iteration of TFLite2 quantized convolution with given shapes and attributes""" - data_format = "channels_last" if "NHWC" else "channels_first" - data = np.random.uniform(0, 1, input_shape).astype("float32") - kernel = np.random.uniform(0, 1, kernel_shape).astype("float32") - data_in = tf.keras.layers.Input(shape=data.shape[1:]) - conv = tf.keras.layers.Conv2D( - filters=kernel_shape[3], - kernel_size=(kernel_shape[0], kernel_shape[1]), - strides=strides, - padding=padding, - data_format=data_format, - activation="relu", - use_bias=False, - )(data_in) - keras_model = tf.keras.models.Model(data_in, conv) - keras_model.layers[1].set_weights([kernel]) + dtype_min, dtype_max = test_tflite.get_range_for_dtype_str(dtype) + channels = weights_shape[0] - # To create quantized values with dynamic range of activations, needs representative dataset - def representative_data_gen(): - for i in range(1): - yield [data] + input_scale = np.random.random() * 0.1 Review comment: Will do! ########## File path: python/tvm/relay/testing/tflite.py ########## @@ -0,0 +1,468 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +TensorFlow Lite model generation infrastructure that uses flatbuffers +============================================================ +""" +import json +import subprocess +import tempfile +from enum import Enum +from typing import List, Dict, Optional, Any, Tuple, Union +import numpy as np +from tvm.contrib.download import download + +# We are currently using TensorFlow Lite 2.4.2 schema to write the model buffers +SCHEMA_URL = ( + "https://raw.githubusercontent.com/tensorflow/tensorflow/v2.4.2/" + "tensorflow/lite/schema/schema.fbs" +) + + +class ActivationFunction(Enum): + NONE = "NONE" + RELU = "RELU" + RELU_N1_TO_1 = "RELU_N1_TO_1" + RELU6 = "RELU6" + TANH = "TANH" + SIGN_BIT = "SIGN_BIT" + + +class Quantization: + "A class representing quantization of a tensor" + + def __init__( + self, + scale: List[float], + zero_point: List[int], + quantized_dimension: int = 0, + ): + """ + Parameters + ---------- + scale: List[float] + The scale(s) + zero_point: List[int] + The zero point(s) + quantized_dimension: int + The dimension across which quantization is applied + """ + self.scale = scale + self.zero_point = zero_point + self.quantized_dimension = quantized_dimension + + def to_json(self) -> Dict[str, Any]: + return { + "scale": self.scale, + "zero_point": self.zero_point, + "quantized_dimension": self.quantized_dimension, + } + + +class Tensor: + """A class representing a tensor""" + + def __init__( + self, + data_type: str, + shape: List[int], + quantization: Optional[Quantization] = None, + buffer_data: Optional[List[int]] = None, + ): + """ + Parameters + ---------- + data_type: str + The data type of data in the tensor + shape: List[int] + The shape of the tensor + quantization: Optional[Quantization] + The quantization parameters of the tensor + buffer_data: Optional[List[int]] + The data in the tensor + """ + self.data_type = data_type + self.buffer_idx = None + self.name = None + self.shape = shape + self.quantization = quantization + self.buffer_data = buffer_data + + def to_json(self) -> Dict[str, Any]: + tensor_json = { + "type": self.data_type.upper(), + "buffer": self.buffer_idx, + "name": self.name, + "shape": self.shape, + } + if self.quantization is not None: + tensor_json["quantization"] = self.quantization.to_json() + return tensor_json + + +class Operator: + """A class representing an operator""" + + def __init__( + self, + opcode: int, + options_type: str, + options: Dict[str, Any], + ): + """ + Parameters + ---------- + opcode: int + The operator's builtin_code + options_type: str + The operator's builtin_options_type + options: Dict[str, Any] + The operator's builtin_options + """ + self.opcode = opcode + self.options_type = options_type + self.options = options + self.op_inputs_idx = [] + self.op_outputs_idx = [] + + +def generate_tflite_model( + inputs: List[Tensor], + outputs: List[Tensor], + operator: Operator, +) -> bytes: + """Generate a TensorFlow Lite model + + Parameters + ---------- + inputs: List[Tensor], + The list of input tensors + outputs: List[Tensor], + The list of output tensors + operator: Operator, + The operator in the model + + Returns + ------------ + TensorFlow Lite model as bytes + """ + tmp_dir = tempfile.gettempdir() + + schema_path = tmp_dir + "/schema.fbs" + + download(SCHEMA_URL, schema_path) + + json_path = tmp_dir + "/tflite_model.json" + tflite_model_path = tmp_dir + "/tflite_model.tflite" + + # figure out which input tensors are inputs to the model and which are inputs to the op + model_inputs_idx = [] + + for idx, tensor in enumerate(inputs): + # all input tensors are inputs to the operator + operator.op_inputs_idx.append(idx) + if tensor.buffer_data is None: + model_inputs_idx.append(idx) + + tensors = inputs + outputs + # model and operator has the same output tensors + model_outputs_idx = list(range(len(inputs), len(tensors))) + operator.op_outputs_idx = model_outputs_idx + + model_json = _make_json(tensors, operator, model_inputs_idx, model_outputs_idx) + with open(json_path, "w") as json_file: + json_file.write(model_json) + + subprocess.run( + ["flatc", "-b", schema_path, json_path], + cwd=tmp_dir, + check=True, + ) + + with open(tflite_model_path, "rb") as file: + model = file.read() + return model + + +def _make_json( + tensors: List[int], + operator: Operator, + model_inputs_idx: List[int], + model_outputs_idx: List[int], +) -> str: + + # first element in list of buffers is always an empty list + buffers = [{"data": []}] + + # turn the Tensor objects into JSONable dicts + tensors_as_json = [] + for idx, tensor in enumerate(tensors, start=1): + tensor.buffer_idx = idx + tensor.name = "x-" + str(idx) + tensors_as_json.append(tensor.to_json()) + + buffers.append({"data": tensor.buffer_data if tensor.buffer_data else []}) + + op = { + "opcode_index": 0, + "inputs": operator.op_inputs_idx, + "outputs": operator.op_outputs_idx, + "mutating_variable_inputs": [], + } + if operator.options_type != "": + op["builtin_options_type"] = operator.options_type + op["builtin_options"] = operator.options + + dictionary = { + "version": 3, Review comment: Yeah it's a schema versioning, I'll add it as a global variable to the top. ########## File path: tests/python/frontend/tflite/test_forward.py ########## @@ -868,74 +869,177 @@ def test_forward_l2_pool2d(): # ----------- -def _test_tflite2_quantized_convolution( - input_shape, kernel_shape, dilations, strides, padding, data_format +def _test_tflite2_quantized_conv2d( + input_shape, + weights_shape, + dilations, + strides, + padding, + dtype="int8", + quantize_per_channel=False, ): """One iteration of TFLite2 quantized convolution with given shapes and attributes""" - data_format = "channels_last" if "NHWC" else "channels_first" - data = np.random.uniform(0, 1, input_shape).astype("float32") - kernel = np.random.uniform(0, 1, kernel_shape).astype("float32") - data_in = tf.keras.layers.Input(shape=data.shape[1:]) - conv = tf.keras.layers.Conv2D( - filters=kernel_shape[3], - kernel_size=(kernel_shape[0], kernel_shape[1]), - strides=strides, - padding=padding, - data_format=data_format, - activation="relu", - use_bias=False, - )(data_in) - keras_model = tf.keras.models.Model(data_in, conv) - keras_model.layers[1].set_weights([kernel]) + dtype_min, dtype_max = test_tflite.get_range_for_dtype_str(dtype) + channels = weights_shape[0] - # To create quantized values with dynamic range of activations, needs representative dataset - def representative_data_gen(): - for i in range(1): - yield [data] + input_scale = np.random.random() * 0.1 + input_zp = np.random.randint(dtype_min, dtype_max) + in_tensor = test_tflite.Tensor( + data_type=dtype, + shape=input_shape, + quantization=test_tflite.Quantization(scale=[input_scale], zero_point=[input_zp]), + ) - tflite_model_quant = _quantize_keras_model(keras_model, representative_data_gen) + # Weights in TFLite 2 are symmetric, i.e the zero point is at 0 + if quantize_per_channel: + weights_scale = [np.random.random() * 0.1 for i in range(channels)] + weights_zp = [0 for i in range(channels)] + else: + weights_scale = [np.random.random() * 0.1] + weights_zp = [0] + weights_quantization = test_tflite.Quantization( Review comment: Some ways we could reduce the amount of code that I can come up with: (1) Use the same function for both, conv2d and depthwise2d and have a parameter to distinguish between the two cases, to make sure we set the quantized dimension etc correctly. (2) Since TFLite2 is focusing on int8, we could make it a default data type for tensors, in most cases that would mean that there will be one parameter less to set when creating a tensor. (3) We can give the operator classes the power to create tensors themselves that make sense for the specific operator, e.g. conv2d = test_tflite.Conv2DOperator(...) weights_tensor = conv2d.make_weights(shape, <some optional arguments>) We can have default data type, qnn params that make sense and optionally fill it with data. Going further, if we tag a tensor with whether it is an input or output, we could save tensors into the op object itself and extract them during json creation. Then we'd essentially only need to pass conv2d to the generate_tflite_model. We might want to think whether it is a good idea to introduce that kind of coupling though. In general, I'm note sure you can create a tensor directly without specifying its qnn params, shape and dtype, but maybe you have some more ideas? ########## File path: python/tvm/relay/testing/tflite.py ########## @@ -0,0 +1,468 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +TensorFlow Lite model generation infrastructure that uses flatbuffers +============================================================ +""" +import json +import subprocess +import tempfile +from enum import Enum +from typing import List, Dict, Optional, Any, Tuple, Union +import numpy as np +from tvm.contrib.download import download + +# We are currently using TensorFlow Lite 2.4.2 schema to write the model buffers +SCHEMA_URL = ( + "https://raw.githubusercontent.com/tensorflow/tensorflow/v2.4.2/" + "tensorflow/lite/schema/schema.fbs" +) + + +class ActivationFunction(Enum): + NONE = "NONE" + RELU = "RELU" + RELU_N1_TO_1 = "RELU_N1_TO_1" + RELU6 = "RELU6" + TANH = "TANH" + SIGN_BIT = "SIGN_BIT" + + +class Quantization: + "A class representing quantization of a tensor" + + def __init__( + self, + scale: List[float], + zero_point: List[int], + quantized_dimension: int = 0, + ): + """ + Parameters + ---------- + scale: List[float] + The scale(s) + zero_point: List[int] + The zero point(s) + quantized_dimension: int + The dimension across which quantization is applied + """ + self.scale = scale + self.zero_point = zero_point + self.quantized_dimension = quantized_dimension + + def to_json(self) -> Dict[str, Any]: + return { + "scale": self.scale, + "zero_point": self.zero_point, + "quantized_dimension": self.quantized_dimension, + } + + +class Tensor: + """A class representing a tensor""" + + def __init__( + self, + data_type: str, + shape: List[int], + quantization: Optional[Quantization] = None, + buffer_data: Optional[List[int]] = None, + ): + """ + Parameters + ---------- + data_type: str + The data type of data in the tensor + shape: List[int] + The shape of the tensor + quantization: Optional[Quantization] + The quantization parameters of the tensor + buffer_data: Optional[List[int]] + The data in the tensor + """ + self.data_type = data_type + self.buffer_idx = None + self.name = None + self.shape = shape + self.quantization = quantization + self.buffer_data = buffer_data + + def to_json(self) -> Dict[str, Any]: + tensor_json = { + "type": self.data_type.upper(), + "buffer": self.buffer_idx, + "name": self.name, + "shape": self.shape, + } + if self.quantization is not None: + tensor_json["quantization"] = self.quantization.to_json() + return tensor_json + + +class Operator: + """A class representing an operator""" + + def __init__( + self, + opcode: int, + options_type: str, + options: Dict[str, Any], + ): + """ + Parameters + ---------- + opcode: int + The operator's builtin_code + options_type: str + The operator's builtin_options_type + options: Dict[str, Any] + The operator's builtin_options + """ + self.opcode = opcode + self.options_type = options_type + self.options = options + self.op_inputs_idx = [] + self.op_outputs_idx = [] + + +def generate_tflite_model( + inputs: List[Tensor], + outputs: List[Tensor], + operator: Operator, +) -> bytes: + """Generate a TensorFlow Lite model + + Parameters + ---------- + inputs: List[Tensor], + The list of input tensors + outputs: List[Tensor], + The list of output tensors + operator: Operator, + The operator in the model + + Returns + ------------ + TensorFlow Lite model as bytes + """ + tmp_dir = tempfile.gettempdir() + + schema_path = tmp_dir + "/schema.fbs" + + download(SCHEMA_URL, schema_path) Review comment: I will cache it! I'll add a comment in the code -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
