MichaelJKlaiber commented on code in PR #16359: URL: https://github.com/apache/tvm/pull/16359#discussion_r1477106113
########## apps/uma/qvanilla/passes.py: ########## @@ -0,0 +1,216 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Transform passes for the q_vanilla_accelerator accelerator""" + +import functools +import tvm +from tvm import tir +from tvm.relay.backend.contrib.uma.api.utils import add_llvm_to_block +from tvm import relay +from tvm.tir import buffer + + [email protected]_func_pass(opt_level=2) +class QVanillaAcceleratorConv2dPass: + _EXTERNAL_FUNCTION_NAME = "q_vanilla_accelerator_conv2dnchw" + # _TVM_BLOCK_MATCH_NAME = "conv2d_nchw" Review Comment: nit: remove ########## apps/uma/qvanilla/passes.py: ########## @@ -0,0 +1,216 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Transform passes for the q_vanilla_accelerator accelerator""" + +import functools +import tvm +from tvm import tir +from tvm.relay.backend.contrib.uma.api.utils import add_llvm_to_block +from tvm import relay +from tvm.tir import buffer + + [email protected]_func_pass(opt_level=2) +class QVanillaAcceleratorConv2dPass: + _EXTERNAL_FUNCTION_NAME = "q_vanilla_accelerator_conv2dnchw" + # _TVM_BLOCK_MATCH_NAME = "conv2d_nchw" + _TVM_BLOCK_MATCH_NAME = "compute_2" + def transform_function( + self, func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext + ) -> tvm.tir.PrimFunc: + + return self._q_vanilla_accelerator_conv2d_pass(func, mod, ctx) + + @classmethod + def _q_vanilla_accelerator_conv2d_pass(cls, func, mod, ctx): + _loops = dict() + _handles = [] + _entry_node = None + zp = [] + block_idx = 0 + def _has_block(name: str, func: tvm.tir.PrimFunc) -> bool: + """ + Determine of a tir.block with `name` exists in `func` + """ + + def _hb(op): + if isinstance(op, tvm.tir.Block): + _found_blocks.append(op.name_hint) + + _found_blocks = [] + tvm.tir.stmt_functor.post_order_visit(func.body, _hb) + return name in _found_blocks + + def _detect_and_replace_conv2d( + func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext + ) -> tvm.tir.PrimFunc: + def _replace_conv2d(op): + if op == _entry_node: + irb = tvm.tir.ir_builder.create() + # Collection of buffer address + buffers = [b[1].data for b in _handles] + # extraction of loop offsets + for k, v in _loops.items(): + assert v.min.value == 0 + offset_order = ["co", "w", "h", "ci", "kh", "kw"] + offsets = [_loops[i].extent.value for i in offset_order] + + offsets.append(zp[0]) + offsets.append(zp[1]) + + args = buffers + offsets + + irb.emit(tir_call(irb, True, cls._EXTERNAL_FUNCTION_NAME, *args)) + irb_result = irb.get() + + return irb_result + elif isinstance(op, tvm.tir.SeqStmt): + # Remove that pad block of TOPI's conv2DNCHW by only returning the 2nd statement + + return op.seq[block_idx] # the line that I've changed to replace the compute_2 block + + return op + + sch = tir.Schedule(func) + + if _has_block(cls._TVM_BLOCK_MATCH_NAME, func): + + #find the zp values + + s1 = [] + s2 = [] + + def _visit(s): + if isinstance(s, tvm.tir.BufferStore): + # stores.append(s.value) + s1.append(s.buffer.data) + s2.append(s.value) + + + tvm.tir.stmt_functor.post_order_visit(func.body, _visit) + + for i in range(len(s1)): + + if s1[i].name == "compile_engine_const": + + zp.append(s2[i]) + block_idx = len(s1) - 3 + + + ### + + conv2d_block = sch.get_block(cls._TVM_BLOCK_MATCH_NAME) + rv_loops = sch.get_loops(conv2d_block) + + assert len(rv_loops) == 7 + loops = dict( + n=rv_loops[0], + co=rv_loops[1], + h=rv_loops[2], + w=rv_loops[3], + ci=rv_loops[4], + kh=rv_loops[5], + kw=rv_loops[6], + ) + _entry_node = sch.get(rv_loops[1]) + + _loops = {k: sch.get(v) for k, v in loops.items()} + _handles = func.buffer_map.items() + + x = tvm.tir.stmt_functor.ir_transform( + func.body, None, _replace_conv2d, ["tir.For", "tir.SeqStmt"] + ) + + return func.with_body(x) + else: + + return func + + r = _detect_and_replace_conv2d(func, mod, ctx) + return r + + +def tir_call(ib: tvm.tir.ir_builder, extern: bool, name: str, *args): + """ + ib: ir_builder + extern: bool + True --> tvm.tir.call_extern + False --> tvm.tir.call_packed + name: str + function name + *args: + arguments for function call + """ + + def buf_from_array(ib, arr, dtype): + # Allocate enough memory to store the whole array + var = ib.allocate("int32", (len(arr),), scope="global") + for i, v in enumerate(arr): + var[i] = v + # Declare a buffer, which is basically a view on the chunk of memory that we allocated + buf = tvm.tir.decl_buffer((len(arr),), dtype, data=var, scope="global") + return buf + + if extern: + args = [i.data if isinstance(i, tvm.tir.Buffer) else i for i in args] + return tvm.tir.call_extern("int32", name, *args) + else: + args = [ + buf_from_array(ib, i, "int32") + if isinstance(i, (tuple, list, tvm.ir.container.Array)) + else i + for i in args + ] + return tvm.tir.call_packed(name, *args) + + [email protected]_pass(opt_level=0) +class ConvertLayout: + print("relay pass") Review Comment: nit: remove or convert to logging ########## apps/uma/qvanilla/passes.py: ########## @@ -0,0 +1,216 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Transform passes for the q_vanilla_accelerator accelerator""" + +import functools +import tvm +from tvm import tir +from tvm.relay.backend.contrib.uma.api.utils import add_llvm_to_block +from tvm import relay +from tvm.tir import buffer + + [email protected]_func_pass(opt_level=2) +class QVanillaAcceleratorConv2dPass: Review Comment: Could you add some docsrting here? ########## apps/uma/qvanilla/strategies.py: ########## @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Strategies for the q_vanilla_accelerator accelerator""" + +# Example how to integrate a custom conv1d strategy: + +# @relay.op.strategy.override_native_generic_func("custom_conv1d_strategy") +# def custom_conv1d_strategy(attrs, inputs, out_type, target): +# strategy = _op.OpStrategy() +# strategy.add_implementation( +# wrap_compute_conv1d(custom_conv1d_compute), +# wrap_topi_schedule(custom_conv1d_schedule), +# name="custom_conv1d.generic", +# return strategy +# + +# For further details see: +# - github.com/apache/tvm-rfcs/blob/main/rfcs/0060_UMA_Unified_Modular_Accelerator_Interface.md +# - $TVM_HOME/python/tvm/relay/op/strategy/x86.py Review Comment: nit: Add to docstring above ########## apps/uma/qvanilla/strategies.py: ########## @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Strategies for the q_vanilla_accelerator accelerator""" + +# Example how to integrate a custom conv1d strategy: + +# @relay.op.strategy.override_native_generic_func("custom_conv1d_strategy") +# def custom_conv1d_strategy(attrs, inputs, out_type, target): +# strategy = _op.OpStrategy() +# strategy.add_implementation( +# wrap_compute_conv1d(custom_conv1d_compute), +# wrap_topi_schedule(custom_conv1d_schedule), +# name="custom_conv1d.generic", +# return strategy +# + +# For further details see: +# - github.com/apache/tvm-rfcs/blob/main/rfcs/0060_UMA_Unified_Modular_Accelerator_Interface.md +# - $TVM_HOME/python/tvm/relay/op/strategy/x86.py + +from tvm import relay +from tvm.relay import op as _op +from tvm.relay.qnn.strategy.hexagon import * +from tvm import topi + + + [email protected]_native_generic_func("qnn_conv2d_strategy") +def qnn_conv2d_strategy(attrs, inputs, out_type, target): + print("qnn strategy") Review Comment: nit: logging ########## apps/uma/qvanilla/conv2dnchw.cc: ########## @@ -0,0 +1,80 @@ +#include <stdlib.h> +#include <stdint.h> +#include <math.h> +#include <stdbool.h> + +#include <stdio.h> + +#ifdef __cplusplus +extern "C" +#endif + + +int q_vanilla_accelerator_conv2dnchw(int8_t* q_vanilla_accelerator_0_i0, int8_t* q_vanilla_accelerator_0_i1, int32_t* bias_data, int32_t* compute, + int32_t oc, int32_t iw, int32_t ih, int32_t ic, int32_t kh, int32_t kw, int32_t i_zp, int32_t k_zp) { + + + int kw_low = kw / 2; + int kh_low = kh / 2; + int kw_high = iw + kw / 2; + int kh_high = ih + kh / 2; + + int padded_iw = iw + 2 * kw_low; + int padded_ih = ih + 2 * kh_low; + + int32_t* data_pad_let = (int32_t*)malloc( + (((ic * padded_iw * padded_ih) + (padded_ih * padded_iw)) + padded_iw) * sizeof(int32_t)); + + int32_t* compute_let = (int32_t*)malloc((oc * ic * kh * kw) * sizeof(int32_t)); + + + + for (int32_t i1_1 = 0; i1_1 < ic; ++i1_1) { + for (int32_t i2_1 = 0; i2_1 < padded_ih; ++i2_1) { + for (int32_t i3_1 = 0; i3_1 < padded_iw; ++i3_1) { + data_pad_let[(((i1_1 * padded_iw * padded_ih) + (i2_1 * padded_iw)) + i3_1)] = (((((kh_low <= i2_1) && (i2_1 < kh_high)) && (kw_low <= i3_1)) && (i3_1 < kw_high)) + ? ((int32_t)q_vanilla_accelerator_0_i0[(((i1_1 * iw * ih) + ((i2_1 - kh_low) * iw) + i3_1 - kw_low))] - (i_zp)) + : 0); + + } + } + } + + + + for (int32_t i0 = 0; i0 < oc; ++i0) { + for (int32_t i1_2 = 0; i1_2 < ic; ++i1_2) { + for (int32_t i2_2 = 0; i2_2 < kh; ++i2_2) { + for (int32_t i3_2 = 0; i3_2 < kw; ++i3_2) { + int32_t cse_var_2 = ((((i0 * ic * kh * kw) + (i1_2 * kw * kh)) + (i2_2 * kw)) + i3_2); + compute_let[cse_var_2] = (((int32_t)q_vanilla_accelerator_0_i1[cse_var_2]) - k_zp); + } + } + } + } + + + for (int32_t oc_ = 0; oc_ < oc; ++oc_) { + for (int32_t oh = 0; oh < ih; ++oh) { + for (int32_t ow = 0; ow < iw; ++ow) { + int32_t cse_var_3 = (((oc_ * ih * iw) + (oh * iw)) + ow); + for (int32_t ic_ = 0; ic_ < ic; ++ic_) { + for (int32_t kh_ = 0; kh_ < kh; ++kh_) { + for (int32_t kw_ = 0; kw_ < kh; ++kw_) { + // int32_t cse_var_3 = (((oc_ * ih * iw) + (oh * iw)) + ow); Review Comment: nit: is this commented code required? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
