cbalint13 commented on code in PR #18182:
URL: https://github.com/apache/tvm/pull/18182#discussion_r2278750108


##########
python/tvm/tir/tensor_intrin/riscv_cpu.py:
##########
@@ -0,0 +1,740 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name,missing-function-docstring,unused-import
+"""Intrinsics for RVV tensorization, both for C and LLVM targets.
+=====================
+**Author**: `Federico Peccia <https://fPecc.github.io/>`_
+"""
+import re
+from tvm.script import tir as T
+from tvm.target.datatype import lower_call_pure_extern, register, register_op
+from .. import TensorIntrin
+
+#####################################################
+# LLVM RISC-V Intrinsic usage:
+# https://llvm.org/docs//RISCV/RISCVVectorExtension.html
+#
+# Vector types are represented using scalable vector
+# types, of the form <vscale x n x ty>. n and ty
+# control LMUL and SEW respectively (see table in docs).
+# TVM represents this with dtype = "tyxvscalexn".
+#
+# n is calculated as (64/SEW)*LMUL.
+# VL is passed to each intrinsic.
+#
+# Some examples (see table in docs):
+# int8 vector type with LMUL = 1 => int8xvscalex8
+# int16 vector type with LMUL = 4 => int16xvscalex16
+# int32 vector type with LMUL = 2 => int32xvscalex4
+#
+#####################################################
+
+#####################################################
+# Helper functions
+#####################################################
+
+RISCV_MIN_VL = 4
+
+
+def get_vlmax(vlen: int, lmul: int, max_sew: int) -> int:
+    """Return VLMAX
+
+    Args:
+        vlen (int): Actual VLEN
+        lmul (int): LMUL
+        max_sew (int): SEW
+
+    Returns:
+        int: VLMAX
+    """
+    return (lmul * vlen) // max_sew
+
+
+def get_vlen_from_mattrs(mattrs: list) -> int:
+    """Extract VLEN from LLVM mattrs list
+
+    Args:
+        mattrs (list): LLVM list of CPU mattrs
+
+    Returns:
+        int: VLEN
+    """
+    vlen_regex = r"zvl(\d+)b"
+    vlen = 0
+    for mattr in mattrs:
+        match = re.search(vlen_regex, mattr)
+
+        if match:
+            vlen = int(match.group(1))
+            break
+    return vlen
+
+
+def _dtype_to_bits(dtype: str) -> int:
+    """Get bits from data type
+
+    Args:
+        dtype (str): Data type
+
+    Returns:
+        int: bits
+    """
+    bits_per_item = int(
+        re.match(r"((float)|(int)|(uint))(?P<width_bits>[0-9]+)", 
dtype).group("width_bits")
+    )
+    assert bits_per_item is not None, f"don't know how to compute size of type 
{dtype}"
+    return bits_per_item
+
+
+def _get_dtype_string(dtype: str) -> str:
+    """Get only type of data type, without bits
+
+    Args:
+        dtype (str): Data type
+
+    Returns:
+        str: only string type
+    """
+    return str(re.match(r"[a-z]+", dtype).group(0))
+
+
+#####################################################
+# Parameterized intrinsics
+#####################################################
+
+
+def rvv_vmacc(J: int, vlmax: int, input_dtype: str, output_dtype: str, lmul: 
int):
+    # pylint: disable=unused-argument
+    input_bits = _dtype_to_bits(input_dtype)
+    output_bits = _dtype_to_bits(output_dtype)
+
+    output_str_type = _get_dtype_string(output_dtype)
+
+    output_dtype_prefix = output_str_type[0]
+
+    input_lmul = lmul if output_dtype_prefix == "f" else lmul // 2
+
+    load_llvm_intrinsic = "llvm.riscv.vle"
+    expand_llvm_intrinsic = "llvm.riscv.vsext"
+    init_llvm_intrinsic = "llvm.riscv.vle"
+    macc_llvm_intrinsic = "llvm.riscv.vmacc" if output_dtype_prefix != "f" 
else "llvm.riscv.vfmacc"
+    store_llvm_intrinsic = "llvm.riscv.vse"
+
+    # Calculated from https://llvm.org/docs//RISCV/RISCVVectorExtension.html
+    n_input_dtype = (64 // input_bits) * input_lmul
+    n_output_dtype = (64 // output_bits) * lmul
+
+    llvm_input_dtype = f"{input_dtype}xvscalex{n_input_dtype}"
+    llvm_macc_dtype = f"{output_str_type}{output_bits}xvscalex{n_output_dtype}"
+
+    broadcast_input = T.int16(0) if input_dtype == "int16" else T.float32(0)
+    broadcast_output = T.int32(0) if output_dtype == "int32" else T.float32(0)
+
+    @T.prim_func
+    def rvv_vmacc_desc(
+        A: T.Buffer((int(vlmax),), input_dtype, align=4, offset_factor=1),
+        B: T.Buffer((int(vlmax),), input_dtype, align=4, offset_factor=1),
+        C: T.Buffer((int(vlmax),), output_dtype, align=4, offset_factor=1),
+    ) -> None:
+        with T.block("root"):
+            T.reads(C[0 : int(vlmax)], A[0 : int(vlmax)], B[0 : int(vlmax)])
+            T.writes(C[0 : int(vlmax)])
+            for j in range(0, int(vlmax)):
+                with T.block("update"):
+                    vj = T.axis.remap("S", [j])
+                    C[vj] = C[vj] + T.cast(A[vj], output_dtype) * 
T.cast(B[vj], output_dtype)
+
+    @T.prim_func
+    def rvv_vmacc_llvm_impl(
+        A: T.Buffer((int(vlmax),), input_dtype, align=4, offset_factor=1),
+        B: T.Buffer((int(vlmax),), input_dtype, align=4, offset_factor=1),
+        C: T.Buffer((int(vlmax),), output_dtype, align=4, offset_factor=1),
+    ) -> None:
+
+        with T.block("root"):
+
+            T.reads(A[0 : int(vlmax)], B[0 : int(vlmax)])
+            T.writes(C[0 : int(vlmax)])
+
+            vec_A = (
+                T.call_llvm_intrin(
+                    llvm_macc_dtype,
+                    expand_llvm_intrinsic,
+                    T.uint32(3),

Review Comment:
   This ```nargs = T.uint32(3)``` is gone as of recent commit 
https://github.com/apache/tvm/pull/18206.
   All other occurences below here should be removed to sync up with the 
upstream change.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to