trevor-m commented on a change in pull request #4901: [Fix] Fix get_valid_count 
flaky test for cuda
URL: https://github.com/apache/incubator-tvm/pull/4901#discussion_r380828822
 
 

 ##########
 File path: topi/python/topi/cuda/nms.py
 ##########
 @@ -28,27 +28,46 @@
 from .. import tag
 
 
-def get_valid_counts_pre(data, flag, idx, score_threshold, id_index, 
score_index):
-    """Low level IR to Prepare get valid count of bounding boxes
-    given a score threshold. Also moves valid boxes to the
+def cuda_atomicAdd_rule(op):
+    if op.dtype == "float32":
+        return tvm.call_pure_extern("float32", "atomicAdd", op.args[0], 
op.args[1])
+    elif op.dtype == "float64":
+        return tvm.call_pure_extern("float64", "atomicAdd", op.args[0], 
op.args[1])
+    elif op.dtype == "int32":
+        return tvm.call_pure_extern("int32", "atomicAdd", op.args[0], 
op.args[1])
+    else:
+        raise RuntimeError("only support int32, float32 and float64")
+
+
+tvm.target.intrin.register_intrin_rule(
+    "cuda", "atomicAdd", cuda_atomicAdd_rule, override=True)
+
+
+def atomicAdd(x, y):
+    return tvm.call_pure_intrin(y.dtype, "atomicAdd", x, y)
+
+
+def get_valid_counts_ir(data, valid_count, Flag, score_threshold, id_index, 
score_index):
 
 Review comment:
   Change variable/function names to fit conventions
   `Flag` -> `flag`
   `atomicAdd` -> `atomic_add`
   etc

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to