yonghong-song created this revision.
yonghong-song added a reviewer: ast.
Herald added projects: clang, LLVM.
Herald added a subscriber: llvm-commits.

In current linux BPF uapi

  https://github.com/torvalds/linux/blob/master/include/uapi/linux/bpf.h

`struct __sk_buff/xdp_md` have fields

  __u32 data;
  __u32 data_end;
  __u32 data_meta;

which actually represent pointers. Typically in bpf program, users
write the following:

  void *data = (void *)(long)__sk_buff->data;

The kernel verifier magically copied the address to the target
64bit register for __sk_buff->data and hope nothing is messed up
and it can survive to the variable "data".

In the past, we have seen a few issues with this. For example,
for the above C code, the IR looks like:

  i32_v = load u32
  i64_v = zext i32_v
  ...

The BPF backend has tried, through InstrInfo.td pattern matching or
optimization in MachineInstr SSA analysis and transformation,
to recognize the above pattern to remove zext so the really "addr"
is preserved. But this is still fragile and in the past, we have
to fix multiple bugs due to other changes in BPF backend. The
optimization may not cover all possible cases. Some users may even
use inline assembly to work around potentially missed compiler
zext elimination.

The patch introduced the following builtin function for bpf target:

  void *ptr = __builtin_load_u32_to_ptr(void *base, int offset);

The builtin will perform a 32bit load with address "base + offset"
and the result, with zext, will be returned. This way, user is
guaranteed a correct address.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D81479

Files:
  clang/include/clang/Basic/BuiltinsBPF.def
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtin-bpf-load-u32-to-ptr.c
  clang/test/Sema/builtin-bpf-load-u32-to-ptr.c
  llvm/include/llvm/IR/IntrinsicsBPF.td

Index: llvm/include/llvm/IR/IntrinsicsBPF.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsBPF.td
+++ llvm/include/llvm/IR/IntrinsicsBPF.td
@@ -26,4 +26,6 @@
   def int_bpf_btf_type_id : GCCBuiltin<"__builtin_bpf_btf_type_id">,
               Intrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_any_ty, llvm_i64_ty],
               [IntrNoMem]>;
+  def int_bpf_load_u32_to_ptr : GCCBuiltin<"__builtin_bpf_load_u32_to_ptr">,
+              Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>;
 }
Index: clang/test/Sema/builtin-bpf-load-u32-to-ptr.c
===================================================================
--- /dev/null
+++ clang/test/Sema/builtin-bpf-load-u32-to-ptr.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -x c -triple bpf-pc-linux-gnu -dwarf-version=4 -fsyntax-only -verify %s
+
+struct t { int a; int b; };
+
+void *invalid1(struct t *arg) { return __builtin_load_u32_to_ptr(arg, arg->a); } // expected-error {{__builtin_load_u32_to_ptr argument 2 not a constant}}
+void *invalid2(struct t *arg) { return __builtin_load_u32_to_ptr(arg + 4); } // expected-error {{too few arguments to function call, expected 2, have 1}}
+void *invalid3(struct t *arg) { return __builtin_load_u32_to_ptr(arg, 4, 0); } // expected-error {{too many arguments to function call, expected 2, have 3}}
+unsigned invalid4(struct t *arg) { return __builtin_load_u32_to_ptr(arg, 4); } // expected-warning {{incompatible pointer to integer conversion returning 'void *' from a function with result type 'unsigned int'}}
+
+void *valid1(struct t *arg) { return __builtin_load_u32_to_ptr(arg, 4); }
Index: clang/test/CodeGen/builtin-bpf-load-u32-to-ptr.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/builtin-bpf-load-u32-to-ptr.c
@@ -0,0 +1,8 @@
+// REQUIRES: bpf-registered-target
+// RUN: %clang -target bpf -emit-llvm -S %s -o - | FileCheck %s
+
+struct t { int a; int b; };
+void *test(struct t *arg) { return __builtin_load_u32_to_ptr(arg, 4); }
+
+// CHECK: define dso_local i8* @test
+// CHECK: call i8* @llvm.bpf.load.u32.to.ptr(i8* %{{[0-9a-z.]+}}, i64 4)
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -2501,13 +2501,28 @@
 bool Sema::CheckBPFBuiltinFunctionCall(unsigned BuiltinID,
                                        CallExpr *TheCall) {
   assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
-          BuiltinID == BPF::BI__builtin_btf_type_id) &&
-         "unexpected ARM builtin");
+          BuiltinID == BPF::BI__builtin_btf_type_id ||
+          BuiltinID == BPF::BI__builtin_load_u32_to_ptr) &&
+         "unexpected BPF builtin");
+
+  // Generic checking has done basic checking against the
+  // signature, here only to ensure the second argument
+  // be a constant.
+  Expr *Arg;
+  if (BuiltinID == BPF::BI__builtin_load_u32_to_ptr) {
+    llvm::APSInt Value;
+    Arg = TheCall->getArg(1);
+    if (!Arg->isIntegerConstantExpr(Value, Context)) {
+      Diag(Arg->getBeginLoc(), diag::err_load_u32_to_ptr_not_const)
+          << 2 << Arg->getSourceRange();
+      return true;
+    }
+    return false;
+  }
 
   if (checkArgCount(*this, TheCall, 2))
     return true;
 
-  Expr *Arg;
   if (BuiltinID == BPF::BI__builtin_btf_type_id) {
     // The second argument needs to be a constant int
     llvm::APSInt Value;
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -10685,7 +10685,8 @@
 Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
                                            const CallExpr *E) {
   assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
-          BuiltinID == BPF::BI__builtin_btf_type_id) &&
+          BuiltinID == BPF::BI__builtin_btf_type_id ||
+          BuiltinID == BPF::BI__builtin_load_u32_to_ptr) &&
          "unexpected BPF builtin");
 
   switch (BuiltinID) {
@@ -10780,6 +10781,16 @@
     Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
     return Fn;
   }
+  case BPF::BI__builtin_load_u32_to_ptr: {
+    Value *BaseV = EmitScalarExpr(E->getArg(0));
+    ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
+    Value *OffsetV = ConstantInt::get(Int64Ty, C->getSExtValue());
+
+    // Built the IR for the bpf_load_u32_to_ptr intrinsic.
+    llvm::Function *FnLoadU32ToPtr = llvm::Intrinsic::getDeclaration(
+        &CGM.getModule(), llvm::Intrinsic::bpf_load_u32_to_ptr, {});
+    return Builder.CreateCall(FnLoadU32ToPtr, {BaseV, OffsetV});
+  }
   }
 }
 
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10788,6 +10788,8 @@
   "__builtin_preserve_field_info argument %0 not a constant">;
 def err_btf_type_id_not_const: Error<
   "__builtin_btf_type_id argument %0 not a constant">;
+def err_load_u32_to_ptr_not_const: Error<
+  "__builtin_load_u32_to_ptr argument %0 not a constant">;
 
 def err_bit_cast_non_trivially_copyable : Error<
   "__builtin_bit_cast %select{source|destination}0 type must be trivially copyable">;
Index: clang/include/clang/Basic/BuiltinsBPF.def
===================================================================
--- clang/include/clang/Basic/BuiltinsBPF.def
+++ clang/include/clang/Basic/BuiltinsBPF.def
@@ -23,5 +23,8 @@
 // Get BTF type id.
 TARGET_BUILTIN(__builtin_btf_type_id, "Ui.", "t", "")
 
+// Load a unsigned value and convert it to a pointer.
+TARGET_BUILTIN(__builtin_load_u32_to_ptr, "v*v*Ui", "n", "")
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to