- fix alignment of pointer in inalloca case
- make existing tests stop failing by changing some and adding a check for 
existing return value alloca (I think?) before adding the ReturnValuePointer 

Index: llvm/test/DebugInfo/COFF/nrvo.ll
--- /dev/null
+++ llvm/test/DebugInfo/COFF/nrvo.ll
@@ -0,0 +1,144 @@
+; RUN: llc < %s | FileCheck %s --check-prefix=ASM
+; RUN: llc < %s -filetype=obj | llvm-readobj - --codeview | FileCheck %s --check-prefix=OBJ
+; C++ source to regenerate:
+; struct Foo {
+;   Foo() = default;
+;   Foo(Foo &&other) { x = other.x; }
+;   int x;
+; };
+; void some_function(int);
+; Foo getFoo() {
+;   Foo foo;
+;   foo.x = 41;
+;   some_function(foo.x);
+;   return foo;
+; }
+; int main() {
+;   Foo bar = getFoo();
+;   return bar.x;
+; }
+; $ clang t.cpp -S -emit-llvm -g -o t.ll
+; ASM-LABEL:  .long  241                      # Symbol subsection for GetFoo 
+; ASM:        .short 4414                     # Record kind: S_LOCAL
+; ASM-NEXT:   .long 4113                      # TypeIndex
+; ASM-NEXT:   .short 0                        # Flags
+; ASM-NEXT:   .asciz "foo"
+; ASM-NEXT:   .p2align 2
+; ASM-NEXT: .Ltmp
+; ASM:        .cv_def_range  .Ltmp{{.*}} .Ltmp{{.*}}, "B\021(\000\000\000"
+; OBJ: Subsection [
+; OBJ:   SubSectionType: Symbols (0xF1)
+; OBJ:   LocalSym {
+; OBJ:     Kind: S_LOCAL (0x113E)
+; OBJ:     Type: Foo& (0x1011)
+; OBJ:     Flags [ (0x0)
+; OBJ:     ]
+; OBJ:     VarName: foo
+; OBJ:   }
+; OBJ:   DefRangeFramePointerRelSym {
+; OBJ:     Offset: 40
+; OBJ:     LocalVariableAddrRange {
+; OBJ:       OffsetStart: .text+0x1D
+; OBJ:       ISectStart: 0x0
+; OBJ:       Range: 0x16
+; OBJ:   }
+; ModuleID = 't.cpp'
+source_filename = "t.cpp"
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.16.27030"
+%struct.Foo = type { i32 }
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local void @"?some_function@@YAXH@Z"(i32) #0 !dbg !8 {
+  %.addr = alloca i32, align 4
+  store i32 %0, i32* %.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %.addr, metadata !12, metadata !DIExpression()), !dbg !13
+  ret void, !dbg !13
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local void @"?GetFoo@@YA?AUFoo@@XZ"(%struct.Foo* noalias sret %agg.result) #0 !dbg !14 {
+  %result.ptr = alloca i8*, align 8
+  %0 = bitcast %struct.Foo* %agg.result to i8*
+  store i8* %0, i8** %result.ptr, align 8
+  call void @llvm.dbg.declare(metadata i8** %result.ptr, metadata !28, metadata !DIExpression(DW_OP_deref)), !dbg !29
+  %x = getelementptr inbounds %struct.Foo, %struct.Foo* %agg.result, i32 0, i32 0, !dbg !30
+  store i32 41, i32* %x, align 4, !dbg !30
+  %x1 = getelementptr inbounds %struct.Foo, %struct.Foo* %agg.result, i32 0, i32 0, !dbg !31
+  %1 = load i32, i32* %x1, align 4, !dbg !31
+  call void @"?some_function@@YAXH@Z"(i32 %1), !dbg !31
+  ret void, !dbg !32
+; Function Attrs: noinline norecurse nounwind optnone uwtable
+define dso_local i32 @main() #2 !dbg !33 {
+  %retval = alloca i32, align 4
+  %bar = alloca %struct.Foo, align 4
+  store i32 0, i32* %retval, align 4
+  call void @llvm.dbg.declare(metadata %struct.Foo* %bar, metadata !36, metadata !DIExpression()), !dbg !37
+  call void @"?GetFoo@@YA?AUFoo@@XZ"(%struct.Foo* sret %bar), !dbg !37
+  %x = getelementptr inbounds %struct.Foo, %struct.Foo* %bar, i32 0, i32 0, !dbg !38
+  %0 = load i32, i32* %x, align 4, !dbg !38
+  ret i32 %0, !dbg !38
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone speculatable }
+attributes #2 = { noinline norecurse nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !6}
+!llvm.ident = !{!7}
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 9.0.0 (https://github.com/llvm/llvm-project.git c19ebebac4bf853e77a69c74abe9f7fce98c1d17)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None)
+!1 = !DIFile(filename: "t.cpp", directory: "C:\5Csrc\5Ctesting\5Cnrvo", checksumkind: CSK_MD5, checksum: "52a5a20c02c102dfd255d5615680a8bd")
+!2 = !{}
+!3 = !{i32 2, !"CodeView", i32 1}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 2}
+!6 = !{i32 7, !"PIC Level", i32 2}
+!7 = !{!"clang version 9.0.0 (https://github.com/llvm/llvm-project.git c19ebebac4bf853e77a69c74abe9f7fce98c1d17)"}
+!8 = distinct !DISubprogram(name: "some_function", linkageName: "?some_function@@YAXH@Z", scope: !1, file: !1, line: 13, type: !9, scopeLine: 13, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!9 = !DISubroutineType(types: !10)
+!10 = !{null, !11}
+!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!12 = !DILocalVariable(arg: 1, scope: !8, file: !1, line: 13, type: !11)
+!13 = !DILocation(line: 13, scope: !8)
+!14 = distinct !DISubprogram(name: "GetFoo", linkageName: "?GetFoo@@YA?AUFoo@@XZ", scope: !1, file: !1, line: 15, type: !15, scopeLine: 15, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!15 = !DISubroutineType(types: !16)
+!16 = !{!17}
+!17 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Foo", file: !1, line: 1, size: 32, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !18, identifier: ".?AUFoo@@")
+!18 = !{!19, !20, !24}
+!19 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !17, file: !1, line: 4, baseType: !11, size: 32)
+!20 = !DISubprogram(name: "Foo", scope: !17, file: !1, line: 2, type: !21, scopeLine: 2, flags: DIFlagPrototyped, spFlags: 0)
+!21 = !DISubroutineType(types: !22)
+!22 = !{null, !23}
+!23 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !17, size: 64, flags: DIFlagArtificial | DIFlagObjectPointer)
+!24 = !DISubprogram(name: "Foo", scope: !17, file: !1, line: 3, type: !25, scopeLine: 3, flags: DIFlagPrototyped, spFlags: 0)
+!25 = !DISubroutineType(types: !26)
+!26 = !{null, !23, !27}
+!27 = !DIDerivedType(tag: DW_TAG_rvalue_reference_type, baseType: !17, size: 64)
+!28 = !DILocalVariable(name: "foo", scope: !14, file: !1, line: 17, type: !17)
+!29 = !DILocation(line: 17, scope: !14)
+!30 = !DILocation(line: 18, scope: !14)
+!31 = !DILocation(line: 19, scope: !14)
+!32 = !DILocation(line: 21, scope: !14)
+!33 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 23, type: !34, scopeLine: 23, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!34 = !DISubroutineType(types: !35)
+!35 = !{!11}
+!36 = !DILocalVariable(name: "bar", scope: !33, file: !1, line: 24, type: !17)
+!37 = !DILocation(line: 24, scope: !33)
+!38 = !DILocation(line: 25, scope: !33)
Index: llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
--- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -1142,9 +1142,15 @@
     // If the variable has an attached offset expression, extract it.
     // FIXME: Try to handle DW_OP_deref as well.
     int64_t ExprOffset = 0;
-    if (VI.Expr)
-      if (!VI.Expr->extractIfOffset(ExprOffset))
+    bool Deref = false;
+    if (VI.Expr) {
+      // If there is one DW_OP_deref element, use offset of 0 and keep going.
+      if (VI.Expr->getNumElements() == 1 &&
+          VI.Expr->getElement(0) == llvm::dwarf::DW_OP_deref)
+        Deref = true;
+      else if (!VI.Expr->extractIfOffset(ExprOffset))
+    }
     // Get the frame register used and the offset.
     unsigned FrameReg = 0;
@@ -1154,6 +1160,7 @@
     // Calculate the label ranges.
     LocalVarDefRange DefRange =
         createDefRangeMem(CVReg, FrameOffset + ExprOffset);
     for (const InsnRange &Range : Scope->getRanges()) {
       const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
       const MCSymbol *End = getLabelAfterInsn(Range.second);
@@ -1164,6 +1171,9 @@
     LocalVariable Var;
     Var.DIVar = VI.Var;
+    if (Deref)
+      Var.UseReferenceType = true;
     recordLocalVariable(std::move(Var), Scope);
Index: debuginfo-tests/win_cdb/nrvo.cpp
--- /dev/null
+++ debuginfo-tests/win_cdb/nrvo.cpp
@@ -0,0 +1,49 @@
+// This ensures that DW_OP_deref is inserted when necessary, such as when NRVO
+// of a string object occurs in C++.
+// RUN: %clang_cl %s -o %t.exe -fuse-ld=lld -Z7
+// RUN: grep DE[B]UGGER: %s | sed -e 's/.*DE[B]UGGER: //' > %t.script
+// RUN: %cdb -cf %t.script %t.exe | FileCheck %s --check-prefixes=DEBUGGER,CHECK
+struct string {
+  string() {}
+  string(int i) : i(i) {}
+  ~string() {}
+  int i = 0;
+string get_string() {
+  string unused;
+  string result = 3;
+  __debugbreak();
+  return result;
+void some_function(int) {}
+struct string2 {
+  string2() = default;
+  string2(string2 &&other) { i = other.i; }
+  int i;
+string2 get_string2() {
+  string2 result;
+  result.i = 5;
+  some_function(result.i);
+  // Test that the debugger can get the value of result after another
+  // function is called.
+  __debugbreak();
+  return result;
+int main() {
+  get_string();
+  get_string2();
+// DEBUGGER: ?? result
+// CHECK: struct string *
+// CHECK:    +0x000 i : 0n3
+// DEBUGGER: ?? result
+// CHECK: struct string2 *
+// CHECK:    +0x000 i : 0n5
Index: debuginfo-tests/nrvo-string.cpp
--- debuginfo-tests/nrvo-string.cpp
+++ debuginfo-tests/nrvo-string.cpp
@@ -17,11 +17,32 @@
 string get_string() {
   string unused;
   string result = 3;
-// DEBUGGER: break 21
+  // DEBUGGER: break 21
   return result;
-int main() { get_string(); }
+void some_function(int) {}
+struct string2 {
+  string2() = default;
+  string2(string2 &&other) { i = other.i; }
+  int i;
+string2 get_string2() {
+  string2 result;
+  result.i = 5;
+  some_function(result.i);
+  // Test that the debugger can get the value of result after another
+  // function is called.
+  // DEBUGGER: break 35
+  return result;
+int main() {
+  get_string();
+  get_string2();
 // DEBUGGER: print result.i
 // CHECK:  = 3
+// DEBUGGER: print result.i
+// CHECK:  = 5
Index: clang/test/CodeGenObjC/objc-non-trivial-struct-nrvo.m
--- clang/test/CodeGenObjC/objc-non-trivial-struct-nrvo.m
+++ clang/test/CodeGenObjC/objc-non-trivial-struct-nrvo.m
@@ -38,7 +38,6 @@
 void func1(TrivialBig *);
 // CHECK: define void @testTrivialBig(%[[STRUCT_TRIVIALBIG]]* noalias sret %[[AGG_RESULT:.*]])
-// CHECK-NOT: alloca
 // CHECK: call void @func1(%[[STRUCT_TRIVIALBIG]]* %[[AGG_RESULT]])
 // CHECK-NEXT: ret void
Index: clang/test/CodeGenCXX/lambda-expressions.cpp
--- clang/test/CodeGenCXX/lambda-expressions.cpp
+++ clang/test/CodeGenCXX/lambda-expressions.cpp
@@ -195,7 +195,6 @@
 // CHECK-NEXT: ret i32
 // CHECK-LABEL: define internal void @"_ZZ1hvEN4$_118__invokeEv"(%struct.A* noalias sret %agg.result) {{.*}} {
-// CHECK-NOT: =
 // CHECK: call void @"_ZZ1hvENK4$_11clEv"(%struct.A* sret %agg.result,
 // CHECK-NEXT: ret void
 struct A { ~A(); };
Index: clang/test/CodeGenCXX/debug-info-nrvo.cpp
--- /dev/null
+++ clang/test/CodeGenCXX/debug-info-nrvo.cpp
@@ -0,0 +1,28 @@
+// RUN: %clangxx -target x86_64-unknown-unknown -g %s -emit-llvm -S -o - | FileCheck %s
+// RUN: %clangxx -target x86_64-unknown-unknown -g -fno-elide-constructors %s -emit-llvm -S -o - | FileCheck %s -check-prefix=NOELIDE
+struct Foo {
+  Foo() = default;
+  Foo(Foo &&other) { x = other.x; }
+  int x;
+void some_function(int);
+Foo getFoo() {
+  Foo foo;
+  foo.x = 41;
+  some_function(foo.x);
+  return foo;
+int main() {
+  Foo bar = getFoo();
+  return bar.x;
+// Check that NRVO variables are stored as a pointer with deref if they are
+// stored in the return register.
+// CHECK: %result.ptr = alloca i8*, align 8
+// CHECK: call void @llvm.dbg.declare(metadata i8** %result.ptr,
+// CHECK-SAME: metadata !DIExpression(DW_OP_deref)
+// NOELIDE: call void @llvm.dbg.declare(metadata %struct.Foo* %foo,
+// NOELIDE-SAME:                        metadata !DIExpression()
Index: clang/test/CodeGenCXX/conditional-gnu-ext.cpp
--- clang/test/CodeGenCXX/conditional-gnu-ext.cpp
+++ clang/test/CodeGenCXX/conditional-gnu-ext.cpp
@@ -79,7 +79,7 @@
   B test0(B &x) {
     // CHECK-LABEL:    define void @_ZN5test35test0ERNS_1BE(
     // CHECK:      [[X:%.*]] = alloca [[B:%.*]]*,
-    // CHECK-NEXT: store [[B]]* {{%.*}}, [[B]]** [[X]]
+    // CHECK:      store [[B]]* {{%.*}}, [[B]]** [[X]]
     // CHECK-NEXT: [[T0:%.*]] = load [[B]]*, [[B]]** [[X]]
     // CHECK-NEXT: [[BOOL:%.*]] = call zeroext i1 @_ZN5test31BcvbEv([[B]]* [[T0]])
     // CHECK-NEXT: br i1 [[BOOL]]
@@ -94,7 +94,7 @@
   B test1() {
     // CHECK-LABEL:    define void @_ZN5test35test1Ev(
     // CHECK:      [[TEMP:%.*]] = alloca [[B]],
-    // CHECK-NEXT: call  void @_ZN5test312test1_helperEv([[B]]* sret [[TEMP]])
+    // CHECK:      call  void @_ZN5test312test1_helperEv([[B]]* sret [[TEMP]])
     // CHECK-NEXT: [[BOOL:%.*]] = call zeroext i1 @_ZN5test31BcvbEv([[B]]* [[TEMP]])
     // CHECK-NEXT: br i1 [[BOOL]]
     // CHECK:      call void @_ZN5test31BC1ERKS0_([[B]]* [[RESULT:%.*]], [[B]]* dereferenceable({{[0-9]+}}) [[TEMP]])
@@ -111,7 +111,7 @@
   A test2(B &x) {
     // CHECK-LABEL:    define void @_ZN5test35test2ERNS_1BE(
     // CHECK:      [[X:%.*]] = alloca [[B]]*,
-    // CHECK-NEXT: store [[B]]* {{%.*}}, [[B]]** [[X]]
+    // CHECK:      store [[B]]* {{%.*}}, [[B]]** [[X]]
     // CHECK-NEXT: [[T0:%.*]] = load [[B]]*, [[B]]** [[X]]
     // CHECK-NEXT: [[BOOL:%.*]] = call zeroext i1 @_ZN5test31BcvbEv([[B]]* [[T0]])
     // CHECK-NEXT: br i1 [[BOOL]]
@@ -126,7 +126,7 @@
   A test3() {
     // CHECK-LABEL:    define void @_ZN5test35test3Ev(
     // CHECK:      [[TEMP:%.*]] = alloca [[B]],
-    // CHECK-NEXT: call  void @_ZN5test312test3_helperEv([[B]]* sret [[TEMP]])
+    // CHECK:      call  void @_ZN5test312test3_helperEv([[B]]* sret [[TEMP]])
     // CHECK-NEXT: [[BOOL:%.*]] = call zeroext i1 @_ZN5test31BcvbEv([[B]]* [[TEMP]])
     // CHECK-NEXT: br i1 [[BOOL]]
     // CHECK:      call void @_ZN5test31BcvNS_1AEEv([[A]]* sret [[RESULT:%.*]], [[B]]* [[TEMP]])
Index: clang/test/CodeGen/arm64-microsoft-arguments.cpp
--- clang/test/CodeGen/arm64-microsoft-arguments.cpp
+++ clang/test/CodeGen/arm64-microsoft-arguments.cpp
@@ -43,7 +43,7 @@
 // Pass and return aggregate (of size < 16 bytes) with non-trivial destructor.
 // Passed directly but returned indirectly.
 // CHECK: define {{.*}} void {{.*}}f4{{.*}}(%struct.S4* inreg noalias sret %agg.result)
-// CHECK: call void {{.*}}func4{{.*}}(%struct.S4* inreg sret %agg.result, [2 x i64] %4)
+// CHECK: call void {{.*}}func4{{.*}}(%struct.S4* inreg sret %agg.result, [2 x i64] %5)
 struct S4 {
   int a[3];
@@ -57,7 +57,7 @@
 // Pass and return from instance method called from instance method.
 // CHECK: define {{.*}} void @{{.*}}bar@Q1{{.*}}(%class.Q1* %this, %class.P1* inreg noalias sret %agg.result)
-// CHECK: call void {{.*}}foo@P1{{.*}}(%class.P1* %ref.tmp, %class.P1* inreg sret %agg.result, i8 %0)
+// CHECK: call void {{.*}}foo@P1{{.*}}(%class.P1* %ref.tmp, %class.P1* inreg sret %agg.result, i8 %1)
 class P1 {
Index: clang/lib/CodeGen/CodeGenFunction.h
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -327,6 +327,10 @@
   /// value. This is invalid iff the function has no return value.
   Address ReturnValue = Address::invalid();
+  /// ReturnValuePointer - The temporary alloca to hold a pointer to sret.
+  /// This is invalid if sret is not in use.
+  Address ReturnValuePointer = Address::invalid();
   /// Return true if a label was seen in the current scope.
   bool hasLabelBeenSeenInCurrentScope() const {
     if (CurLexicalScope)
Index: clang/lib/CodeGen/CodeGenFunction.cpp
--- clang/lib/CodeGen/CodeGenFunction.cpp
+++ clang/lib/CodeGen/CodeGenFunction.cpp
@@ -895,6 +895,12 @@
     if (CurFnInfo->getReturnInfo().isSRetAfterThis())
     ReturnValue = Address(&*AI, CurFnInfo->getReturnInfo().getIndirectAlign());
+    if (!CurFnInfo->getReturnInfo().getIndirectByVal()) {
+      ReturnValuePointer = CreateDefaultAlignTempAlloca(Int8PtrTy, "result.ptr");
+      Builder.CreateStore(Builder.CreatePointerBitCastOrAddrSpaceCast(
+                              ReturnValue.getPointer(), Int8PtrTy),
+                          ReturnValuePointer);
+    }
   } else if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::InAlloca &&
              !hasScalarEvaluationKind(CurFnInfo->getReturnType())) {
     // Load the sret pointer from the argument struct and return into that.
@@ -902,6 +908,7 @@
     llvm::Function::arg_iterator EI = CurFn->arg_end();
     llvm::Value *Addr = Builder.CreateStructGEP(nullptr, &*EI, Idx);
+    ReturnValuePointer = Address(Addr, getPointerAlign());
     Addr = Builder.CreateAlignedLoad(Addr, getPointerAlign(), "agg.result");
     ReturnValue = Address(Addr, getNaturalTypeAlignment(RetTy));
   } else {
Index: clang/lib/CodeGen/CGDecl.cpp
--- clang/lib/CodeGen/CGDecl.cpp
+++ clang/lib/CodeGen/CGDecl.cpp
@@ -1403,12 +1403,11 @@
           ? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D)
           : Address::invalid();
+  bool NRVO = getLangOpts().ElideConstructors && D.isNRVOVariable();
   if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) {
     address = OpenMPLocalAddr;
   } else if (Ty->isConstantSizeType()) {
-    bool NRVO = getLangOpts().ElideConstructors &&
-      D.isNRVOVariable();
     // If this value is an array or struct with a statically determinable
     // constant initializer, there are optimizations we can do.
@@ -1561,8 +1560,16 @@
   // Emit debug info for local var declaration.
   if (EmitDebugInfo && HaveInsertPoint()) {
+    Address DebugAddr = address;
+    bool UsePointerValue = NRVO && ReturnValuePointer.isValid();
-    (void)DI->EmitDeclareOfAutoVariable(&D, address.getPointer(), Builder);
+    // If NRVO, use a pointer to the return address.
+    if (UsePointerValue)
+      DebugAddr = ReturnValuePointer;
+    (void)DI->EmitDeclareOfAutoVariable(&D, DebugAddr.getPointer(), Builder,
+                                        UsePointerValue);
   if (D.hasAttr<AnnotateAttr>() && HaveInsertPoint())
Index: clang/lib/CodeGen/CGDebugInfo.h
--- clang/lib/CodeGen/CGDebugInfo.h
+++ clang/lib/CodeGen/CGDebugInfo.h
@@ -422,9 +422,10 @@
   /// declaration.
   /// Returns a pointer to the DILocalVariable associated with the
   /// llvm.dbg.declare, or nullptr otherwise.
-  llvm::DILocalVariable *EmitDeclareOfAutoVariable(const VarDecl *Decl,
-                                                   llvm::Value *AI,
-                                                   CGBuilderTy &Builder);
+  llvm::DILocalVariable *
+  EmitDeclareOfAutoVariable(const VarDecl *Decl, llvm::Value *AI,
+                            CGBuilderTy &Builder,
+                            const bool UsePointerValue = false);
   /// Emit call to \c llvm.dbg.label for an label.
   void EmitLabel(const LabelDecl *D, CGBuilderTy &Builder);
@@ -507,7 +508,8 @@
   /// llvm.dbg.declare, or nullptr otherwise.
   llvm::DILocalVariable *EmitDeclare(const VarDecl *decl, llvm::Value *AI,
                                      llvm::Optional<unsigned> ArgNo,
-                                     CGBuilderTy &Builder);
+                                     CGBuilderTy &Builder,
+                                     const bool UsePointerValue = false);
   struct BlockByRefType {
     /// The wrapper struct used inside the __block_literal struct.
Index: clang/lib/CodeGen/CGDebugInfo.cpp
--- clang/lib/CodeGen/CGDebugInfo.cpp
+++ clang/lib/CodeGen/CGDebugInfo.cpp
@@ -3835,7 +3835,8 @@
 llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD,
                                                 llvm::Value *Storage,
                                                 llvm::Optional<unsigned> ArgNo,
-                                                CGBuilderTy &Builder) {
+                                                CGBuilderTy &Builder,
+                                                const bool UsePointerValue) {
   assert(DebugKind >= codegenoptions::LimitedDebugInfo);
   assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!");
   if (VD->hasAttr<NoDebugAttr>())
@@ -3940,6 +3941,16 @@
+  // Clang stores the sret pointer provided by the caller in a static alloca.
+  // Use DW_OP_deref to tell the debugger to load the pointer and treat it as
+  // the address of the variable.
+  if (UsePointerValue) {
+    assert(std::find(Expr.begin(), Expr.end(), llvm::dwarf::DW_OP_deref) ==
+               Expr.end() &&
+           "Debug info already contains DW_OP_deref.");
+    Expr.push_back(llvm::dwarf::DW_OP_deref);
+  }
   // Create the descriptor for the variable.
   auto *D = ArgNo ? DBuilder.createParameterVariable(
                         Scope, Name, *ArgNo, Unit, Line, Ty,
@@ -3958,9 +3969,10 @@
 llvm::DILocalVariable *
 CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD, llvm::Value *Storage,
-                                       CGBuilderTy &Builder) {
+                                       CGBuilderTy &Builder,
+                                       const bool UsePointerValue) {
   assert(DebugKind >= codegenoptions::LimitedDebugInfo);
-  return EmitDeclare(VD, Storage, llvm::None, Builder);
+  return EmitDeclare(VD, Storage, llvm::None, Builder, UsePointerValue);
 void CGDebugInfo::EmitLabel(const LabelDecl *D, CGBuilderTy &Builder) {
