I can now successfully pass doubles to/from C functions on armhf. I've written a test program that passes lots of different combinations of single/double/longint/int64 to C code and all combinations that do not involve singles are working.

A patch with the current state of the work is attatched.
Index: rtl/arm/setjump.inc
===================================================================
--- rtl/arm/setjump.inc	(revision 20511)
+++ rtl/arm/setjump.inc	(working copy)
@@ -16,7 +16,7 @@
 
 function fpc_setjmp(var S : jmp_buf) : longint;assembler;[Public, alias : 'FPC_SETJMP'];nostackframe; compilerproc;
   asm
-    {$if defined(FPUVFPV2) or defined(FPUVFPV3)}
+    {$if defined(FPUVFPV2) or defined(FPUVFPV3) or defined(FPUVFPV3_D16)}
     {$if defined(CPUARMV3) or defined(CPUARMV4) or defined(CPUARMV5)}
     fstmiax r0!, {d8-d15}
     {$else}
@@ -46,7 +46,7 @@
     movs    r0, r1
     it eq
     moveq   r0, #1
-    {$if defined(FPUVFPV2) or defined(FPUVFPV3)}
+    {$if defined(FPUVFPV2) or defined(FPUVFPV3) or defined(FPUVFPV3_D16)}
     fldmiad ip!, {d8-d15}
     {$endif}
     ldmia   ip,{v1-v6, sl, fp}
@@ -57,7 +57,7 @@
     mov     ip, r0
     movs    r0, r1
     moveq   r0, #1
-    {$if defined(FPUVFPV2) or defined(FPUVFPV3)}
+    {$if defined(FPUVFPV2) or defined(FPUVFPV3) or defined(FPUVFPV3_D16)}
     {$if defined(CPUARMV3) or defined(CPUARMV4) or defined(CPUARMV5)}
     fldmiax ip!, {d8-d15}
     {$else}
Index: rtl/arm/math.inc
===================================================================
--- rtl/arm/math.inc	(revision 20511)
+++ rtl/arm/math.inc	(working copy)
@@ -14,7 +14,7 @@
 
  **********************************************************************}
 
-{$if defined(FPUFPA) or defined(FPUFPA10) or defined(FPUFPA11) or defined(FPUVFPV2) or defined(FPUVFPV3)}
+{$if defined(FPUFPA) or defined(FPUFPA10) or defined(FPUFPA11) or defined(FPUVFPV2) or defined(FPUVFPV3) or defined(FPUVFPV3_D16)}
     {$define FPC_SYSTEM_HAS_ABS}
     function fpc_abs_real(d : ValReal) : ValReal;compilerproc;
     begin
Index: rtl/arm/mathu.inc
===================================================================
--- rtl/arm/mathu.inc	(revision 20511)
+++ rtl/arm/mathu.inc	(working copy)
@@ -177,7 +177,7 @@
 begin
 end;
 
-{$elseif defined(darwin) or defined(FPUVFPV2) or defined(FPUVFPV3)}
+{$elseif defined(darwin) or defined(FPUVFPV2) or defined(FPUVFPV3) or defined(FPUVFPV3_d16)}
 
 const
   _VFP_ENABLE_IM  =  1 shl 8;         { invalid operation      }
Index: rtl/arm/arm.inc
===================================================================
--- rtl/arm/arm.inc	(revision 20511)
+++ rtl/arm/arm.inc	(working copy)
@@ -30,7 +30,7 @@
 {$if not(defined(wince)) and not(defined(gba)) and not(defined(nds)) and not(defined(FPUSOFT)) and not(defined(FPULIBGCC))}
 
 {$define FPC_SYSTEM_HAS_SYSINITFPU}
-{$if not defined(darwin) and not defined(FPUVFPV2) and not defined(FPUVFPV3)}
+{$if not defined(darwin) and not defined(FPUVFPV2) and not defined(FPUVFPV3) and not defined(FPUVFPV3_D16)}
 Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
 begin
   { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
Index: rtl/arm/setjumph.inc
===================================================================
--- rtl/arm/setjumph.inc	(revision 20511)
+++ rtl/arm/setjumph.inc	(working copy)
@@ -16,7 +16,7 @@
 
 type
    jmp_buf = packed record
-{$if defined(FPUVFPV2) or defined(FPUVFPV3)}
+{$if defined(FPUVFPV2) or defined(FPUVFPV3) or defined(FPUVFPV3_D16)}
       d8,d9,d10,d11,d12,d13,d14,d15: double;
 {$endif}
       v1,v2,v3,v4,v5,v6,sl,fp,sp,pc : dword;
Index: compiler/ninl.pas
===================================================================
--- compiler/ninl.pas	(revision 20511)
+++ compiler/ninl.pas	(working copy)
@@ -3094,7 +3094,7 @@
             internalerror(200104047);
 
           in_slice_x:
-            internalerror(2005101501);
+            internalerror(2005101502);
 
           in_ord_x,
           in_chr_byte:
Index: compiler/verbose.pas
===================================================================
--- compiler/verbose.pas	(revision 20511)
+++ compiler/verbose.pas	(working copy)
@@ -533,6 +533,8 @@
         UpdateStatus;
         do_internalerror(i);
         inc(status.errorcount);
+        //deliberately segfault for easier debugging
+        pchar(0)^ := 'A';
         raise ECompilerAbort.Create;
       end;
 
Index: compiler/cgobj.pas
===================================================================
--- compiler/cgobj.pas	(revision 20511)
+++ compiler/cgobj.pas	(working copy)
@@ -740,7 +740,10 @@
     procedure tcg.ungetcpuregister(list:TAsmList;r:Tregister);
       begin
         if not assigned(rg[getregtype(r)]) then
-          internalerror(200312126);
+          begin
+            writeln(ord(getregtype(r)));
+            internalerror(200312126);
+          end;
         rg[getregtype(r)].ungetcpuregister(list,r);
       end;
 
Index: compiler/systems.inc
===================================================================
--- compiler/systems.inc	(revision 20511)
+++ compiler/systems.inc	(working copy)
@@ -210,7 +210,7 @@
 
        tabi = (abi_default
             ,abi_powerpc_sysv,abi_powerpc_aix
-            ,abi_eabi,abi_armeb
+            ,abi_eabi,abi_armeb,abi_eabihf
        );
 
 
Index: compiler/fpcdefs.inc
===================================================================
--- compiler/fpcdefs.inc	(revision 20511)
+++ compiler/fpcdefs.inc	(working copy)
@@ -130,17 +130,21 @@
   {$define cputargethasfixedstack}
   {$define cpurefshaveindexreg}
   { default to armel }
-  {$if not(defined(CPUARM)) and not(defined(CPUARMEB)) and not(defined(FPC_OARM)) and not(defined(FPC_ARMEB))}
+  {$if not(defined(CPUARM)) and not(defined(CPUARMEB)) and not(defined(FPC_OARM)) and not(defined(FPC_ARMEB)) and not(defined(FPC_ARMHF))}
     {$define FPC_ARMEL}
   {$endif}
   { inherit FPC_ARMEL? }
-  {$if defined(CPUARMEL) and not(defined(FPC_OARM)) and not(defined(FPC_ARMEB))}
+  {$if defined(CPUARMEL) and not(defined(FPC_OARM)) and not(defined(FPC_ARMEB)) and not(defined(FPC_ARMHF))}
     {$define FPC_ARMEL}
   {$endif}
   { inherit FPC_ARMEB? }
-  {$if defined(CPUARMEB) and not(defined(FPC_OARM)) and not(defined(FPC_ARMEL))}
+  {$if defined(CPUARMEB) and not(defined(FPC_OARM)) and not(defined(FPC_ARMEL)) and not(defined(FPC_ARMHF))}
     {$define FPC_ARMEB}
   {$endif}
+  { inherit FPC_ARMHF? }
+  {$if defined(CPUARMHF) and not(defined(FPC_OARM)) and not(defined(FPC_ARMEL)) and not(defined(FPC_ARMEB))}
+    {$define FPC_ARMHF}
+  {$endif}
 {$endif arm}
 
 {$ifdef m68k}
Index: compiler/ncgutil.pas
===================================================================
--- compiler/ncgutil.pas	(revision 20511)
+++ compiler/ncgutil.pas	(working copy)
@@ -1977,6 +1977,10 @@
                         cg64.a_load64_ref_reg(list,href,destloc.register64);
                         unget_para(paraloc^);
                       end;
+                    LOC_FPUREGISTER:
+                      begin
+                        internalerror(2012031001);
+                      end;
                     else
                       internalerror(2005101501);
                   end
@@ -1995,22 +1999,25 @@
           LOC_CFPUREGISTER :
             begin
 {$if defined(sparc) or defined(arm)}
-              { Arm and Sparc passes floats in int registers, when loading to fpu register
-                we need a temp }
-              sizeleft := TCGSize2Size[destloc.size];
-              tg.GetTemp(list,sizeleft,sizeleft,tt_normal,tempref);
-              href:=tempref;
-              while assigned(paraloc) do
+              { Arm (with softfloat ABI) and Sparc passes floats in int registers, 
+                when loading to fpu register we need a temp }
+              if (paraloc^.loc = LOC_REGISTER) then 
                 begin
-                  unget_para(paraloc^);
-                  cg.a_load_cgparaloc_ref(list,paraloc^,href,sizeleft,destloc.reference.alignment);
-                  inc(href.offset,TCGSize2Size[paraloc^.size]);
-                  dec(sizeleft,TCGSize2Size[paraloc^.size]);
-                  paraloc:=paraloc^.next;
-                end;
-              gen_alloc_regloc(list,destloc);
-              cg.a_loadfpu_ref_reg(list,destloc.size,destloc.size,tempref,destloc.register);
-              tg.UnGetTemp(list,tempref);
+                  sizeleft := TCGSize2Size[destloc.size];
+                  tg.GetTemp(list,sizeleft,sizeleft,tt_normal,tempref);
+                  href:=tempref;
+                  while assigned(paraloc) do
+                    begin
+                      unget_para(paraloc^);
+                      cg.a_load_cgparaloc_ref(list,paraloc^,href,sizeleft,destloc.reference.alignment);
+                      inc(href.offset,TCGSize2Size[paraloc^.size]);
+                      dec(sizeleft,TCGSize2Size[paraloc^.size]);
+                      paraloc:=paraloc^.next;
+                    end;
+                  gen_alloc_regloc(list,destloc);
+                  cg.a_loadfpu_ref_reg(list,destloc.size,destloc.size,tempref,destloc.register);
+                  tg.UnGetTemp(list,tempref);
+               end;
 {$else sparc}
               unget_para(paraloc^);
               gen_alloc_regloc(list,destloc);
@@ -2047,7 +2054,9 @@
               else
 {$endif not cpu64bitalu}
                 begin
+                  //writeln('about to unget parameter, paraloc^.loc=',ord(paraloc^.loc),' getregtype(paraloc.register)=',ord(getregtype(paraloc^.register)));
                   unget_para(paraloc^);
+                  //writeln('parameter unget complete');
                   gen_alloc_regloc(list,destloc);
                   { from register to register -> alignment is irrelevant }
                   cg.a_load_cgparaloc_anyreg(list,destloc.size,paraloc^,destloc.register,0);
@@ -2116,7 +2125,9 @@
         for i:=0 to current_procinfo.procdef.paras.count-1 do
           begin
             currpara:=tparavarsym(current_procinfo.procdef.paras[i]);
+            //writeln('calling gen_load_cgpara_loc from ncgutil.pas');
             gen_load_cgpara_loc(list,currpara.vardef,currpara.paraloc[calleeside],currpara.initialloc,paramanager.param_use_paraloc(currpara.paraloc[calleeside]));
+            //writeln('called gen_load_cgpara_loc from ncgutil.pas');
             { gen_load_cgpara_loc() already allocated the initialloc
               -> don't allocate again }
             if currpara.initialloc.loc in [LOC_CREGISTER,LOC_CFPUREGISTER,LOC_CMMREGISTER] then
Index: compiler/systems.pas
===================================================================
--- compiler/systems.pas	(revision 20511)
+++ compiler/systems.pas	(working copy)
@@ -84,7 +84,7 @@
           id          : tasm;
           idtxt       : string[12];
           asmbin      : string[8];
-          asmcmd      : string[50];
+          asmcmd      : string[100];
           supported_targets : set of tsystem;
           flags        : set of tasmflags;
           labelprefix : string[3];
@@ -314,7 +314,7 @@
              'mips','arm', 'powerpc64', 'avr', 'mipsel');
 
        abi2str : array[tabi] of string[10] =
-         ('DEFAULT','SYSV','AIX','EABI','ARMEB');
+         ('DEFAULT','SYSV','AIX','EABI','ARMEB','EABIHF');
 
     var
        targetinfos   : array[tsystem] of psysteminfo;
Index: compiler/pp.pas
===================================================================
--- compiler/pp.pas	(revision 20511)
+++ compiler/pp.pas	(working copy)
@@ -43,6 +43,7 @@
   FPC_ARMEB           create an arm big endian compiler
   FPC_OARM            create an arm oabi compiler, only needed when the host
                       compiler is ARMEL or ARMEB
+  FPC_ARMHF           create an armhf (eabi vfp variant) compiler
   -----------------------------------------------------------------
   cpuflags            The target processor has status flags (on by default)
   cpufpemu            The target compiler will also support emitting software
Index: compiler/arm/narminl.pas
===================================================================
--- compiler/arm/narminl.pas	(revision 20511)
+++ compiler/arm/narminl.pas	(working copy)
@@ -89,7 +89,8 @@
                end;
             end;
           fpu_vfpv2,
-          fpu_vfpv3:
+          fpu_vfpv3,
+          fpu_vfpv3_d16:
             begin
               location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
               location_copy(location,left.location);
@@ -118,7 +119,8 @@
               fpu_fpa11:
                 expectloc:=LOC_FPUREGISTER;
               fpu_vfpv2,
-              fpu_vfpv3:
+              fpu_vfpv3,
+              fpu_vfpv3_d16:
                 expectloc:=LOC_MMREGISTER;
               else
                 internalerror(2009112401);
@@ -140,7 +142,8 @@
               fpu_fpa11:
                 expectloc:=LOC_FPUREGISTER;
               fpu_vfpv2,
-              fpu_vfpv3:
+              fpu_vfpv3,
+              fpu_vfpv3_d16:
                 expectloc:=LOC_MMREGISTER;
               else
                 internalerror(2009112402);
@@ -162,7 +165,8 @@
               fpu_fpa11:
                 expectloc:=LOC_FPUREGISTER;
               fpu_vfpv2,
-              fpu_vfpv3:
+              fpu_vfpv3,
+              fpu_vfpv3_d16:
                 expectloc:=LOC_MMREGISTER;
               else
                 internalerror(2009112403);
@@ -213,7 +217,8 @@
           fpu_fpa11:
             current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_ABS,location.register,left.location.register),get_fpu_postfix(resultdef)));
           fpu_vfpv2,
-          fpu_vfpv3:
+          fpu_vfpv3,
+          fpu_vfpv3_d16:
             begin
               if singleprec then
                 op:=A_FABSS
@@ -239,7 +244,8 @@
           fpu_fpa11:
             current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_MUF,location.register,left.location.register,left.location.register),get_fpu_postfix(resultdef)));
           fpu_vfpv2,
-          fpu_vfpv3:
+          fpu_vfpv3,
+          fpu_vfpv3_d16:
             begin
               if singleprec then
                 op:=A_FMULS
@@ -265,7 +271,8 @@
           fpu_fpa11:
             current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_SQT,location.register,left.location.register),get_fpu_postfix(resultdef)));
           fpu_vfpv2,
-          fpu_vfpv3:
+          fpu_vfpv3,
+          fpu_vfpv3_d16:
             begin
               if singleprec then
                 op:=A_FSQRTS
Index: compiler/arm/cgcpu.pas
===================================================================
--- compiler/arm/cgcpu.pas	(revision 20511)
+++ compiler/arm/cgcpu.pas	(working copy)
@@ -226,7 +226,7 @@
           non-overlapping subregs per register, so we can only use
           half the single precision registers for now (as sub registers of the
           double precision ones). }
-        if current_settings.fputype=fpu_vfpv3 then
+        if (current_settings.fputype=fpu_vfpv3) then
           rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
               [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
                RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
@@ -1438,7 +1438,8 @@
                       end;
                 end;
               fpu_vfpv2,
-              fpu_vfpv3:
+              fpu_vfpv3,
+              fpu_vfpv3_d16:
                 begin;
                   mmregs:=rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall);
                 end;
@@ -1509,7 +1510,7 @@
              begin
                reference_reset(ref,4);
                if (tg.direction*tarmprocinfo(current_procinfo).floatregstart>=1023) or
-                  (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3]) then
+                  (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16]) then
                  begin
                    if not is_shifter_const(tarmprocinfo(current_procinfo).floatregstart,shift) then
                      begin
@@ -1537,7 +1538,8 @@
                        lastfloatreg-firstfloatreg+1,ref));
                    end;
                  fpu_vfpv2,
-                 fpu_vfpv3:
+                 fpu_vfpv3,
+                 fpu_vfpv3_d16:
                    begin
                      ref.index:=ref.base;
                      ref.base:=NR_NO;
@@ -1591,7 +1593,8 @@
                       end;
                 end;
               fpu_vfpv2,
-              fpu_vfpv3:
+              fpu_vfpv3,
+              fpu_vfpv3_d16:
                 begin;
                   { restore vfp registers? }
                   mmregs:=rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall);
@@ -1603,7 +1606,7 @@
               begin
                 reference_reset(ref,4);
                 if (tg.direction*tarmprocinfo(current_procinfo).floatregstart>=1023) or
-                   (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3]) then
+                   (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16]) then
                   begin
                     if not is_shifter_const(tarmprocinfo(current_procinfo).floatregstart,shift) then
                       begin
@@ -1630,7 +1633,8 @@
                         lastfloatreg-firstfloatreg+1,ref));
                     end;
                   fpu_vfpv2,
-                  fpu_vfpv3:
+                  fpu_vfpv3,
+                  fpu_vfpv3_d16:
                     begin
                       ref.index:=ref.base;
                       ref.base:=NR_NO;
Index: compiler/arm/narmcnv.pas
===================================================================
--- compiler/arm/narmcnv.pas	(revision 20511)
+++ compiler/arm/narmcnv.pas	(working copy)
@@ -116,7 +116,8 @@
               fpu_fpa11:
                 expectloc:=LOC_FPUREGISTER;
               fpu_vfpv2,
-              fpu_vfpv3:
+              fpu_vfpv3,
+              fpu_vfpv3_d16:
                 expectloc:=LOC_MMREGISTER;
               else
                 internalerror(2009112702);
@@ -195,7 +196,8 @@
               end;
             end;
           fpu_vfpv2,
-          fpu_vfpv3:
+          fpu_vfpv3,
+          fpu_vfpv3_d16:
             begin
               location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
               signed:=left.location.size=OS_S32;
Index: compiler/arm/narmcal.pas
===================================================================
--- compiler/arm/narmcal.pas	(revision 20511)
+++ compiler/arm/narmcal.pas	(working copy)
@@ -41,13 +41,14 @@
     cgbase,
     cpubase,cpuinfo,
     ncgutil,
-    paramgr;
+    paramgr,
+    systems;
 
   procedure tarmcallnode.set_result_location(realresdef: tstoreddef);
     begin
-      if (realresdef.typ=floatdef) and
+      if (realresdef.typ=floatdef) and (target_info.abi <> abi_eabihf) and
          ((cs_fp_emulation in current_settings.moduleswitches) or
-          (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3])) then
+          (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16])) then
         begin
           { keep the fpu values in integer registers for now, the code
             generator will move them to memory or an mmregister when necessary
Index: compiler/arm/agarmgas.pas
===================================================================
--- compiler/arm/agarmgas.pas	(revision 20511)
+++ compiler/arm/agarmgas.pas	(working copy)
@@ -80,9 +80,14 @@
         result:=inherited MakeCmdLine;
         if (current_settings.fputype = fpu_soft) then
           result:='-mfpu=softvfp '+result;
-
+        if (current_settings.fputype = fpu_vfpv3) then
+          result:='-mfpu=vfpv3 '+result;
+        if (current_settings.fputype = fpu_vfpv3_d16) then
+          result:='-mfpu=vfpv3-d16 '+result;
         if current_settings.cputype = cpu_armv7m then
           result:='-march=armv7m -mthumb -mthumb-interwork '+result;
+        if target_info.abi = abi_eabihf then 
+          result:='-march=armv7-a -mfloat-abi=hard -meabi=5 '+result;
       end;
 
     procedure TArmGNUAssembler.WriteExtraHeader;
Index: compiler/arm/cpubase.pas
===================================================================
--- compiler/arm/cpubase.pas	(revision 20511)
+++ compiler/arm/cpubase.pas	(working copy)
@@ -286,7 +286,7 @@
 
       NR_FPU_RESULT_REG = NR_F0;
 
-      NR_MM_RESULT_REG  = NR_NO;
+      NR_MM_RESULT_REG  = NR_D0;
 
       NR_RETURN_ADDRESS_REG = NR_FUNCTION_RETURN_REG;
 
Index: compiler/arm/narmmat.pas
===================================================================
--- compiler/arm/narmmat.pas	(revision 20511)
+++ compiler/arm/narmmat.pas	(working copy)
@@ -331,7 +331,8 @@
                 cgsize2fpuoppostfix[def_cgsize(resultdef)]));
             end;
           fpu_vfpv2,
-          fpu_vfpv3:
+          fpu_vfpv3,
+          fpu_vfpv3_d16:
             begin
               location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
               location:=left.location;
Index: compiler/arm/cpuinfo.pas
===================================================================
--- compiler/arm/cpuinfo.pas	(revision 20511)
+++ compiler/arm/cpuinfo.pas	(working copy)
@@ -56,7 +56,8 @@
       fpu_fpa10,
       fpu_fpa11,
       fpu_vfpv2,
-      fpu_vfpv3
+      fpu_vfpv3,
+      fpu_vfpv3_d16
      );
 
    tcontrollertype =
@@ -197,14 +198,15 @@
      'ARMV7M'
    );
 
-   fputypestr : array[tfputype] of string[6] = ('',
+   fputypestr : array[tfputype] of string[9] = ('',
      'SOFT',
      'LIBGCC',
      'FPA',
      'FPA10',
      'FPA11',
      'VFPV2',
-     'VFPV3'
+     'VFPV3',
+     'VFPV3_D16'
    );
 
 
@@ -1015,7 +1017,7 @@
         )
     );
 
-   vfp_scalar = [fpu_vfpv2,fpu_vfpv3];
+   vfp_scalar = [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16];
 
    { Supported optimizations, only used for information }
    supported_optimizerswitches = genericlevel1optimizerswitches+
Index: compiler/arm/narmadd.pas
===================================================================
--- compiler/arm/narmadd.pas	(revision 20511)
+++ compiler/arm/narmadd.pas	(working copy)
@@ -164,7 +164,8 @@
                  cgsize2fpuoppostfix[def_cgsize(resultdef)]));
             end;
           fpu_vfpv2,
-          fpu_vfpv3:
+          fpu_vfpv3,
+          fpu_vfpv3_d16:
             begin
               { force mmreg as location, left right doesn't matter
                 as both will be in a fpureg }
@@ -248,7 +249,8 @@
                    cgsize2fpuoppostfix[def_cgsize(resultdef)]));
             end;
           fpu_vfpv2,
-          fpu_vfpv3:
+          fpu_vfpv3,
+          fpu_vfpv3_d16:
             begin
               location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
               location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true);
Index: compiler/arm/cpupi.pas
===================================================================
--- compiler/arm/cpupi.pas	(revision 20511)
+++ compiler/arm/cpupi.pas	(working copy)
@@ -106,7 +106,8 @@
                 floatsavesize:=(lastfloatreg-firstfloatreg+1)*12;
             end;
           fpu_vfpv2,
-          fpu_vfpv3:
+          fpu_vfpv3,
+          fpu_vfpv3_d16:
             begin
               floatsavesize:=0;
               regs:=cg.rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall);
Index: compiler/arm/cpupara.pas
===================================================================
--- compiler/arm/cpupara.pas	(revision 20511)
+++ compiler/arm/cpupara.pas	(working copy)
@@ -46,7 +46,7 @@
          private
           procedure init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword);
           function create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
-            var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword):longint;
+            var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; isvariadic: boolean):longint;
           procedure create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
        end;
 
@@ -55,7 +55,8 @@
     uses
        verbose,systems,cutils,
        rgobj,
-       defutil,symsym;
+       defutil,symsym,
+       sysutils; //for inttohex in debug code, remove later
 
 
     function tarmparamanager.get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;
@@ -110,7 +111,7 @@
       end;
 
 
-    function getparaloc(calloption : tproccalloption; p : tdef) : tcgloc;
+    function getparaloc(calloption : tproccalloption; p : tdef; isvariadic: boolean) : tcgloc;
       begin
          { Later, the LOC_REFERENCE is in most cases changed into LOC_REGISTER
            if push_addr_param for the def is true
@@ -119,11 +120,15 @@
             orddef:
               getparaloc:=LOC_REGISTER;
             floatdef:
-              if (calloption in [pocall_cdecl,pocall_cppdecl,pocall_softfloat]) or
+              if (target_info.abi = abi_eabihf) and
+                 (not isvariadic) then
+                getparaloc:=LOC_MMREGISTER
+              else if (calloption in [pocall_cdecl,pocall_cppdecl,pocall_softfloat]) or
                  (cs_fp_emulation in current_settings.moduleswitches) or
-                 (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3]) then
+                 (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16]) then
                 { the ARM eabi also allows passing VFP values via VFP registers,
-                  but at least neither Mac OS X nor Linux seems to do that }
+                  but Mac OS X doesn't seem to do that and linux only does it if
+                  built with the "-mfloat-abi=hard" option }
                 getparaloc:=LOC_REGISTER
               else
                 getparaloc:=LOC_FPUREGISTER;
@@ -223,7 +228,7 @@
 
 
     function tarmparamanager.create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
-        var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword):longint;
+        var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; isvariadic: boolean):longint;
 
       var
         nextintreg,nextfloatreg,nextmmreg : tsuperregister;
@@ -302,7 +307,7 @@
                   paralen := paradef.size
                 else
                   paralen := tcgsize2size[def_cgsize(paradef)];
-                loc := getparaloc(p.proccalloption,paradef);
+                loc := getparaloc(p.proccalloption,paradef,isvariadic);
                 if (paradef.typ in [objectdef,arraydef,recorddef]) and
                   not is_special_array(paradef) and
                   (hp.varspez in [vs_value,vs_const]) then
@@ -349,7 +354,7 @@
                     LOC_REGISTER:
                       begin
                         { align registers for eabi }
-                        if (target_info.abi=abi_eabi) and
+                        if ((target_info.abi=abi_eabi) or (target_info.abi=abi_eabihf)) and
                            firstparaloc and
                            (paradef.alignment=8) then
                           begin
@@ -405,6 +410,39 @@
                             end;
                           end;
                       end;
+                    LOC_MMREGISTER:
+                      begin
+                        if nextmmreg<=RS_D7 then
+                          begin
+                            paraloc^.loc:=LOC_MMREGISTER;
+                            case paraloc^.size of
+                              OS_F32:
+                                //FIXME: add support for dealing with "spare single registers"
+                                paraloc^.register:=newreg(R_MMREGISTER,nextmmreg,R_SUBFS);
+                              OS_F64:
+                                begin
+                                  paraloc^.register:=newreg(R_MMREGISTER,nextmmreg,R_SUBFD);
+                                  //writeln('paraloc^.register=',inttohex(longint(paraloc^.register),8));
+                                end;
+                              else
+                                internalerror(2012031601);
+                            end;
+                            
+                            inc(nextmmreg);
+                          end
+                        else
+                          begin
+                            //FIXME: set spare single register to none here
+                            { LOC_REFERENCE always contains everything that's left }
+                            paraloc^.loc:=LOC_REFERENCE;
+                            paraloc^.size:=int_cgsize(paralen);
+                            if (side=callerside) then
+                              paraloc^.reference.index:=NR_STACK_POINTER_REG;
+                            paraloc^.reference.offset:=stack_offset;
+                            inc(stack_offset,align(paralen,4));
+                            paralen:=0;
+                         end;
+                      end;
                     LOC_REFERENCE:
                       begin
                         if push_addr_param(hp.varspez,paradef,p.proccalloption) then
@@ -415,7 +453,7 @@
                         else
                           begin
                             { align stack for eabi }
-                            if (target_info.abi=abi_eabi) and
+                            if ((target_info.abi=abi_eabi) or (target_info.abi=abi_eabihf)) and
                                firstparaloc and
                                (paradef.alignment=8) then
                               stack_offset:=align(stack_offset,8);
@@ -499,9 +537,28 @@
         { Return in FPU register? }
         if def.typ=floatdef then
           begin
-            if (p.proccalloption in [pocall_softfloat]) or
+            if target_info.abi = abi_eabihf then 
+              begin
+                paraloc^.loc:=LOC_MMREGISTER;
+                case retcgsize of
+                  OS_64,
+                  OS_F64:
+                    begin
+                      paraloc^.register:=NR_MM_RESULT_REG;
+                    end;
+                  OS_32,
+                  OS_F32:
+                    begin
+                      paraloc^.register:=NR_S0;   
+                    end;
+                  else
+                    internalerror(2005082603);
+                end;
+                paraloc^.size:=retcgsize;
+              end
+            else if (p.proccalloption in [pocall_softfloat]) or
                (cs_fp_emulation in current_settings.moduleswitches) or
-               (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3]) then
+               (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16]) then
               begin
                 case retcgsize of
                   OS_64,
@@ -566,7 +623,7 @@
       begin
         init_values(curintreg,curfloatreg,curmmreg,cur_stack_offset);
 
-        result:=create_paraloc_info_intern(p,side,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset);
+        result:=create_paraloc_info_intern(p,side,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset,false);
 
         create_funcretloc_info(p,side);
      end;
@@ -579,10 +636,10 @@
       begin
         init_values(curintreg,curfloatreg,curmmreg,cur_stack_offset);
 
-        result:=create_paraloc_info_intern(p,callerside,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset);
+        result:=create_paraloc_info_intern(p,callerside,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset,true);
         if (p.proccalloption in [pocall_cdecl,pocall_cppdecl]) then
           { just continue loading the parameters in the registers }
-          result:=create_paraloc_info_intern(p,callerside,varargspara,curintreg,curfloatreg,curmmreg,cur_stack_offset)
+          result:=create_paraloc_info_intern(p,callerside,varargspara,curintreg,curfloatreg,curmmreg,cur_stack_offset,true)
         else
           internalerror(200410231);
       end;
Index: compiler/systems/i_linux.pas
===================================================================
--- compiler/systems/i_linux.pas	(revision 20511)
+++ compiler/systems/i_linux.pas	(working copy)
@@ -534,6 +534,70 @@
             abi : abi_default
           );
 
+{$ifdef FPC_ARMHF}
+       system_arm_linux_info : tsysteminfo =
+          (
+            system       : system_arm_Linux;
+            name         : 'Linux for ARMHF';
+            shortname    : 'Linux';
+            flags        : [tf_needs_symbol_size,tf_needs_symbol_type,tf_files_case_sensitive,
+                            tf_requires_proper_alignment,
+                            tf_smartlink_sections,tf_smartlink_library,tf_has_winlike_resources];
+            cpu          : cpu_arm;
+            unit_env     : 'LINUXUNITS';
+            extradefines : 'UNIX;HASUNIX;CPUARMHF';
+            exeext       : '';
+            defext       : '.def';
+            scriptext    : '.sh';
+            smartext     : '.sl';
+            unitext      : '.ppu';
+            unitlibext   : '.ppl';
+            asmext       : '.s';
+            objext       : '.o';
+            resext       : '.res';
+            resobjext    : '.or';
+            sharedlibext : '.so';
+            staticlibext : '.a';
+            staticlibprefix : 'libp';
+            sharedlibprefix : 'lib';
+            sharedClibext : '.so';
+            staticClibext : '.a';
+            staticClibprefix : 'lib';
+            sharedClibprefix : 'lib';
+            importlibprefix : 'libimp';
+            importlibext : '.a';
+            Cprefix      : '';
+            newline      : #10;
+            dirsep       : '/';
+            assem        : as_gas;
+            assemextern  : as_gas;
+            link         : nil;
+            linkextern   : nil;
+            ar           : ar_gnu_ar;
+            res          : res_elf;
+            dbg          : dbg_stabs;
+            script       : script_unix;
+            endian       : endian_little;
+            alignment    :
+              (
+                procalign       : 4;
+                loopalign       : 4;
+                jumpalign       : 0;
+                constalignmin   : 0;
+                constalignmax   : 8;
+                varalignmin     : 0;
+                varalignmax     : 8;
+                localalignmin   : 4;
+                localalignmax   : 8;
+                recordalignmin  : 0;
+                recordalignmax  : 8;
+                maxCrecordalign : 8
+              );
+            first_parm_offset : 8;
+            stacksize    : 8*1024*1024;
+            abi : abi_eabihf
+          );
+{$else FPC_ARMHF}
 {$ifdef FPC_ARMEL}
        system_arm_linux_info : tsysteminfo =
           (
@@ -726,6 +790,7 @@
           );
 {$endif FPC_ARMEB}
 {$endif FPC_ARMEL}
+{$endif FPC_ARMHF}
 
        system_mips_linux_info : tsysteminfo =
           (
Index: compiler/systems/t_linux.pas
===================================================================
--- compiler/systems/t_linux.pas	(revision 20511)
+++ compiler/systems/t_linux.pas	(working copy)
@@ -185,11 +185,15 @@
 {$endif powerpc64}
 
 {$ifdef arm}
+{$ifdef FPC_ARMHF}
+     defdynlinker:='/lib/ld-linux.so.3';
+{$else FPC_ARMHF}
 {$ifdef FPC_ARMEL}
      defdynlinker:='/lib/ld-linux.so.3';
 {$else FPC_ARMEL}
      defdynlinker:='/lib/ld-linux.so.2';
 {$endif FPC_ARMEL}
+{$endif FPC_ARMHF}
 {$endif arm}
 
 {$ifdef mips}
Index: compiler/ncgcal.pas
===================================================================
--- compiler/ncgcal.pas	(revision 20511)
+++ compiler/ncgcal.pas	(working copy)
@@ -369,7 +369,9 @@
             if (cnf_return_value_used in callnodeflags) or
                assigned(funcretnode) then
               begin
+                //writeln('calling gen_load_cgpara_loc from ncgal.pas');
                 gen_load_cgpara_loc(current_asmdata.CurrAsmList,realresdef,retloc,location,false);
+                //writeln('called gen_load_cgpara_loc from ncgal.pas');
 {$ifdef arm}
                 if (resultdef.typ=floatdef) and
                    (location.loc=LOC_REGISTER) and
Index: compiler/options.pas
===================================================================
--- compiler/options.pas	(revision 20511)
+++ compiler/options.pas	(working copy)
@@ -2883,6 +2883,10 @@
     undef_system_macro('FPC_ABI_'+abi2str[abi]);
   def_system_macro('FPC_ABI_'+abi2str[target_info.abi]);
 
+  { Define FPC_ABI_EABI in addition to FPC_ABI_EABIHF on EABI VFP hardfloat
+    systems since most code needs to behave the same on both}
+  if target_info.abi = abi_eabihf then def_system_macro('FPC_ABI_EABI');
+
   { Write logo }
   if option.ParaLogo then
     option.writelogo;
@@ -3051,6 +3055,22 @@
     end;
 
 {$ifdef arm}
+  if target_info.abi = abi_eabihf then begin
+    if not(option.FPUSetExplicitly) then begin
+      init_settings.fputype:=fpu_vfpv3_d16
+    end else begin
+      if not (init_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16]) then begin
+        //fixme: figure out how to error out properly
+        writeln('You must use a FPU type of VFPV2, VFPV3 or VFPV3_D16 when using the EABIHF ABI target');
+        halt;
+      end;
+    end;
+ 
+  end;
+{$endif arm}
+  
+
+{$ifdef arm}
 { set default cpu type to ARMv6 for Darwin unless specified otherwise }
 if (target_info.system=system_arm_darwin) then
   begin
@@ -3059,6 +3079,16 @@
     if not option.OptCPUSetExplicitly then
       init_settings.optimizecputype:=cpu_armv6;
   end;
+
+{ set default cpu type to ARMv7 for ARMHF unless specified otherwise }
+if (target_info.abi = abi_eabihf) then 
+  begin
+    if not option.CPUSetExplicitly then
+      init_settings.cputype:=cpu_armv7;
+    if not option.OptCPUSetExplicitly then
+      init_settings.optimizecputype:=cpu_armv7;
+  end;
+
 {$endif arm}
 
   { now we can define cpu and fpu type }
_______________________________________________
fpc-devel maillist  -  fpc-devel@lists.freepascal.org
http://lists.freepascal.org/mailman/listinfo/fpc-devel

Reply via email to