Dear Kyösti,

Am Samstag, den 16.04.2016, 14:16 +0300 schrieb Kyösti Mälkki:
> On Thu, Apr 14, 2016 at 6:17 PM, Kyösti Mälkki wrote:
> > On Mon, Apr 4, 2016 at 5:58 PM, Aaron Durbin:
> > > 
> > > From your "before" build we have the following where the
> > > addresses start diverging.
> > > 
> > > -02004072 W arch_segment_loaded
> > > -02004073 W platform_prog_run
> > > -02004074 T prog_run
> > > 
> > > +02004072 W platform_segment_loaded
> > > +02004073 W arch_segment_loaded
> > > +02004074 T prog_segment_loaded
> > > +020040a3 W platform_prog_run
> > > +020040a4 T prog_run
> > > 
> > > That's 0x30 hex bytes off starting from platform_prog_run(). You
> > > could try adding nops to try to re-align things to where they
> > > were. It'll be tedious, but that's the only thing I can think of
> > > at the moment.
> >
> > Hi
> > 
> > On pcengines/apu1 I got regression of 300ms on romstage too. The
> > delay was gone once I enabled serial console.
> > 
> > Attached is a patch that places newly introduce
> > prog_segment_loaded() and platform_segment_loaded() functions after
> > any AGESA executable in the binary.
> > This alone fixes the delay too. Looks a lot like alignment issue
> > now.
> > 
> > If we inject this 0x30 offset at different locations within
> > libagesa build, maybe we can narrow this down to few functions?
> 
> I narrowed it down Lib/amdlib.c where all the tiny low-level IO access
> functions live. Injecting extra 0x30 before and after this object in
> libagesa.fam14.o resulted with 500-600ms difference in romstage
> performance. Splitting something critical across cachelines perhaps?
> 
> Forcing function aligment alone seems to fix this, not much
> difference compared to build of Lib/amdlib.c with -O2.

Applying your three patches, indeed fix the regression in boot time by
the commit 096f4579 (lib/prog_loading: introduce prog_segment_loaded())
[1]. With your patches applied it’s even faster than before.

It’s now down from around 1,379 ms to 380 ms, which is even faster than
the 520 ms before the regression (and 495 ms) before that.

Thanks a lot!

Are the patches just proof of concept, or ready to be reviewed on
Gerrit?


Thanks,

Paul


[1] https://review.coreboot.org/14214
From f5f3d485100947193f05a486e8830053fe98cac9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ky=C3=B6sti=20M=C3=A4lkki?= <[email protected]>
Date: Sat, 16 Apr 2016 01:28:35 +0300
Subject: [PATCH 1/3] AGESA f14: Suppress maybe-uninitialized warnings

Compiling with -O2 throws errors on these.

Change-Id: I04afa42f0ac76677f859ca72f9df2e128762ad3c
---
 src/vendorcode/amd/agesa/f14/Lib/amdlib.c                |  2 +-
 src/vendorcode/amd/agesa/f14/Proc/Mem/Tech/DDR3/mtspd3.c | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/vendorcode/amd/agesa/f14/Lib/amdlib.c b/src/vendorcode/amd/agesa/f14/Lib/amdlib.c
index 24c162a..86e1b9e 100644
--- a/src/vendorcode/amd/agesa/f14/Lib/amdlib.c
+++ b/src/vendorcode/amd/agesa/f14/Lib/amdlib.c
@@ -561,7 +561,7 @@ LibAmdIoRMW (
 {
   UINT32  TempData = 0;
   UINT32  TempMask = 0;
-  UINT32  Value;
+  UINT32  Value = 0;
   LibAmdGetDataFromPtr (AccessWidth, Data,  DataMask, &TempData, &TempMask);
   LibAmdIoRead (AccessWidth, IoAddress, &Value, StdHeader);
   Value = (Value & (~TempMask)) | TempData;
diff --git a/src/vendorcode/amd/agesa/f14/Proc/Mem/Tech/DDR3/mtspd3.c b/src/vendorcode/amd/agesa/f14/Proc/Mem/Tech/DDR3/mtspd3.c
index ea9cabc..eb5b161 100644
--- a/src/vendorcode/amd/agesa/f14/Proc/Mem/Tech/DDR3/mtspd3.c
+++ b/src/vendorcode/amd/agesa/f14/Proc/Mem/Tech/DDR3/mtspd3.c
@@ -155,7 +155,7 @@ MemTDIMMPresence3 (
   UINT8 Channel;
   UINT8 i;
   MEM_PARAMETER_STRUCT *RefPtr;
-  UINT8 *SpdBufferPtr;
+  UINT8 *SpdBufferPtr = NULL;
   DIE_STRUCT *MCTPtr;
   DCT_STRUCT *DCTPtr;
   CH_DEF_STRUCT *ChannelPtr;
@@ -434,7 +434,7 @@ MemTSPDGetTargetSpeed3 (
   IN OUT   MEM_TECH_BLOCK *TechPtr
   )
 {
-  UINT8 *SpdBufferPtr;
+  UINT8 *SpdBufferPtr = NULL;
   UINT8 Dimm;
   UINT8 Dct;
   UINT8 Channel;
@@ -514,8 +514,8 @@ MemTSPDCalcWidth3 (
   IN OUT   MEM_TECH_BLOCK *TechPtr
   )
 {
-  UINT8 *SpdBufferAPtr;
-  UINT8 *SpdBufferBPtr;
+  UINT8 *SpdBufferAPtr = NULL;
+  UINT8 *SpdBufferBPtr = NULL;
   MEM_NB_BLOCK *NBPtr;
   DIE_STRUCT *MCTPtr;
   DCT_STRUCT *DCTPtr;
@@ -635,7 +635,7 @@ MemTAutoCycTiming3 (
     0
   };
 
-  UINT8  *SpdBufferPtr;
+  UINT8  *SpdBufferPtr = NULL;
   INT32  MiniMaxTmg[GET_SIZE_OF (SpdIndexes)];
   UINT8  MiniMaxTrfc[4];
 
@@ -761,7 +761,7 @@ MemTSPDSetBanks3 (
   IN OUT   MEM_TECH_BLOCK *TechPtr
   )
 {
-  UINT8 *SpdBufferPtr;
+  UINT8 *SpdBufferPtr = NULL;
   UINT8 i;
   UINT8 ChipSel;
   UINT8 DimmID;
@@ -971,7 +971,7 @@ MemTSPDGetTCL3 (
   IN OUT   MEM_TECH_BLOCK *TechPtr
   )
 {
-  UINT8  *SpdBufferPtr;
+  UINT8  *SpdBufferPtr = NULL;
   UINT8 CLdesired;
   UINT8 CLactual;
   UINT8 Dimm;
-- 
1.9.1

From 70e7def891cb14051b60b2e4378319ef229a7d3c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ky=C3=B6sti=20M=C3=A4lkki?= <[email protected]>
Date: Sat, 16 Apr 2016 01:40:45 +0300
Subject: [PATCH 2/3] AGESA: Do not optimise amdlib.c for size

Using -Os disables -falign-functions, for unlucky builds this will
delay entry to ramstage by 600ms. This almost doubles the time spent
in romstage for builds where serial console is disabled.
---
 src/vendorcode/amd/agesa/f14/Makefile.inc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/vendorcode/amd/agesa/f14/Makefile.inc b/src/vendorcode/amd/agesa/f14/Makefile.inc
index a5f7561..1eee9bf 100644
--- a/src/vendorcode/amd/agesa/f14/Makefile.inc
+++ b/src/vendorcode/amd/agesa/f14/Makefile.inc
@@ -330,6 +330,10 @@ libagesa-y += Proc/GNB/Nb/NbPowerMgmt.c
 libagesa-y += Proc/Recovery/HT/htInitReset.c
 libagesa-y += Proc/Mem/Main/mu.c
 
+# Do not optimise performance-critical low-level IO for size with -Os,
+# request -O2 with -falign-functions.
+$(obj)/libagesa/vendorcode/amd/agesa/f14/Lib/amdlib.o: CFLAGS_libagesa += -O2
+
 $(obj)/libagesa.fam14.a: $$(libagesa-objs)
 	@printf " AGESA $(subst $(obj)/,,$(@))\n"
 	$(AR_romstage) rcs $@ $+
-- 
1.9.1

From 79b212053dbead3ee04984becb13009a3e637ecc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ky=C3=B6sti=20M=C3=A4lkki?= <[email protected]>
Date: Sat, 16 Apr 2016 13:58:28 +0300
Subject: [PATCH 3/3] AGESA: Align AMDlib functions

Change-Id: I2bf052a717c10760a530f7c4b9a472399e23c0f2
---
 src/vendorcode/amd/agesa/f14/Lib/amdlib.c | 36 +++++++++++++++++++++++++++++--
 src/vendorcode/amd/agesa/f14/Makefile.inc |  4 ----
 2 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/src/vendorcode/amd/agesa/f14/Lib/amdlib.c b/src/vendorcode/amd/agesa/f14/Lib/amdlib.c
index 86e1b9e..8d821cb 100644
--- a/src/vendorcode/amd/agesa/f14/Lib/amdlib.c
+++ b/src/vendorcode/amd/agesa/f14/Lib/amdlib.c
@@ -101,6 +101,7 @@ LibAmdGetDataFromPtr (
  *----------------------------------------------------------------------------------------
  */
 UINT8
+__attribute__ ((aligned (32)))
 ReadIo8 (
   IN       UINT16 Address
   )
@@ -108,6 +109,7 @@ ReadIo8 (
   return __inbyte (Address);
 }
 UINT16
+__attribute__ ((aligned (32)))
 ReadIo16 (
   IN       UINT16 Address
   )
@@ -115,6 +117,7 @@ ReadIo16 (
   return __inword (Address);
 }
 UINT32
+__attribute__ ((aligned (32)))
 ReadIo32 (
   IN       UINT16 Address
   )
@@ -122,6 +125,7 @@ ReadIo32 (
   return __indword (Address);
 }
 VOID
+__attribute__ ((aligned (32)))
 WriteIo8 (
   IN       UINT16 Address,
   IN       UINT8 Data
@@ -130,6 +134,7 @@ WriteIo8 (
   __outbyte (Address, Data);
 }
 VOID
+__attribute__ ((aligned (32)))
 WriteIo16 (
   IN       UINT16 Address,
   IN       UINT16 Data
@@ -138,6 +143,7 @@ WriteIo16 (
   __outword (Address, Data);
 }
 VOID
+__attribute__ ((aligned (32)))
 WriteIo32 (
   IN       UINT16 Address,
   IN       UINT32 Data
@@ -166,6 +172,7 @@ RestoreHwcr (
   __writemsr (0xC0010015, value);
 }
 UINT8
+__attribute__ ((aligned (32)))
 Read64Mem8 (
   IN       UINT64 Address
   )
@@ -181,6 +188,7 @@ Read64Mem8 (
   return dataRead;
 }
 UINT16
+__attribute__ ((aligned (32)))
 Read64Mem16 (
   IN       UINT64 Address
   )
@@ -196,6 +204,7 @@ Read64Mem16 (
   return dataRead;
 }
 UINT32
+__attribute__ ((aligned (32)))
 Read64Mem32 (
   IN       UINT64 Address
   )
@@ -211,6 +220,7 @@ Read64Mem32 (
   return dataRead;
   }
 VOID
+__attribute__ ((aligned (32)))
 Write64Mem8 (
   IN       UINT64 Address,
   IN       UINT8 Data
@@ -227,6 +237,7 @@ Write64Mem8 (
   }
 }
 VOID
+__attribute__ ((aligned (32)))
 Write64Mem16 (
   IN       UINT64 Address,
   IN       UINT16 Data
@@ -243,6 +254,7 @@ Write64Mem16 (
  }
 }
 VOID
+__attribute__ ((aligned (32)))
 Write64Mem32 (
   IN       UINT64 Address,
   IN       UINT32 Data
@@ -363,6 +375,7 @@ LibAmdBitScanReverse (
 }
 
 VOID
+__attribute__ ((aligned (32)))
 LibAmdMsrRead (
   IN       UINT32 MsrAddress,
      OUT   UINT64 *Value,
@@ -372,6 +385,7 @@ LibAmdMsrRead (
   *Value = __readmsr (MsrAddress);
 }
 VOID
+__attribute__ ((aligned (32)))
 LibAmdMsrWrite (
   IN       UINT32 MsrAddress,
   IN       UINT64 *Value,
@@ -476,6 +490,7 @@ StopHere (
  *
  */
 VOID
+__attribute__ ((aligned (32)))
 LibAmdIoRead (
   IN       ACCESS_WIDTH AccessWidth,
   IN       UINT16 IoAddress,
@@ -513,6 +528,7 @@ LibAmdIoRead (
  *
  */
 VOID
+__attribute__ ((aligned (32)))
 LibAmdIoWrite (
   IN       ACCESS_WIDTH AccessWidth,
   IN       UINT16 IoAddress,
@@ -551,6 +567,7 @@ LibAmdIoWrite (
  *
  */
 VOID
+__attribute__ ((aligned (32)))
 LibAmdIoRMW (
   IN       ACCESS_WIDTH AccessWidth,
   IN       UINT16 IoAddress,
@@ -583,6 +600,7 @@ LibAmdIoRMW (
  *
  */
 VOID
+__attribute__ ((aligned (32)))
 LibAmdIoPoll (
   IN       ACCESS_WIDTH AccessWidth,
   IN       UINT16 IoAddress,
@@ -613,6 +631,7 @@ LibAmdIoPoll (
  *
  */
 VOID
+__attribute__ ((aligned (32)))
 LibAmdMemRead (
   IN       ACCESS_WIDTH AccessWidth,
   IN       UINT64 MemAddress,
@@ -650,6 +669,7 @@ LibAmdMemRead (
  *
  */
 VOID
+__attribute__ ((aligned (32)))
 LibAmdMemWrite (
   IN       ACCESS_WIDTH AccessWidth,
   IN       UINT64 MemAddress,
@@ -688,6 +708,7 @@ LibAmdMemWrite (
  *
  */
 VOID
+__attribute__ ((aligned (32)))
 LibAmdMemRMW (
   IN       ACCESS_WIDTH AccessWidth,
   IN       UINT64 MemAddress,
@@ -720,6 +741,7 @@ LibAmdMemRMW (
  *
  */
 VOID
+__attribute__ ((aligned (32)))
 LibAmdMemPoll (
   IN       ACCESS_WIDTH AccessWidth,
   IN       UINT64 MemAddress,
@@ -750,6 +772,7 @@ LibAmdMemPoll (
  *
  */
 VOID
+__attribute__ ((aligned (32)))
 LibAmdPciRead (
   IN       ACCESS_WIDTH AccessWidth,
   IN       PCI_ADDR PciAddress,
@@ -799,6 +822,7 @@ LibAmdPciRead (
  *
  */
 VOID
+__attribute__ ((aligned (32)))
 LibAmdPciWrite (
   IN       ACCESS_WIDTH AccessWidth,
   IN       PCI_ADDR PciAddress,
@@ -849,6 +873,7 @@ LibAmdPciWrite (
  *
  */
 VOID
+__attribute__ ((aligned (32)))
 LibAmdPciRMW (
   IN       ACCESS_WIDTH AccessWidth,
   IN       PCI_ADDR PciAddress,
@@ -881,6 +906,7 @@ LibAmdPciRMW (
  *
  */
 VOID
+__attribute__ ((aligned (32)))
 LibAmdPciPoll (
   IN       ACCESS_WIDTH AccessWidth,
   IN       PCI_ADDR PciAddress,
@@ -947,6 +973,7 @@ GetPciMmioAddress (
  * @param[in]   StdHeader     Standard configuration header
  */
 VOID
+__attribute__ ((aligned (32)))
 LibAmdPciReadBits (
   IN       PCI_ADDR Address,
   IN       UINT8 Highbit,
@@ -980,6 +1007,7 @@ LibAmdPciReadBits (
  * @param[in]   StdHeader     Standard configuration header
  */
 VOID
+__attribute__ ((aligned (32)))
 LibAmdPciWriteBits (
   IN       PCI_ADDR Address,
   IN       UINT8 Highbit,
@@ -1021,6 +1049,7 @@ LibAmdPciWriteBits (
  */
 
 VOID
+__attribute__ ((aligned (32)))
 LibAmdPciFindNextCap (
   IN OUT   PCI_ADDR *Address,
   IN       AMD_CONFIG_PARAMS *StdHeader
@@ -1092,6 +1121,7 @@ LibAmdPciFindNextCap (
  * @param[in]     StdHeader     Standard configuration header (Optional)
  */
 VOID
+__attribute__ ((aligned (32)))
 LibAmdMemFill (
   IN       VOID *Destination,
   IN       UINT8 Value,
@@ -1118,6 +1148,7 @@ LibAmdMemFill (
  * @param[in]     StdHeader     Standard configuration header (Optional)
  */
 VOID
+__attribute__ ((aligned (32)))
 LibAmdMemCopy (
   IN       VOID *Destination,
   IN       CONST VOID *Source,
@@ -1256,8 +1287,8 @@ LibAmdGetPackageType (
  */
 
 
-VOID
-STATIC
+STATIC VOID
+__attribute__ ((aligned (32)))
 LibAmdGetDataFromPtr (
   IN       ACCESS_WIDTH AccessWidth,
   IN       CONST VOID   *Data,
@@ -1298,6 +1329,7 @@ LibAmdGetDataFromPtr (
 
 
 UINT8
+__attribute__ ((aligned (32)))
 LibAmdAccessWidth (
   IN       ACCESS_WIDTH AccessWidth
   )
diff --git a/src/vendorcode/amd/agesa/f14/Makefile.inc b/src/vendorcode/amd/agesa/f14/Makefile.inc
index 1eee9bf..a5f7561 100644
--- a/src/vendorcode/amd/agesa/f14/Makefile.inc
+++ b/src/vendorcode/amd/agesa/f14/Makefile.inc
@@ -330,10 +330,6 @@ libagesa-y += Proc/GNB/Nb/NbPowerMgmt.c
 libagesa-y += Proc/Recovery/HT/htInitReset.c
 libagesa-y += Proc/Mem/Main/mu.c
 
-# Do not optimise performance-critical low-level IO for size with -Os,
-# request -O2 with -falign-functions.
-$(obj)/libagesa/vendorcode/amd/agesa/f14/Lib/amdlib.o: CFLAGS_libagesa += -O2
-
 $(obj)/libagesa.fam14.a: $$(libagesa-objs)
 	@printf " AGESA $(subst $(obj)/,,$(@))\n"
 	$(AR_romstage) rcs $@ $+
-- 
1.9.1

Attachment: signature.asc
Description: This is a digitally signed message part

-- 
coreboot mailing list: [email protected]
https://www.coreboot.org/mailman/listinfo/coreboot

Reply via email to