[PATCH 4/6 v2] Add support for LZO-compressed kernels for ARM

2009-08-14 Thread Albin Tonnerre
This is the second part of patch. This part includes:
 - changes to ach/arch/boot/Makefile to make it easier to add new
   compression types
 - new piggy.lzo.S necessary for lzo compression
 - changes in arch/arm/boot/compressed/misc.c to allow the use of lzo or
   gzip, depending on the config
 - Kconfig support

Signed-off-by: Albin Tonnerre albin.tonne...@free-electrons.com
---
Changes:
 Compiling with -Os failed due to missing __aeabi_uidivmod.
 Link using arch/arm/lib/lib1funcs.o which provides this symbol, and
 define a dummy __div0 function in arch/arm/boot/compressed/misc.c, as
 this symbol is required by lib1funcs.

 arch/arm/Kconfig  |2 +
 arch/arm/boot/compressed/Makefile |   31 ++---
 arch/arm/boot/compressed/misc.c   |  116 ++---
 arch/arm/boot/compressed/piggy.S  |6 --
 arch/arm/boot/compressed/piggy.gzip.S |6 ++
 arch/arm/boot/compressed/piggy.lzo.S  |6 ++
 6 files changed, 70 insertions(+), 97 deletions(-)
 delete mode 100644 arch/arm/boot/compressed/piggy.S
 create mode 100644 arch/arm/boot/compressed/piggy.gzip.S
 create mode 100644 arch/arm/boot/compressed/piggy.lzo.S

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index aef63c8..ea71c0c 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -18,6 +18,8 @@ config ARM
select HAVE_KRETPROBES if (HAVE_KPROBES)
select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
select HAVE_GENERIC_DMA_COHERENT
+   select HAVE_KERNEL_GZIP
+   select HAVE_KERNEL_LZO
help
  The ARM series is a line of low-power-consumption RISC chip designs
  licensed by ARM Ltd and targeted at embedded applications and
diff --git a/arch/arm/boot/compressed/Makefile 
b/arch/arm/boot/compressed/Makefile
index ce39dc5..5b4629b 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -63,8 +63,12 @@ endif
 
 SEDFLAGS   = s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/
 
-targets   := vmlinux vmlinux.lds piggy.gz piggy.o font.o font.c \
-head.o misc.o $(OBJS)
+suffix_$(CONFIG_KERNEL_GZIP) = gzip
+suffix_$(CONFIG_KERNEL_LZO)  = lzo
+
+targets   := vmlinux vmlinux.lds \
+piggy.$(suffix_y) piggy.$(suffix_y).o \
+font.o font.c head.o misc.o $(OBJS)
 
 ifeq ($(CONFIG_FUNCTION_TRACER),y)
 ORIG_CFLAGS := $(KBUILD_CFLAGS)
@@ -87,22 +91,31 @@ endif
 ifneq ($(PARAMS_PHYS),)
 LDFLAGS_vmlinux += --defsym params_phys=$(PARAMS_PHYS)
 endif
-LDFLAGS_vmlinux += -p --no-undefined -X \
-   $(shell $(CC) $(KBUILD_CFLAGS) --print-libgcc-file-name) -T
+# ?
+LDFLAGS_vmlinux += -p
+# Report unresolved symbol references
+LDFLAGS_vmlinux += --no-undefined
+# Delete all temporary local symbols
+LDFLAGS_vmlinux += -X
+# Next argument is a linker script
+LDFLAGS_vmlinux += -T
+
+# For __aeabi_uidivmod
+lib1funcs = $(srctree)/arch/$(SRCARCH)/lib/lib1funcs.o
 
 # Don't allow any static data in misc.o, which
 # would otherwise mess up our GOT table
 CFLAGS_misc.o := -Dstatic=
 
-$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.o \
-   $(addprefix $(obj)/, $(OBJS)) FORCE
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o \
+   $(addprefix $(obj)/, $(OBJS)) $(lib1funcs) FORCE
$(call if_changed,ld)
@:
 
-$(obj)/piggy.gz: $(obj)/../Image FORCE
-   $(call if_changed,gzip)
+$(obj)/piggy.$(suffix_y): $(obj)/../Image FORCE
+   $(call if_changed,$(suffix_y))
 
-$(obj)/piggy.o:  $(obj)/piggy.gz FORCE
+$(obj)/piggy.$(suffix_y).o:  $(obj)/piggy.$(suffix_y) FORCE
 
 CFLAGS_font.o := -Dstatic=
 
diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c
index 17153b5..57077c8 100644
--- a/arch/arm/boot/compressed/misc.c
+++ b/arch/arm/boot/compressed/misc.c
@@ -18,10 +18,15 @@
 
 unsigned int __machine_arch_type;
 
+#define _LINUX_STRING_H_
+
 #include linux/compiler.h/* for inline */
 #include linux/types.h   /* for size_t */
 #include linux/stddef.h  /* for NULL */
 #include asm/string.h
+#include linux/linkage.h
+
+#include asm/unaligned.h
 
 #ifdef STANDALONE_DEBUG
 #define putstr printf
@@ -188,34 +193,8 @@ static inline __ptr_t memcpy(__ptr_t __dest, __const 
__ptr_t __src,
 /*
  * gzip delarations
  */
-#define OF(args)  args
 #define STATIC static
 
-typedef unsigned char  uch;
-typedef unsigned short ush;
-typedef unsigned long  ulg;
-
-#define WSIZE 0x8000   /* Window size must be at least 32k, */
-   /* and a power of two */
-
-static uch *inbuf; /* input buffer */
-static uch window[WSIZE];  /* Sliding window buffer */
-
-static unsigned insize;/* valid bytes in inbuf */
-static unsigned inptr; /* index of next byte to be processed in inbuf 
*/
-static unsigned outcnt;/* bytes in output buffer */
-
-/* gzip flag byte */
-#define ASCII_FLAG   0x01 /* bit 0 set: file probably 

Re: New fast(?)-boot results on ARM

2009-08-14 Thread Zan Lynx

Robert Schwebel wrote:


- 2.4 s up from u-boot to the end of Uncompressing Linux
- 300 ms until ubifs initialization starts
- 3.7 s for ubifs, until mounted root

So we basically have 7 s for the kernel. The rest is userspace, which hasn't
seen much optimization yet, other than trying to start the GUI application as
early as possible, while doing all other init stuff in parallel. Adding quiet
brings us another 300 ms.

That's factor 70 away from the 110 ms boot time Tim has talked about some days
ago (and he measured on an ARM cpu which had almost half the speed of this
one), and I'm wondering what we can do to improve the boot time.


2.4s in uncompression? That seems like an obvious target for improvement.

Your kernel seems awfully large. 3104K code? You should definitely find 
out what is making it that big and cut out everything you do not need. 
You might even try some of the embedded system scripts that rip out all 
the printk strings.


If you get the kernel size way down then use a uncompressed kernel and 
it should boot a lot faster if the bottleneck is CPU speed.


However, it is probably IO speed. There could be something really wrong 
and slow with your MTD. Does it DMA or is it doing something crazy like 
using the CPU to read a byte at a time?


Or maybe its cheap and slow flash. In that case I think your only hope 
is to make all the code as small as possible and/or find a different 
flash filesystem that does not have to read so much of the device to 
mount. Perhaps use a read-only compressed filesystem for the system 
binaries and reflash it for software upgrades. Only init and mount the 
writable flash for user-storable data well after system boot has finished.

--
Zan Lynx
zl...@acm.org

Knowledge is Power.  Power Corrupts.  Study Hard.  Be Evil.
--
To unsubscribe from this list: send the line unsubscribe linux-embedded in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: New fast(?)-boot results on ARM

2009-08-14 Thread Robert Schwebel
Zan,

On Fri, Aug 14, 2009 at 12:19:48PM -0600, Zan Lynx wrote:
  That's factor 70 away from the 110 ms boot time Tim has talked about
  some days ago (and he measured on an ARM cpu which had almost half
  the speed of this one), and I'm wondering what we can do to improve
  the boot time.

 2.4s in uncompression? That seems like an obvious target for
 improvement.

Indeed, we'll check that.

However, I have a little bit the impression that most systems which are
hyped as fast boot out there are optimized so aggressively that they
are not really usable in real life applications any more. So we try to
configure the systems in a realistic way. I know that we won't get the
last milliseconds that way - but I'd like to find out how far we can go.

 Your kernel seems awfully large. 3104K code? You should definitely find
 out what is making it that big and cut out everything you do not need.

Definitely, will audit again.

 You might even try some of the embedded system scripts that rip out
 all the printk strings.

Hmm, that's definitely in the last-minute-before-product category.

 If you get the kernel size way down then use a uncompressed kernel and
 it should boot a lot faster if the bottleneck is CPU speed.

I'll try that.

 However, it is probably IO speed. There could be something really wrong
 and slow with your MTD. Does it DMA or is it doing something crazy like
 using the CPU to read a byte at a time?

Will check.

 Or maybe its cheap and slow flash. In that case I think your only hope
 is to make all the code as small as possible and/or find a different
 flash filesystem that does not have to read so much of the device to
 mount. Perhaps use a read-only compressed filesystem for the system
 binaries and reflash it for software upgrades. Only init and mount the
 writable flash for user-storable data well after system boot has
 finished.

That would be also a last-minute change, but surely worth to be
evaluated.

We recently changed from jffs2 to ubifs and hoped to gain speed during
that step.

Thanks for your feedback!

rsc
-- 
Pengutronix e.K.   | |
Industrial Linux Solutions | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0|
Amtsgericht Hildesheim, HRA 2686   | Fax:   +49-5121-206917- |
--
To unsubscribe from this list: send the line unsubscribe linux-embedded in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: New fast(?)-boot results on ARM

2009-08-14 Thread Denys Vlasenko
On Fri, Aug 14, 2009 at 7:02 PM, Robert
Schwebelr.schwe...@pengutronix.de wrote:
 So we basically have 7 s for the kernel. The rest is userspace, which hasn't
 seen much optimization yet, other than trying to start the GUI application as
 early as possible, while doing all other init stuff in parallel. Adding 
 quiet
 brings us another 300 ms.

 That's factor 70 away from the 110 ms boot time Tim has talked about some days
 ago (and he measured on an ARM cpu which had almost half the speed of this
 one), and I'm wondering what we can do to improve the boot time.

 Robert

 r...@thebe:~$ microcom | ptx_ts U-Boot 2.0.0-rc9
 [  2.395740]   2.395740
 [  2.395860]   0.000120
 [  0.11]   0.11 U-Boot 2.0.0-rc9 (Aug  5 2009 - 10:05:58)
 [  0.59]   0.48
 [  0.003823]   0.003764 Board: Phytec phyCORE-i.MX27
 [  0.010753]   0.006930 cfi_probe: cfi_flash base: 0xc000 size: 
 0x0200
 [  0.018711]   0.007958 NAND device: Manufacturer ID: 0x20, Chip ID: 0x36 
 (ST Micro NAND 64MiB 1,8V 8-bit)
 [  0.026592]   0.007881 im...@imxfb0: i.MX Framebuffer driver
 [  0.178655]   0.152063 dev_protect: currently broken
 [  0.178736]   0.81 Using environment in NOR Flash
 [  0.182577]   0.003841 initialising PLLs
 [  0.367142]   0.184565 Malloc space: 0xa3f0 - 0xa7f0 (size 64 MB)
 [  0.370568]   0.003426 Stack space : 0xa3ef8000 - 0xa3f0 (size 32 kB)
 [  0.445993]   0.075425 running /env/bin/init...
 [  0.870592]   0.424599
 [  0.874559]   0.003967 Hit any key to stop autoboot:  0

boot loader is not fast. considering its simple task,
it can be made faster.

 [  1.326621]   0.452062 loaded zImage from /dev/nand0.kernel.bb with size 
 1679656
 [  2.009996]   0.683375 Uncompressing 
 Linux...
  done, booting the kernel.
 [  2.416999]   0.407003 Linux version 2.6.31-rc4-g056f82f-dirty 
 (s...@octopus) (gcc version 4.3.2 (OSELAS.Toolchain-1.99.3) ) #1 PREEMPT Thu 
 Aug 6 08:37:19 CEST 2009

Other people already commented on this (kernel is too big)

 [  2.418729]   0.001730 CPU: ARM926EJ-S [41069264] revision 4 (ARMv5TEJ), 
 cr=00053177
 [  2.423081]   0.004352 CPU: VIVT data cache, VIVT instruction cache
 [  2.426592]   0.003511 Machine: phyCORE-i.MX27
...
 [  2.742628]   0.016050 0x0036-0x0400 : root
 [  3.058610]   0.315982 UBI: attaching mtd7 to ubi0
 [  3.062878]   0.004268 UBI: physical eraseblock size:   16384 bytes (16 
 KiB)
 [  3.070601]   0.007723 UBI: logical eraseblock size:    15360 bytes
 [  3.070665]   0.64 UBI: smallest flash I/O unit:    512
 [  3.078564]   0.007899 UBI: VID header offset:          512 (aligned 512)
 [  3.078609]   0.45 UBI: data offset:                1024
 [  5.006609]   1.928000 UBI: attached mtd7 to ubi0
 [  5.013157]   0.006548 UBI: MTD device name:            root

As others commented, ubi looks slow and you probably need to find out why.

 [  5.014566]   0.001409 UBI: MTD device size:            60 MiB
 [  5.018660]   0.004094 UBI: number of good PEBs:        3880
 [  5.022585]   0.003925 UBI: number of bad PEBs:         0
 [  5.026797]   0.004212 UBI: max. allowed volumes:       89
 [  5.026849]   0.52 UBI: wear-leveling threshold:    4096
 [  5.030779]   0.003930 UBI: number of internal volumes: 1
 [  5.034583]   0.003804 UBI: number of user volumes:     1
 [  5.046572]   0.011989 UBI: available PEBs:             0
 [  5.046622]   0.50 UBI: total number of reserved PEBs: 3880
 [  5.046657]   0.35 UBI: number of PEBs reserved for bad PEB handling: 
 38
 [  5.050606]   0.003949 UBI: max/mean erase counter: 2/0
 [  5.050668]   0.62 UBI: image sequence number: 0
 [  5.058619]   0.007951 UBI: background thread ubi_bgt0d started, PID 215
 [  5.062620]   0.004001 oprofile: using timer interrupt.
 [  5.070584]   0.007964 TCP cubic registered
 [  5.070637]   0.53 NET: Registered protocol family 17
 [  5.074624]   0.003987 RPC: Registered udp transport module.
 [  5.082616]   0.007992 RPC: Registered tcp transport module.
 [  5.605159]   0.522543 eth0: config: auto-negotiation on, 100FDX, 100HDX, 
 10FDX, 10HDX.
 [  6.602621]   0.997462 IP-Config: Complete:
 [  6.606638]   0.004017      device=eth0, addr=192.168.23.197, 
 mask=255.255.0.0, gw=192.168.23.2,
 [  6.614588]   0.007950      host=192.168.23.197, domain=, 
 nis-domain=(none),
 [  6.618652]   0.004064      bootserver=192.168.23.2, 
 rootserver=192.168.23.2, rootpath=

Well, this ~1 second is not really kernel's fault, it's DHCP delay.
But, do you need to do it at this moment?
You do not seem to be using networking filesystems.
You can run DHCP client in userspace.

 [  6.630579]   0.011927 UBIFS: recovery needed
 [  6.662655]   0.032076 UBIFS: recovery completed
 [  6.666587]   0.003932 UBIFS: mounted UBI device 0, volume 1, name root
 [  6.670570]   0.003983 UBIFS: file system size:   58490880 bytes (57120 
 KiB, 55 MiB, 3808 LEBs)
 [  6.678572]   

Re: New fast(?)-boot results on ARM

2009-08-14 Thread Linus Walleij
2009/8/14 Robert Schwebel r.schwe...@pengutronix.de:
 On Fri, Aug 14, 2009 at 12:19:48PM -0600, Zan Lynx wrote:

  That's factor 70 away from the 110 ms boot time Tim has talked about
  some days ago (and he measured on an ARM cpu which had almost half
  the speed of this one), and I'm wondering what we can do to improve
  the boot time.

 2.4s in uncompression? That seems like an obvious target for
 improvement.

 Indeed, we'll check that.

We got rid of uncompression on a flash-based system vastly improving
boot time. The reason is that compressed kernels are faster only when
the throughput to the persistent storage is lower than the decompression
throughput, and on typical embedded systems with DMA the throughput to
memory outperforms the CPU-based decompression.

Of course it depends on a lot of stuff like performance of flash controller,
kernel storage filesystem performance, DMA controller performance,
cache architecture etc so it's individual per-system.

Linus Walleij
--
To unsubscribe from this list: send the line unsubscribe linux-embedded in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html