[PATCH 4/6 v2] Add support for LZO-compressed kernels for ARM
This is the second part of patch. This part includes: - changes to ach/arch/boot/Makefile to make it easier to add new compression types - new piggy.lzo.S necessary for lzo compression - changes in arch/arm/boot/compressed/misc.c to allow the use of lzo or gzip, depending on the config - Kconfig support Signed-off-by: Albin Tonnerre albin.tonne...@free-electrons.com --- Changes: Compiling with -Os failed due to missing __aeabi_uidivmod. Link using arch/arm/lib/lib1funcs.o which provides this symbol, and define a dummy __div0 function in arch/arm/boot/compressed/misc.c, as this symbol is required by lib1funcs. arch/arm/Kconfig |2 + arch/arm/boot/compressed/Makefile | 31 ++--- arch/arm/boot/compressed/misc.c | 116 ++--- arch/arm/boot/compressed/piggy.S |6 -- arch/arm/boot/compressed/piggy.gzip.S |6 ++ arch/arm/boot/compressed/piggy.lzo.S |6 ++ 6 files changed, 70 insertions(+), 97 deletions(-) delete mode 100644 arch/arm/boot/compressed/piggy.S create mode 100644 arch/arm/boot/compressed/piggy.gzip.S create mode 100644 arch/arm/boot/compressed/piggy.lzo.S diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index aef63c8..ea71c0c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -18,6 +18,8 @@ config ARM select HAVE_KRETPROBES if (HAVE_KPROBES) select HAVE_FUNCTION_TRACER if (!XIP_KERNEL) select HAVE_GENERIC_DMA_COHERENT + select HAVE_KERNEL_GZIP + select HAVE_KERNEL_LZO help The ARM series is a line of low-power-consumption RISC chip designs licensed by ARM Ltd and targeted at embedded applications and diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index ce39dc5..5b4629b 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -63,8 +63,12 @@ endif SEDFLAGS = s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/ -targets := vmlinux vmlinux.lds piggy.gz piggy.o font.o font.c \ -head.o misc.o $(OBJS) +suffix_$(CONFIG_KERNEL_GZIP) = gzip +suffix_$(CONFIG_KERNEL_LZO) = lzo + +targets := vmlinux vmlinux.lds \ +piggy.$(suffix_y) piggy.$(suffix_y).o \ +font.o font.c head.o misc.o $(OBJS) ifeq ($(CONFIG_FUNCTION_TRACER),y) ORIG_CFLAGS := $(KBUILD_CFLAGS) @@ -87,22 +91,31 @@ endif ifneq ($(PARAMS_PHYS),) LDFLAGS_vmlinux += --defsym params_phys=$(PARAMS_PHYS) endif -LDFLAGS_vmlinux += -p --no-undefined -X \ - $(shell $(CC) $(KBUILD_CFLAGS) --print-libgcc-file-name) -T +# ? +LDFLAGS_vmlinux += -p +# Report unresolved symbol references +LDFLAGS_vmlinux += --no-undefined +# Delete all temporary local symbols +LDFLAGS_vmlinux += -X +# Next argument is a linker script +LDFLAGS_vmlinux += -T + +# For __aeabi_uidivmod +lib1funcs = $(srctree)/arch/$(SRCARCH)/lib/lib1funcs.o # Don't allow any static data in misc.o, which # would otherwise mess up our GOT table CFLAGS_misc.o := -Dstatic= -$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.o \ - $(addprefix $(obj)/, $(OBJS)) FORCE +$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o \ + $(addprefix $(obj)/, $(OBJS)) $(lib1funcs) FORCE $(call if_changed,ld) @: -$(obj)/piggy.gz: $(obj)/../Image FORCE - $(call if_changed,gzip) +$(obj)/piggy.$(suffix_y): $(obj)/../Image FORCE + $(call if_changed,$(suffix_y)) -$(obj)/piggy.o: $(obj)/piggy.gz FORCE +$(obj)/piggy.$(suffix_y).o: $(obj)/piggy.$(suffix_y) FORCE CFLAGS_font.o := -Dstatic= diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c index 17153b5..57077c8 100644 --- a/arch/arm/boot/compressed/misc.c +++ b/arch/arm/boot/compressed/misc.c @@ -18,10 +18,15 @@ unsigned int __machine_arch_type; +#define _LINUX_STRING_H_ + #include linux/compiler.h/* for inline */ #include linux/types.h /* for size_t */ #include linux/stddef.h /* for NULL */ #include asm/string.h +#include linux/linkage.h + +#include asm/unaligned.h #ifdef STANDALONE_DEBUG #define putstr printf @@ -188,34 +193,8 @@ static inline __ptr_t memcpy(__ptr_t __dest, __const __ptr_t __src, /* * gzip delarations */ -#define OF(args) args #define STATIC static -typedef unsigned char uch; -typedef unsigned short ush; -typedef unsigned long ulg; - -#define WSIZE 0x8000 /* Window size must be at least 32k, */ - /* and a power of two */ - -static uch *inbuf; /* input buffer */ -static uch window[WSIZE]; /* Sliding window buffer */ - -static unsigned insize;/* valid bytes in inbuf */ -static unsigned inptr; /* index of next byte to be processed in inbuf */ -static unsigned outcnt;/* bytes in output buffer */ - -/* gzip flag byte */ -#define ASCII_FLAG 0x01 /* bit 0 set: file probably
Re: New fast(?)-boot results on ARM
Robert Schwebel wrote: - 2.4 s up from u-boot to the end of Uncompressing Linux - 300 ms until ubifs initialization starts - 3.7 s for ubifs, until mounted root So we basically have 7 s for the kernel. The rest is userspace, which hasn't seen much optimization yet, other than trying to start the GUI application as early as possible, while doing all other init stuff in parallel. Adding quiet brings us another 300 ms. That's factor 70 away from the 110 ms boot time Tim has talked about some days ago (and he measured on an ARM cpu which had almost half the speed of this one), and I'm wondering what we can do to improve the boot time. 2.4s in uncompression? That seems like an obvious target for improvement. Your kernel seems awfully large. 3104K code? You should definitely find out what is making it that big and cut out everything you do not need. You might even try some of the embedded system scripts that rip out all the printk strings. If you get the kernel size way down then use a uncompressed kernel and it should boot a lot faster if the bottleneck is CPU speed. However, it is probably IO speed. There could be something really wrong and slow with your MTD. Does it DMA or is it doing something crazy like using the CPU to read a byte at a time? Or maybe its cheap and slow flash. In that case I think your only hope is to make all the code as small as possible and/or find a different flash filesystem that does not have to read so much of the device to mount. Perhaps use a read-only compressed filesystem for the system binaries and reflash it for software upgrades. Only init and mount the writable flash for user-storable data well after system boot has finished. -- Zan Lynx zl...@acm.org Knowledge is Power. Power Corrupts. Study Hard. Be Evil. -- To unsubscribe from this list: send the line unsubscribe linux-embedded in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: New fast(?)-boot results on ARM
Zan, On Fri, Aug 14, 2009 at 12:19:48PM -0600, Zan Lynx wrote: That's factor 70 away from the 110 ms boot time Tim has talked about some days ago (and he measured on an ARM cpu which had almost half the speed of this one), and I'm wondering what we can do to improve the boot time. 2.4s in uncompression? That seems like an obvious target for improvement. Indeed, we'll check that. However, I have a little bit the impression that most systems which are hyped as fast boot out there are optimized so aggressively that they are not really usable in real life applications any more. So we try to configure the systems in a realistic way. I know that we won't get the last milliseconds that way - but I'd like to find out how far we can go. Your kernel seems awfully large. 3104K code? You should definitely find out what is making it that big and cut out everything you do not need. Definitely, will audit again. You might even try some of the embedded system scripts that rip out all the printk strings. Hmm, that's definitely in the last-minute-before-product category. If you get the kernel size way down then use a uncompressed kernel and it should boot a lot faster if the bottleneck is CPU speed. I'll try that. However, it is probably IO speed. There could be something really wrong and slow with your MTD. Does it DMA or is it doing something crazy like using the CPU to read a byte at a time? Will check. Or maybe its cheap and slow flash. In that case I think your only hope is to make all the code as small as possible and/or find a different flash filesystem that does not have to read so much of the device to mount. Perhaps use a read-only compressed filesystem for the system binaries and reflash it for software upgrades. Only init and mount the writable flash for user-storable data well after system boot has finished. That would be also a last-minute change, but surely worth to be evaluated. We recently changed from jffs2 to ubifs and hoped to gain speed during that step. Thanks for your feedback! rsc -- Pengutronix e.K. | | Industrial Linux Solutions | http://www.pengutronix.de/ | Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0| Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917- | -- To unsubscribe from this list: send the line unsubscribe linux-embedded in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: New fast(?)-boot results on ARM
On Fri, Aug 14, 2009 at 7:02 PM, Robert Schwebelr.schwe...@pengutronix.de wrote: So we basically have 7 s for the kernel. The rest is userspace, which hasn't seen much optimization yet, other than trying to start the GUI application as early as possible, while doing all other init stuff in parallel. Adding quiet brings us another 300 ms. That's factor 70 away from the 110 ms boot time Tim has talked about some days ago (and he measured on an ARM cpu which had almost half the speed of this one), and I'm wondering what we can do to improve the boot time. Robert r...@thebe:~$ microcom | ptx_ts U-Boot 2.0.0-rc9 [ 2.395740] 2.395740 [ 2.395860] 0.000120 [ 0.11] 0.11 U-Boot 2.0.0-rc9 (Aug 5 2009 - 10:05:58) [ 0.59] 0.48 [ 0.003823] 0.003764 Board: Phytec phyCORE-i.MX27 [ 0.010753] 0.006930 cfi_probe: cfi_flash base: 0xc000 size: 0x0200 [ 0.018711] 0.007958 NAND device: Manufacturer ID: 0x20, Chip ID: 0x36 (ST Micro NAND 64MiB 1,8V 8-bit) [ 0.026592] 0.007881 im...@imxfb0: i.MX Framebuffer driver [ 0.178655] 0.152063 dev_protect: currently broken [ 0.178736] 0.81 Using environment in NOR Flash [ 0.182577] 0.003841 initialising PLLs [ 0.367142] 0.184565 Malloc space: 0xa3f0 - 0xa7f0 (size 64 MB) [ 0.370568] 0.003426 Stack space : 0xa3ef8000 - 0xa3f0 (size 32 kB) [ 0.445993] 0.075425 running /env/bin/init... [ 0.870592] 0.424599 [ 0.874559] 0.003967 Hit any key to stop autoboot: 0 boot loader is not fast. considering its simple task, it can be made faster. [ 1.326621] 0.452062 loaded zImage from /dev/nand0.kernel.bb with size 1679656 [ 2.009996] 0.683375 Uncompressing Linux... done, booting the kernel. [ 2.416999] 0.407003 Linux version 2.6.31-rc4-g056f82f-dirty (s...@octopus) (gcc version 4.3.2 (OSELAS.Toolchain-1.99.3) ) #1 PREEMPT Thu Aug 6 08:37:19 CEST 2009 Other people already commented on this (kernel is too big) [ 2.418729] 0.001730 CPU: ARM926EJ-S [41069264] revision 4 (ARMv5TEJ), cr=00053177 [ 2.423081] 0.004352 CPU: VIVT data cache, VIVT instruction cache [ 2.426592] 0.003511 Machine: phyCORE-i.MX27 ... [ 2.742628] 0.016050 0x0036-0x0400 : root [ 3.058610] 0.315982 UBI: attaching mtd7 to ubi0 [ 3.062878] 0.004268 UBI: physical eraseblock size: 16384 bytes (16 KiB) [ 3.070601] 0.007723 UBI: logical eraseblock size: 15360 bytes [ 3.070665] 0.64 UBI: smallest flash I/O unit: 512 [ 3.078564] 0.007899 UBI: VID header offset: 512 (aligned 512) [ 3.078609] 0.45 UBI: data offset: 1024 [ 5.006609] 1.928000 UBI: attached mtd7 to ubi0 [ 5.013157] 0.006548 UBI: MTD device name: root As others commented, ubi looks slow and you probably need to find out why. [ 5.014566] 0.001409 UBI: MTD device size: 60 MiB [ 5.018660] 0.004094 UBI: number of good PEBs: 3880 [ 5.022585] 0.003925 UBI: number of bad PEBs: 0 [ 5.026797] 0.004212 UBI: max. allowed volumes: 89 [ 5.026849] 0.52 UBI: wear-leveling threshold: 4096 [ 5.030779] 0.003930 UBI: number of internal volumes: 1 [ 5.034583] 0.003804 UBI: number of user volumes: 1 [ 5.046572] 0.011989 UBI: available PEBs: 0 [ 5.046622] 0.50 UBI: total number of reserved PEBs: 3880 [ 5.046657] 0.35 UBI: number of PEBs reserved for bad PEB handling: 38 [ 5.050606] 0.003949 UBI: max/mean erase counter: 2/0 [ 5.050668] 0.62 UBI: image sequence number: 0 [ 5.058619] 0.007951 UBI: background thread ubi_bgt0d started, PID 215 [ 5.062620] 0.004001 oprofile: using timer interrupt. [ 5.070584] 0.007964 TCP cubic registered [ 5.070637] 0.53 NET: Registered protocol family 17 [ 5.074624] 0.003987 RPC: Registered udp transport module. [ 5.082616] 0.007992 RPC: Registered tcp transport module. [ 5.605159] 0.522543 eth0: config: auto-negotiation on, 100FDX, 100HDX, 10FDX, 10HDX. [ 6.602621] 0.997462 IP-Config: Complete: [ 6.606638] 0.004017 device=eth0, addr=192.168.23.197, mask=255.255.0.0, gw=192.168.23.2, [ 6.614588] 0.007950 host=192.168.23.197, domain=, nis-domain=(none), [ 6.618652] 0.004064 bootserver=192.168.23.2, rootserver=192.168.23.2, rootpath= Well, this ~1 second is not really kernel's fault, it's DHCP delay. But, do you need to do it at this moment? You do not seem to be using networking filesystems. You can run DHCP client in userspace. [ 6.630579] 0.011927 UBIFS: recovery needed [ 6.662655] 0.032076 UBIFS: recovery completed [ 6.666587] 0.003932 UBIFS: mounted UBI device 0, volume 1, name root [ 6.670570] 0.003983 UBIFS: file system size: 58490880 bytes (57120 KiB, 55 MiB, 3808 LEBs) [ 6.678572]
Re: New fast(?)-boot results on ARM
2009/8/14 Robert Schwebel r.schwe...@pengutronix.de: On Fri, Aug 14, 2009 at 12:19:48PM -0600, Zan Lynx wrote: That's factor 70 away from the 110 ms boot time Tim has talked about some days ago (and he measured on an ARM cpu which had almost half the speed of this one), and I'm wondering what we can do to improve the boot time. 2.4s in uncompression? That seems like an obvious target for improvement. Indeed, we'll check that. We got rid of uncompression on a flash-based system vastly improving boot time. The reason is that compressed kernels are faster only when the throughput to the persistent storage is lower than the decompression throughput, and on typical embedded systems with DMA the throughput to memory outperforms the CPU-based decompression. Of course it depends on a lot of stuff like performance of flash controller, kernel storage filesystem performance, DMA controller performance, cache architecture etc so it's individual per-system. Linus Walleij -- To unsubscribe from this list: send the line unsubscribe linux-embedded in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html