[PATCH] Implemented almost-in-place kernel decompression

Waldemar Kozaczuk Thu, 23 Aug 2018 04:25:10 -0700

This patch implements almost-in-decompression of kernel as
described by #985 with caveat that the segments are decompressed
starting with last one. More detailed explanation of how
this new decompression strategy works can be found in extensive
comments added to fastlz source code.


This new decompression strategy has following benefits:
- minimized amount of RAM to execute OSv images (the new minimum
RAM to execute native example with ROFS is 27.5 MB instead of 31MB).
The difference is almost twofold when executing ramfs images with
java-example.
- no longer one needs to adjust manually lzkernel_base to
build bigger ramfs images

Fixes #985

Signed-off-by: Waldemar Kozaczuk <jwkozac...@gmail.com>
---
 Makefile                    | 11 ++++--
 arch/x64/lzloader.ld        |  1 +
 fastlz/fastlz.h             | 20 +++++++++++
 fastlz/lz.cc                | 69 ++++++++++++++++++++++++++++++-------
 fastlz/lzloader.cc          | 61 +++++++++++++++++++++++++++-----
 scripts/check-image-size.sh |  9 ++---
 6 files changed, 140 insertions(+), 31 deletions(-)

diff --git a/Makefile b/Makefile
index 770b28d0..ac56cb5a 100644
--- a/Makefile
+++ b/Makefile
@@ -423,7 +423,7 @@ ifeq ($(arch),x64)
 # Below we verify that the compiled kernel isn't too big given the current
 # setting of these paramters; Otherwise we recommend to increase lzkernel_base.
 kernel_base := 0x200000
-lzkernel_base := 0x1800000
+lzkernel_base := 0x100000
 
 
 $(out)/boot.bin: arch/x64/boot16.ld $(out)/arch/x64/boot16.o
@@ -460,13 +460,20 @@ $(out)/fastlz/lzloader.o: fastlz/lzloader.cc | 
generated-headers
        $(makedir)
        $(call quiet, $(CXX) $(CXXFLAGS) -O0 -m32 -fno-instrument-functions -o 
$@ -c fastlz/lzloader.cc, CXX $<)
 
+# Below we are employing pretty nasty hack using dd to get rid of 1MB hole in 
lzloader.elf.
+# It is necessary to make sure that code and data is in correct places in 
memory
+# after file is loaded by logic in boot16.S. Unfortunately there is no easy 
way to force
+# linker to not put 1MB hole by specifying it somehow in lzloader.ld.
 $(out)/lzloader.elf: $(out)/loader-stripped.elf.lz.o $(out)/fastlz/lzloader.o 
arch/x64/lzloader.ld \
        $(out)/fastlz/fastlz.o
-       $(call very-quiet, scripts/check-image-size.sh 
$(out)/loader-stripped.elf $(shell bash -c 'echo 
$$(($(lzkernel_base)-$(kernel_base)))'))
+       $(call very-quiet, scripts/check-image-size.sh 
$(out)/loader-stripped.elf)
        $(call quiet, $(LD) -o $@ --defsym=OSV_LZKERNEL_BASE=$(lzkernel_base) \
                -Bdynamic --export-dynamic --eh-frame-hdr --enable-new-dtags \
                -T arch/x64/lzloader.ld \
                $(filter %.o, $^), LINK lzloader.elf)
+       $(call very-quiet, dd if=$(out)/lzloader.elf 
of=$(out)/lzloader-fixed.elf count=8)
+       $(call very-quiet, dd if=$(out)/lzloader.elf 
of=$(out)/lzloader-fixed.elf skip=2056 seek=8)
+       $(call quiet, mv $(out)/lzloader-fixed.elf $(out)/lzloader.elf, REMOVE 
1MB HOLE)
        $(call quiet, truncate -s %32768 $@, ALIGN lzloader.elf)
 
 acpi-defines = -DACPI_MACHINE_WIDTH=64 -DACPI_USE_LOCAL_CACHE
diff --git a/arch/x64/lzloader.ld b/arch/x64/lzloader.ld
index 992698b9..82d8ddcf 100644
--- a/arch/x64/lzloader.ld
+++ b/arch/x64/lzloader.ld
@@ -10,6 +10,7 @@ SECTIONS
 {
     . = OSV_LZKERNEL_BASE + 0x1000;
     .text : { *(.text) }
+    . = OSV_LZKERNEL_BASE + 0x3000;
     .data : { *(.data) }
     .bss : { *(.bss) }
     .edata = .;
diff --git a/fastlz/fastlz.h b/fastlz/fastlz.h
index 8ad3336a..6aa1db6d 100644
--- a/fastlz/fastlz.h
+++ b/fastlz/fastlz.h
@@ -17,6 +17,26 @@
 
 #define FASTLZ_VERSION_STRING "0.1.0"
 
+/**
+ * 1 MB was chosen for segment size to make sure that
+ * it could first into 2nd MB just before uncompressed
+ * kernel located at 0x200000
+ */
+#define SEGMENT_SIZE (1024 * 1024)
+/**
+ * The maximum compressed segment size needs to be slightly less
+ * than 1 MB so that in worst case scenario first segment
+ * which is decompressed last does not overlap with the
+ * its target decompressed area - 2nd MB. It has to be by 4 bytes
+ * plus 3 pages smaller than 1 MB because first 3 pages in lzloader.elf
+ * are occupied by code and next 4 bytes store offset of the segments info
+ * table. In case first segment is of original size and overlaps slightly with 
its
+ * target 2nd MB, then data would be copied byte by byte going backwards from
+ * last byte towards 1st one. All in all this scheme guarantees
+ * no data is overwritten no matter what the scenario.
+ */
+#define MAX_COMPRESSED_SEGMENT_SIZE (SEGMENT_SIZE - sizeof(int) - 0x3000)
+
 /**
   Compress a block of data in the input buffer and returns the size of
   compressed block. The size of input buffer is specified by length. The
diff --git a/fastlz/lz.cc b/fastlz/lz.cc
index 9cd4dfd8..63680d4c 100644
--- a/fastlz/lz.cc
+++ b/fastlz/lz.cc
@@ -11,11 +11,25 @@
 
 using namespace std;
 
-// Compress the Kernel using fastlz algorithm
+// Compress the kernel by splitting it into smaller 1MB segments
+// and compressing each one using fastlz algorithm and finally
+// combining into single output file
+//
+// The resulting output file has following structure:
+// - 4 bytes (int) field - offset where segment info table is stored
+// - followed by each compressed segment content
+// - followed by segment info table comprised of:
+//    - 4 bytes (int) field - number of compressed segments
+//    - followed by 8 bytes (2 int-wide fields) for each segment:
+//       - uncompressed segment size in bytes
+//       - compressed segment size in bytes
+//
+// If a compressed segment size is greater than MAX_COMPRESSED_SEGMENT_SIZE 
(~99% compression ratio)
+// then original segment is placed into output.
 int main(int argc, char* argv[])
 {
-    size_t input_length, output_length;
-    char *input, *output;
+    size_t input_length;
+    char *input;
 
     if (argc != 2) {
         cout << "usage: lz inputfile\n";
@@ -37,22 +51,51 @@ int main(int argc, char* argv[])
         return EXIT_FAILURE;
     }
 
-    output = new char[(int)(input_length*2)];
-    output_length = fastlz_compress(input, input_length, output);
-
     ofstream output_file((std::string(argv[1]) + ".lz").c_str(), 
ios::out|ios::binary|ios::trunc);
-
-    if (output_file) {
-        output_file.write(output, output_length);
-        output_file.close();
-    }
-    else {
+    if (!output_file) {
         cout << "Error opening output file\n";
         return EXIT_FAILURE;
     }
+    // Leave space for offset of segment info table
+    output_file.seekp(sizeof(int),ios::beg);
+
+    int segments_count = input_length / SEGMENT_SIZE + 1;
+    int *segment_sizes = new int[segments_count * 2];
+    char *compressed_segment = new char[SEGMENT_SIZE * 2];
+    //
+    // Iterate over each 1MB chunk (or less for last chunk) of input data
+    // and either append it compressed or original one as is
+    for(auto segment = 0; segment < segments_count; segment ++) {
+        auto bytes_to_compress = (segment < segments_count - 1) ? SEGMENT_SIZE 
: input_length % SEGMENT_SIZE;
+        segment_sizes[segment * 2] = bytes_to_compress;
+
+        size_t compressed_segment_length =
+                fastlz_compress(input + segment * SEGMENT_SIZE, 
bytes_to_compress, compressed_segment);
+        //
+        // Check if we actually compressed anything
+        if (compressed_segment_length < bytes_to_compress && 
compressed_segment_length <= MAX_COMPRESSED_SEGMENT_SIZE) {
+            output_file.write(compressed_segment, compressed_segment_length);
+            segment_sizes[segment * 2 + 1] = compressed_segment_length;
+        }
+        // Otherwise write original uncompressed segment data (compression 
ratio would have been > 99%)
+        else {
+            output_file.write(input + segment * SEGMENT_SIZE, 
bytes_to_compress);
+            segment_sizes[segment * 2 + 1] = bytes_to_compress;
+        }
+    }
+    //
+    // Write the segments info table
+    int info_offset = output_file.tellp();
+    output_file.write(reinterpret_cast<const char*>(&segments_count), 
sizeof(segments_count));
+    output_file.write(reinterpret_cast<const char*>(segment_sizes), 
segments_count * 2 * sizeof(int));
+    //
+    // Write offset of the segments info table in the beginning of the file
+    output_file.seekp(0,ios::beg);
+    output_file.write(reinterpret_cast<const char*>(&info_offset), 
sizeof(info_offset));
+    output_file.close();
 
     delete[] input;
-    delete[] output;
+    delete[] compressed_segment;
 
     return 0;
 }
diff --git a/fastlz/lzloader.cc b/fastlz/lzloader.cc
index e05387e7..563070fa 100644
--- a/fastlz/lzloader.cc
+++ b/fastlz/lzloader.cc
@@ -12,6 +12,10 @@
 
 #define BUFFER_OUT (char *)OSV_KERNEL_BASE
 
+#define INT_AT(byte_array,offset) (*(reinterpret_cast<int*>(byte_array + 
offset)))
+#define SEGMENT_INFO(info_array,segment) (reinterpret_cast<int*>(info_array + 
(1 + 2 * segment) * sizeof(int)))
+#define COPIED_SEGMENT_INFO(info_array,segment) 
(reinterpret_cast<int*>(info_array + 2 * segment * sizeof(int)))
+
 extern char _binary_loader_stripped_elf_lz_start;
 extern char _binary_loader_stripped_elf_lz_end;
 extern char _binary_loader_stripped_elf_lz_size;
@@ -24,13 +28,52 @@ extern "C" void *memset(void *s, int c, size_t n)
 
 extern "C" void uncompress_loader()
 {
-    // We do not know the exact uncompressed size, so don't really want to
-    // pass a the last (maxout) parameter of fastlz_decompress. Let it
-    // uncompress as much as it has input. The Makefile already verifies
-    // that the uncompressed kernel doesn't overwrite this uncompression code.
-    // Sadly, "INT_MAX" is the largest number we can pass. If we ever need
-    // more than 2GB here, it won't work.
-    fastlz_decompress(&_binary_loader_stripped_elf_lz_start,
-            (size_t) &_binary_loader_stripped_elf_lz_size,
-            BUFFER_OUT, INT_MAX);
+    char *compressed_input = &_binary_loader_stripped_elf_lz_start;
+    //
+    // Read 1st four bytes to identify offset of the segments info table
+    int info_offset = INT_AT(compressed_input,0);
+    char *segments_info_array = compressed_input + info_offset;
+    int segments_count = INT_AT(compressed_input,info_offset);
+    //
+    // Calculate source offset and destination offset for last segment
+    size_t src_offset = sizeof(int), dst_offset = 0;
+    for (auto segment = 0; segment < segments_count; segment++) {
+        int *segment_info = SEGMENT_INFO(segments_info_array, segment);
+        auto decomp_segment_size = *segment_info;
+        auto comp_segment_size = *(segment_info + 1);
+        src_offset += comp_segment_size;
+        dst_offset += decomp_segment_size;
+    }
+    auto uncompressed_size = dst_offset;
+    //
+    // Copy info table to an area above target decompressed
+    // kernel, otherwise we would have written over it when decompressing
+    // segments
+    char *copied_info_array = BUFFER_OUT + (uncompressed_size + 10);
+    for (auto segment = 0; segment < segments_count; segment++) {
+        int *segment_info = SEGMENT_INFO(segments_info_array, segment);
+        int *target_segment_info = COPIED_SEGMENT_INFO(copied_info_array, 
segment);
+        *target_segment_info = *segment_info;
+        *(target_segment_info + 1) = *(segment_info + 1);
+    }
+    //
+    // Iterate over segments and decompress them starting with the last one
+    for (auto segment = segments_count - 1; segment >= 0; segment --) {
+        int *segment_info = COPIED_SEGMENT_INFO(copied_info_array, segment);
+        src_offset -= *(segment_info + 1);
+        dst_offset -= *segment_info;
+        if (*(segment_info + 1) < *segment_info) {
+            fastlz_decompress(compressed_input + src_offset,
+                              *(segment_info + 1),
+                              BUFFER_OUT + dst_offset,
+                              INT_MAX);
+        }
+        else {
+            //TODO: Figure out how to copy int by int faster
+            //
+            // Copy from last byte to the first
+            for (auto off = *segment_info - 1; off >= 0 ; off-- )
+                *(BUFFER_OUT + dst_offset + off) = *(compressed_input + 
src_offset + off);
+        }
+    }
 }
diff --git a/scripts/check-image-size.sh b/scripts/check-image-size.sh
index 0f972a7a..92677600 100755
--- a/scripts/check-image-size.sh
+++ b/scripts/check-image-size.sh
@@ -5,8 +5,8 @@
 # This work is open source software, licensed under the terms of the
 # BSD license as described in the LICENSE file in the top-level directory.
 #
-if [ "$#" -ne 2 ]; then
-    echo "usage: $0 file maxsize"
+if [ "$#" -ne 1 ]; then
+    echo "usage: $0 file"
     exit 1
 fi
 
@@ -17,9 +17,4 @@ if [ $size -ge 2147483647 ]; then
     echo "If you really need such a huge kernel, fix fastlz/lzloader.cc"
     exit 1
 fi
-if [ $size -ge "$2" ]; then
-    echo "$1 is bigger than $2 available for uncompressing kernel."
-    echo "Increase 'lzkernel_base' in Makefile to raise this limit."
-    exit 1
-fi
 exit 0
-- 
2.17.1

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.

[PATCH] Implemented almost-in-place kernel decompression

Reply via email to