Hello community, here is the log from the commit of package plzip for openSUSE:Factory checked in at 2015-02-10 20:22:13 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/plzip (Old) and /work/SRC/openSUSE:Factory/.plzip.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Package is "plzip" Changes: -------- --- /work/SRC/openSUSE:Factory/plzip/plzip.changes 2014-10-06 12:06:27.000000000 +0200 +++ /work/SRC/openSUSE:Factory/.plzip.new/plzip.changes 2015-02-10 20:22:14.000000000 +0100 @@ -1,0 +2,10 @@ +Sun Feb 8 14:39:20 UTC 2015 - jengelh@inai.de + +- Update to new upstream release 1.3 +* Testing of a non-seekable file or of standard input now uses up + to 30 MiB less memory per thread. +* "-dvvv" and "-tvvv" now show the dictionary size of the first + member, producing the same output as lzip for single-member + files. + +------------------------------------------------------------------- Old: ---- plzip-1.2.tar.gz plzip-1.2.tar.gz.sig New: ---- plzip-1.3.tar.gz plzip-1.3.tar.gz.sig ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ plzip.spec ++++++ --- /var/tmp/diff_new_pack.xRmMFI/_old 2015-02-10 20:22:15.000000000 +0100 +++ /var/tmp/diff_new_pack.xRmMFI/_new 2015-02-10 20:22:15.000000000 +0100 @@ -2,7 +2,7 @@ # # spec file for package plzip # -# Copyright (c) 2014 SUSE LINUX Products GmbH, Nuernberg, Germany. +# Copyright (c) 2015 SUSE LINUX Products GmbH, Nuernberg, Germany. # Copyright (c) 2012 Pascal Bleser # # All modifications and additions to the file contributed by third parties @@ -19,7 +19,7 @@ Name: plzip -Version: 1.2 +Version: 1.3 Release: 0 Summary: Parallel LZMA Data Compressor License: GPL-2.0+ @@ -50,7 +50,10 @@ %setup -q %build -./configure \ +mkdir build +pushd build +# not autoconf +../configure \ --prefix="%{_prefix}" \ --bindir="%{_bindir}" \ --datadir="%{_datadir}" \ @@ -63,14 +66,18 @@ CXX="%__cxx" \ CPPFLAGS="%{optflags}" \ CXXFLAGS="%{optflags}" - -%__make %{?_smp_flags} +make %{?_smp_flags} +popd %install -%__make DESTDIR="%{buildroot}" LDCONFIG=echo install +pushd build +%make_install LDCONFIG=echo +popd %check -%__make check +pushd build +make check +popd %post %install_info --info-dir="%{_infodir}" "%{_infodir}/%{name}".info%{ext_info} ++++++ plzip-1.2.tar.gz -> plzip-1.3.tar.gz ++++++ ++++ 1609 lines of diff (skipped) ++++ retrying with extended exclude list diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/plzip-1.2/ChangeLog new/plzip-1.3/ChangeLog --- old/plzip-1.2/ChangeLog 2014-08-29 00:31:54.000000000 +0200 +++ new/plzip-1.3/ChangeLog 2015-01-22 12:22:17.000000000 +0100 @@ -1,3 +1,13 @@ +2015-01-22 Antonio Diaz Diaz + + * Version 1.3 released. + * dec_stream.cc: Do not use output packets or muxer when testing. + * Make '-dvvv' and '-tvvv' show dictionary size like lzip. + * lzip.h: Added missing 'const' to the declaration of 'compress'. + * Added chapters 'Memory requirements' and 'Minimum file sizes' + to manual. + * Makefile.in: Added new targets 'install*-compress'. + 2014-08-29 Antonio Diaz Diaz * Version 1.2 released. @@ -112,7 +122,7 @@ until something better appears on the net. -Copyright (C) 2009-2014 Antonio Diaz Diaz. +Copyright (C) 2009-2015 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute and diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/plzip-1.2/INSTALL new/plzip-1.3/INSTALL --- old/plzip-1.2/INSTALL 2014-08-25 02:32:05.000000000 +0200 +++ new/plzip-1.3/INSTALL 2015-01-08 12:54:16.000000000 +0100 @@ -34,6 +34,10 @@ 5. Type 'make install' to install the program and any data files and documentation. + Or type 'make install-compress', which additionally compresses the + info manual and the man page after installation. (Installing + compressed docs may become the default in the future). + You can install only the program, the info manual or the man page by typing 'make install-bin', 'make install-info' or 'make install-man' respectively. @@ -60,7 +64,7 @@ explained above. -Copyright (C) 2009-2014 Antonio Diaz Diaz. +Copyright (C) 2009-2015 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/plzip-1.2/NEWS new/plzip-1.3/NEWS --- old/plzip-1.2/NEWS 2014-08-20 20:47:36.000000000 +0200 +++ new/plzip-1.3/NEWS 2014-11-25 18:16:04.000000000 +0100 @@ -1,16 +1,14 @@ -Changes in version 1.2: +Changes in version 1.3: -Copying of file dates, permissions, and ownership now behaves like "cp -p". -(If the user ID or the group ID can't be duplicated, the file permission -bits S_ISUID and S_ISGID are cleared). +Testing of a non-seekable file or of standard input now uses up to 30 +MiB less memory per thread. -Individual limits have been set on the number of packets produced by -each decompresor worker thread to limit the amount of memory used in all -cases. +"-dvvv" and "-tvvv" now show the dictionary size of the first member, +producing the same output as lzip for single-member files. -The approximate amount of memory required has been documented in the -manual. +Chapters "Memory requirements" and "Minimum file sizes" have been added +to the manual. -"plzip.texinfo" has been renamed to "plzip.texi". - -The license has been changed to GPL version 2 or later. +The targets "install-compress", "install-strip-compress", +"install-info-compress" and "install-man-compress" have been added to +the Makefile. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/plzip-1.2/README new/plzip-1.3/README --- old/plzip-1.2/README 2014-08-29 13:58:32.000000000 +0200 +++ new/plzip-1.3/README 2015-01-21 18:21:28.000000000 +0100 @@ -6,9 +6,10 @@ Plzip can compress/decompress large files on multiprocessor machines much faster than lzip, at the cost of a slightly reduced compression -ratio. Note that the number of usable threads is limited by file size; -on files larger than a few GB plzip can use hundreds of processors, but -on files of only a few MB plzip is no faster than lzip. +ratio (0.4 to 2 percent larger compressed files). Note that the number +of usable threads is limited by file size; on files larger than a few GB +plzip can use hundreds of processors, but on files of only a few MB +plzip is no faster than lzip. When compressing, plzip divides the input file into chunks and compresses as many chunks simultaneously as worker threads are chosen, @@ -23,8 +24,9 @@ Plzip uses the lzip file format; the files produced by plzip are fully compatible with lzip-1.4 or newer, and can be rescued with lziprecover. -The lzip file format is designed for long-term data archiving, taking -into account both data integrity and decoder availability: +The lzip file format is designed for data sharing and long-term +archiving, taking into account both data integrity and decoder +availability: * The lzip format provides very safe integrity checking and some data recovery means. The lziprecover program can repair bit-flip errors @@ -39,8 +41,8 @@ extract the data from a lzip file long after quantum computers eventually render LZMA obsolete. - * Additionally lzip is copylefted, which guarantees that it will - remain free forever. + * Additionally the lzip reference implementation is copylefted, which + guarantees that it will remain free forever. A nice feature of the lzip format is that a corrupt byte is easier to repair the nearer it is from the beginning of the file. Therefore, with @@ -86,7 +88,7 @@ compressed files is also supported. -Copyright (C) 2009-2014 Antonio Diaz Diaz. +Copyright (C) 2009-2015 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/plzip-1.2/arg_parser.cc new/plzip-1.3/arg_parser.cc --- old/plzip-1.2/arg_parser.cc 2014-08-24 17:52:51.000000000 +0200 +++ new/plzip-1.3/arg_parser.cc 2015-01-16 23:09:05.000000000 +0100 @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2014 Antonio Diaz Diaz. + Copyright (C) 2006-2015 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/plzip-1.2/arg_parser.h new/plzip-1.3/arg_parser.h --- old/plzip-1.2/arg_parser.h 2014-08-24 17:52:51.000000000 +0200 +++ new/plzip-1.3/arg_parser.h 2015-01-16 23:09:05.000000000 +0100 @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2014 Antonio Diaz Diaz. + Copyright (C) 2006-2015 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/plzip-1.2/compress.cc new/plzip-1.3/compress.cc --- old/plzip-1.2/compress.cc 2014-08-28 20:39:59.000000000 +0200 +++ new/plzip-1.3/compress.cc 2015-01-21 18:01:06.000000000 +0100 @@ -1,6 +1,6 @@ /* Plzip - Parallel compressor compatible with lzip Copyright (C) 2009 Laszlo Ersek. - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -156,9 +156,11 @@ struct Packet // data block with a serial number { - unsigned id; // serial number assigned as received uint8_t * data; int size; // number of bytes in data (if any) + unsigned id; // serial number assigned as received + Packet( uint8_t * const d, const int s, const unsigned i ) + : data( d ), size( s ), id( i ) {} }; @@ -207,10 +209,7 @@ // make a packet with data received from splitter void receive_packet( uint8_t * const data, const int size ) { - Packet * const ipacket = new Packet; - ipacket->id = receive_id++; - ipacket->data = data; - ipacket->size = size; + Packet * const ipacket = new Packet( data, size, receive_id++ ); slot_tally.get_slot(); // wait for a free slot xlock( &imutex ); packet_queue.push( ipacket ); @@ -310,6 +309,7 @@ const Pretty_print * pp; int infd; int data_size; + int offset; }; @@ -322,12 +322,13 @@ const Pretty_print & pp = *tmp.pp; const int infd = tmp.infd; const int data_size = tmp.data_size; + const int offset = tmp.offset; for( bool first_post = true; ; first_post = false ) { - uint8_t * const data = new( std::nothrow ) uint8_t[data_size]; + uint8_t * const data = new( std::nothrow ) uint8_t[offset+data_size]; if( !data ) { pp( mem_msg ); cleanup_and_fail(); } - const int size = readblock( infd, data, data_size ); + const int size = readblock( infd, data + offset, data_size ); if( size != data_size && errno ) { pp(); show_error( "Read error", errno ); cleanup_and_fail(); } @@ -354,6 +355,7 @@ const Pretty_print * pp; int dictionary_size; int match_len_limit; + int offset; }; @@ -366,15 +368,13 @@ const Pretty_print & pp = *tmp.pp; const int dictionary_size = tmp.dictionary_size; const int match_len_limit = tmp.match_len_limit; + const int offset = tmp.offset; while( true ) { Packet * const packet = courier.distribute_packet(); if( !packet ) break; // no more packets to process - const int max_compr_size = 42 + packet->size + ( ( packet->size + 7 ) / 8 ); - uint8_t * const new_data = new( std::nothrow ) uint8_t[max_compr_size]; - if( !new_data ) { pp( mem_msg ); cleanup_and_fail(); } const int dict_size = std::max( LZ_min_dictionary_size(), std::min( dictionary_size, packet->size ) ); LZ_Encoder * const encoder = @@ -396,16 +396,16 @@ { if( written < packet->size ) { - const int wr = LZ_compress_write( encoder, packet->data + written, + const int wr = LZ_compress_write( encoder, + packet->data + offset + written, packet->size - written ); if( wr < 0 ) internal_error( "library error (LZ_compress_write)" ); written += wr; } - if( written >= packet->size ) - { delete[] packet->data; LZ_compress_finish( encoder ); } + if( written >= packet->size ) LZ_compress_finish( encoder ); } - const int rd = LZ_compress_read( encoder, new_data + new_pos, - max_compr_size - new_pos ); + const int rd = LZ_compress_read( encoder, packet->data + new_pos, + offset + written - new_pos ); if( rd < 0 ) { pp(); @@ -415,7 +415,7 @@ cleanup_and_fail(); } new_pos += rd; - if( new_pos > max_compr_size ) + if( new_pos >= offset + written ) internal_error( "packet size exceeded in worker" ); if( LZ_compress_finished( encoder ) == 1 ) break; } @@ -423,8 +423,7 @@ if( LZ_compress_close( encoder ) < 0 ) { pp( "LZ_compress_close failed." ); cleanup_and_fail(); } - if( verbosity >= 2 && packet->size > 0 ) show_progress( packet->size ); - packet->data = new_data; + if( packet->size > 0 ) show_progress( packet->size ); packet->size = new_pos; courier.collect_packet( packet ); } @@ -447,12 +446,9 @@ const Packet * const opacket = packet_vector[i]; out_size += opacket->size; - if( outfd >= 0 ) - { - const int wr = writeblock( outfd, opacket->data, opacket->size ); - if( wr != opacket->size ) - { pp(); show_error( "Write error", errno ); cleanup_and_fail(); } - } + const int wr = writeblock( outfd, opacket->data, opacket->size ); + if( wr != opacket->size ) + { pp(); show_error( "Write error", errno ); cleanup_and_fail(); } delete[] opacket->data; delete opacket; } @@ -469,6 +465,7 @@ const int infd, const int outfd, const Pretty_print & pp, const int debug_level ) { + const int offset = data_size / 8; const int slots_per_worker = 2; const int num_slots = ( ( num_workers > 1 ) ? num_workers * slots_per_worker : 1 ); @@ -481,6 +478,7 @@ splitter_arg.pp = &pp; splitter_arg.infd = infd; splitter_arg.data_size = data_size; + splitter_arg.offset = offset; pthread_t splitter_thread; int errcode = pthread_create( &splitter_thread, 0, csplitter, &splitter_arg ); @@ -492,6 +490,7 @@ worker_arg.pp = &pp; worker_arg.dictionary_size = dictionary_size; worker_arg.match_len_limit = match_len_limit; + worker_arg.offset = offset; pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers]; if( !worker_threads ) { pp( mem_msg ); cleanup_and_fail(); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/plzip-1.2/dec_stdout.cc new/plzip-1.3/dec_stdout.cc --- old/plzip-1.2/dec_stdout.cc 2014-08-28 20:39:59.000000000 +0200 +++ new/plzip-1.3/dec_stdout.cc 2015-01-21 18:01:06.000000000 +0100 @@ -1,6 +1,6 @@ /* Plzip - Parallel compressor compatible with lzip Copyright (C) 2009 Laszlo Ersek. - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -46,6 +46,8 @@ { uint8_t * data; // data == 0 means end of member int size; // number of bytes in data (if any) + explicit Packet( uint8_t * const d = 0, const int s = 0 ) + : data( d ), size( s ) {} }; @@ -211,21 +213,16 @@ { if( new_pos > 0 ) // make data packet { - Packet * opacket = new Packet; - opacket->data = new_data; - opacket->size = new_pos; + Packet * const opacket = new Packet( new_data, new_pos ); courier.collect_packet( opacket, worker_id ); new_pos = 0; new_data = new( std::nothrow ) uint8_t[max_packet_size]; if( !new_data ) { pp( "Not enough memory." ); cleanup_and_fail(); } } if( LZ_decompress_finished( decoder ) == 1 ) - { + { // end of member token + courier.collect_packet( new Packet, worker_id ); LZ_decompress_reset( decoder ); // prepare for new member - Packet * opacket = new Packet; // end of member token - opacket->data = 0; - opacket->size = 0; - courier.collect_packet( opacket, worker_id ); break; } } @@ -250,15 +247,12 @@ { while( true ) { - Packet * opacket = courier.deliver_packet(); + Packet * const opacket = courier.deliver_packet(); if( !opacket ) break; // queue is empty. all workers exited - if( outfd >= 0 ) - { - const int wr = writeblock( outfd, opacket->data, opacket->size ); - if( wr != opacket->size ) - { pp(); show_error( "Write error", errno ); cleanup_and_fail(); } - } + const int wr = writeblock( outfd, opacket->data, opacket->size ); + if( wr != opacket->size ) + { pp(); show_error( "Write error", errno ); cleanup_and_fail(); } delete[] opacket->data; delete opacket; } @@ -311,7 +305,7 @@ (double)out_size / in_size, ( 8.0 * in_size ) / out_size, 100.0 * ( 1.0 - ( (double)in_size / out_size ) ) ); - if( verbosity >= 3 ) + if( verbosity >= 4 ) std::fprintf( stderr, "decompressed size %9llu, size %9llu. ", out_size, in_size ); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/plzip-1.2/dec_stream.cc new/plzip-1.3/dec_stream.cc --- old/plzip-1.2/dec_stream.cc 2014-08-28 20:39:59.000000000 +0200 +++ new/plzip-1.3/dec_stream.cc 2015-01-21 18:01:06.000000000 +0100 @@ -1,6 +1,6 @@ /* Plzip - Parallel compressor compatible with lzip Copyright (C) 2009 Laszlo Ersek. - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -47,6 +47,8 @@ { uint8_t * data; // data == 0 means end of member int size; // number of bytes in data (if any) + explicit Packet( uint8_t * const d = 0, const int s = 0 ) + : data( d ), size( s ) {} }; @@ -102,9 +104,7 @@ // if data == 0, move to next queue void receive_packet( uint8_t * const data, const int size ) { - Packet * ipacket = new Packet; - ipacket->data = data; - ipacket->size = size; + Packet * const ipacket = new Packet( data, size ); if( data ) { in_size += size; slot_tally.get_slot(); } // wait for a free slot xlock( &imutex ); @@ -185,6 +185,13 @@ return opacket; } + void add_out_size( const unsigned long long partial_out_size ) + { + xlock( &omutex ); + out_size += partial_out_size; + xunlock( &omutex ); + } + void finish() // splitter has no more packets to send { xlock( &imutex ); @@ -206,7 +213,7 @@ // Search forward from 'pos' for "LZIP" (Boyer-Moore algorithm) -// Return pos of found string or 'pos+size' if not found. +// Returns pos of found string or 'pos+size' if not found. // int find_magic( const uint8_t * const buffer, const int pos, const int size ) { @@ -269,6 +276,7 @@ header.version() ); } cleanup_and_fail( 2 ); } + show_header( header.dictionary_size() ); unsigned long long partial_member_size = 0; while( true ) @@ -337,22 +345,25 @@ Packet_courier * courier; const Pretty_print * pp; int worker_id; + bool testing; }; - // consume packets from courier, decompress their contents, and - // give the produced packets to courier. + // consume packets from courier, decompress their contents and, + // if not testing, give the produced packets to courier. extern "C" void * dworker_s( void * arg ) { const Worker_arg & tmp = *(Worker_arg *)arg; Packet_courier & courier = *tmp.courier; const Pretty_print & pp = *tmp.pp; const int worker_id = tmp.worker_id; + const bool testing = tmp.testing; uint8_t * new_data = new( std::nothrow ) uint8_t[max_packet_size]; LZ_Decoder * const decoder = LZ_decompress_open(); if( !new_data || !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) { pp( "Not enough memory." ); cleanup_and_fail(); } + unsigned long long partial_out_size = 0; int new_pos = 0; bool trailing_garbage_found = false; @@ -391,24 +402,21 @@ if( new_pos == max_packet_size || trailing_garbage_found || LZ_decompress_finished( decoder ) == 1 ) { - if( new_pos > 0 ) // make data packet + if( !testing && new_pos > 0 ) // make data packet { - Packet * opacket = new Packet; - opacket->data = new_data; - opacket->size = new_pos; + Packet * const opacket = new Packet( new_data, new_pos ); courier.collect_packet( opacket, worker_id ); - new_pos = 0; new_data = new( std::nothrow ) uint8_t[max_packet_size]; if( !new_data ) { pp( "Not enough memory." ); cleanup_and_fail(); } } + partial_out_size += new_pos; + new_pos = 0; if( trailing_garbage_found || LZ_decompress_finished( decoder ) == 1 ) { + if( !testing ) // end of member token + courier.collect_packet( new Packet, worker_id ); LZ_decompress_reset( decoder ); // prepare for new member - Packet * opacket = new Packet; // end of member token - opacket->data = 0; - opacket->size = 0; - courier.collect_packet( opacket, worker_id ); break; } } @@ -421,6 +429,7 @@ } delete[] new_data; + courier.add_out_size( partial_out_size ); if( LZ_decompress_member_position( decoder ) != 0 ) { pp( "Error, some data remains in decoder." ); cleanup_and_fail(); } if( LZ_decompress_close( decoder ) < 0 ) @@ -435,17 +444,12 @@ { while( true ) { - Packet * opacket = courier.deliver_packet(); + Packet * const opacket = courier.deliver_packet(); if( !opacket ) break; // queue is empty. all workers exited - out_size += opacket->size; - - if( outfd >= 0 ) - { - const int wr = writeblock( outfd, opacket->data, opacket->size ); - if( wr != opacket->size ) - { pp(); show_error( "Write error", errno ); cleanup_and_fail(); } - } + const int wr = writeblock( outfd, opacket->data, opacket->size ); + if( wr != opacket->size ) + { pp(); show_error( "Write error", errno ); cleanup_and_fail(); } delete[] opacket->data; delete opacket; } @@ -454,11 +458,10 @@ } // end namespace - // init the courier, then start the splitter and the workers and - // call the muxer. + // init the courier, then start the splitter and the workers and, + // if not testing, call the muxer. int dec_stream( const int num_workers, const int infd, const int outfd, - const Pretty_print & pp, const int debug_level, - const bool testing ) + const Pretty_print & pp, const int debug_level ) { const int in_slots_per_worker = 2; const int out_slots = 32; @@ -487,12 +490,13 @@ worker_args[i].courier = &courier; worker_args[i].pp = &pp; worker_args[i].worker_id = i; + worker_args[i].testing = ( outfd < 0 ); errcode = pthread_create( &worker_threads[i], 0, dworker_s, &worker_args[i] ); if( errcode ) { show_error( "Can't create worker threads", errcode ); cleanup_and_fail(); } } - muxer( courier, pp, outfd ); + if( outfd >= 0 ) muxer( courier, pp, outfd ); for( int i = num_workers - 1; i >= 0; --i ) { @@ -512,11 +516,11 @@ (double)out_size / in_size, ( 8.0 * in_size ) / out_size, 100.0 * ( 1.0 - ( (double)in_size / out_size ) ) ); - if( verbosity >= 3 ) + if( verbosity >= 4 ) std::fprintf( stderr, "decompressed size %9llu, size %9llu. ", out_size, in_size ); - if( verbosity >= 1 ) std::fprintf( stderr, testing ? "ok\n" : "done\n" ); + if( verbosity >= 1 ) std::fprintf( stderr, (outfd < 0) ? "ok\n" : "done\n" ); if( debug_level & 1 ) std::fprintf( stderr, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/plzip-1.2/decompress.cc new/plzip-1.3/decompress.cc --- old/plzip-1.2/decompress.cc 2014-08-28 20:39:59.000000000 +0200 +++ new/plzip-1.3/decompress.cc 2015-01-21 18:01:06.000000000 +0100 @@ -1,6 +1,6 @@ /* Plzip - Parallel compressor compatible with lzip Copyright (C) 2009 Laszlo Ersek. - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -196,20 +196,21 @@ // start the workers and wait for them to finish. int decompress( int num_workers, const int infd, const int outfd, const Pretty_print & pp, const int debug_level, - const bool testing, const bool infd_isreg ) + const bool infd_isreg ) { if( !infd_isreg ) - return dec_stream( num_workers, infd, outfd, pp, debug_level, testing ); + return dec_stream( num_workers, infd, outfd, pp, debug_level ); const File_index file_index( infd ); if( file_index.retval() == 1 ) { lseek( infd, 0, SEEK_SET ); - return dec_stream( num_workers, infd, outfd, pp, debug_level, testing ); + return dec_stream( num_workers, infd, outfd, pp, debug_level ); } if( file_index.retval() != 0 ) { pp( file_index.error().c_str() ); return file_index.retval(); } + show_header( file_index.dictionary_size( 0 ) ); if( num_workers > file_index.members() ) num_workers = file_index.members(); @@ -255,11 +256,11 @@ (double)out_size / in_size, ( 8.0 * in_size ) / out_size, 100.0 * ( 1.0 - ( (double)in_size / out_size ) ) ); - if( verbosity >= 3 ) + if( verbosity >= 4 ) std::fprintf( stderr, "decompressed size %9llu, size %9llu. ", out_size, in_size ); - if( verbosity >= 1 ) std::fprintf( stderr, testing ? "ok\n" : "done\n" ); + if( verbosity >= 1 ) std::fprintf( stderr, (outfd < 0) ? "ok\n" : "done\n" ); return 0; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/plzip-1.2/doc/plzip.1 new/plzip-1.3/doc/plzip.1 --- old/plzip-1.2/doc/plzip.1 2014-08-29 16:56:22.000000000 +0200 +++ new/plzip-1.3/doc/plzip.1 2015-01-22 12:21:07.000000000 +0100 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH PLZIP "1" "August 2014" "plzip 1.2" "User Commands" +.TH PLZIP "1" "January 2015" "plzip 1.3" "User Commands" .SH NAME plzip \- reduces the size of files .SH SYNOPSIS @@ -70,8 +70,7 @@ The bidimensional parameter space of LZMA can't be mapped to a linear scale optimal for all files. If your files are large, very repetitive, etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR -options directly to achieve optimal performance. For example, \fB\-9m64\fR -usually compresses executables more (and faster) than \fB\-9\fR. +options directly to achieve optimal performance. .PP Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or @@ -84,7 +83,7 @@ .SH COPYRIGHT Copyright \(co 2009 Laszlo Ersek. .br -Copyright \(co 2014 Antonio Diaz Diaz. +Copyright \(co 2015 Antonio Diaz Diaz. Using Lzlib 1.6 License GPLv2+: GNU GPL version 2 or later .br diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/plzip-1.2/doc/plzip.info new/plzip-1.3/doc/plzip.info --- old/plzip-1.2/doc/plzip.info 2014-08-29 16:34:40.000000000 +0200 +++ new/plzip-1.3/doc/plzip.info 2015-01-22 12:22:28.000000000 +0100 @@ -11,7 +11,7 @@ Plzip Manual ************ -This manual is for Plzip (version 1.2, 29 August 2014). +This manual is for Plzip (version 1.3, 22 January 2015). * Menu: @@ -19,11 +19,13 @@ * Program design:: Internal structure of plzip * Invoking plzip:: Command line interface * File format:: Detailed format of the compressed file +* Memory requirements:: Memory required to compress and decompress +* Minimum file sizes:: Minimum file sizes required for full speed * Problems:: Reporting bugs * Concept index:: Index of concepts - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -40,16 +42,18 @@ Plzip can compress/decompress large files on multiprocessor machines much faster than lzip, at the cost of a slightly reduced compression -ratio. Note that the number of usable threads is limited by file size; -on files larger than a few GB plzip can use hundreds of processors, but -on files of only a few MB plzip is no faster than lzip. +ratio (0.4 to 2 percent larger compressed files). Note that the number +of usable threads is limited by file size; on files larger than a few GB +plzip can use hundreds of processors, but on files of only a few MB +plzip is no faster than lzip (*note Minimum file sizes::). Plzip uses the lzip file format; the files produced by plzip are fully compatible with lzip-1.4 or newer, and can be rescued with lziprecover. - The lzip file format is designed for long-term data archiving, taking -into account both data integrity and decoder availability: + The lzip file format is designed for data sharing and long-term +archiving, taking into account both data integrity and decoder +availability: * The lzip format provides very safe integrity checking and some data recovery means. The lziprecover program can repair bit-flip errors @@ -64,50 +68,23 @@ archaeologist to extract the data from a lzip file long after quantum computers eventually render LZMA obsolete. - * Additionally lzip is copylefted, which guarantees that it will - remain free forever. + * Additionally the lzip reference implementation is copylefted, which + guarantees that it will remain free forever. A nice feature of the lzip format is that a corrupt byte is easier to repair the nearer it is from the beginning of the file. Therefore, with the help of lziprecover, losing an entire archive just because of a corrupt byte near the beginning is a thing of the past. - The member trailer stores the 32-bit CRC of the original data, the -size of the original data and the size of the member. These values, -together with the value remaining in the range decoder and the -end-of-stream marker, provide a 4 factor integrity checking which -guarantees that the decompressed version of the data is identical to -the original. This guards against corruption of the compressed data, -and against undetected bugs in plzip (hopefully very unlikely). The -chances of data corruption going undetected are microscopic. Be aware, -though, that the check occurs upon decompression, so it can only tell -you that something is wrong. It can't help you recover the original -uncompressed data. - Plzip uses the same well-defined exit status values used by lzip and bzip2, which makes it safer than compressors returning ambiguous warning values (like gzip) when it is used as a back end for other programs like tar or zutils. - The amount of memory required *per thread* is approximately the -following: - - * For compression; 3 times the data size (*note --data-size::) plus - 11 times the dictionary size. - - * For decompression or testing of a non-seekable file or of standard - input; 2 times the dictionary size plus up to 32 MiB. - - * For decompression of a regular file to a non-seekable file or to - standard output; the dictionary size plus up to 32 MiB. - - * For decompression of a regular file to another regular file, or for - testing of a regular file; the dictionary size. - Plzip will automatically use the smallest possible dictionary size for each file without exceeding the given limit. Keep in mind that the decompression memory requirement is affected at compression time by the -choice of dictionary size limit. +choice of dictionary size limit (*note Memory requirements::). When compressing, plzip replaces every file given in the command line with a compressed version of itself, with the name "original_name.lz". @@ -245,8 +222,8 @@ value. Note that the number of usable threads is limited to - ceil( file_size / data_size ) during compression (*note - --data-size::), and to the number of members in the input during + ceil( file_size / data_size ) during compression (*note Minimum + file sizes::), and to the number of members in the input during decompression. '-o FILE' @@ -287,8 +264,8 @@ When compressing, show the compression ratio for each file processed. A second '-v' shows the progress of compression. When decompressing or testing, further -v's (up to 4) increase the - verbosity level, showing status, compression ratio, decompressed - size, and compressed size. + verbosity level, showing status, compression ratio, dictionary + size, decompressed size, and compressed size. '-1 .. -9' Set the compression parameters (dictionary size and match length @@ -299,8 +276,7 @@ linear scale optimal for all files. If your files are large, very repetitive, etc, you may need to use the '--match-length' and '--dictionary-size' options directly to achieve optimal - performance. For example, '-9m64' usually compresses executables - more (and faster) than '-9'. + performance. Level Dictionary size Match length limit -1 1 MiB 5 bytes @@ -340,7 +316,7 @@ caused plzip to panic.  -File: plzip.info, Node: File format, Next: Problems, Prev: Invoking plzip, Up: Top +File: plzip.info, Node: File format, Next: Memory requirements, Prev: Invoking plzip, Up: Top 4 File format ************* @@ -413,9 +389,70 @@  -File: plzip.info, Node: Problems, Next: Concept index, Prev: File format, Up: Top +File: plzip.info, Node: Memory requirements, Next: Minimum file sizes, Prev: File format, Up: Top + +5 Memory required to compress and decompress +******************************************** + +The amount of memory required *per thread* is approximately the +following: + + * For compression; 11 times the dictionary size plus 3 times the + data size (*note --data-size::). + + * For decompression of a regular (seekable) file to another regular + file, or for testing of a regular file; the dictionary size. Note + that regular files with more than 1024 bytes of trailing garbage + are treated as non-seekable. + + * For testing of a non-seekable file or of standard input; the + dictionary size plus up to 5 MiB. + + * For decompression of a regular file to a non-seekable file or to + standard output; the dictionary size plus up to 32 MiB. + + * For decompression of a non-seekable file or of standard input; the + dictionary size plus up to 35 MiB. + + +File: plzip.info, Node: Minimum file sizes, Next: Problems, Prev: Memory requirements, Up: Top + +6 Minimum file sizes required for full compression speed +******************************************************** + +When compressing, plzip divides the input file into chunks and +compresses as many chunks simultaneously as worker threads are chosen, +creating a multi-member compressed file. + + For this to work as expected (and roughly multiply the compression +speed by the number of available processors), the uncompressed file +must be at least as large as the number of worker threads times the +chunk size (*note --data-size::). Else some processors will not get any +data to compress, and compression will be proportionally slower. The +maximum speed increase achievable on a given file is limited by the +ratio (file_size / data_size). + + The following table shows the minimum uncompressed file size needed +for full use of N processors at a given compression level, using the +default data size for each level: + +Processors 2 4 8 16 64 256 +------------------------------------------------------------------------- +Level +-1 4 MiB 8 MiB 16 MiB 32 MiB 128 MiB 512 MiB +-2 6 MiB 12 MiB 24 MiB 48 MiB 192 MiB 768 MiB +-3 8 MiB 16 MiB 32 MiB 64 MiB 256 MiB 1 GiB +-4 12 MiB 24 MiB 48 MiB 96 MiB 384 MiB 1.5 GiB +-5 16 MiB 32 MiB 64 MiB 128 MiB 512 MiB 2 GiB +-6 32 MiB 64 MiB 128 MiB 256 MiB 1 GiB 4 GiB +-7 64 MiB 128 MiB 256 MiB 512 MiB 2 GiB 8 GiB +-8 96 MiB 192 MiB 384 MiB 768 MiB 3 GiB 12 GiB +-9 128 MiB 256 MiB 512 MiB 1 GiB 4 GiB 16 GiB + + +File: plzip.info, Node: Problems, Next: Concept index, Prev: Minimum file sizes, Up: Top -5 Reporting bugs +7 Reporting bugs **************** There are probably bugs in plzip. There are certainly errors and @@ -441,6 +478,8 @@ * getting help: Problems. (line 6) * introduction: Introduction. (line 6) * invoking: Invoking plzip. (line 6) +* memory requirements: Memory requirements. (line 6) +* minimum file sizes: Minimum file sizes. (line 6) * options: Invoking plzip. (line 6) * program design: Program design. (line 6) * usage: Invoking plzip. (line 6) @@ -450,13 +489,15 @@  Tag Table: Node: Top221 -Node: Introduction847 -Node: Program design6279 -Node: Invoking plzip7868 -Ref: --data-size8313 -Node: File format13471 -Node: Problems15976 -Node: Concept index16505 +Node: Introduction988 +Node: Program design5284 +Node: Invoking plzip6873 +Ref: --data-size7287 +Node: File format12414 +Node: Memory requirements14930 +Node: Minimum file sizes15907 +Node: Problems17758 +Node: Concept index18294  End Tag Table diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/plzip-1.2/doc/plzip.texi new/plzip-1.3/doc/plzip.texi --- old/plzip-1.2/doc/plzip.texi 2014-08-29 13:58:32.000000000 +0200 +++ new/plzip-1.3/doc/plzip.texi 2015-01-22 12:22:17.000000000 +0100 @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 29 August 2014 -@set VERSION 1.2 +@set UPDATED 22 January 2015 +@set VERSION 1.3 @dircategory Data Compression @direntry @@ -39,12 +39,14 @@ * Program design:: Internal structure of plzip * Invoking plzip:: Command line interface * File format:: Detailed format of the compressed file +* Memory requirements:: Memory required to compress and decompress +* Minimum file sizes:: Minimum file sizes required for full speed * Problems:: Reporting bugs * Concept index:: Index of concepts @end menu @sp 1 -Copyright @copyright{} 2009-2014 Antonio Diaz Diaz. +Copyright @copyright{} 2009-2015 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -60,15 +62,17 @@ Plzip can compress/decompress large files on multiprocessor machines much faster than lzip, at the cost of a slightly reduced compression -ratio. Note that the number of usable threads is limited by file size; -on files larger than a few GB plzip can use hundreds of processors, but -on files of only a few MB plzip is no faster than lzip. +ratio (0.4 to 2 percent larger compressed files). Note that the number +of usable threads is limited by file size; on files larger than a few GB +plzip can use hundreds of processors, but on files of only a few MB +plzip is no faster than lzip (@pxref{Minimum file sizes}). Plzip uses the lzip file format; the files produced by plzip are fully compatible with lzip-1.4 or newer, and can be rescued with lziprecover. -The lzip file format is designed for long-term data archiving, taking -into account both data integrity and decoder availability: +The lzip file format is designed for data sharing and long-term +archiving, taking into account both data integrity and decoder +availability: @itemize @bullet @item @@ -87,8 +91,8 @@ LZMA obsolete. @item -Additionally lzip is copylefted, which guarantees that it will remain -free forever. +Additionally the lzip reference implementation is copylefted, which +guarantees that it will remain free forever. @end itemize A nice feature of the lzip format is that a corrupt byte is easier to @@ -96,47 +100,15 @@ the help of lziprecover, losing an entire archive just because of a corrupt byte near the beginning is a thing of the past. -The member trailer stores the 32-bit CRC of the original data, the size -of the original data and the size of the member. These values, together -with the value remaining in the range decoder and the end-of-stream -marker, provide a 4 factor integrity checking which guarantees that the -decompressed version of the data is identical to the original. This -guards against corruption of the compressed data, and against undetected -bugs in plzip (hopefully very unlikely). The chances of data corruption -going undetected are microscopic. Be aware, though, that the check -occurs upon decompression, so it can only tell you that something is -wrong. It can't help you recover the original uncompressed data. - Plzip uses the same well-defined exit status values used by lzip and bzip2, which makes it safer than compressors returning ambiguous warning values (like gzip) when it is used as a back end for other programs like tar or zutils. -The amount of memory required @strong{per thread} is approximately the -following: - -@itemize @bullet -@item -For compression; 3 times the data size (@pxref{--data-size}) plus 11 -times the dictionary size. - -@item -For decompression or testing of a non-seekable file or of standard -input; 2 times the dictionary size plus up to 32 MiB. - -@item -For decompression of a regular file to a non-seekable file or to -standard output; the dictionary size plus up to 32 MiB. - -@item -For decompression of a regular file to another regular file, or for -testing of a regular file; the dictionary size. -@end itemize - Plzip will automatically use the smallest possible dictionary size for each file without exceeding the given limit. Keep in mind that the decompression memory requirement is affected at compression time by the -choice of dictionary size limit. +choice of dictionary size limit (@pxref{Memory requirements}). When compressing, plzip replaces every file given in the command line with a compressed version of itself, with the name "original_name.lz". @@ -232,9 +204,9 @@ @itemx --version Print the version number of plzip on the standard output and exit. +@anchor{--data-size} @item -B @var{bytes} @itemx --data-size=@var{bytes} -@anchor{--data-size} Set the size of the input data blocks, in bytes. The input file will be divided in chunks of this size before compression is performed. Valid values range from 8 KiB to 1 GiB. Default value is two times the @@ -277,8 +249,8 @@ value. @w{@samp{plzip --help}} shows the system's default value. Note that the number of usable threads is limited to @w{ceil( file_size -/ data_size )} during compression (@pxref{--data-size}), and to the -number of members in the input during decompression. +/ data_size )} during compression (@pxref{Minimum file sizes}), and to +the number of members in the input during decompression. @item -o @var{file} @itemx --output=@var{file} @@ -315,8 +287,8 @@ When compressing, show the compression ratio for each file processed. A second @samp{-v} shows the progress of compression.@* When decompressing or testing, further -v's (up to 4) increase the -verbosity level, showing status, compression ratio, decompressed size, -and compressed size. +verbosity level, showing status, compression ratio, dictionary size, +decompressed size, and compressed size. @item -1 .. -9 Set the compression parameters (dictionary size and match length limit) @@ -327,8 +299,7 @@ scale optimal for all files. If your files are large, very repetitive, etc, you may need to use the @samp{--match-length} and @samp{--dictionary-size} options directly to achieve optimal -performance. For example, @samp{-9m64} usually compresses executables -more (and faster) than @samp{-9}. +performance. @multitable {Level} {Dictionary size} {Match length limit} @item Level @tab Dictionary size @tab Match length limit @@ -449,6 +420,73 @@ @end table +@node Memory requirements +@chapter Memory required to compress and decompress +@cindex memory requirements + +The amount of memory required @strong{per thread} is approximately the +following: + +@itemize @bullet +@item +For compression; 11 times the dictionary size plus 3 times the data size +(@pxref{--data-size}). + +@item +For decompression of a regular (seekable) file to another regular file, +or for testing of a regular file; the dictionary size. Note that regular +files with more than 1024 bytes of trailing garbage are treated as +non-seekable. + +@item +For testing of a non-seekable file or of standard input; the dictionary +size plus up to 5 MiB. + +@item +For decompression of a regular file to a non-seekable file or to +standard output; the dictionary size plus up to 32 MiB. + +@item +For decompression of a non-seekable file or of standard input; the +dictionary size plus up to 35 MiB. +@end itemize + + +@node Minimum file sizes +@chapter Minimum file sizes required for full compression speed +@cindex minimum file sizes + +When compressing, plzip divides the input file into chunks and +compresses as many chunks simultaneously as worker threads are chosen, +creating a multi-member compressed file. + +For this to work as expected (and roughly multiply the compression speed +by the number of available processors), the uncompressed file must be at +least as large as the number of worker threads times the chunk size +(@pxref{--data-size}). Else some processors will not get any data to +compress, and compression will be proportionally slower. The maximum +speed increase achievable on a given file is limited by the ratio +@w{(file_size / data_size)}. + +The following table shows the minimum uncompressed file size needed for +full use of N processors at a given compression level, using the default +data size for each level: + +@multitable {Processors} {512 MiB} {512 MiB} {512 MiB} {512 MiB} {512 MiB} {512 MiB} +@headitem Processors @tab 2 @tab 4 @tab 8 @tab 16 @tab 64 @tab 256 +@item Level +@item -1 @tab 4 MiB @tab 8 MiB @tab 16 MiB @tab 32 MiB @tab 128 MiB @tab 512 MiB +@item -2 @tab 6 MiB @tab 12 MiB @tab 24 MiB @tab 48 MiB @tab 192 MiB @tab 768 MiB +@item -3 @tab 8 MiB @tab 16 MiB @tab 32 MiB @tab 64 MiB @tab 256 MiB @tab 1 GiB +@item -4 @tab 12 MiB @tab 24 MiB @tab 48 MiB @tab 96 MiB @tab 384 MiB @tab 1.5 GiB +@item -5 @tab 16 MiB @tab 32 MiB @tab 64 MiB @tab 128 MiB @tab 512 MiB @tab 2 GiB +@item -6 @tab 32 MiB @tab 64 MiB @tab 128 MiB @tab 256 MiB @tab 1 GiB @tab 4 GiB +@item -7 @tab 64 MiB @tab 128 MiB @tab 256 MiB @tab 512 MiB @tab 2 GiB @tab 8 GiB +@item -8 @tab 96 MiB @tab 192 MiB @tab 384 MiB @tab 768 MiB @tab 3 GiB @tab 12 GiB +@item -9 @tab 128 MiB @tab 256 MiB @tab 512 MiB @tab 1 GiB @tab 4 GiB @tab 16 GiB +@end multitable + + @node Problems @chapter Reporting bugs @cindex bugs @@ -461,7 +499,7 @@ If you find a bug in plzip, please send electronic mail to @email{lzip-bug@@nongnu.org}. Include the version number, which you can -find by running @w{@samp{plzip --version}}. +find by running @w{@code{plzip --version}}. @node Concept index diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/plzip-1.2/file_index.cc new/plzip-1.3/file_index.cc --- old/plzip-1.2/file_index.cc 2014-08-29 16:34:36.000000000 +0200 +++ new/plzip-1.3/file_index.cc 2015-01-21 18:01:06.000000000 +0100 @@ -1,5 +1,5 @@ /* Plzip - Parallel compressor compatible with lzip - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -50,7 +50,7 @@ char buf[80]; snprintf( buf, sizeof buf, "%s%llu%s", msg1, num, msg2 ); error_ = buf; - retval_ = 2; + retval_ = member_vector.empty() ? 1 : 2; // maybe trailing garbage } @@ -74,9 +74,10 @@ retval_ = 2; return; } if( !header.verify_version() ) { set_num_error( "Version ", header.version(), - " member format not supported." ); return; } + " member format not supported." ); retval_ = 2; return; } long long pos = isize; // always points to a header or to EOF + const long long max_garbage = 1024; while( pos >= min_member_size ) { File_trailer trailer; @@ -86,8 +87,8 @@ const long long member_size = trailer.member_size(); if( member_size < min_member_size || member_size > pos ) { - if( member_vector.empty() ) // maybe trailing garbage - { --pos; continue; } + if( member_vector.empty() && isize - pos < max_garbage ) + { --pos; continue; } // maybe trailing garbage set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 ); break; } @@ -96,11 +97,12 @@ { set_errno_error( "Error reading member header: " ); break; } if( !header.verify_magic() || !header.verify_version() ) { - if( member_vector.empty() ) // maybe trailing garbage - { --pos; continue; } + if( member_vector.empty() && isize - pos < max_garbage ) + { --pos; continue; } // maybe trailing garbage set_num_error( "Bad header at pos ", pos - member_size ); break; } + const unsigned dictionary_size = header.dictionary_size(); if( member_vector.empty() && isize - pos > File_header::size && seek_read( infd, header.data, File_header::size, pos ) == File_header::size && header.verify_magic() && header.verify_version() ) @@ -110,7 +112,7 @@ } pos -= member_size; member_vector.push_back( Member( 0, trailer.data_size(), - pos, member_size ) ); + pos, member_size, dictionary_size ) ); } if( pos != 0 || member_vector.empty() ) { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/plzip-1.2/file_index.h new/plzip-1.3/file_index.h --- old/plzip-1.2/file_index.h 2014-08-28 20:39:59.000000000 +0200 +++ new/plzip-1.3/file_index.h 2015-01-21 18:01:06.000000000 +0100 @@ -1,5 +1,5 @@ /* Plzip - Parallel compressor compatible with lzip - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -41,10 +41,11 @@ struct Member { Block dblock, mblock; // data block, member block + unsigned dictionary_size; Member( const long long dp, const long long ds, - const long long mp, const long long ms ) - : dblock( dp, ds ), mblock( mp, ms ) {} + const long long mp, const long long ms, const unsigned dict_size ) + : dblock( dp, ds ), mblock( mp, ms ), dictionary_size( dict_size ) {} }; std::vector< Member > member_vector; @@ -74,4 +75,6 @@ { return member_vector[i].dblock; } const Block & mblock( const long i ) const { return member_vector[i].mblock; } + unsigned dictionary_size( const long i ) const + { return member_vector[i].dictionary_size; } }; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/plzip-1.2/lzip.h new/plzip-1.3/lzip.h --- old/plzip-1.2/lzip.h 2014-08-28 20:39:59.000000000 +0200 +++ new/plzip-1.3/lzip.h 2015-01-21 18:01:06.000000000 +0100 @@ -1,5 +1,5 @@ /* Plzip - Parallel compressor compatible with lzip - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -162,7 +162,7 @@ void xsignal( pthread_cond_t * const cond ); void xbroadcast( pthread_cond_t * const cond ); int compress( const int data_size, const int dictionary_size, - const int match_len_limit, int num_workers, + const int match_len_limit, const int num_workers, const int infd, const int outfd, const Pretty_print & pp, const int debug_level ); @@ -176,8 +176,7 @@ // defined in dec_stream.cc int dec_stream( const int num_workers, const int infd, const int outfd, - const Pretty_print & pp, const int debug_level, - const bool testing ); + const Pretty_print & pp, const int debug_level ); // defined in decompress.cc int preadblock( const int fd, uint8_t * const buf, const int size, @@ -188,11 +187,12 @@ const Pretty_print & pp, const int worker_id ); int decompress( int num_workers, const int infd, const int outfd, const Pretty_print & pp, const int debug_level, - const bool testing, const bool infd_isreg ); + const bool infd_isreg ); // defined in main.cc extern int verbosity; void cleanup_and_fail( const int retval = 1 ); // terminate the program +void show_header( const unsigned dictionary_size ); void show_error( const char * const msg, const int errcode = 0, const bool help = false ); void internal_error( const char * const msg ); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/plzip-1.2/main.cc new/plzip-1.3/main.cc --- old/plzip-1.2/main.cc 2014-08-28 20:39:59.000000000 +0200 +++ new/plzip-1.3/main.cc 2015-01-08 12:49:40.000000000 +0100 @@ -1,6 +1,6 @@ /* Plzip - Parallel compressor compatible with lzip Copyright (C) 2009 Laszlo Ersek. - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -72,7 +72,7 @@ const char * const Program_name = "Plzip"; const char * const program_name = "plzip"; -const char * const program_year = "2014"; +const char * const program_year = "2015"; const char * invocation_name = 0; struct { const char * from; const char * to; } const known_extensions[] = { @@ -119,7 +119,7 @@ " -1 .. -9 set compression level [default 6]\n" " --fast alias for -1\n" " --best alias for -9\n", num_online ); - if( verbosity > 0 ) + if( verbosity >= 1 ) { std::printf( " -D, --debug= (0-1) print debug statistics to stderr\n" ); } @@ -130,8 +130,7 @@ "The bidimensional parameter space of LZMA can't be mapped to a linear\n" "scale optimal for all files. If your files are large, very repetitive,\n" "etc, you may need to use the --match-length and --dictionary-size\n" - "options directly to achieve optimal performance. For example, -9m64\n" - "usually compresses executables more (and faster) than -9.\n" + "options directly to achieve optimal performance.\n" "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" "invalid input file, 3 for an internal consistency error (eg, bug) which\n" @@ -152,6 +151,28 @@ "There is NO WARRANTY, to the extent permitted by law.\n" ); } +} // end namespace + +void show_header( const unsigned dictionary_size ) + { + if( verbosity >= 3 ) + { + const char * const prefix[8] = + { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; + enum { factor = 1024 }; + const char * p = ""; + const char * np = " "; + unsigned num = dictionary_size; + bool exact = ( num % factor == 0 ); + + for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) + { num /= factor; if( num % factor != 0 ) exact = false; + p = prefix[i]; np = ""; } + std::fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p ); + } + } + +namespace { unsigned long long getnum( const char * const ptr, const unsigned long long llimit, @@ -323,7 +344,7 @@ bool check_tty( const int infd, const Mode program_mode ) { - if( program_mode == m_compress && outfd >= 0 && isatty( outfd ) ) + if( program_mode == m_compress && isatty( outfd ) ) { show_error( "I won't write compressed data to a terminal.", 0, true ); return false; @@ -337,6 +358,32 @@ return true; } +} // end namespace + +// This can be called from any thread, main thread or sub-threads alike, +// since they all call common helper functions that call cleanup_and_fail() +// in case of an error. +// +void cleanup_and_fail( const int retval ) + { + // only one thread can delete and exit + static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + + pthread_mutex_lock( &mutex ); // ignore errors to avoid loop + if( delete_output_on_interrupt ) + { + delete_output_on_interrupt = false; + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n", + program_name, output_filename.c_str() ); + if( outfd >= 0 ) { close( outfd ); outfd = -1; } + if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT ) + show_error( "WARNING: deletion of output file (apparently) failed." ); + } + std::exit( retval ); + } + +namespace { // Set permissions, owner and times. void close_and_set_permissions( const struct stat * const in_statsp ) @@ -431,30 +478,6 @@ } -// This can be called from any thread, main thread or sub-threads alike, -// since they all call common helper functions that call cleanup_and_fail() -// in case of an error. -// -void cleanup_and_fail( const int retval ) - { - // only one thread can delete and exit - static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - - pthread_mutex_lock( &mutex ); // ignore errors to avoid loop - if( delete_output_on_interrupt ) - { - delete_output_on_interrupt = false; - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n", - program_name, output_filename.c_str() ); - if( outfd >= 0 ) { close( outfd ); outfd = -1; } - if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT ) - show_error( "WARNING: deletion of output file (apparently) failed." ); - } - std::exit( retval ); - } - - void show_progress( const int packet_size, const Pretty_print * const p, const unsigned long long cfile_size ) @@ -464,17 +487,20 @@ static const Pretty_print * pp = 0; static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - if( p ) // initialize static vars - { csize = cfile_size; pos = 0; pp = p; } - if( pp ) - { - xlock( &mutex ); - pos += packet_size; - if( csize > 0 ) - std::fprintf( stderr, "%4llu%%", pos / csize ); - std::fprintf( stderr, " %.1f MB\r", pos / 1000000.0 ); - pp->reset(); (*pp)(); // restore cursor position - xunlock( &mutex ); + if( verbosity >= 2 ) + { + if( p ) // initialize static vars + { csize = cfile_size; pos = 0; pp = p; } + if( pp ) + { + xlock( &mutex ); + pos += packet_size; + if( csize > 0 ) + std::fprintf( stderr, "%4llu%%", pos / csize ); + std::fprintf( stderr, " %.1f MB\r", pos / 1000000.0 ); + pp->reset(); (*pp)(); // restore cursor position + xunlock( &mutex ); + } } } @@ -688,15 +714,13 @@ int tmp; if( program_mode == m_compress ) { - if( verbosity >= 2 ) // init - show_progress( 0, &pp, infd_isreg ? in_statsp->st_size / 100 : 0 ); + show_progress( 0, &pp, infd_isreg ? in_statsp->st_size / 100 : 0 ); // init tmp = compress( data_size, encoder_options.dictionary_size, encoder_options.match_len_limit, num_workers, infd, outfd, pp, debug_level ); } else - tmp = decompress( num_workers, infd, outfd, pp, debug_level, - program_mode == m_test, infd_isreg ); + tmp = decompress( num_workers, infd, outfd, pp, debug_level, infd_isreg ); if( tmp > retval ) retval = tmp; if( tmp && program_mode != m_test ) cleanup_and_fail( retval ); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/plzip-1.2/testsuite/check.sh new/plzip-1.3/testsuite/check.sh --- old/plzip-1.2/testsuite/check.sh 2014-08-24 18:36:14.000000000 +0200 +++ new/plzip-1.3/testsuite/check.sh 2015-01-21 18:01:06.000000000 +0100 @@ -1,6 +1,6 @@ #! /bin/sh # check script for Plzip - Parallel compressor compatible with lzip -# Copyright (C) 2009-2014 Antonio Diaz Diaz. +# Copyright (C) 2009-2015 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it.