Re: [HACKERS] [REVIEW] Re: Compression of full-page-writes

Andres Freund Thu, 10 Jul 2014 23:31:33 -0700

On 2014-07-04 19:27:10 +0530, Rahila Syed wrote:
> +     /* Allocates memory for compressed backup blocks according to the 
> compression
> +     * algorithm used.Once per session at the time of insertion of first XLOG
> +     * record.
> +     * This memory stays till the end of session. OOM is handled by making 
> the
> +     * code proceed without FPW compression*/
> +     static char *compressed_pages[XLR_MAX_BKP_BLOCKS];
> +     static bool compressed_pages_allocated = false;
> +     if (compress_backup_block != BACKUP_BLOCK_COMPRESSION_OFF &&
> +             compressed_pages_allocated!= true)
> +     {
> +             size_t buffer_size = VARHDRSZ;
> +             int j;
> +             if (compress_backup_block == BACKUP_BLOCK_COMPRESSION_SNAPPY)
> +                     buffer_size += snappy_max_compressed_length(BLCKSZ);
> +             else if (compress_backup_block == BACKUP_BLOCK_COMPRESSION_LZ4)
> +                     buffer_size += LZ4_compressBound(BLCKSZ);
> +             else if (compress_backup_block == BACKUP_BLOCK_COMPRESSION_PGLZ)
> +                     buffer_size += PGLZ_MAX_OUTPUT(BLCKSZ);
> +             for (j = 0; j < XLR_MAX_BKP_BLOCKS; j++)
> +             {       compressed_pages[j] = (char *) malloc(buffer_size);
> +                     if(compressed_pages[j] == NULL)
> +                     {
> +                             
> compress_backup_block=BACKUP_BLOCK_COMPRESSION_OFF;
> +                             break;
> +                     }
> +             }
> +             compressed_pages_allocated = true;
> +     }


Why not do this in InitXLOGAccess() or similar?

>       /*
>        * Make additional rdata chain entries for the backup blocks, so that we
>        * don't need to special-case them in the write loop.  This modifies the
> @@ -1015,11 +1048,32 @@ begin:;
>               rdt->next = &(dtbuf_rdt2[i]);
>               rdt = rdt->next;
>  
> +             if (compress_backup_block != BACKUP_BLOCK_COMPRESSION_OFF)
> +             {
> +             /* Compress the backup block before including it in rdata chain 
> */
> +                     rdt->data = CompressBackupBlock(page, BLCKSZ - 
> bkpb->hole_length,
> +                                                                             
>         compressed_pages[i], &(rdt->len));
> +                     if (rdt->data != NULL)
> +                     {
> +                             /*
> +                              * write_len is the length of compressed block 
> and its varlena
> +                              * header
> +                              */
> +                             write_len += rdt->len;
> +                             bkpb->hole_length = BLCKSZ - rdt->len;
> +                             /*Adding information about compression in the 
> backup block header*/
> +                             bkpb->block_compression=compress_backup_block;
> +                             rdt->next = NULL;
> +                             continue;
> +                     }
> +             }
> +

So, you're compressing backup blocks one by one. I wonder if that's the
right idea and if we shouldn't instead compress all of them in one run to
increase the compression ratio.


> +/*
>   * Get a pointer to the right location in the WAL buffer containing the
>   * given XLogRecPtr.
>   *
> @@ -4061,6 +4174,50 @@ RestoreBackupBlockContents(XLogRecPtr lsn, BkpBlock 
> bkpb, char *blk,
>       {
>               memcpy((char *) page, blk, BLCKSZ);
>       }
> +     /* Decompress if backup block is compressed*/
> +     else if (VARATT_IS_COMPRESSED((struct varlena *) blk)
> +                             && 
> bkpb.block_compression!=BACKUP_BLOCK_COMPRESSION_OFF)
> +     {
> +             if (bkpb.block_compression == BACKUP_BLOCK_COMPRESSION_SNAPPY)
> +             {
> +                     int ret;
> +                     size_t compressed_length = VARSIZE((struct varlena *) 
> blk) - VARHDRSZ;
> +                     char *compressed_data = (char *)VARDATA((struct varlena 
> *) blk);
> +                     size_t s_uncompressed_length;
> +
> +                     ret = snappy_uncompressed_length(compressed_data,
> +                                     compressed_length,
> +                                     &s_uncompressed_length);
> +                     if (!ret)
> +                             elog(ERROR, "snappy: failed to determine 
> compression length");
> +                     if (BLCKSZ != s_uncompressed_length)
> +                             elog(ERROR, "snappy: compression size mismatch 
> %d != %zu",
> +                                             BLCKSZ, s_uncompressed_length);
> +
> +                     ret = snappy_uncompress(compressed_data,
> +                                     compressed_length,
> +                                     page);
> +                     if (ret != 0)
> +                             elog(ERROR, "snappy: decompression failed: %d", 
> ret);
> +             }
> +             else if (bkpb.block_compression == BACKUP_BLOCK_COMPRESSION_LZ4)
> +             {
> +                     int ret;
> +                     size_t compressed_length = VARSIZE((struct varlena *) 
> blk) - VARHDRSZ;
> +                     char *compressed_data = (char *)VARDATA((struct varlena 
> *) blk);
> +                     ret = LZ4_decompress_fast(compressed_data, page,
> +                                     BLCKSZ);
> +                     if (ret != compressed_length)
> +                             elog(ERROR, "lz4: decompression size mismatch: 
> %d vs %zu", ret,
> +                                             compressed_length);
> +             }
> +             else if (bkpb.block_compression == 
> BACKUP_BLOCK_COMPRESSION_PGLZ)
> +             {
> +                     pglz_decompress((PGLZ_Header *) blk, (char *) page);
> +             }
> +             else
> +                     elog(ERROR, "Wrong value for compress_backup_block 
> GUC");
> +     }
>       else
>       {
>               memcpy((char *) page, blk, bkpb.hole_offset);

So why aren't we compressing the hole here instead of compressing the
parts that the current logic deems to be filled with important information?

>  /*
>   * Options for enum values stored in other modules
>   */
> @@ -3498,6 +3512,16 @@ static struct config_enum ConfigureNamesEnum[] =
>               NULL, NULL, NULL
>       },
>  
> +     {
> +             {"compress_backup_block", PGC_SIGHUP, WAL_SETTINGS,
> +                     gettext_noop("Compress backup block in WAL using 
> specified compression algorithm."),
> +                     NULL
> +             },
> +             &compress_backup_block,
> +             BACKUP_BLOCK_COMPRESSION_OFF, backup_block_compression_options,
> +             NULL, NULL, NULL
> +     },
> +

This should be named 'compress_full_page_writes' or so, even if a
temporary guc. There's the 'full_page_writes' guc and I see little
reaason to deviate from its name.

Greetings,

Andres Freund


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] [REVIEW] Re: Compression of full-page-writes

Reply via email to