⬅ Download proof of concept
This is a proof of concept micro-benchmark compressing the
37_640 bytes symbol from the bug report linked above with both
id_compress from the dmd backend and lz4 (without dictionary).
The lz4 result is additionally transformed to 7-bits similar
to base-64.
Original size: 37640 bytes
id_compress : 5243 ms for 10_000 iterations, 800 bytes
lz4 : 71 ms for 10_000 iterations, 389 bytes
That roughly corresponds to a 2x higher compression ratio at
70x faster compression speed.
--
Marco
import std.bitmanip;
import core.stdc.string;
struct S(T)
{
void foo(){ throw new Exception("1");}
}
auto s(T)(T t)
{
struct Result
{
void foo(){ throw new Exception("2");}
}
return Result();
}
void main()
{
import core.stdc.stdlib;
import std.stdio;
import std.datetime;
string text = 1.s.s.s.s.s.s.s.s.s.s.foo.mangleof;
writefln("Original size: %s", text.length);
size_t comprSize, comprSize2;
char* buffer;
StopWatch sw;
sw.start();
foreach (i; 0 .. 10_000)
{
buffer = cast(char*)malloc((LZ4_compressBound(text.length) * 8 + 6) / 7);
comprSize = LZ4_compress_fast(text.ptr, buffer, text.length);
comprSize2 = (comprSize * 8 + 6) / 7;
foreach_reverse (k; 0 .. comprSize2)
{
size_t idx = k * 7 / 8;
ubyte bit = k * 7 % 8;
ubyte _7bit = buffer[idx] >> bit;
if (bit > 1)
_7bit |= buffer[idx+1] << (8 - bit);
_7bit &= 0x7F;
_7bit += '!';
buffer[k] = _7bit;
}
}
sw.stop();
writefln("%s ms for 10_000 iterations", sw.peek.msecs);
writefln("Compressed size: %s", comprSize2);
writefln("Compressed bytes: %s", cast(ubyte[])buffer[0 .. comprSize2]);
stdout.flush();
sw.start();
foreach (i; 0 .. 10_000)
id_compress(text.ptr, text.length, &comprSize);
sw.stop();
writefln("%s ms for 10_000 iterations", sw.peek.msecs);
writefln("Compresed size (id_compress): %s", comprSize);
}
// Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
enum LZ4_MEMORY_USAGE = 14;
enum LZ4_HASHLOG = LZ4_MEMORY_USAGE-2;
enum HASH_SIZE_U32 = (1 << LZ4_MEMORY_USAGE) / uint.sizeof;
enum COPYLENGTH = 8;
enum MINMATCH = 4;
enum MFLIMIT = COPYLENGTH + MINMATCH;
enum LZ4_minLength = MFLIMIT + 1;
enum LASTLITERALS = 5;
// Increase this value ==> compression run slower on incompressible data
enum LZ4_skipTrigger = 6U;
enum MAXD_LOG = 16;
enum MAX_DISTANCE = (1 << MAXD_LOG) - 1;
enum LZ4_MAX_INPUT_SIZE = 0x7E000000; /* 2 113 929 216 bytes */
enum LZ4_64Klimit = 64*1024 + (MFLIMIT-1);
enum tableType_t { byPtr, byU32, byU16 };
struct Token
{
mixin(bitfields!(
size_t, "matchLength", 4,
size_t, "literalLength", 4,
));
}
struct LZ4_stream
{
uint[HASH_SIZE_U32] hashTable;
uint currentOffset;
uint initCheck;
const char* dictionary;
uint dictSize;
}
size_t LZ4_compressBound(size_t isize)
{
return isize > LZ4_MAX_INPUT_SIZE ? 0 : isize + (isize/255) + 16;
}
ushort LZ4_read16(in void* memPtr)
{
ushort val16;
memcpy(&val16, memPtr, 2);
return val16;
}
uint LZ4_read32(in void* memPtr)
{
uint val32;
memcpy(&val32, memPtr, 4);
return val32;
}
size_t LZ4_read_ARCH(in void* p)
{
size_t result = void;
memcpy(&result, p, (void*).sizeof);
return result;
}
uint LZ4_hashSequence(size_t sequence, const tableType_t tableType)
{
immutable hashLog = (tableType == tableType_t.byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
version (D_LP64)
{
enum prime5bytes = 889523592379UL;
immutable hashMask = (1<<hashLog) - 1;
return (sequence * prime5bytes >> 40 - hashLog) & hashMask;
}
else
{
return sequence * 2654435761 >> MINMATCH * 8 - hashLog;
}
}
uint LZ4_hashPosition(in void* p, tableType_t tableType)
{
return LZ4_hashSequence(LZ4_read_ARCH(p), tableType);
}
void LZ4_putPositionOnHash(in char* p, uint h, ref LZ4_stream tableBase,
const tableType_t tableType, in char* srcBase)
{
with (tableType_t) final switch (tableType)
{
case byPtr:
const(char)** hashTable = cast(const(char)**)tableBase.hashTable.ptr;
hashTable[h] = p;
return;
case byU32:
uint* hashTable = cast(uint*)tableBase.hashTable.ptr;
hashTable[h] = cast(uint) (p-srcBase);
return;
case byU16:
ushort* hashTable = cast(ushort*)tableBase.hashTable.ptr;
hashTable[h] = cast(ushort)(p-srcBase);
return;
}
}
void LZ4_putPosition(in char* p, ref LZ4_stream tableBase, const tableType_t tableType,
in char* srcBase)
{
uint h = LZ4_hashPosition(p, tableType);
LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
}
const(char*) LZ4_getPositionOnHash(uint h, ref const LZ4_stream tableBase,
const tableType_t tableType, in char* srcBase)
{
with (tableType_t) final switch (tableType)
{
case tableType_t.byPtr:
const char** hashTable = cast(const char**)tableBase.hashTable.ptr;
return hashTable[h];
case tableType_t.byU32:
uint* hashTable = cast(uint*)tableBase.hashTable.ptr;
return hashTable[h] + srcBase;
case tableType_t.byU16:
ushort* hashTable = cast(ushort*)tableBase.hashTable.ptr;
return hashTable[h] + srcBase;
}
}
const(char*) LZ4_getPosition(in char* p, ref const LZ4_stream tableBase,
const tableType_t tableType, in char* srcBase)
{
uint h = LZ4_hashPosition(p, tableType);
return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
}
/* customized version of memcpy, which may overwrite up to 7 bytes beyond dstEnd */
void LZ4_wildCopy(void* dstPtr, scope const(void)* srcPtr, in void* dstEnd)
{
do
{
memcpy(dstPtr, srcPtr, 8);
dstPtr += 8; srcPtr += 8;
}
while (dstPtr < dstEnd);
}
void LZ4_writeLE16(void* memPtr, ushort value)
{
version (LittleEndian)
{
memcpy(memPtr, &value, 2);
}
else
{
ubyte* p = cast(ubyte*)memPtr;
p[0] = cast(ubyte)value;
p[1] = value >> 8;
}
}
size_t LZ4_NbCommonBytes(size_t val)
{
import core.bitop;
version (LittleEndian)
return bsf(val) >> 3;
else /* Big Endian CPU */
return bsr(val) >> 3;
}
size_t LZ4_count(scope const(char)* pIn, scope const(char)* pMatch, in char* pInLimit)
{
const char* pStart = pIn;
while (pIn < pInLimit-(size_t.sizeof-1))
{
size_t diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
if (!diff) { pIn+=size_t.sizeof; pMatch+=size_t.sizeof; continue; }
pIn += LZ4_NbCommonBytes(diff);
return pIn - pStart;
}
version (D_LP64) if (pIn < (pInLimit-3) && LZ4_read32(pMatch) == LZ4_read32(pIn))
{
pIn+=4; pMatch+=4;
}
if (pIn < (pInLimit-1) && LZ4_read16(pMatch) == LZ4_read16(pIn))
{
pIn+=2; pMatch+=2;
}
if (pIn < pInLimit && *pMatch == *pIn)
pIn++;
return pIn - pStart;
}
size_t LZ4_compress_generic(ref LZ4_stream ctx, in char* source, char* dest,
const size_t inputSize, const tableType_t tableType)
in
{
assert (inputSize <= LZ4_MAX_INPUT_SIZE, "Unsupported input size, too large");
assert (tableType != tableType_t.byU16 || inputSize < LZ4_64Klimit, "Size too large (not within 64K limit)");
}
body
{
const(char)*
ip = source,
anchor = source;
const char*
lowRefLimit = ip - ctx.dictSize,
dictionary = ctx.dictionary,
dictEnd = dictionary + ctx.dictSize,
iend = ip + inputSize,
mflimit = iend - MFLIMIT,
matchlimit = iend - LASTLITERALS;
immutable dictDelta = dictEnd - source;
char*
op = dest;
uint forwardH;
size_t refDelta = 0;
const char* base = source, lowLimit = source;
/* Input too small, no compression (all literals) */
if (inputSize < LZ4_minLength)
goto _last_literals;
/* First Byte */
LZ4_putPosition(ip, ctx, tableType, base);
ip++; forwardH = LZ4_hashPosition(ip, tableType);
/* Main Loop */
for ( ; ; )
{
const(char)* match;
Token* token;
{
const(char)* forwardIp = ip;
uint step = 1;
uint searchMatchNb = 1 << LZ4_skipTrigger;
/* Find a match */
do
{
uint h = forwardH;
ip = forwardIp;
forwardIp += step;
step = (searchMatchNb++ >> LZ4_skipTrigger);
if (forwardIp > mflimit)
goto _last_literals;
match = LZ4_getPositionOnHash(h, ctx, tableType, base);
forwardH = LZ4_hashPosition(forwardIp, tableType);
LZ4_putPositionOnHash(ip, h, ctx, tableType, base);
}
while (tableType == tableType_t.byU16
? (LZ4_read32(match+refDelta) != LZ4_read32(ip))
: (match + MAX_DISTANCE < ip));
}
/* Catch up */
while (ip > anchor && match+refDelta > lowLimit && ip[-1] == match[refDelta-1])
{
ip--; match--;
}
{
/* Encode Literal length */
size_t litLength = ip - anchor;
token = cast(Token*)op++;
if (litLength >= Token.literalLength_max)
{
token.literalLength = Token.literalLength_max;
size_t len = litLength - Token.literalLength_max;
for (; len >= 255 ; len -= 255)
*op++ = 255;
*op++ = cast(ubyte)len;
}
else token.literalLength = litLength;
/* Copy Literals */
LZ4_wildCopy(op, anchor, op+litLength);
op += litLength;
}
_next_match:
/* Encode Offset */
LZ4_writeLE16(op, cast(ushort)(ip-match)); op += 2;
/* Encode MatchLength */
{
size_t matchLength = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
ip += MINMATCH + matchLength;
if (matchLength >= Token.matchLength_max)
{
token.matchLength = Token.matchLength_max;
matchLength -= Token.matchLength_max;
for (; matchLength >= 510 ; matchLength-=510) { *op++ = 255; *op++ = 255; }
if (matchLength >= 255) { matchLength-=255; *op++ = 255; }
*op++ = cast(ubyte)matchLength;
}
else token.matchLength = cast(ubyte)matchLength;
}
anchor = ip;
/* Test end of chunk */
if (ip > mflimit) break;
/* Fill table */
LZ4_putPosition(ip-2, ctx, tableType, base);
/* Test next position */
match = LZ4_getPosition(ip, ctx, tableType, base);
LZ4_putPosition(ip, ctx, tableType, base);
if (match + MAX_DISTANCE >= ip && LZ4_read32(match+refDelta) == LZ4_read32(ip))
{
token = cast(Token*)op++;
*token = Token.init;
goto _next_match;
}
/* Prepare next loop */
forwardH = LZ4_hashPosition(++ip, tableType);
}
_last_literals:
/* Encode Last Literals */
{
Token* token = cast(Token*)op++;
const lastRun = iend - anchor;
if (lastRun >= Token.literalLength_max)
{
token.literalLength = Token.literalLength_max;
size_t accumulator = lastRun - Token.literalLength_max;
for (; accumulator >= 255 ; accumulator -= 255)
*op++ = 255;
*op++ = cast(ubyte)accumulator;
}
else token.literalLength = lastRun;
memcpy(op, anchor, lastRun);
op += lastRun;
}
/* End */
return op - dest;
}
version (GNU)
{
import gcc.attribute;
enum noinline = attribute("noinline");
}
else
enum noinline;
@noinline
size_t LZ4_compress_fast(in char* source, char* dest, size_t inputSize)
{
version (GNU) {} else pragma(inline, false);
LZ4_stream state;
if (inputSize < LZ4_64Klimit)
return LZ4_compress_generic(state, source, dest, inputSize, tableType_t.byU16);
else version (D_LP64)
return LZ4_compress_generic(state, source, dest, inputSize, tableType_t.byU32);
else
return LZ4_compress_generic(state, source, dest, inputSize, tableType_t.byPtr);
}
// DMD CODE HERE
static int longest_match(in char* dict, size_t dlen, in char* pattern, size_t plen,
int *pmatchoff, int *pmatchlen)
{
int matchlen = 0;
int matchoff;
int i;
int j;
for (i = 0; i < dlen; i++)
{
if (dict[i] == pattern[0])
{
for (j = 1; 1; j++)
{
if (i + j == dlen || j == plen)
break;
if (dict[i + j] != pattern[j])
break;
}
if (j >= matchlen)
{
matchlen = j;
matchoff = i;
}
}
}
if (matchlen > 1)
{
*pmatchlen = matchlen;
*pmatchoff = matchoff;
return 1; // found a match
}
return 0; // no match
}
/******************************************
* Compress an identifier for name mangling purposes.
* Format is if ASCII, then it's just the char.
* If high bit set, then it's a length/offset pair
*
* Params:
* id = string to compress
* idlen = length of id
* plen = where to store length of compressed result
* Returns:
* malloc'd compressed 0-terminated identifier
*/
char* id_compress(in char* id, size_t idlen, scope size_t* plen)
{
import core.stdc.stdlib;
int count = 0;
char* p = cast(char*)malloc(idlen + 1);
for (size_t i = 0; i < idlen; i++)
{
int matchoff;
int matchlen;
size_t j = 0;
if (i > 1023)
j = i - 1023;
if (longest_match(id + j, i - j, id + i, idlen - i, &matchoff, &matchlen))
{
matchoff += j;
size_t off = i - matchoff;
//printf("matchoff = %3d, matchlen = %2d, off = %d\n", matchoff, matchlen, off);
assert(off >= matchlen);
if (off <= 8 && matchlen <= 8)
{
p[count] = cast(char)(0xC0 | ((off - 1) << 3) | (matchlen - 1));
count++;
i += matchlen - 1;
continue;
}
else if (matchlen > 2 && off < 1024)
{
if (matchlen >= 1024)
matchlen = 1023; // longest representable match
p[count + 0] = 0x80 | ((matchlen >> 4) & 0x38) | ((off >> 7) & 7);
p[count + 1] = cast(char)(0x80 | matchlen);
p[count + 2] = cast(char)(0x80 | off);
count += 3;
i += matchlen - 1;
continue;
}
}
p[count] = id[i];
count++;
}
p[count] = 0;
//printf("old size = %d, new size = %d\n", idlen, count);
*plen = count;
return p;
}