Putting metadata bytes in record data is INCREDIBLY efficient.

The heavily overload byte codes that I have used for my last two database systems has, for example, byte codes for:

1. Integers from -10 to 32 (totally arbitrary)
2. Integers of significant length 1 to 8
3. UTF strings from length of 0 to 32 bytes (also arbitrary)
4. UTF strings with encoded lengths from 1 to 4 bytes.

Run length encoding works very well for squeezing out blanks in char fields, De Witt's stupid benchmark, and very else little.  No rational person would ever using fixed length character string for De Witt's benchmarks.

The overloaded byte code scheme was not my idea but a suggestion from very, very Vulcan follower.  Unfortunately, that was too may companies back and I no longer have his name.  It was a superb suggestion. I'll drop in the codes that I use for Amorphous.

National character sets belong in client side conversions and, indirectly, in collations.  We're in an era where national boundaries have no part of data (or, unfortunately, armies).


On 6/9/2022 8:36 AM, Adriano dos Santos Fernandes wrote:
On 09/06/2022 09:29, Dimitry Sibiryakov wrote:
Adriano dos Santos Fernandes wrote 09.06.2022 14:16:
What do you think and are there any active work in this regard?
   Using of record encoding instead of record compressing was suggested
years ago by Ann and Jim.
Yes, but, suggesting things and doing no action during more than 10
years does not make users happy.


   Self-descriptive record format which makes RDB$FORMATS obsolete and
solve problems with garbage collection etc was suggested by me (for
replication block buffer in the first place but storage can use it as
well).

Putting metadata bytes in each record is not efficient.


Adriano


Firebird-Devel mailing list, web interface 
athttps://lists.sourceforge.net/lists/listinfo/firebird-devel
--
Jim Starkey, AmorphousDB, LLC
// Copyright (c) 2014 by James A. Starkey.  All rights reserved.


#pragma once

#include "Stream.h"
#include "StringClass.h"
#include "UUId.h"
#include "Date.h"
#include "Timestamp.h"

class Opaque;

static const int typeNull                               = 1;
static const int typeEnd                                = 2;
static const int typeBoolean                    = 3;
static const int typeClassId                    = 4;
static const int typeAttributeId                = 5;
static const int typeObjectId                   = 6;
static const int typeList                               = 7;
static const int typeInt                                = 8;
static const int typeLong                               = 9;
static const int typeDouble                             = 10;
static const int typeDate                               = 11;
static const int typeString                             = 12;
static const int typeOpaque                             = 13;
static const int typeUUId                               = 14;
static const int typeTimestamp                  = 15;
static const int typeScaledInt                  = 16;
        

static const int codeNull               = 1;
static const int codeEnd                = 2;
static const int codeBoolean0           = 3;
static const int codeBoolean1           = 4;
static const int codeClass0             = 5;
static const int codeClass1             = 6;
static const int codeClass2             = 7;
static const int codeClass3             = 8;
static const int codeClass4             = 9;
static const int codeClass5             = 10;
static const int codeClass6             = 11;
static const int codeClass7             = 12;
static const int codeClass8             = 13;
static const int codeClass9             = 14;
static const int codeClass10            = 15;
static const int codeClass11            = 16;
static const int codeClass12            = 17;
static const int codeClass13            = 18;
static const int codeClass14            = 19;
static const int codeClass15            = 20;
static const int codeClass16            = 21;
static const int codeClassCount1        = 22;
static const int codeClassCount2        = 23;
static const int codeClassCount3        = 24;
static const int codeClassCount4        = 25;
static const int codeIntMinus10         = 26;
static const int codeIntMinus9          = 27;
static const int codeIntMinus8          = 28;
static const int codeIntMinus7          = 29;
static const int codeIntMinus6          = 30;
static const int codeIntMinus5          = 31;
static const int codeIntMinus4          = 32;
static const int codeIntMinus3          = 33;
static const int codeIntMinus2          = 34;
static const int codeIntMinus1          = 35;
static const int codeInt0               = 36;
static const int codeInt1               = 37;
static const int codeInt2               = 38;
static const int codeInt3               = 39;
static const int codeInt4               = 40;
static const int codeInt5               = 41;
static const int codeInt6               = 42;
static const int codeInt7               = 43;
static const int codeInt8               = 44;
static const int codeInt9               = 45;
static const int codeInt10              = 46;
static const int codeInt11              = 47;
static const int codeInt12              = 48;
static const int codeInt13              = 49;
static const int codeInt14              = 50;
static const int codeInt15              = 51;
static const int codeInt16              = 52;
static const int codeInt17              = 53;
static const int codeInt18              = 54;
static const int codeInt19              = 55;
static const int codeInt20              = 56;
static const int codeInt21              = 57;
static const int codeInt22              = 58;
static const int codeInt23              = 59;
static const int codeInt24              = 60;
static const int codeInt25              = 61;
static const int codeInt26              = 62;
static const int codeInt27              = 63;
static const int codeInt28              = 64;
static const int codeInt29              = 65;
static const int codeInt30              = 66;
static const int codeInt31              = 67;
static const int codeInt32              = 68;
static const int codeIntLen1            = 69;
static const int codeIntLen2            = 70;
static const int codeIntLen3            = 71;
static const int codeIntLen4            = 72;
static const int codeIntLen5            = 73;
static const int codeIntLen6            = 74;
static const int codeIntLen7            = 75;
static const int codeIntLen8            = 76;
static const int codeScaledIntLen1      = 77;
static const int codeScaledIntLen2      = 78;
static const int codeScaledIntLen3      = 79;
static const int codeScaledIntLen4      = 80;
static const int codeScaledIntLen5      = 81;
static const int codeScaledIntLen6      = 82;
static const int codeScaledIntLen7      = 83;
static const int codeScaledIntLen8      = 84;
static const int codeUtf8Len0           = 85;
static const int codeUtf8Len1           = 86;
static const int codeUtf8Len2           = 87;
static const int codeUtf8Len3           = 88;
static const int codeUtf8Len4           = 89;
static const int codeUtf8Len5           = 90;
static const int codeUtf8Len6           = 91;
static const int codeUtf8Len7           = 92;
static const int codeUtf8Len8           = 93;
static const int codeUtf8Len9           = 94;
static const int codeUtf8Len10          = 95;
static const int codeUtf8Len11          = 96;
static const int codeUtf8Len12          = 97;
static const int codeUtf8Len13          = 98;
static const int codeUtf8Len14          = 99;
static const int codeUtf8Len15          = 100;
static const int codeUtf8Len16          = 101;
static const int codeUtf8Len17          = 102;
static const int codeUtf8Len18          = 103;
static const int codeUtf8Len19          = 104;
static const int codeUtf8Len20          = 105;
static const int codeUtf8Len21          = 106;
static const int codeUtf8Len22          = 107;
static const int codeUtf8Len23          = 108;
static const int codeUtf8Len24          = 109;
static const int codeUtf8Len25          = 110;
static const int codeUtf8Len26          = 111;
static const int codeUtf8Len27          = 112;
static const int codeUtf8Len28          = 113;
static const int codeUtf8Len29          = 114;
static const int codeUtf8Len30          = 115;
static const int codeUtf8Len31          = 116;
static const int codeUtf8Len32          = 117;
static const int codeUtf8Count1         = 118;
static const int codeUtf8Count2         = 119;
static const int codeUtf8Count3         = 120;
static const int codeUtf8Count4         = 121;
static const int codeDateLen0           = 122;
static const int codeDateLen1           = 123;
static const int codeDateLen2           = 124;
static const int codeDateLen3           = 125;
static const int codeDateLen4           = 126;
//static const int codeDateLen5         = 127;
//static const int codeDateLen6         = 128;
//static const int codeDateLen7         = 129;
//static const int codeDateLen8         = 130;
static const int codeDouble0            = 131;
static const int codeDouble1            = 132;
static const int codeDouble2            = 133;
static const int codeDouble3            = 134;
static const int codeDouble4            = 135;
static const int codeDouble5            = 136;
static const int codeDouble6            = 137;
static const int codeDouble7            = 138;
static const int codeDouble8            = 139;
static const int codeOpaqueLen0         = 140;
static const int codeOpaqueLen1         = 141;
static const int codeOpaqueLen2         = 142;
static const int codeOpaqueLen3         = 143;
static const int codeOpaqueLen4         = 144;
static const int codeOpaqueLen5         = 145;
static const int codeOpaqueLen6         = 146;
static const int codeOpaqueLen7         = 147;
static const int codeOpaqueLen8         = 148;
static const int codeOpaqueLen9         = 149;
static const int codeOpaqueLen10        = 150;
static const int codeOpaqueLen11        = 151;
static const int codeOpaqueLen12        = 152;
static const int codeOpaqueLen13        = 153;
static const int codeOpaqueLen14        = 154;
static const int codeOpaqueLen15        = 155;
static const int codeOpaqueLen16        = 156;
static const int codeOpaqueCount1       = 157;
static const int codeOpaqueCount2       = 158;
static const int codeOpaqueCount3       = 159;
static const int codeOpaqueCount4       = 160;
static const int codeUuid               = 161;
static const int codeTimestampLen0      = 162;
static const int codeTimestampLen1      = 163;
static const int codeTimestampLen2      = 164;
static const int codeTimestampLen3      = 165;
static const int codeTimestampLen4      = 166;
static const int codeTimestampLen5      = 167;
static const int codeTimestampLen6      = 168;
static const int codeTimestampLen7      = 169;
static const int codeTimestampLen8      = 170;
        
        

class EncodedStream : public Stream
{
public:
        EncodedStream(void);
        virtual ~EncodedStream(void);

        void            decode(void);
        int                     byteCount(int64_t n);
        void            putNumber(int count, int64_t number);
        void            decodeError(const char* expectedType);
        int64_t         decodeInteger(int count);
        void            setEncodedStream(EncodedStream* stream);
        void            setData(int length, const UCHAR* data);
        void            traceFn(const char* format, ...);
        size_t          getPosition();
        bool            isEOF();
        void            prepare();

        void            encodeNull();
        void            encodeEnd();
        void            encodeBoolean(bool value);
        void            encodeInt(int64_t value);
//      void            encodeString(String value);
        void            encodeString(const char* string);
        void            encodeString(int length, const char* string);
        void            encodeUUId(UUId* uuid);
        void            encodeOpaque(int length, const UCHAR* data);
        void            encodeOpaque(Stream* source);
        void            encodeOpaque(Opaque* source);
        void            encodeScaledInt(int scale, int64_t number);
        void            encodeDate(Date value);
        void            encodeTimestamp(Timestamp timestamp);
        void            encodeDouble(double value);

        int16_t         getInt16(void);
        int32_t         getInt32(void);
        int64_t         getInt64(void);
        UUId            getUUId(void);

        String          getOpaque(void);
        void            getOpaque(Stream* opaque);
        int                     getOpaqueLength();
        void            getOpaqueData(void* buffer);
        const UCHAR* getOpaquePointer();

        String          getEncodedString(void);                         // 
getString conflicts with Stream::getString
        int64_t         getScaledInt(int* scale);
        bool            getBoolean(void);
        double          getDouble(void);
        Date            getDate(void);
        Timestamp       getTimestamp(void);

        virtual void    readData(void);
        virtual void    readData(int length, void* buffer);
        virtual void    decodeUnimplemented(int code);
        virtual void    encodeUnimplemented(const char* text);

        inline UCHAR getByte()
                {
                if (readPtr >= readEnd)
                        readData();

                return *readPtr++;
                }

        int                     valueType;
        int                             opaqueLength;
        int64_t                 number;
        String                  string;
        bool                    boolean;
        bool                    trace;
        int                             code;
        int                             scale;
        size_t                  offset;
        UUId                    uuid;
        double                  dbl;
        Date                    date;
        const UCHAR*    readPtr;
        const UCHAR*    readEnd;
        const UCHAR*    readStart;
};

Firebird-Devel mailing list, web interface at 
https://lists.sourceforge.net/lists/listinfo/firebird-devel

Reply via email to