CODE TRANSFORMATION AND FINITE AUTOMATONS
                  -----------------------------------------


   There are such interesting object as finite automaton.
   This automaton has some internal states (just such variables),
   so when they changed, some action is performed.
   So, there is set of actions, and each action is performed
   when some automaton's state is changed to some specified value,
   and there are matrix(es), which defines state changes.

   Here is simple task:
   to convert string like "1,3-7,9-100,105" into binary form.
   Lets consider implementation, based on finite automaton.
   Lets introduce single internal state S, and its values will be:

   0 - before first (before '-') number
   1 - inside of first number
   2 - before second (after '-') number
   3 - inside of second number
   4 - end of string (after last char)
   5 - syntax error

   so, program will look as follows:

   c  - pointer to current character
   *c - current character's value
   A  - action #

   change   char   A  action:        situation:

   0-->1    0..9   1  l=*c-'0';      new pair, got digit
   0-->5   other                     new pair, got NOT a digit
   1-->0     ,     2  store(l,l);    single number, ends with ','
   1-->1    0..9   3  l=l*10+*c-'0'  adding new digit to first number
   1-->2     -                       two numbers, 1st one ends with '-'
   1-->4    eoln   2  store(l,l);    single number, ends with eoln
   1-->5   other                     unavailable char within first number
   2-->3    0..9   4  h=*c-'0';      two numbers, got digit after '-'
   2-->5   other                     two numbers, got NOT a digit after '-'
   3-->0     ,     5  store(l,h);    two numbers, ends with ','
   3-->3    0..9   6  h=h*10+*c-'0'; adding new digit to second number
   3-->4    eoln   5  store(l,h);    two number, ends with eoln
   3-->5   other                     unavailable char within first number
   4-->            7  exit();        eoln, all ok
   5-->            8  error();       syntax error

   so, matrix which describes state changes and corresponding actions,
   will look as follows:

                      next state:
         c         0  1  2  3  4  5
         u s           action #:
         r t   0   -  1  -  -  -  2
         r a   1   3  4  5  -  3  2
         e t   2   -  -  -  6  -  2
         n e   3   7  -  -  8  7  2
         t     4   -  -  -  -  -  -
               5   -  -  -  -  -  -

   Simple source in C will look as follows:

    int S = 0;
    char*c = "1,3-7,9-100,105";

    for(;;)
    {
      switch(S)
      {
        case 0:
                if ( isdigit(*c)  ) { S=1; l=*c-'0';      } else
                                    { S=5;                };
                break;
        case 1:
                if ( *c == ','    ) { S=0; store(l,l);    } else
                if ( isdigit(*c)  ) { S=1; l=l*10+*c-'0'; } else
                if ( *c == '-'    ) { S=2;                } else
                if ( *c == 0      ) { S=4; store(l,l);    } else
                                    { S=5;                };
                break;
        case 2:
                if ( isdigit(*c)  ) { S=3; h=*c-'0';      } else
                                    { S=5;                };
                break;
        case 3:
                if ( *c == ','    ) { S=0; store(l,h);    } else
                if ( isdigit(*c)  ) { S=3; h=h*10+*c-'0'; } else
                if ( *c == 0      ) { S=4; store(l,h);    } else
                                    { S=5;                };
                break;
        case 4:
                exit();
                break;
        case 5:
                error();
                break;
      }
      c++;
    }

   Now, if we will make source listed above a bit more low-level,
   and remove state-variables, we will get the following:

x:
  x0:
                if ( isdigit(*c)  ) { l=*c-'0';      c++; goto x1; } else
                                    {                c++; goto x5; };
  x1:
                if ( *c == ','    ) { store(l,l);    c++; goto x0; } else
                if ( isdigit(*c)  ) { l=l*10+*c-'0'; c++; goto x1; } else
                if ( *c == '-'    ) {                     goto x2; } else
                if ( *c == 0      ) { store(l,l);    c++; goto x4; } else
                                    {                     goto x5; };
  x2:
                if ( isdigit(*c)  ) { h=*c-'0';      c++; goto x3; } else
                                    {                     goto x5; };
  x3:
                if ( *c == ','    ) { store(l,h);    c++; goto x0; } else
                if ( isdigit(*c)  ) { h=h*10+*c-'0'; c++; goto x3; } else
                if ( *c == 0      ) { store(l,h);    c++; goto x4; } else
                                    {                     goto x5; };
  x4:           exit();
  x5:           error();
        goto x

   Now, if somebody will try to reverse this code into classical C
   constructions, such as for, while, if-else, there will remain many
   redundand goto(jmp) commands, and if number of them will be big enough,
   it will be very hard to understand what this code does.

   Execution graph for such code will differ from execution graph
   for classical code by larger amount of crossed links.

   To reverse this code into its source form,
   you should select constant code blocks, enum them, then introduce
   states, and only after that it will be possible to continue
   understanding this code. But this looks as a hard task, because
   while optimization, original code blocks can be divided into parts,
   mixed and merged.

   Now, lets consider a bit more complicated source:

    int T[256] =
    {
      4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      //                      , -      0 1 2 3 4 5 6 7 8 9
      0,0,0,0,0,0,0,0,0,0,0,0,2,3,0,0, 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    };

    for(;;)
    {
      int A;
      switch( S )
      {
        case 0:
                switch( T[*c] )
                {
                  case 1:  { S=1; A=1; }; break;
                  default: { S=5; A=2; }; break;
                }
                break;
        case 1:
                switch( T[*c] )
                {
                  case 2:  { S=0; A=2; }; break;
                  case 1:  { S=1; A=3; }; break;
                  case 3:  { S=2;      }; break;
                  case 4:  { S=4; A=2; }; break;
                  case 0:  { S=5;      }; break;
                }
                break;
        case 2:
                switch( T[*c] )
                {
                  case 1:  { S=3; A=4; }; break;
                  default: { S=5;      }; break;
                }
                break;
        case 3:
                switch( T[*c] )
                {
                  case 2:  { S=0; A=5; }; break;
                  case 1:  { S=3; A=6; }; break;
                  case 4:  { S=4; A=5; }; break;
                  default: { S=5;      }; break;
                }
                break;
        case 4:
                A = 7;
                break;
        case 5:
                A = 8;
                break;
      }
      switch( A )
      {
        case 1: { l=*c-'0';      c++; }; break;
        case 2: { store(l,l);    c++; }; break;
        case 3: { l=l*10+*c-'0'; c++; }; break;
        case 4: { h=*c-'0';      c++; }; break;
        case 5: { store(l,h);    c++; }; break;
        case 6: { h=h*10+*c-'0'; c++; }; break;
        case 7: { exit();             }; break;
        case 8: { error();            }; break;
      }
    }

    As you can see, this sample is written without comparison commands,
    and conditional jmps; everything is based on switch'es which is
    equivalent to tables.

    In such form our program is a cycle, consisting of three parts:
    in first part, external data is analyzed (here it is string of chars),
    and converted into some internal variables (here it is shown
    indirectly using matrix T[]), then state changes are calculated
    (here it is first switch), and at the same time action # is
    calculated (here it is once again first switch and variable A),
    and in third part, some action is performed (here it is 2nd switch).

    And here is interesting moment: now, instead of program, we have
    action #'s (indexes of code blocks),
    and sequence of these indexes is the core of our program.

    Now lets consider the following question: how next action # is generated.
    It is generated depending on current automaton state values and some
    other data.

    This means that we can use tables, which will tell us which state is
    next, and which action should be executed on such state change.

    Such tables can be converted into a function.

    Requirements for this function are simple:
    argument of this function is a number, which contains current state values,
    and probably some identifier, and result of this function is a number,
    which contains next state values, probably another identifier,
    and action index.

    As such, our program will just iterate the following cycle:
    1. Call main function, passing state values as argument;
       then extract new state values and action index from the result.
    2. Execute action by given index,
       which will probably change state values too.

    Lets consider the following sample:
    States are register values, from EAX to EDI, and action number
    is an instruction value, padded with NOP's or RETN.

    Then, ALL main part our program will be a ...function.
    Lets initially pass to function:
    EAX = 2, ECX = 3, other regs = 0, command is NOP.
    So, argument will be:
              00000000...00000003000000020000C390
              <--EDI->...<--ECX-><--EAX-><--cmd->
    Lets function result then will be:
              00000000...00000003000000020000C341
              <--EDI->...<--ECX-><--EAX-><--cmd->
    which means same state values, and command = inc ecx.
    So, we execute given command, and ecx state is changed,
    and then we pass into a function this argument:
            00000000...00000004000000020000C341
            <--EDI->...<--ECX-><--EAX-><--cmd->
    and so on.
    For sure, argument and result values can also contain
    unique instruction id, in case when some opcodes are duplicated.

    Interesting fact is that our function can indirectly encode
    many instructions, such as jumps and arithmetic commands.
    The only commands can not be encoded are commands modifying
    memory, working with other external devices and api-calls.

    So, the question appears: how to build such function.

    There can be many variants, from simple tables up to neural networks.

    In the sample given above, it is impossible, for sure,
    because there are many registers, and they can have many
    values, so function will be so big so it will not fit into
    existing computers.

    So, lets consider simplified sample.

    Argument of function is just a current state number.
    Result is WORD.
    High result byte is first opcode byte.
    Low result byte is state number,
    and low state bit is value of ZF flag.
    As such, we can indirectly encode JZ/JNZ commands.
    Also, state number is equal to action index.
    Program will just encrypt/decrypt some asciiz text string.

    Source:

           ACTIONS:           STATES/ACTIONS AND STATE CHANGES:
                                      NZ          ZR
xormsg:                            -1 --> 00             1ST BYTE:
         xor     edx, edx          00 --> 02   01 --> 02    33
cycle1:  lea     esi, msg          02 --> 04   03 --> 04    BE
         mov     ecx, msg_size     04 --> 06   05 --> 06    B9
cycle2:  xor     [esi], dh         06 --> 08   07 --> 08    30
         sub     dh, dl            08 --> 10   09 --> 10    2A
         inc     esi               10 --> 12   11 --> 12    46
         dec     ecx               12 --> 06   13 --> 14    49
         jnz     cycle2
         inc     dl                14 --> 16   15 --> 16    FE
         cmp     dl, 'Z'           16 --> 02   17 --> 18    80
         jne     cycle1
         retn                      18

    Here is a function, defined by arguments and results:

       Argument   Result

        f( -1 ) = 0x3300
        f(0x00) = 0xBE02
        f(0x01) = 0xBE02
        f(0x02) = 0xB904
        f(0x03) = 0xB904
        f(0x04) = 0x3006
        f(0x05) = 0x3006
        f(0x06) = 0x2A08
        f(0x07) = 0x2A08
        f(0x08) = 0x460A
        f(0x09) = 0x460A
        f(0x0A) = 0x490C
        f(0x0B) = 0x490C
        f(0x0C) = 0x3006
        f(0x0D) = 0xFE0E
        f(0x0E) = 0x8010
        f(0x0F) = 0x8010
        f(0x10) = 0xBE02
        f(0x11) = 0x0012

    Now lets generate a function.
    Let it be polynomial.

      f(X) = SUMi( C[i]*X^i ), i=0..18

    Here X is function argument, which, as we decided, is current state,
    and function result contains next state and its first opcode byte.

    Simple program to calculate coefficients is here: see (1).

    So, now we have everything to implement function:
    Here it is:

  float calc(float X)
  {
    float y = 0;
    for(int j=0; j<N; j++)
      y += pow(X,j) * C[j];
    return y;
  }

   Or, in more nice form:

N                       equ     19

go_next_state:          pushf  ; IN/OUT: EBX=current/next state
                        pusha
                        finit
                        fild    dword ptr [esp+4*4]     ; pusha.ebx
                        push    N
                        pop     ecx
                        lea     edx, C_table
                        fldz            ; st(1)
                        fld1            ; st(0)
__c1:                   fld     st(0)
                        fld     tbyte ptr [edx]
                        fmulp
                        faddp   st(2),st(0)
                        fmul    st(0),st(2)
                        add     edx, 10
                        loop    __c1
                        fistp   dword ptr [esp+4*4]     ; pusha.ebx
                        fistp   dword ptr [esp+4*4]     ; pusha.ebx
                        popa
                        popf
                        retn

C_table                 label   tbyte
  dt      4.864199999999999986e+04   ; C[ 0]
  dt      1.052028658321440037e+06   ; C[ 1]
  dt     -2.226893544084362929e+06   ; C[ 2]
  dt      1.054917653361728822e+06   ; C[ 3]
  dt      1.030581898921544049e+06   ; C[ 4]
  dt     -1.641889337850409245e+06   ; C[ 5]
  dt      1.056816179457771135e+06   ; C[ 6]
  dt     -4.226269487577827341e+05   ; C[ 7]
  dt      1.179126264336835917e+05   ; C[ 8]
  dt     -2.418954943314347526e+04   ; C[ 9]
  dt      3.749247680199179089e+03   ; C[10]
  dt     -4.446691826539333110e+02   ; C[11]
  dt      4.044639078866551414e+01   ; C[12]
  dt     -2.801020107772053989e+00   ; C[13]
  dt      1.450610807381378830e-01   ; C[14]
  dt     -5.436999378694686739e-03   ; C[15]
  dt      1.391774067561251339e-04   ; C[16]
  dt     -2.174850123595773034e-06   ; C[17]
  dt      1.563443629925163634e-08   ; C[18]

    Since first opcode byte is encoded within high byte of function result,
    it will be defined in our program as ?:

                        .data

msg                     db      'Hello world!',0
msg_size                equ     $-msg

start:
                        call    xormsg

                        push    0
                        push    offset msg
                        push    offset msg
                        push    0
                        callW   MessageBoxA

                        call    xormsg

                        push    0
                        push    offset msg
                        push    offset msg
                        push    0
                        callW   MessageBoxA

                        push    -1
                        callW   ExitProcess

xormsg:
                        xor     ebx, ebx    ; initial state = -1
                        dec     ebx

                        jmp     begin

cycle:                  pushf               ; copy ZF into bit 0 of EBX
                        push    eax         ;
                        lahf                ;
                        shr     ah, 6 ; ZF  ;
                        and     ah, 1       ;
                        and     bl, 0FEh    ;
                        or      bl, ah      ;
                        pop     eax         ;
                        popf                ;
begin:
                        call    go_next_state ; get next state

                        cmp     bl, 18      ; exit if last state is reached
                        jae     quit

                        push    eax ecx     ; take 1st opcode byte from
                        mov     cl, bh      ; function result, and patch code
                        mov     bh, 0
                        mov     eax, jtab[ebx*4]
                        mov     [eax], cl
                        pop     ecx eax

                        jmp     jtab[ebx*4] ; go to action

quit:                   retn

jtab                    label   dword       ; table of actions
                                            ; index = action # = state #
                        dd      s00,s01
                        dd      s02,s03
                        dd      s04,s05
                        dd      s06,s07
                        dd      s08,s09
                        dd      s10,s11
                        dd      s12,s13
                        dd      s14,s15
                        dd      s16,s17
s00:
s01:                  ; xor     edx, edx
                        db      ?,0D2h
                        jmp     cycle
s02:
s03:                  ; lea     esi, msg
                        db      ?
                        dd      offset msg
                        jmp     cycle
s04:
s05:                  ; mov     ecx, msg_size
                        db      ?
                        dd      msg_size
                        jmp     cycle
s06:
s07:                  ; xor     [esi], dh
                        db      ?,36h
                        jmp     cycle
s08:
s09:                  ; sub     dh, dl
                        db      ?,0F2h
                        jmp     cycle
s10:
s11:                  ; inc     esi
                        db      ?
                        jmp     cycle
s12:
s13:                  ; dec     ecx
                        db      ?
                        jmp     cycle
s14:
s15:                  ; inc     dl
                        db      ?,0C2h
                        jmp     cycle
s16:
s17:                  ; cmp     dl, 'Z'
                        db      ?,0FAh,'Z'
                        jmp     cycle

    Now, to understate logic of this compiled program, you should calculate
    all function results for all state values, then build table of state
    changes, and insert JZ/JNZ into corresponding program places.
    After that, if program is NOT builded using method shown in the
    1st part of this article, i.e. not based on finite automaton conception,
    it will be possible to reverse it into classical C constructions, such as
    if, for, while.

    Now, a bit of theory.

    All these code transformation can be applied to mostly all
    blocks of existing code, i.e. any linear code can be converted
    into finite automaton, and then state change table can be
    converted into a function.

    Lets assume, that variable state of automaton's state is not ZF flag,
    as it show above, but AL register value.
    This means, that to extract all information encoded in our
    function, it is required to iterate 256 variants.
    If this would be AX register, then, for example on checking
    two first bytes of some file for MZ sign, current state will be
    changed into one state in 65534 cases, and into another state
    in 2 cases. The bigger argument size is, the more hardly
    is to iterate it; and in some cases some state changes will be
    lost, and part of code, which, for example, follows MZ-check
    will not be extracted.

    Ideally, such function is a black box, which has some value
    on input, and outputs some other value, but extract all the
    information is impossible.

    Well, enough words has been said, and i'll go drink some beer,
    while you will think about all that crap. I hope, it will help
    you somewhere.

--------------------------------------------------------------------------------

  Appendix (1) - program to calculate polynomial coefficients.

  #include <windows.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <assert.h>
  #include <io.h>
  #include <math.h>
  #pragma hdrstop

  #define float    long double

  int N = 19;

  float X[100] = {-1,0,2,4,6, 8,10,12,14,16,1,3,5,7, 9,11,13,15,17 };
  float Y[100] = {
    0+0x3300,
    2+0xBE00,
    4+0xB900,
    6+0x3000,
    8+0x2A00,
   10+0x4600,
   12+0x4900,
    6+0x3000,
   16+0x8000,
    2+0xBE00,
    2+0xBE00,
    4+0xB900,
    6+0x3000,
    8+0x2A00,
   10+0x4600,
   12+0x4900,
   14+0xFE00,
   16+0x8000,
   18+0x0000 };

  float C[100];

  float calc(float X)
  {
    float y = 0;
    for(int j=0; j<N; j++)
      y += pow(X,j) * C[j];
    return y;
  }

  void init()
  {
    float* Z = new float[ N*(N+1) ];
    assert(Z);

  #define Zyx(y,x)       Z[y*(N+1)+x]

    for(int y=0; y<N; y++)
    {
      for(int x=0; x<N; x++)
        Zyx(y,x) = pow(X[y], x);
      Zyx(y,N) = Y[y];
    }
    for(int n=0; n<N-1; n++)
    for(int y=n+1; y<N; y++)
    {
      float t = Zyx(y,n) / Zyx(n,n);
      for(int x=0; x<=N; x++)
        Zyx(y,x) -= Zyx(n,x) * t;
    }
    for(int n=N-1; n>=0; n--)
    {
      float s = 0;
      for(int t=n+1; t<=N-1; t++)
        s += Zyx(n,t) * C[t];
      C[n] = (Zyx(n,N) - s) / Zyx(n,n);
    }

    delete Z;

    for(int i=0; i<N; i++)
      assert(abs(calc(X[i]) - Y[i]) < 0.0001);
  }

  void main()
  {
    init();

    for(int n=0; n<N; n++)
      printf("f(%5.2Lf) (%02X) = %5.2Lf (%04X)\n", X[n],
      (int)ceil(X[n]), Y[n], (int)ceil(Y[n]));
    printf("f(X) = SUMi( C[i]*X^i ), i=0..%i\n", N-1);
    for(int n=0; n<N; n++)
      printf("dt %30.19Le  ; C[%2i]\n", C[n], n);

  }

--------------------------------------------------------------------------------

[linuxkernelnewbies] CODE TRANSFORMATION AND FINITE AUTOMATONS

Reply via email to