> and also why are you looping 25 + 8 + 8 times in Process() function
It looks like he is computing 4 iterations of A5/1 in each cycle, and the
tables were generated with 100 dummy clockings (25*4 = 100) and then you need
64 bits of keystream ((8+8)*4).
> Can you take some time out and let me know how you are using these masks for
> keysearching
This is some optimization how to compute 4 iterations in one cycle.
It would be interesting to test whether it makes sense on current SIMD
hardware, or if bitslicing is faster.
On 7.4.2016 14:05, Ajay Fuloria wrote:
> Hi!!
>
> I went through the kraken code and I must commend you for your work. I was
> trying to understand the code so that I came make use of it.
>
> I am facing problem in understanding the CalcTable() function in A5cpu.cpp
> ... You create a mask lookup in this function. Can you take some time out and
> let me know how you are using these masks for keysearching and also why are
> you looping 25 + 8 + 8 times in Process() function.
>
> Thanks and regards,
>
> Ajay
>
>
> For your quick reference the code of these two functions is as below :
>
>
> void A5Cpu::CalcTables(void)
> {
> /* Calculate clocking table */
> for(int i=0; i< 16 ; i++) {
> for(int j=0; j< 16 ; j++) {
> for(int k=0; k< 16 ; k++) {
> /* Copy input */
> int m1 = i;
> int m2 = j;
> int m3 = k;
> /* Generate masks */
> int cm1 = 0;
> int cm2 = 0;
> int cm3 = 0;
> /* Counter R2 */
> int r2count = 0;
> for (int l = 0; l < 4 ; l++ ) {
> cm1 = cm1 << 1;
> cm2 = cm2 << 1;
> cm3 = cm3 << 1;
> int maj = ((m1>>3)+(m2>>3)+(m3>>3))>>1;
> if ((m1>>3)==maj) {
> m1 = (m1<<1)&0x0f;
> cm1 |= 0x01;
> }
> if ((m2>>3)==maj) {
> m2 = (m2<<1)&0x0f;
> cm2 |= 0x01;
> r2count++;
> }
> if ((m3>>3)==maj) {
> m3 = (m3<<1)&0x0f;
> cm3 |= 0x01;
> }
> }
> // printf( "%x %x %x -> %x:%x:%x\n", i,j,k, cm1, cm2, cm3);
> int index = i*16*16+j*16+k;
> mClockMask[index] = (r2count<<12) | (cm1<<8) | (cm2<<4) | cm3;
> }
> }
> }
>
> /* Calculate 111000 + clock mask table */
> for (int i=0; i < 64 ; i++ ) {
> for(int j=0; j<16; j++) {
> int count = PopcountNibble(j);
> int feedback = 0;
> int data = i;
> for (int k=0; k<count; k++) {
> feedback = feedback << 1;
> int v = (data>>5) ^ (data>>4) ^ (data>>3);
> data = data << 1;
> feedback ^= (v&0x01);
> }
> data = i;
> int mask = j;
> int output = 0;
> for (int k=0; k<4; k++) {
> output = (output<<1) ^ ((data>>5)&0x01);
> if (mask&0x08) {
> data = data << 1;
> }
> mask = mask << 1;
> }
> int index = i * 16 + j;
> mTable6bit[index] = (feedback<<4) | output;
> // printf("%02x:%x -> %x %x\n", i,j,feedback, output);
> }
> }
>
> /* Calculate 11000 + clock mask table */
> for (int i=0; i < 32 ; i++ ) {
> for(int j=0; j<16; j++) {
> int count = PopcountNibble(j);
> int feedback = 0;
> int data = i;
> for (int k=0; k<count; k++) {
> feedback = feedback << 1;
> int v = (data>>4) ^ (data>>3);
> data = data << 1;
> feedback ^= (v&0x01);
> }
> data = i;
> int mask = j;
> int output = 0;
> for (int k=0; k<4; k++) {
> output = (output<<1) ^ ((data>>4)&0x01);
> if (mask&0x08) {
> data = data << 1;
> }
> mask = mask << 1;
> }
> int index = i * 16 + j;
> mTable5bit[index] = (feedback<<4) | output;
> // printf("%02x:%x -> %x %x\n", i,j,feedback, output);
> }
> }
>
> /* Calculate 1000 + clock mask table */
> for (int i=0; i < 16 ; i++ ) {
> for(int j=0; j<16; j++) {
> int count = PopcountNibble(j);
> int feedback = 0;
> int data = i;
> for (int k=0; k<count; k++) {
> feedback = feedback << 1;
> int v = (data>>3);
> data = data << 1;
> feedback ^= (v&0x01);
> }
> int index = i * 16 + j;
> mTable4bit[index] = (count<<4)|feedback;
> // printf("%02x:%x -> %x\n", i,j,feedback );
> }
> }
> }
>
>
>
> ---------------------------------------------------------
>
>
> void A5Cpu::Process(void)
> {
> bool active = false;
> struct timeval tStart;
> struct timeval tEnd;
>
> uint64_t start_point;
> uint64_t target;
> uint64_t start_point_r;
> int32_t start_round;
> int32_t stop_round;
> uint32_t advance;
> const uint32_t* RFtable;
> void* context;
>
> for(;;) {
> if (!mRunning) break;
>
> /* Get input */
> sem_wait(&mMutex);
> if (mInputStart.size()) {
> start_point = mInputStart.front();
> mInputStart.pop_front();
> target = mInputTarget.front();
> mInputTarget.pop_front();
> start_point_r = ReverseBits(start_point);
> start_round = mInputRound.front();
> mInputRound.pop_front();
> stop_round = mInputRoundStop.front();
> mInputRoundStop.pop_front();
> advance = mInputAdvance.front();
> mInputAdvance.pop_front();
> context = mInputContext.front();
> mInputContext.pop_front();
> map< uint32_t, class Advance* >::iterator it = mAdvances.find(advance);
> if (it==mAdvances.end()) {
> class Advance* adv = new Advance(advance, mMaxRound);
> mAdvances[advance] = adv;
> RFtable = adv->getRFtable();
> } else {
> RFtable = (*it).second->getRFtable();
> }
> active = true;
> // printf("Insert\n");
> }
> sem_post(&mMutex);
>
> if (!active) {
> /* Don't use CPU while idle */
> usleep(250);
> continue;
> }
>
> gettimeofday( &tStart, NULL );
> /* Do something */
> unsigned int out_hi = start_point_r>>32;
> unsigned int out_lo = start_point_r;
>
> unsigned int target_lo = target;
> unsigned int target_hi = target >> 32;
>
> unsigned int last_key_lo;
> unsigned int last_key_hi;
>
> bool keysearch = (target != 0ULL);
>
> for (int round=start_round; round < stop_round; ) {
> out_lo = out_lo ^ RFtable[2*round];////not convibced
> out_hi = out_hi ^ RFtable[2*round+1];
>
> if ((out_hi>>mCondition)==0) {// check for distinguished points else
> new round
> // uint64_t res = (((uint64_t)out_hi)<<32)|out_lo;
> // res = ReverseBits(res);
> // printf("New round %i %016llx %08x:%08x\n", round, res, out_hi,
> out_lo);
> round++;
> if (round>=stop_round) break;
> }
>
> unsigned int lfsr1 = out_lo;
> unsigned int lfsr2 = (out_hi << 13) | (out_lo >> 19);
> unsigned int lfsr3 = out_hi >> 9;
>
> last_key_hi = out_hi;
> last_key_lo =out_lo;
>
> for (int i=0; i<25 ; i++) {
> int clocks = ((lfsr1<<3)&0xf00) | ((lfsr2>>3)&0xf0) |
> ((lfsr3>>7)&0xf);
> int masks = mClockMask[clocks];
>
> /* lfsr1 */
> unsigned int tmask = (masks>>8)&0x0f;
> unsigned int tval = mTable6bit[((lfsr1>>9)&0x3f0)|tmask];
> unsigned int tval2 = mTable4bit[((lfsr1>>6)&0xf0)|tmask];
> lfsr1 = (lfsr1<<(tval2>>4))^(tval>>4)^(tval2&0x0f);
>
> /* lfsr2 */
> tmask = (masks>>4)&0x0f;
> tval = mTable5bit[((lfsr2>>13)&0x1f0)|tmask];
> out_hi = out_hi ^ (tval&0x0f);
> lfsr2 = (lfsr2<<(masks>>12))^(tval>>4);
>
> /* lfsr3 */
> tmask = masks & 0x0f;
> tval = mTable6bit[((lfsr3>>13)&0x3f0)|tmask];
> tval2 = mTable4bit[(lfsr3&0xf0)|tmask];
> lfsr3 = (lfsr3<<(tval2>>4))^(tval>>4)^(tval2&0x0f);
> }
> for (int i=0; i<8 ; i++) {
> int clocks = ((lfsr1<<3)&0xf00) | ((lfsr2>>3)&0xf0) |
> ((lfsr3>>7)&0xf);
> int masks = mClockMask[clocks];
>
> /* lfsr1 */
> unsigned int tmask = (masks>>8)&0x0f;
> unsigned int tval = mTable6bit[((lfsr1>>9)&0x3f0)|tmask];
> out_hi = (out_hi << 4) | (tval&0x0f);
> unsigned int tval2 = mTable4bit[((lfsr1>>6)&0xf0)|tmask];
> lfsr1 = (lfsr1<<(tval2>>4))^(tval>>4)^(tval2&0x0f);
>
> /* lfsr2 */
> tmask = (masks>>4)&0x0f;
> tval = mTable5bit[((lfsr2>>13)&0x1f0)|tmask];
> out_hi = out_hi ^ (tval&0x0f);
> lfsr2 = (lfsr2<<(masks>>12))^(tval>>4);
>
> /* lfsr3 */
> tmask = masks & 0x0f;
> tval = mTable6bit[((lfsr3>>13)&0x3f0)|tmask];
> out_hi = out_hi ^ (tval&0x0f);
> tval2 = mTable4bit[(lfsr3&0xf0)|tmask];
> lfsr3 = (lfsr3<<(tval2>>4))^(tval>>4)^(tval2&0x0f);
> }
> for (int i=0; i<8 ; i++) {
> int clocks = ((lfsr1<<3)&0xf00) | ((lfsr2>>3)&0xf0) |
> ((lfsr3>>7)&0xf);
> int masks = mClockMask[clocks];
>
> /* lfsr1 */
> unsigned int tmask = (masks>>8)&0x0f;
> unsigned int tval = mTable6bit[((lfsr1>>9)&0x3f0)|tmask];
> out_lo = (out_lo << 4) | (tval&0x0f);
> unsigned int tval2 = mTable4bit[((lfsr1>>6)&0xf0)|tmask];
> lfsr1 = (lfsr1<<(tval2>>4))^(tval>>4)^(tval2&0x0f);
>
> /* lfsr2 */
> tmask = (masks>>4)&0x0f;
> tval = mTable5bit[((lfsr2>>13)&0x1f0)|tmask];
> out_lo = out_lo ^ (tval&0x0f);
> lfsr2 = (lfsr2<<(masks>>12))^(tval>>4);
>
> /* lfsr3 */
> tmask = masks & 0x0f;
> tval = mTable6bit[((lfsr3>>13)&0x3f0)|tmask];
> out_lo = out_lo ^ (tval&0x0f);
> tval2 = mTable4bit[(lfsr3&0xf0)|tmask];
> lfsr3 = (lfsr3<<(tval2>>4))^(tval>>4)^(tval2&0x0f);
> }
> if (keysearch&&(target_hi==out_hi)&&(target_lo==out_lo)) {
> /* report key as finishing state */
> out_hi = last_key_hi;
> out_lo = last_key_lo;
> start_round = -1;
> break;
> }
> }
>
>
> /////////////////////////////
> gettimeofday( &tEnd, NULL );
> unsigned int uSecs = 1000000 * (tEnd.tv_sec - tStart.tv_sec);
> uSecs += (tEnd.tv_usec - tStart.tv_usec);
>
> // printf("Completed in %i ms\n", uSecs/1000);
>
> /* Report completed chains */
> sem_wait(&mMutex);
>
> uint64_t res = (((uint64_t)out_hi)<<32)|out_lo;
> res = ReverseBits(res);
> mOutput.push( pair<uint64_t,uint64_t>(start_point,res) );
> mOutputStartRound.push( start_round );
> mOutputContext.push( context );
> active = false;
>
> sem_post(&mMutex);
>
> }
> }
>
>
>
> --
> Ajay Fuloria
> merlinsignals.blogspot.in <http://merlinsignals.blogspot.in>
> ᐧ
>
>
> _______________________________________________
> A51 mailing list
> [email protected]
> https://lists.srlabs.de/cgi-bin/mailman/listinfo/a51
>
--
Jan Hrach | http://jenda.hrach.eu/
GPG CD98 5440 4372 0C6D 164D A24D F019 2F8E 6527 282E
_______________________________________________
A51 mailing list
[email protected]
https://lists.srlabs.de/cgi-bin/mailman/listinfo/a51