http://www.daniweb.com/forums/thread195386.html#

how many registers can i use in intel i7

 
Up Vote 0 Down Vote
  #1
Jun 2nd, 2009
Hi,
how many 64 bit registers can I use inside intel i7 cpu for storage purposes to feed them later into XMM registers? I currently use XMM0-15, MM0-8, R8-15 only. I know i can use RAX,RBX,RCX, RDX and eight registers inside the FPU (ST0-ST8), but what others can I use? Can I use stack registers? Thanks in advance.

I attach my application code if needed.
  1. ///////////////////////////////////////////
  2. pipe_line_math.h
  3. #include <stdio.h>
  4. #include <stdlib.h>
  5. #include <time.h>
  6.  
  7. void pipe_mult_ushort(ushort *data,ushort *rands)
  8. {
  9. __asm__ __volatile__(".intel_syntax noprefix\n\t"
  10. //// in this section we pull as much data as we can into the CPU
  11. //// to minimize the DRAM delay and store it where we can
  12.  
  13. "movdqa xmm0,[edi]\n\t" // load xmm0 & xmm1
  14. "movdqa xmm1,[esi]\n\t"
  15.  
  16. "movdqa xmm2,0x10[edi]\n\t" // load xmm2 & xmm3
  17. "movdqa xmm3,0x10[esi]\n\t"
  18.  
  19. "movdqa xmm4,0x20[edi]\n\t" // load xmm4 & xmm5
  20. "movdqa xmm5,0x20[esi]\n\t"
  21.  
  22. "movdqa xmm6,0x30[edi]\n\t" // load xmm6 & xmm7
  23. "movdqa xmm7,0x30[esi]\n\t"
  24.  
  25. "movdqa xmm8,0x40[edi]\n\t" // load xmm8 & xmm9
  26. "movdqa xmm9,0x40[esi]\n\t"
  27.  
  28. "movdqa xmm10,0x50[edi]\n\t" // load xmm10 & xmm11
  29. "movdqa xmm11,0x50[esi]\n\t"
  30.  
  31. "movdqa xmm12,0x60[edi]\n\t" // load xmm12 & xmm13
  32. "movdqa xmm13,0x60[esi]\n\t"
  33.  
  34. "movdqa xmm14,0x70[edi]\n\t" // load xmm14 & xmm15
  35. "movdqa xmm15,0x70[esi]\n\t"
  36.  
  37. "movq mm0,0x80[edi]\n\t" // load mmx0
  38. "movq mm1,0x80[esi]\n\t"
  39. "movq mm2,0x88[edi]\n\t"
  40. "movq mm3,0x88[esi]\n\t"
  41. "movq mm4,0x90[edi]\n\t"
  42. "movq mm5,0x90[esi]\n\t"
  43. "movq mm6,0x98[edi]\n\t"
  44. "movq mm7,0x98[esi]\n\t"
  45.  
  46. "movq r8,0xA0[edi]\n\t" // store some in extended 64bit registers
  47. "movq r9,0xA0[esi]\n\t"
  48. "movq r10,0xA8[edi]\n\t"
  49. "movq r11,0xA8[esi]\n\t"
  50. "movq r12,0xB0[edi]\n\t"
  51. "movq r13,0xB0[esi]\n\t"
  52. "movq r14,0xB8[edi]\n\t"
  53. "movq r15,0xB8[esi]\n\t"
  54.  
  55. // all available registers were data can be stored were filled, proceed with calcs now
  56. // calc xmms first
  57. "pmullw xmm0,xmm1\n\t" // calc xmm0
  58. "pmullw xmm2,xmm3\n\t" // calc xmm2
  59. "pmullw xmm4,xmm5\n\t" // calc xmm4
  60. "pmullw xmm6,xmm7\n\t" // calc xmm6
  61. "pmullw xmm8,xmm9\n\t" // calc xmm8
  62. "pmullw xmm10,xmm11\n\t" // calc xmm10
  63. "pmullw xmm12,xmm13\n\t" // calc xmm12
  64. "pmullw xmm14,xmm15\n\t" // calc xmm14
  65.  
  66. // calc mms second
  67. "pmullw mm0,mm1\n\t" // calc mm0
  68. "pmullw mm2,mm3\n\t" // calc mm0
  69. "pmullw mm4,mm5\n\t" // calc mm0
  70. "pmullw mm6,mm7\n\t" // calc mm0
  71.  
  72. // send xmm values to memory
  73. "movdqa [edi],xmm0\n\t" // xmm0 -> memory
  74. "movdqa 0x10[edi],xmm2\n\t" // xmm2 -> memory
  75. "movdqa 0x20[edi],xmm4\n\t" // xmm4 -> memory
  76. "movdqa 0x30[edi],xmm6\n\t" // xmm6 -> memory
  77. "movdqa 0x40[edi],xmm8\n\t" // xmm8 -> memory
  78. "movdqa 0x50[edi],xmm10\n\t" // xmm10 -> memory
  79. "movdqa 0x60[edi],xmm12\n\t" // xmm12 -> memory
  80. "movdqa 0x70[edi],xmm14\n\t" // xmm14 -> memory
  81.  
  82. // send mm values to memory
  83. "movq 0x80[edi],mm0\n\t" // mm0 -> memory
  84. "movq 0x88[edi],mm2\n\t" // mm2 -> memory
  85. "movq 0x90[edi],mm4\n\t" // mm4 -> memory
  86. "movq 0x98[edi],mm6\n\t" // mm6 -> memory
  87.  
  88. // xmms & mms are free now
  89. // load mms from 'r's
  90. "movq mm0,r8\n\t" // move saved 'r' to mm
  91. "movq mm1,r9\n\t" // move saved 'r' to mm
  92. "movq mm2,r10\n\t" // move saved 'r' to mm
  93. "movq mm3,r11\n\t" // move saved 'r' to mm
  94. "movq mm4,r12\n\t" // move saved 'r' to mm
  95. "movq mm5,r13\n\t" // move saved 'r' to mm
  96. "movq mm6,r14\n\t" // move saved 'r' to mm
  97. "movq mm7,r15\n\t" // move saved 'r' to mm
  98. // calc mms
  99. "pmullw mm0,mm1\n\t" // calc mms copied from 'r's
  100. "pmullw mm2,mm3\n\t" // calc mms copied from 'r's
  101. "pmullw mm4,mm5\n\t" // calc mms copied from 'r's
  102. "pmullw mm6,mm7\n\t" // calc mms copied from 'r's
  103. // send mm values to memory
  104. "movq 0xA0[edi],mm0\n\t" // mm0 -> memory
  105. "movq 0xA8[edi],mm2\n\t" // mm2 -> memory
  106. "movq 0xB0[edi],mm4\n\t" // mm4 -> memory
  107. "movq 0xB8[edi],mm6\n\t" // mm6 -> memory
  108.  
  109. :
  110. : "D" (data) ,"S" (rands)
  111. : "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7",
  112. "xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15",
  113. "mm0","mm1","mm2","mm3","mm4","mm5","mm6","mm7",
  114. "r8","r9","r10","r11","r12","r13","r14","r15",
  115. "memory");
  116.  
  117. }
  118.  
  119.  
  120. ///////////////////////////////////////////
  121. #include <stdio.h>
  122. #include <stdlib.h>
  123. #include <time.h>
  124.  
  125. #include "pipe_line_math.h"
  126.  
  127. #define _ARRAY_SIZE_ 256*256*256*24
  128. #define _ELTS_PER_PIPE_ 112
  129. ushort __attribute__ ((aligned (16))) rands[_ARRAY_SIZE_];
  130. ushort __attribute__ ((aligned (16))) data[_ARRAY_SIZE_];
  131.  
  132. struct timespec tspec1;
  133. struct timespec tspec2;
  134.  
  135.  
  136. main() {
  137. ulong i,max;
  138. double diff;
  139.  
  140. for (i=0;i<_ARRAY_SIZE_;i++) { /// fill with any data
  141. rands[i]=i%4;
  142. data[i]=i*2+i;
  143. }
  144.  
  145. max=_ARRAY_SIZE_/_ELTS_PER_PIPE_;
  146. clock_gettime(CLOCK_REALTIME,&tspec1);
  147. for (i=0;i<max;i=i+_ELTS_PER_PIPE_) pipe_mult_ushort(&data[i],&rands[i]);
  148. for (i=0;i<max;i=i+_ELTS_PER_PIPE_) pipe_mult_ushort(&data[i],&rands[i]); // one more time
  149. clock_gettime(CLOCK_REALTIME,&tspec2);
  150. diff=((double)tspec2.tv_sec+(double)tspec2.tv_nsec/1000000000.0)-((double)tspec1.tv_sec+tspec1.tv_nsec/1000000000.0);
  151. printf("time pipeline multiply:\nstart: %d:%d\n end: %d:%d ; total diff: %f\n",tspec1.tv_sec,tspec1.tv_nsec,tspec2.tv_sec,tspec2.tv_nsec,diff);
  152. printf("sample data:\n");
  153. for (i=0;i<64;i++) {
  154. printf("%d,",data[i]);
  155. if (!((i+1)%16)) printf("\n");
  156.  
  157. }
  158.  
  159. return(0);
  160. }



Reply via email to