this is the long overdue TSU notification for my patch to enhance
bn_mul_add_words with SSE2 support.

unfortunately i never got the time to make the various improvements
suggested on the mailing list, but i'm hoping that by clearing up the
export requirements on this patch someone else will be able to clean
things up for final inclusion.

thanks
-dean

SUBMISSION TYPE: TSU
SUBMITTED BY: dean gaudet
SUBMITTED FOR: dean gaudet
POINT OF CONTACT: [EMAIL PROTECTED]
PHONE and/or FAX: 408 919 3086
MANUFACTURER: n/a
PRODUCT NAME/MODEL #: openssl
ECCN: 5D002

this patch introduces dynamic selection of the bn_mul_add_words inner
loop according to whether the processor supports SSE2 or not.

        ./config CPUID

to enable this support.  (this is all that's required to get the SSE2
support.)

the cpuid.c file definitely needs porting to windoze (due to the need
to trap SIGILL during the test).  i've tested it on recent linux,
freebsd and netbsd; and on p4, xeon, athlon, opteron, pentium-m, p3,
p3m, and efficeon.

i'm not really sure where to cause my CRYPTO_cpuid_init() routine
to be called.  i've just tacked in a couple calls here in BN_new()
and BN_init() ... but it needs some place more central.

see http://arctic.org/~dean/crypto/rsa.html for the latest info.

-dean

===================================================================
RCS file: /home/dean/openssl/Repository/openssl/Configure,v
retrieving revision 1.419
diff -u -r1.419 Configure
--- Configure.orig      30 Nov 2003 23:29:26 -0000      1.419
+++ Configure   8 Dec 2003 03:24:48 -0000
@@ -90,6 +90,7 @@
 # MD5_ASM      use some extra md5 assember,
 # SHA1_ASM     use some extra sha1 assember, must define L_ENDIAN for x86
 # RMD160_ASM   use some extra ripemd160 assember,
+# CPUID                use x86 cpuid to select MMX/SSE2 code at run-time

 my $x86_gcc_des="DES_PTR DES_RISC1 DES_UNROLL";

@@ -636,6 +637,7 @@
 my $sha1_obj="";
 my $rmd160_obj="";
 my $processor="";
+my $cpuid=0;
 my $default_ranlib;
 my $perl;

@@ -832,6 +834,8 @@
                        }
                elsif (/^386$/)
                        { $processor=386; }
+               elsif (/^CPUID$/)
+                       { $cpuid=1; }
                elsif (/^rsaref$/)
                        {
                        # No RSAref support any more since it's not needed.
@@ -1200,6 +1204,7 @@
 #      $rmd160_obj=$rmd160_enc;
        $cflags.=" -DRMD160_ASM";
        }
+$cflags.=" -DCPUID" if ($cpuid);

 # "Stringify" the C flags string.  This permits it to be made part of a string
 # and works as well on command lines.
===================================================================
RCS file: /home/dean/openssl/Repository/openssl/crypto/Makefile.ssl,v
retrieving revision 1.101
diff -u -r1.101 Makefile.ssl
--- crypto/Makefile.ssl.orig    3 Dec 2003 16:29:41 -0000       1.101
+++ crypto/Makefile.ssl 8 Dec 2003 04:01:37 -0000
@@ -38,8 +38,8 @@

 LIB= $(TOP)/libcrypto.a
 SHARED_LIB= libcrypto$(SHLIB_EXT)
-LIBSRC=        cryptlib.c mem.c mem_clr.c mem_dbg.c cversion.c ex_data.c tmdiff.c 
cpt_err.c ebcdic.c uid.c o_time.c o_str.c
-LIBOBJ= cryptlib.o mem.o mem_clr.o mem_dbg.o cversion.o ex_data.o tmdiff.o cpt_err.o 
ebcdic.o uid.o o_time.o o_str.o
+LIBSRC=        cryptlib.c mem.c mem_clr.c mem_dbg.c cversion.c ex_data.c tmdiff.c 
cpt_err.c ebcdic.c uid.c o_time.c o_str.c cpuid.c
+LIBOBJ= cryptlib.o mem.o mem_clr.o mem_dbg.o cversion.o ex_data.o tmdiff.o cpt_err.o 
ebcdic.o uid.o o_time.o o_str.o cpuid.o

 SRC= $(LIBSRC)

===================================================================
RCS file: crypto/cpuid.c
diff -N crypto/cpuid.c
--- crypto/cpuid.c.orig 1 Jan 1970 00:00:00 -0000
+++ crypto/cpuid.c      8 Dec 2003 05:56:16 -0000
@@ -0,0 +1,158 @@
+/*
+ * This x86 CPUID code is derived from intel appnote 241618.
+ *
+ * This code is public domain.
+ */
+
+/*
+ * Unfortunately, this code isn't portable.  Naively it would
+ * seem like this stuff belongs in one of the perlasm files,
+ * however there's a nasty SSE detection problem:  you
+ * need to execute a possibly faulting instruction in order
+ * to distinguish the setting of cr4.osfxsr on CPUs which
+ * claim to support SSE.  If the OS kernel has not set
+ * cr4.osfxsr=1 (to indicate that it will save/restore the
+ * extra registers via FXSAVE/FXRESTOR on context switches)
+ * then SSE/SSE2 instructions will fault, even though cpuid
+ * indicates their availability.
+ *
+ * Intercepting the fault requires OS interfacing... on unix
+ * it requires trapping SIGILL.  On Win32, I'm not sure what
+ * to do.  At any rate, it would be a royal pain to do all
+ * the signal (and threadsafe) foo from assembly, so this
+ * code is in C with embedded asm.
+ *
+ * - [EMAIL PROTECTED]
+ */
+
+#ifdef CPUID
+#include <stdlib.h>
+#include <signal.h>
+#include <setjmp.h>
+#ifdef _REENTRANT
+#include <pthread.h>
+#endif
+
+#include "cpuid.h"
+
+unsigned CRYPTO_cpuid_value;
+
+
+static sigjmp_buf CRYPTO_cpuid_jmp;
+
+static void CRYPTO_cpuid_sigill(int signum)
+{
+       siglongjmp(CRYPTO_cpuid_jmp, 1);
+}
+
+static unsigned CRYPTO_cpuid_init_internal(void)
+{
+       char *e;
+       unsigned eax, ebx, ecx, edx;
+
+       CRYPTO_cpuid_value = CRYPTO_CPUID_ONCE;
+
+       e = getenv("CRYPTO_CPUID");
+       if (e) {
+               CRYPTO_cpuid_value = strtoul(e, 0, 0) | CRYPTO_CPUID_ONCE;
+               return CRYPTO_cpuid_value;
+       }
+
+       /* test for the existance of the CPUID instruction
+        * by toggling the ID bit in eflags register.
+        * see intel application note 241618
+        */
+       __asm volatile(
+               "\n     pushf"
+               "\n     pop %0"
+               "\n     mov %0,%1"
+               "\n     xor $0x200000,%0"
+               "\n     push %0"
+               "\n     popf"
+               "\n     pushf"
+               "\n     pop %0"
+               "\n     xor %0,%1"
+               "\n     mov $0,%0"
+               "\n     jz 1f"
+               "\n     mov $1,%0"
+               "\n1:"
+               : "=&r" (eax), "=&r" (ecx) :: "memory");
+       if (!eax) {
+               return CRYPTO_cpuid_value;
+       }
+
+       /* get results of cpuid level 1 */
+       __asm volatile(
+               "cpuid"
+               : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
+               : "0" (1));
+
+       if (edx & 0x800000) {
+               CRYPTO_cpuid_value |= CRYPTO_CPUID_MMX;
+       }
+
+       /* here is the extended test for SSE which checks if an
+        * SSE instruction traps.
+        */
+       if (edx & 0x2000000) {
+               struct sigaction sa, sa_save;
+
+               if (sigsetjmp(CRYPTO_cpuid_jmp, 1) == 0) {
+                       sa.sa_handler = CRYPTO_cpuid_sigill;
+                       sa.sa_flags = 0;
+                       sigemptyset(&sa.sa_mask);
+                       if (sigaction(SIGILL, &sa, &sa_save)) {
+                               return CRYPTO_cpuid_value;
+                       }
+
+                       /* this is "xorps %xmm0,%xmm0" ... in a manner
+                        * which all as(1) should accept.
+                        */
+                       __asm volatile(".byte 0x0f,0x57,0xc0");
+               }
+               else {
+                       sigaction(SIGILL, &sa_save, NULL);
+                       return CRYPTO_cpuid_value;
+               }
+               sigaction(SIGILL, &sa_save, NULL);
+
+               CRYPTO_cpuid_value |= CRYPTO_CPUID_SSE;
+       }
+
+       if (edx & 0x4000000) {
+               CRYPTO_cpuid_value |= CRYPTO_CPUID_SSE2;
+       }
+
+       if (ecx & 1) {
+               CRYPTO_cpuid_value |= CRYPTO_CPUID_SSE3;
+       }
+
+       return CRYPTO_cpuid_value;
+}
+
+
+#ifdef _REENTRANT
+static pthread_once_t CRYPTO_cpuid_once_control = PTHREAD_ONCE_INIT;
+
+unsigned CRYPTO_cpuid_init(void)
+{
+       if (CRYPTO_cpuid_value) {
+               return CRYPTO_cpuid_value;
+       }
+       pthread_once(&CRYPTO_cpuid_once_control,
+                       (void (*)(void))CRYPTO_cpuid_init_internal);
+       return CRYPTO_cpuid_value;
+}
+#else
+unsigned CRYPTO_cpuid_init(void)
+{
+       if (CRYPTO_cpuid_value) {
+               return CRYPTO_cpuid_value;
+       }
+       return CRYPTO_cpuid_init_internal();
+}
+#endif
+
+#else
+int CPUID_dummy;
+#endif
===================================================================
RCS file: crypto/cpuid.h
diff -N crypto/cpuid.h
--- crypto/cpuid.h.orig 1 Jan 1970 00:00:00 -0000
+++ crypto/cpuid.h      8 Dec 2003 05:21:57 -0000
@@ -0,0 +1,28 @@
+#ifndef HEADER_CPUID_H
+#define HEADER_CPUID_H
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+#define CRYPTO_CPUID_ONCE              (0x01)
+#define CRYPTO_CPUID_MMX               (0x02)
+#define CRYPTO_CPUID_SSE               (0x04)
+#define CRYPTO_CPUID_SSE2              (0x08)
+#define CRYPTO_CPUID_SSE3              (0x10)
+
+#ifdef CPUID
+extern unsigned CRYPTO_cpuid_init(void);
+extern unsigned CRYPTO_cpuid_value;
+
+#define CRYPTO_cpuid() (CRYPTO_cpuid_value ? CRYPTO_cpuid_value : CRYPTO_cpuid_init())
+
+#else
+#define CRYPTO_cpuid() (0)
+#endif
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif
===================================================================
RCS file: /home/dean/openssl/Repository/openssl/crypto/bn/bn_lib.c,v
retrieving revision 1.55
diff -u -r1.55 bn_lib.c
--- crypto/bn/bn_lib.c.orig     2 Dec 2003 20:01:30 -0000       1.55
+++ crypto/bn/bn_lib.c  8 Dec 2003 04:25:01 -0000
@@ -66,6 +66,7 @@
 #include <stdio.h>
 #include "cryptlib.h"
 #include "bn_lcl.h"
+#include "cpuid.h"

 const char *BN_version="Big Number" OPENSSL_VERSION_PTEXT;

@@ -288,6 +289,7 @@

 void BN_init(BIGNUM *a)
        {
+       (void)CRYPTO_cpuid();
        memset(a,0,sizeof(BIGNUM));
        bn_check_top(a);
        }
@@ -296,6 +298,7 @@
        {
        BIGNUM *ret;

+       (void)CRYPTO_cpuid();
        if ((ret=(BIGNUM *)OPENSSL_malloc(sizeof(BIGNUM))) == NULL)
                {
                BNerr(BN_F_BN_NEW,ERR_R_MALLOC_FAILURE);
===================================================================
RCS file: /home/dean/openssl/Repository/openssl/crypto/bn/asm/bn-586.pl,v
retrieving revision 1.5
diff -u -r1.5 bn-586.pl
--- crypto/bn/asm/bn-586.pl.orig        6 Dec 2000 16:30:23 -0000       1.5
+++ crypto/bn/asm/bn-586.pl     8 Dec 2003 04:45:43 -0000
@@ -42,6 +42,82 @@

        &jz(&label("maw_finish"));

+       if ($cpuid) {
+               &test(&DWP("CRYPTO_cpuid_value","","",0),$cpuid_sse2);
+               &jz(&label("maw_loop"));
+
+               &movd("mm0",$w);                # mm0 = w
+               &pxor("mm1","mm1");             # mm1 = carry_in
+
+               &set_label("maw_sse2_loop",0);
+               &movd("mm3",&DWP(0,$r,"",0));   # mm3 = r[0]
+               &paddq("mm1","mm3");            # mm1 = carry_in + r[0]
+               &movd("mm2",&DWP(0,$a,"",0));   # mm2 = a[0]
+               &pmuludq("mm2","mm0");          # mm2 = w*a[0]
+               &movd("mm4",&DWP(4,$a,"",0));   # mm4 = a[1]
+               &pmuludq("mm4","mm0");          # mm4 = w*a[1]
+               &movd("mm6",&DWP(8,$a,"",0));   # mm6 = a[2]
+               &pmuludq("mm6","mm0");          # mm6 = w*a[2]
+               &movd("mm7",&DWP(12,$a,"",0));  # mm7 = a[3]
+               &pmuludq("mm7","mm0");          # mm7 = w*a[3]
+               &paddq("mm1","mm2");            # mm1 = carry_in + r[0] + w*a[0]
+               &movd("mm3",&DWP(4,$r,"",0));   # mm3 = r[1]
+               &paddq("mm3","mm4");            # mm3 = r[1] + w*a[1]
+               &movd("mm5",&DWP(8,$r,"",0));   # mm5 = r[2]
+               &paddq("mm5","mm6");            # mm5 = r[2] + w*a[2]
+               &movd("mm4",&DWP(12,$r,"",0));  # mm4 = r[3]
+               &paddq("mm7","mm4");            # mm7 = r[3] + w*a[3]
+               &movd(&DWP(0,$r,"",0),"mm1");
+               &movd("mm2",&DWP(16,$a,"",0));  # mm2 = a[4]
+               &pmuludq("mm2","mm0");          # mm2 = w*a[4]
+               &psrlq("mm1",32);               # mm1 = carry0
+               &movd("mm4",&DWP(20,$a,"",0));  # mm4 = a[5]
+               &pmuludq("mm4","mm0");          # mm4 = w*a[5]
+               &paddq("mm1","mm3");            # mm1 = carry0 + r[1] + w*a[1]
+               &movd("mm6",&DWP(24,$a,"",0));  # mm6 = a[6]
+               &pmuludq("mm6","mm0");          # mm6 = w*a[6]
+               &movd(&DWP(4,$r,"",0),"mm1");
+               &psrlq("mm1",32);               # mm1 = carry1
+               &movd("mm3",&DWP(28,$a,"",0));  # mm3 = a[7]
+               &add($a,32);
+               &pmuludq("mm3","mm0");          # mm3 = w*a[7]
+               &paddq("mm1","mm5");            # mm1 = carry1 + r[2] + w*a[2]
+               &movd("mm5",&DWP(16,$r,"",0));  # mm5 = r[4]
+               &paddq("mm2","mm5");            # mm2 = r[4] + w*a[4]
+               &movd(&DWP(8,$r,"",0),"mm1");
+               &psrlq("mm1",32);               # mm1 = carry2
+               &paddq("mm1","mm7");            # mm1 = carry2 + r[3] + w*a[3]
+               &movd("mm5",&DWP(20,$r,"",0));  # mm5 = r[5]
+               &paddq("mm4","mm5");            # mm4 = r[5] + w*a[5]
+               &movd(&DWP(12,$r,"",0),"mm1");
+               &psrlq("mm1",32);               # mm1 = carry3
+               &paddq("mm1","mm2");            # mm1 = carry3 + r[4] + w*a[4]
+               &movd("mm5",&DWP(24,$r,"",0));  # mm5 = r[6]
+               &paddq("mm6","mm5");            # mm6 = r[6] + w*a[6]
+               &movd(&DWP(16,$r,"",0),"mm1");
+               &psrlq("mm1",32);               # mm1 = carry4
+               &paddq("mm1","mm4");            # mm1 = carry4 + r[5] + w*a[5]
+               &movd("mm5",&DWP(28,$r,"",0));  # mm5 = r[7]
+               &paddq("mm3","mm5");            # mm3 = r[7] + w*a[7]
+               &movd(&DWP(20,$r,"",0),"mm1");
+               &psrlq("mm1",32);               # mm1 = carry5
+               &paddq("mm1","mm6");            # mm1 = carry5 + r[6] + w*a[6]
+               &movd(&DWP(24,$r,"",0),"mm1");
+               &psrlq("mm1",32);               # mm1 = carry6
+               &paddq("mm1","mm3");            # mm1 = carry6 + r[7] + w*a[7]
+               &movd(&DWP(28,$r,"",0),"mm1");
+               &add($r,32);
+               &psrlq("mm1",32);               # mm1 = carry_out
+
+               &sub("ecx",8);
+               &jnz(&label("maw_sse2_loop"));
+
+               &movd($c,"mm1");                # c = carry_out
+               &emms();
+
+               &jmp(&label("maw_finish"));
+       }
+
        &set_label("maw_loop",0);

        &mov(&swtmp(0),"ecx");  #
@@ -50,14 +126,14 @@
                {
                &comment("Round $i");

-                &mov("eax",&DWP($i,$a,"",0));  # *a
+               &mov("eax",&DWP($i,$a,"",0));   # *a
                &mul($w);                       # *a * w
                &add("eax",$c);         # L(t)+= *r
-                &mov($c,&DWP($i,$r,"",0));     # L(t)+= *r
+               &mov($c,&DWP($i,$r,"",0));      # L(t)+= *r
                &adc("edx",0);                  # H(t)+=carry
-                &add("eax",$c);                # L(t)+=c
+               &add("eax",$c);         # L(t)+=c
                &adc("edx",0);                  # H(t)+=carry
-                &mov(&DWP($i,$r,"",0),"eax");  # *r= L(t);
+               &mov(&DWP($i,$r,"",0),"eax");   # *r= L(t);
                &mov($c,"edx");                 # c=  H(t);
                }

===================================================================
RCS file: /home/dean/openssl/Repository/openssl/crypto/perlasm/x86asm.pl,v
retrieving revision 1.10
diff -u -r1.10 x86asm.pl
--- crypto/perlasm/x86asm.pl.orig       28 Nov 2003 13:10:47 -0000      1.10
+++ crypto/perlasm/x86asm.pl    8 Dec 2003 04:26:27 -0000
@@ -54,7 +54,11 @@
                }

        $pic=0;
-       for (@ARGV) {   $pic=1 if (/\-[fK]PIC/i);       }
+       $cpuid=0;
+       for (@ARGV) {
+               $pic=1 if (/\-[fK]PIC/i);
+               $cpuid=1 if (/\-DCPUID/);
+       }

        &asm_init_output();

===================================================================
RCS file: /home/dean/openssl/Repository/openssl/crypto/perlasm/x86ms.pl,v
retrieving revision 1.8
diff -u -r1.8 x86ms.pl
--- crypto/perlasm/x86ms.pl.orig        27 Sep 2003 22:14:39 -0000      1.8
+++ crypto/perlasm/x86ms.pl     8 Dec 2003 04:34:19 -0000
@@ -120,6 +120,7 @@
 sub main'mov   { &out2("mov",@_); }
 sub main'movb  { &out2("mov",@_); }
 sub main'and   { &out2("and",@_); }
+sub main'test  { &out2("test",@_); }
 sub main'or    { &out2("or",@_); }
 sub main'shl   { &out2("shl",@_); }
 sub main'shr   { &out2("shr",@_); }
@@ -161,6 +162,13 @@
 sub main'ret   { &out0("ret"); }
 sub main'nop   { &out0("nop"); }

+sub main'emms  { &out0("emms"); }
+sub main'movd  { &out2("movd",@_); }
+sub main'paddq { &out2("paddq",@_); }
+sub main'pmuludq{ &out2("pmuludq",@_); }
+sub main'psrlq { &out2("psrlq",@_); }
+sub main'pxor  { &out2("pxor",@_); }
+
 sub out2
        {
        local($name,$p1,$p2)[EMAIL PROTECTED];
===================================================================
RCS file: /home/dean/openssl/Repository/openssl/crypto/perlasm/x86mwasm_nw.pl,v
retrieving revision 1.1
diff -u -r1.1 x86mwasm_nw.pl
--- crypto/perlasm/x86mwasm_nw.pl.orig  28 Nov 2003 13:10:47 -0000      1.1
+++ crypto/perlasm/x86mwasm_nw.pl       8 Dec 2003 04:34:54 -0000
@@ -134,6 +134,7 @@
 sub main'mov   { &out2("mov",@_); }
 sub main'movb  { &out2("mov",@_); }
 sub main'and   { &out2("and",@_); }
+sub main'test  { &out2("test",@_); }
 sub main'or    { &out2("or",@_); }
 sub main'shl   { &out2("shl",@_); }
 sub main'shr   { &out2("shr",@_); }
@@ -177,6 +178,13 @@
 sub main'ret   { &out0("ret"); }
 sub main'nop   { &out0("nop"); }

+sub main'emms  { &out0("emms"); }
+sub main'movd  { &out2("movd",@_); }
+sub main'paddq { &out2("paddq",@_); }
+sub main'pmuludq{ &out2("pmuludq",@_); }
+sub main'psrlq { &out2("psrlq",@_); }
+sub main'pxor  { &out2("pxor",@_); }
+
 sub out2
        {
        my($name,$p1,$p2)[EMAIL PROTECTED];
===================================================================
RCS file: /home/dean/openssl/Repository/openssl/crypto/perlasm/x86nasm.pl,v
retrieving revision 1.8
diff -u -r1.8 x86nasm.pl
--- crypto/perlasm/x86nasm.pl.orig      27 Sep 2003 22:14:39 -0000      1.8
+++ crypto/perlasm/x86nasm.pl   8 Dec 2003 04:34:02 -0000
@@ -126,6 +126,7 @@
 sub main'mov   { &out2("mov",@_); }
 sub main'movb  { &out2("mov",@_); }
 sub main'and   { &out2("and",@_); }
+sub main'test  { &out2("test",@_); }
 sub main'or    { &out2("or",@_); }
 sub main'shl   { &out2("shl",@_); }
 sub main'shr   { &out2("shr",@_); }
@@ -170,6 +171,13 @@
 sub main'ret   { &out0("ret"); }
 sub main'nop   { &out0("nop"); }

+sub main'emms  { &out0("emms"); }
+sub main'movd  { &out2("movd",@_); }
+sub main'paddq { &out2("paddq",@_); }
+sub main'pmuludq{ &out2("pmuludq",@_); }
+sub main'psrlq { &out2("psrlq",@_); }
+sub main'pxor  { &out2("pxor",@_); }
+
 sub out2
        {
        my($name,$p1,$p2)[EMAIL PROTECTED];
===================================================================
RCS file: /home/dean/openssl/Repository/openssl/crypto/perlasm/x86unix.pl,v
retrieving revision 1.23
diff -u -r1.23 x86unix.pl
--- crypto/perlasm/x86unix.pl.orig      27 Sep 2003 22:14:39 -0000      1.23
+++ crypto/perlasm/x86unix.pl   8 Dec 2003 04:33:48 -0000
@@ -51,6 +51,14 @@
        'edi',  '%edi',
        'ebp',  '%ebp',
        'esp',  '%esp',
+       'mm0',  '%mm0',
+       'mm1',  '%mm1',
+       'mm2',  '%mm2',
+       'mm3',  '%mm3',
+       'mm4',  '%mm4',
+       'mm5',  '%mm5',
+       'mm6',  '%mm6',
+       'mm7',  '%mm7',
        );

 %reg_val=(
@@ -132,6 +140,7 @@
 sub main'mov   { &out2("movl",@_); }
 sub main'movb  { &out2("movb",@_); }
 sub main'and   { &out2("andl",@_); }
+sub main'test  { &out2("testl",@_); }
 sub main'or    { &out2("orl",@_); }
 sub main'shl   { &out2("sall",@_); }
 sub main'shr   { &out2("shrl",@_); }
@@ -174,6 +183,13 @@
 sub main'ret   { &out0("ret"); }
 sub main'nop   { &out0("nop"); }

+sub main'emms  { &out0("emms"); }
+sub main'movd  { &out2("movd",@_); }
+sub main'paddq { &out2("paddq",@_); }
+sub main'pmuludq{ &out2("pmuludq",@_); }
+sub main'psrlq { &out2("psrlq",@_); }
+sub main'pxor  { &out2("pxor",@_); }
+
 # The bswapl instruction is new for the 486. Emulate if i386.
 sub main'bswap
        {
@@ -586,3 +602,8 @@
        }

 sub main'blindpop { &out1("popl",@_); }
+
+$main'cpuid_mmx = 2;
+$main'cpuid_sse = 4;
+$main'cpuid_sse2 = 8;
+$main'cpuid_sse3 = 16;

______________________________________________________________________
OpenSSL Project                                 http://www.openssl.org
Development Mailing List                       [EMAIL PROTECTED]
Automated List Manager                           [EMAIL PROTECTED]

Reply via email to