Module Name:    src
Committed By:   christos
Date:           Sun Mar  4 04:00:45 UTC 2018

Added Files:
        src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha: Makefile
            alpha-mont.S alphacpuid.S asm.h crypto.inc ghash-alpha.S modes.inc
            regdef.h sha.inc sha1-alpha.S

Log Message:
Alpha Stuff


To generate a diff of this commit:
cvs rdiff -u -r0 -r1.1 \
    src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/Makefile \
    src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/alpha-mont.S \
    src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/alphacpuid.S \
    src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/asm.h \
    src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/crypto.inc \
    src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/ghash-alpha.S \
    src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/modes.inc \
    src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/regdef.h \
    src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/sha.inc \
    src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/sha1-alpha.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Added files:

Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/Makefile
diff -u /dev/null src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/Makefile:1.1
--- /dev/null	Sat Mar  3 23:00:45 2018
+++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/Makefile	Sat Mar  3 23:00:45 2018
@@ -0,0 +1,15 @@
+#	$NetBSD: Makefile,v 1.1 2018/03/04 04:00:45 christos Exp $
+
+.include "bsd.own.mk"
+
+CRYPTODIST=${NETBSDSRCDIR}/crypto
+.include "${NETBSDSRCDIR}/crypto/Makefile.openssl"
+
+regen:
+	for i in $$(find ${OPENSSLSRC} -name \*alpha\*.pl); do \
+		case $$i in \
+		(*) perl -I${OPENSSLSRC}/crypto/perlasm \
+		-I${OPENSSLSRC}/crypto/bn/asm $$i linux /dev/stdout \
+		> $$(basename $$i .pl).S;; \
+		esac; \
+	done
Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/alpha-mont.S
diff -u /dev/null src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/alpha-mont.S:1.1
--- /dev/null	Sat Mar  3 23:00:45 2018
+++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/alpha-mont.S	Sat Mar  3 23:00:45 2018
@@ -0,0 +1,274 @@
+#ifdef __linux__
+#include <asm/regdef.h>
+#else
+#include <asm.h>
+#include <regdef.h>
+#endif
+
+.text
+
+.set	noat
+.set	noreorder
+
+.globl	bn_mul_mont
+.align	5
+.ent	bn_mul_mont
+bn_mul_mont:
+	lda	sp,-48(sp)
+	stq	ra,0(sp)
+	stq	s3,8(sp)
+	stq	s4,16(sp)
+	stq	s5,24(sp)
+	stq	fp,32(sp)
+	mov	sp,fp
+	.mask	0x0400f000,-48
+	.frame	fp,48,ra
+	.prologue 0
+
+	.align	4
+	.set	reorder
+	sextl	a5,a5
+	mov	0,v0
+	cmplt	a5,4,AT
+	bne	AT,.Lexit
+
+	ldq	t1,0(a1)	# ap[0]
+	s8addq	a5,16,AT
+	ldq	t4,8(a1)
+	subq	sp,AT,sp
+	ldq	t5,0(a2)	# bp[0]
+	lda	AT,-4096(zero)	# mov	-4096,AT
+	ldq	a4,0(a4)
+	and	sp,AT,sp
+
+	mulq	t1,t5,t0
+	ldq	t3,0(a3)	# np[0]
+	umulh	t1,t5,t1
+	ldq	t6,8(a3)
+
+	mulq	t0,a4,s5
+
+	mulq	t3,s5,t2
+	umulh	t3,s5,t3
+
+	addq	t2,t0,t2
+	cmpult	t2,t0,AT
+	addq	t3,AT,t3
+
+	mulq	t4,t5,t8
+	mov	2,s4
+	umulh	t4,t5,t9
+	mov	sp,t7
+
+	mulq	t6,s5,t10
+	s8addq	s4,a1,t4
+	umulh	t6,s5,t11
+	s8addq	s4,a3,t6
+.align	4
+.L1st:
+	.set	noreorder
+	ldq	t4,0(t4)
+	addl	s4,1,s4
+	ldq	t6,0(t6)
+	lda	t7,8(t7)
+
+	addq	t8,t1,t0
+	mulq	t4,t5,t8
+	cmpult	t0,t1,AT
+	addq	t10,t3,t2
+
+	mulq	t6,s5,t10
+	addq	t9,AT,t1
+	cmpult	t2,t3,v0
+	cmplt	s4,a5,t12
+
+	umulh	t4,t5,t9
+	addq	t11,v0,t3
+	addq	t2,t0,t2
+	s8addq	s4,a1,t4
+
+	umulh	t6,s5,t11
+	cmpult	t2,t0,v0
+	addq	t3,v0,t3
+	s8addq	s4,a3,t6
+
+	stq	t2,-8(t7)
+	nop
+	unop
+	bne	t12,.L1st
+	.set	reorder
+
+	addq	t8,t1,t0
+	addq	t10,t3,t2
+	cmpult	t0,t1,AT
+	cmpult	t2,t3,v0
+	addq	t9,AT,t1
+	addq	t11,v0,t3
+
+	addq	t2,t0,t2
+	cmpult	t2,t0,v0
+	addq	t3,v0,t3
+
+	stq	t2,0(t7)
+
+	addq	t3,t1,t3
+	cmpult	t3,t1,AT
+	stq	t3,8(t7)
+	stq	AT,16(t7)
+
+	mov	1,s3
+.align	4
+.Louter:
+	s8addq	s3,a2,t5
+	ldq	t1,0(a1)
+	ldq	t4,8(a1)
+	ldq	t5,0(t5)
+	ldq	t3,0(a3)
+	ldq	t6,8(a3)
+	ldq	t12,0(sp)
+
+	mulq	t1,t5,t0
+	umulh	t1,t5,t1
+
+	addq	t0,t12,t0
+	cmpult	t0,t12,AT
+	addq	t1,AT,t1
+
+	mulq	t0,a4,s5
+
+	mulq	t3,s5,t2
+	umulh	t3,s5,t3
+
+	addq	t2,t0,t2
+	cmpult	t2,t0,AT
+	mov	2,s4
+	addq	t3,AT,t3
+
+	mulq	t4,t5,t8
+	mov	sp,t7
+	umulh	t4,t5,t9
+
+	mulq	t6,s5,t10
+	s8addq	s4,a1,t4
+	umulh	t6,s5,t11
+.align	4
+.Linner:
+	.set	noreorder
+	ldq	t12,8(t7)	#L0
+	nop			#U1
+	ldq	t4,0(t4)	#L1
+	s8addq	s4,a3,t6	#U0
+
+	ldq	t6,0(t6)	#L0
+	nop			#U1
+	addq	t8,t1,t0	#L1
+	lda	t7,8(t7)
+
+	mulq	t4,t5,t8	#U1
+	cmpult	t0,t1,AT	#L0
+	addq	t10,t3,t2	#L1
+	addl	s4,1,s4
+
+	mulq	t6,s5,t10	#U1
+	addq	t9,AT,t1	#L0
+	addq	t0,t12,t0	#L1
+	cmpult	t2,t3,v0	#U0
+
+	umulh	t4,t5,t9	#U1
+	cmpult	t0,t12,AT	#L0
+	addq	t2,t0,t2	#L1
+	addq	t11,v0,t3	#U0
+
+	umulh	t6,s5,t11	#U1
+	s8addq	s4,a1,t4	#L0
+	cmpult	t2,t0,v0	#L1
+	cmplt	s4,a5,t12	#U0	# borrow t12
+
+	addq	t1,AT,t1	#L0
+	addq	t3,v0,t3	#U1
+	stq	t2,-8(t7)	#L1
+	bne	t12,.Linner	#U0
+	.set	reorder
+
+	ldq	t12,8(t7)
+	addq	t8,t1,t0
+	addq	t10,t3,t2
+	cmpult	t0,t1,AT
+	cmpult	t2,t3,v0
+	addq	t9,AT,t1
+	addq	t11,v0,t3
+
+	addq	t0,t12,t0
+	cmpult	t0,t12,AT
+	addq	t1,AT,t1
+
+	ldq	t12,16(t7)
+	addq	t2,t0,s4
+	cmpult	s4,t0,v0
+	addq	t3,v0,t3
+
+	addq	t3,t1,t2
+	stq	s4,0(t7)
+	cmpult	t2,t1,t3
+	addq	t2,t12,t2
+	cmpult	t2,t12,AT
+	addl	s3,1,s3
+	addq	t3,AT,t3
+	stq	t2,8(t7)
+	cmplt	s3,a5,t12	# borrow t12
+	stq	t3,16(t7)
+	bne	t12,.Louter
+
+	s8addq	a5,sp,t12	# &tp[num]
+	mov	a0,a2		# put rp aside
+	mov	sp,t7
+	mov	sp,a1
+	mov	0,t1		# clear borrow bit
+
+.align	4
+.Lsub:	ldq	t0,0(t7)
+	ldq	t2,0(a3)
+	lda	t7,8(t7)
+	lda	a3,8(a3)
+	subq	t0,t2,t2	# tp[i]-np[i]
+	cmpult	t0,t2,AT
+	subq	t2,t1,t0
+	cmpult	t2,t0,t1
+	or	t1,AT,t1
+	stq	t0,0(a0)
+	cmpult	t7,t12,v0
+	lda	a0,8(a0)
+	bne	v0,.Lsub
+
+	subq	t3,t1,t1	# handle upmost overflow bit
+	mov	sp,t7
+	mov	a2,a0		# restore rp
+
+	and	sp,t1,a1
+	bic	a2,t1,a2
+	bis	a2,a1,a1	# ap=borrow?tp:rp
+
+.align	4
+.Lcopy:	ldq	t4,0(a1)	# copy or in-place refresh
+	lda	t7,8(t7)
+	lda	a0,8(a0)
+	lda	a1,8(a1)
+	stq	zero,-8(t7)	# zap tp
+	cmpult	t7,t12,AT
+	stq	t4,-8(a0)
+	bne	AT,.Lcopy
+	mov	1,v0
+
+.Lexit:
+	.set	noreorder
+	mov	fp,sp
+	/*ldq	ra,0(sp)*/
+	ldq	s3,8(sp)
+	ldq	s4,16(sp)
+	ldq	s5,24(sp)
+	ldq	fp,32(sp)
+	lda	sp,48(sp)
+	ret	(ra)
+.end	bn_mul_mont
+.ascii	"Montgomery Multiplication for Alpha, CRYPTOGAMS by <ap...@openssl.org>"
+.align	2
Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/alphacpuid.S
diff -u /dev/null src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/alphacpuid.S:1.1
--- /dev/null	Sat Mar  3 23:00:45 2018
+++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/alphacpuid.S	Sat Mar  3 23:00:45 2018
@@ -0,0 +1,232 @@
+.text
+
+.set	noat
+
+.globl	OPENSSL_cpuid_setup
+.ent	OPENSSL_cpuid_setup
+OPENSSL_cpuid_setup:
+	.frame	$30,0,$26
+	.prologue 0
+	ret	($26)
+.end	OPENSSL_cpuid_setup
+
+.globl	OPENSSL_wipe_cpu
+.ent	OPENSSL_wipe_cpu
+OPENSSL_wipe_cpu:
+	.frame	$30,0,$26
+	.prologue 0
+	clr	$1
+	clr	$2
+	clr	$3
+	clr	$4
+	clr	$5
+	clr	$6
+	clr	$7
+	clr	$8
+	clr	$16
+	clr	$17
+	clr	$18
+	clr	$19
+	clr	$20
+	clr	$21
+	clr	$22
+	clr	$23
+	clr	$24
+	clr	$25
+	clr	$27
+	clr	$at
+	clr	$29
+	fclr	$f0
+	fclr	$f1
+	fclr	$f10
+	fclr	$f11
+	fclr	$f12
+	fclr	$f13
+	fclr	$f14
+	fclr	$f15
+	fclr	$f16
+	fclr	$f17
+	fclr	$f18
+	fclr	$f19
+	fclr	$f20
+	fclr	$f21
+	fclr	$f22
+	fclr	$f23
+	fclr	$f24
+	fclr	$f25
+	fclr	$f26
+	fclr	$f27
+	fclr	$f28
+	fclr	$f29
+	fclr	$f30
+	mov	$sp,$0
+	ret	($26)
+.end	OPENSSL_wipe_cpu
+
+.globl	OPENSSL_atomic_add
+.ent	OPENSSL_atomic_add
+OPENSSL_atomic_add:
+	.frame	$30,0,$26
+	.prologue 0
+1:	ldl_l	$0,0($16)
+	addl	$0,$17,$1
+	stl_c	$1,0($16)
+	beq	$1,1b
+	addl	$0,$17,$0
+	ret	($26)
+.end	OPENSSL_atomic_add
+
+.globl	OPENSSL_rdtsc
+.ent	OPENSSL_rdtsc
+OPENSSL_rdtsc:
+	.frame	$30,0,$26
+	.prologue 0
+	rpcc	$0
+	ret	($26)
+.end	OPENSSL_rdtsc
+
+.globl	OPENSSL_cleanse
+.ent	OPENSSL_cleanse
+OPENSSL_cleanse:
+	.frame	$30,0,$26
+	.prologue 0
+	beq	$17,.Ldone
+	and	$16,7,$0
+	bic	$17,7,$at
+	beq	$at,.Little
+	beq	$0,.Laligned
+
+.Little:
+	subq	$0,8,$0
+	ldq_u	$1,0($16)
+	mov	$16,$2
+.Lalign:
+	mskbl	$1,$16,$1
+	lda	$16,1($16)
+	subq	$17,1,$17
+	addq	$0,1,$0
+	beq	$17,.Lout
+	bne	$0,.Lalign
+.Lout:	stq_u	$1,0($2)
+	beq	$17,.Ldone
+	bic	$17,7,$at
+	beq	$at,.Little
+
+.Laligned:
+	stq	$31,0($16)
+	subq	$17,8,$17
+	lda	$16,8($16)
+	bic	$17,7,$at
+	bne	$at,.Laligned
+	bne	$17,.Little
+.Ldone: ret	($26)
+.end	OPENSSL_cleanse
+
+.globl	CRYPTO_memcmp
+.ent	CRYPTO_memcmp
+CRYPTO_memcmp:
+	.frame	$30,0,$26
+	.prologue 0
+	xor	$0,$0,$0
+	beq	$18,.Lno_data
+
+	xor	$1,$1,$1
+	nop
+.Loop_cmp:
+	ldq_u	$2,0($16)
+	subq	$18,1,$18
+	ldq_u	$3,0($17)
+	extbl	$2,$16,$2
+	lda	$16,1($16)
+	extbl	$3,$17,$3
+	lda	$17,1($17)
+	xor	$3,$2,$2
+	or	$2,$0,$0
+	bne	$18,.Loop_cmp
+
+	subq	$31,$0,$0
+	srl	$0,63,$0
+.Lno_data:
+	ret	($26)
+.end	CRYPTO_memcmp
+.globl	OPENSSL_instrument_bus
+.ent	OPENSSL_instrument_bus
+OPENSSL_instrument_bus:
+	.frame	$30,0,$26
+	.prologue 0
+	mov	$17,$0
+
+	rpcc	$20
+	mov	0,$21
+
+	ecb	($16)
+	ldl_l	$19,0($16)
+	addl	$21,$19,$19
+	mov	$19,$21
+	stl_c	$19,0($16)
+	stl	$21,0($16)
+
+.Loop:	rpcc	$19
+	subq	$19,$20,$21
+	mov	$19,$20
+
+	ecb	($16)
+	ldl_l	$19,0($16)
+	addl	$21,$19,$19
+	mov	$19,$21
+	stl_c	$19,0($16)
+	stl	$21,0($16)
+
+	subl	$17,1,$17
+	lda	$16,4($16)
+	bne	$17,.Loop
+
+	ret	($26)
+.end	OPENSSL_instrument_bus
+
+.globl	OPENSSL_instrument_bus2
+.ent	OPENSSL_instrument_bus2
+OPENSSL_instrument_bus2:
+	.frame	$30,0,$26
+	.prologue 0
+	mov	$17,$0
+
+	rpcc	$20
+	mov	0,$21
+
+	ecb	($16)
+	ldl_l	$19,0($16)
+	addl	$21,$19,$19
+	mov	$19,$21
+	stl_c	$19,0($16)
+	stl	$21,0($16)
+
+	rpcc	$19
+	subq	$19,$20,$21
+	mov	$19,$20
+	mov	$21,$22
+.Loop2:
+	ecb	($16)
+	ldl_l	$19,0($16)
+	addl	$21,$19,$19
+	mov	$19,$21
+	stl_c	$19,0($16)
+	stl	$21,0($16)
+
+	subl	$18,1,$18
+	beq	$18,.Ldone2
+
+	rpcc	$19
+	subq	$19,$20,$21
+	mov	$19,$20
+	subq	$22,$21,$19
+	mov	$21,$22
+	cmovne	$19,1,$19
+	subl	$17,$19,$17
+	s4addq	$19,$16,$16
+	bne	$17,.Loop2
+
+.Ldone2:
+	subl	$0,$17,$0
+	ret	($26)
+.end	OPENSSL_instrument_bus2
Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/crypto.inc
diff -u /dev/null src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/crypto.inc:1.1
--- /dev/null	Sat Mar  3 23:00:45 2018
+++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/crypto.inc	Sat Mar  3 23:00:45 2018
@@ -0,0 +1,10 @@
+.PATH.S: ${.PARSEDIR}
+
+ASMDIR:=${.PARSEDIR}
+
+CPUID_SRCS += alpha-mont.S alphacpuid.S
+CPUID = yes
+CPPFLAGS += -DOPENSSL_BN_ASM_MONT -I${ASMDIR}
+
+.include "../../crypto.inc"
+
Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/ghash-alpha.S
diff -u /dev/null src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/ghash-alpha.S:1.1
--- /dev/null	Sat Mar  3 23:00:45 2018
+++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/ghash-alpha.S	Sat Mar  3 23:00:45 2018
@@ -0,0 +1,576 @@
+#ifdef __linux__
+#include <asm/regdef.h>
+#else
+#include <asm.h>
+#include <regdef.h>
+#endif
+
+.text
+
+.set	noat
+.set	noreorder
+.globl	gcm_gmult_4bit
+.align	4
+.ent	gcm_gmult_4bit
+gcm_gmult_4bit:
+	.frame	sp,0,ra
+	.prologue 0
+
+	ldq	t11,8(a0)
+	ldq	t10,0(a0)
+
+	bsr	t0,picmeup
+	nop
+.align	4
+	extbl	t11,7,a4
+	and	a4,0xf0,a5
+	sll	a4,4,a4
+	and	a4,0xf0,a4
+
+	addq	a4,a1,a4
+	ldq	t9,8(a4)
+	addq	a5,a1,a5
+	ldq	t8,0(a4)
+
+	and	t9,0x0f,t12
+	sll	t8,60,t0
+	lda	v0,6(zero)
+	extbl	t11,6,a4
+
+	ldq	t6,8(a5)
+	s8addq	t12,AT,t12
+	ldq	t5,0(a5)
+	srl	t9,4,t9
+
+	ldq	t7,0(t12)
+	srl	t8,4,t8
+	xor	t0,t9,t9
+	and	a4,0xf0,a5
+
+	xor	t6,t9,t9
+	sll	a4,4,a4
+	xor	t5,t8,t8
+	and	a4,0xf0,a4
+
+	addq	a4,a1,a4
+	ldq	t4,8(a4)
+	addq	a5,a1,a5
+	ldq	t3,0(a4)
+
+.Looplo1:
+	and	t9,0x0f,t12
+	sll	t8,60,t0
+	subq	v0,1,v0
+	srl	t9,4,t9
+
+	ldq	t6,8(a5)
+	xor	t7,t8,t8
+	ldq	t5,0(a5)
+	s8addq	t12,AT,t12
+
+	ldq	t7,0(t12)
+	srl	t8,4,t8
+	xor	t0,t9,t9
+	extbl	t11,v0,a4
+
+	and	a4,0xf0,a5
+	xor	t3,t8,t8
+	xor	t4,t9,t9
+	sll	a4,4,a4
+
+
+	and	t9,0x0f,t12
+	sll	t8,60,t0
+	and	a4,0xf0,a4
+	srl	t9,4,t9
+
+	s8addq	t12,AT,t12
+	xor	t7,t8,t8
+	addq	a4,a1,a4
+	addq	a5,a1,a5
+
+	ldq	t7,0(t12)
+	srl	t8,4,t8
+	ldq	t4,8(a4)
+	xor	t0,t9,t9
+
+	xor	t6,t9,t9
+	xor	t5,t8,t8
+	ldq	t3,0(a4)
+	bne	v0,.Looplo1
+
+
+	and	t9,0x0f,t12
+	sll	t8,60,t0
+	lda	v0,7(zero)
+	srl	t9,4,t9
+
+	ldq	t6,8(a5)
+	xor	t7,t8,t8
+	ldq	t5,0(a5)
+	s8addq	t12,AT,t12
+
+	ldq	t7,0(t12)
+	srl	t8,4,t8
+	xor	t0,t9,t9
+	extbl	t10,v0,a4
+
+	and	a4,0xf0,a5
+	xor	t3,t8,t8
+	xor	t4,t9,t9
+	sll	a4,4,a4
+
+	and	t9,0x0f,t12
+	sll	t8,60,t0
+	and	a4,0xf0,a4
+	srl	t9,4,t9
+
+	s8addq	t12,AT,t12
+	xor	t7,t8,t8
+	addq	a4,a1,a4
+	addq	a5,a1,a5
+
+	ldq	t7,0(t12)
+	srl	t8,4,t8
+	ldq	t4,8(a4)
+	xor	t0,t9,t9
+
+	xor	t6,t9,t9
+	xor	t5,t8,t8
+	ldq	t3,0(a4)
+	unop
+
+
+.Loophi1:
+	and	t9,0x0f,t12
+	sll	t8,60,t0
+	subq	v0,1,v0
+	srl	t9,4,t9
+
+	ldq	t6,8(a5)
+	xor	t7,t8,t8
+	ldq	t5,0(a5)
+	s8addq	t12,AT,t12
+
+	ldq	t7,0(t12)
+	srl	t8,4,t8
+	xor	t0,t9,t9
+	extbl	t10,v0,a4
+
+	and	a4,0xf0,a5
+	xor	t3,t8,t8
+	xor	t4,t9,t9
+	sll	a4,4,a4
+
+
+	and	t9,0x0f,t12
+	sll	t8,60,t0
+	and	a4,0xf0,a4
+	srl	t9,4,t9
+
+	s8addq	t12,AT,t12
+	xor	t7,t8,t8
+	addq	a4,a1,a4
+	addq	a5,a1,a5
+
+	ldq	t7,0(t12)
+	srl	t8,4,t8
+	ldq	t4,8(a4)
+	xor	t0,t9,t9
+
+	xor	t6,t9,t9
+	xor	t5,t8,t8
+	ldq	t3,0(a4)
+	bne	v0,.Loophi1
+
+
+	and	t9,0x0f,t12
+	sll	t8,60,t0
+	srl	t9,4,t9
+
+	ldq	t6,8(a5)
+	xor	t7,t8,t8
+	ldq	t5,0(a5)
+	s8addq	t12,AT,t12
+
+	ldq	t7,0(t12)
+	srl	t8,4,t8
+	xor	t0,t9,t9
+
+	xor	t4,t9,t9
+	xor	t3,t8,t8
+
+	and	t9,0x0f,t12
+	sll	t8,60,t0
+	srl	t9,4,t9
+
+	s8addq	t12,AT,t12
+	xor	t7,t8,t8
+
+	ldq	t7,0(t12)
+	srl	t8,4,t8
+	xor	t6,t9,t9
+	xor	t5,t8,t8
+	xor	t0,t9,t9
+	xor	t7,t8,t8
+	srl	t9,24,t0	# byte swap
+	srl	t9,8,t1
+
+	sll	t9,8,t2
+	sll	t9,24,t9
+	zapnot	t0,0x11,t0
+	zapnot	t1,0x22,t1
+
+	zapnot	t9,0x88,t9
+	or	t0,t1,t0
+	zapnot	t2,0x44,t2
+
+	or	t9,t0,t9
+	srl	t8,24,t0
+	srl	t8,8,t1
+
+	or	t9,t2,t9
+	sll	t8,8,t2
+	sll	t8,24,t8
+
+	srl	t9,32,t11
+	sll	t9,32,t9
+
+	zapnot	t0,0x11,t0
+	zapnot	t1,0x22,t1
+	or	t9,t11,t11
+
+	zapnot	t8,0x88,t8
+	or	t0,t1,t0
+	zapnot	t2,0x44,t2
+
+	or	t8,t0,t8
+	or	t8,t2,t8
+
+	srl	t8,32,t10
+	sll	t8,32,t8
+
+	or	t8,t10,t10
+	stq	t11,8(a0)
+	stq	t10,0(a0)
+
+	ret	(ra)
+.end	gcm_gmult_4bit
+.globl	gcm_ghash_4bit
+.align	4
+.ent	gcm_ghash_4bit
+gcm_ghash_4bit:
+	lda	sp,-32(sp)
+	stq	ra,0(sp)
+	stq	s0,8(sp)
+	stq	s1,16(sp)
+	.mask	0x04000600,-32
+	.frame	sp,32,ra
+	.prologue 0
+
+	ldq_u	s0,0(a2)
+	ldq_u	t3,7(a2)
+	ldq_u	s1,8(a2)
+	ldq_u	t4,15(a2)
+	ldq	t10,0(a0)
+	ldq	t11,8(a0)
+
+	bsr	t0,picmeup
+	nop
+
+.Louter:
+	extql	s0,a2,s0
+	extqh	t3,a2,t3
+	or	s0,t3,s0
+	lda	a2,16(a2)
+
+	extql	s1,a2,s1
+	extqh	t4,a2,t4
+	or	s1,t4,s1
+	subq	a3,16,a3
+
+	xor	t11,s1,t11
+	xor	t10,s0,t10
+.align	4
+	extbl	t11,7,a4
+	and	a4,0xf0,a5
+	sll	a4,4,a4
+	and	a4,0xf0,a4
+
+	addq	a4,a1,a4
+	ldq	t9,8(a4)
+	addq	a5,a1,a5
+	ldq	t8,0(a4)
+
+	and	t9,0x0f,t12
+	sll	t8,60,t0
+	lda	v0,6(zero)
+	extbl	t11,6,a4
+
+	ldq	t6,8(a5)
+	s8addq	t12,AT,t12
+	ldq	t5,0(a5)
+	srl	t9,4,t9
+
+	ldq	t7,0(t12)
+	srl	t8,4,t8
+	xor	t0,t9,t9
+	and	a4,0xf0,a5
+
+	xor	t6,t9,t9
+	sll	a4,4,a4
+	xor	t5,t8,t8
+	and	a4,0xf0,a4
+
+	addq	a4,a1,a4
+	ldq	t4,8(a4)
+	addq	a5,a1,a5
+	ldq	t3,0(a4)
+
+.Looplo2:
+	and	t9,0x0f,t12
+	sll	t8,60,t0
+	subq	v0,1,v0
+	srl	t9,4,t9
+
+	ldq	t6,8(a5)
+	xor	t7,t8,t8
+	ldq	t5,0(a5)
+	s8addq	t12,AT,t12
+
+	ldq	t7,0(t12)
+	srl	t8,4,t8
+	xor	t0,t9,t9
+	extbl	t11,v0,a4
+
+	and	a4,0xf0,a5
+	xor	t3,t8,t8
+	xor	t4,t9,t9
+	sll	a4,4,a4
+
+
+	and	t9,0x0f,t12
+	sll	t8,60,t0
+	and	a4,0xf0,a4
+	srl	t9,4,t9
+
+	s8addq	t12,AT,t12
+	xor	t7,t8,t8
+	addq	a4,a1,a4
+	addq	a5,a1,a5
+
+	ldq	t7,0(t12)
+	srl	t8,4,t8
+	ldq	t4,8(a4)
+	xor	t0,t9,t9
+
+	xor	t6,t9,t9
+	xor	t5,t8,t8
+	ldq	t3,0(a4)
+	bne	v0,.Looplo2
+
+
+	and	t9,0x0f,t12
+	sll	t8,60,t0
+	lda	v0,7(zero)
+	srl	t9,4,t9
+
+	ldq	t6,8(a5)
+	xor	t7,t8,t8
+	ldq	t5,0(a5)
+	s8addq	t12,AT,t12
+
+	ldq	t7,0(t12)
+	srl	t8,4,t8
+	xor	t0,t9,t9
+	extbl	t10,v0,a4
+
+	and	a4,0xf0,a5
+	xor	t3,t8,t8
+	xor	t4,t9,t9
+	sll	a4,4,a4
+
+	and	t9,0x0f,t12
+	sll	t8,60,t0
+	and	a4,0xf0,a4
+	srl	t9,4,t9
+
+	s8addq	t12,AT,t12
+	xor	t7,t8,t8
+	addq	a4,a1,a4
+	addq	a5,a1,a5
+
+	ldq	t7,0(t12)
+	srl	t8,4,t8
+	ldq	t4,8(a4)
+	xor	t0,t9,t9
+
+	xor	t6,t9,t9
+	xor	t5,t8,t8
+	ldq	t3,0(a4)
+	unop
+
+
+.Loophi2:
+	and	t9,0x0f,t12
+	sll	t8,60,t0
+	subq	v0,1,v0
+	srl	t9,4,t9
+
+	ldq	t6,8(a5)
+	xor	t7,t8,t8
+	ldq	t5,0(a5)
+	s8addq	t12,AT,t12
+
+	ldq	t7,0(t12)
+	srl	t8,4,t8
+	xor	t0,t9,t9
+	extbl	t10,v0,a4
+
+	and	a4,0xf0,a5
+	xor	t3,t8,t8
+	xor	t4,t9,t9
+	sll	a4,4,a4
+
+
+	and	t9,0x0f,t12
+	sll	t8,60,t0
+	and	a4,0xf0,a4
+	srl	t9,4,t9
+
+	s8addq	t12,AT,t12
+	xor	t7,t8,t8
+	addq	a4,a1,a4
+	addq	a5,a1,a5
+
+	ldq	t7,0(t12)
+	srl	t8,4,t8
+	ldq	t4,8(a4)
+	xor	t0,t9,t9
+
+	xor	t6,t9,t9
+	xor	t5,t8,t8
+	ldq	t3,0(a4)
+	bne	v0,.Loophi2
+
+
+	and	t9,0x0f,t12
+	sll	t8,60,t0
+	srl	t9,4,t9
+
+	ldq	t6,8(a5)
+	xor	t7,t8,t8
+	ldq	t5,0(a5)
+	s8addq	t12,AT,t12
+
+	ldq	t7,0(t12)
+	srl	t8,4,t8
+	xor	t0,t9,t9
+
+	xor	t4,t9,t9
+	xor	t3,t8,t8
+
+	and	t9,0x0f,t12
+	sll	t8,60,t0
+	srl	t9,4,t9
+
+	s8addq	t12,AT,t12
+	xor	t7,t8,t8
+
+	ldq	t7,0(t12)
+	srl	t8,4,t8
+	xor	t6,t9,t9
+	xor	t5,t8,t8
+	xor	t0,t9,t9
+	xor	t7,t8,t8
+	srl	t9,24,t0	# byte swap
+	srl	t9,8,t1
+
+	sll	t9,8,t2
+	sll	t9,24,t9
+	zapnot	t0,0x11,t0
+	zapnot	t1,0x22,t1
+
+	zapnot	t9,0x88,t9
+	or	t0,t1,t0
+	zapnot	t2,0x44,t2
+
+	or	t9,t0,t9
+	srl	t8,24,t0
+	srl	t8,8,t1
+
+	or	t9,t2,t9
+	sll	t8,8,t2
+	sll	t8,24,t8
+
+	srl	t9,32,t11
+	sll	t9,32,t9
+	beq	a3,.Ldone
+
+	zapnot	t0,0x11,t0
+	zapnot	t1,0x22,t1
+	or	t9,t11,t11
+	ldq_u	s0,0(a2)
+
+	zapnot	t8,0x88,t8
+	or	t0,t1,t0
+	zapnot	t2,0x44,t2
+	ldq_u	t3,7(a2)
+
+	or	t8,t0,t8
+	or	t8,t2,t8
+	ldq_u	s1,8(a2)
+	ldq_u	t4,15(a2)
+
+	srl	t8,32,t10
+	sll	t8,32,t8
+
+	or	t8,t10,t10
+	br	zero,.Louter
+
+.Ldone:
+	zapnot	t0,0x11,t0
+	zapnot	t1,0x22,t1
+	or	t9,t11,t11
+
+	zapnot	t8,0x88,t8
+	or	t0,t1,t0
+	zapnot	t2,0x44,t2
+
+	or	t8,t0,t8
+	or	t8,t2,t8
+
+	srl	t8,32,t10
+	sll	t8,32,t8
+
+	or	t8,t10,t10
+
+	stq	t11,8(a0)
+	stq	t10,0(a0)
+
+	.set	noreorder
+	/*ldq	ra,0(sp)*/
+	ldq	s0,8(sp)
+	ldq	s1,16(sp)
+	lda	sp,32(sp)
+	ret	(ra)
+.end	gcm_ghash_4bit
+
+.align	4
+.ent	picmeup
+picmeup:
+	.frame	sp,0,t0
+	.prologue 0
+	br	AT,.Lpic
+.Lpic:	lda	AT,12(AT)
+	ret	(t0)
+.end	picmeup
+	nop
+rem_4bit:
+	.long	0,0x0000<<16, 0,0x1C20<<16, 0,0x3840<<16, 0,0x2460<<16
+	.long	0,0x7080<<16, 0,0x6CA0<<16, 0,0x48C0<<16, 0,0x54E0<<16
+	.long	0,0xE100<<16, 0,0xFD20<<16, 0,0xD940<<16, 0,0xC560<<16
+	.long	0,0x9180<<16, 0,0x8DA0<<16, 0,0xA9C0<<16, 0,0xB5E0<<16
+.ascii	"GHASH for Alpha, CRYPTOGAMS by <ap...@openssl.org>"
+.align	4
+
Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/modes.inc
diff -u /dev/null src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/modes.inc:1.1
--- /dev/null	Sat Mar  3 23:00:45 2018
+++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/modes.inc	Sat Mar  3 23:00:45 2018
@@ -0,0 +1,6 @@
+.PATH.S: ${.PARSEDIR}
+
+MODES_SRCS += ghash-alpha.S
+MODESCPPFLAGS = -DGHASH_ASM
+
+.include "../../modes.inc"
Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/regdef.h
diff -u /dev/null src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/regdef.h:1.1
--- /dev/null	Sat Mar  3 23:00:45 2018
+++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/regdef.h	Sat Mar  3 23:00:45 2018
@@ -0,0 +1,49 @@
+#define v0      $0
+
+#define t0      $1
+#define t1      $2
+#define t2      $3
+#define t3      $4
+#define t4      $5
+#define t5      $6
+#define t6      $7
+#define t7      $8
+
+#define s0      $9
+#define s1      $10
+#define s2      $11
+#define s3      $12
+#define s4      $13
+#define s5      $14
+#define s6      $15
+#define fp      s6
+
+#define a0      $16
+#define a1      $17
+#define a2      $18
+#define a3      $19
+#define a4      $20
+#define a5      $21
+
+#define t8      $22
+#define t9      $23
+#define t10     $24
+#define t11     $25
+#define ra      $26
+#define t12     $27
+
+#define pv      t12
+#define AT      $at
+#define gp      $29
+#define sp      $30
+#define zero    $31
+
+#define ta 	t10
+#define tb	t11
+#define tc	t12
+#define td	AT
+
+#define te	a5
+#define tf	a4
+#define tg	a3
+#define th	v0
Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/sha.inc
diff -u /dev/null src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/sha.inc:1.1
--- /dev/null	Sat Mar  3 23:00:45 2018
+++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/sha.inc	Sat Mar  3 23:00:45 2018
@@ -0,0 +1,6 @@
+.PATH.S: ${.PARSEDIR}
+
+SHA_SRCS = sha1-alpha.S
+SHACPPFLAGS = -DSHA1_ASM
+
+.include "../../sha.inc"
Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/sha1-alpha.S
diff -u /dev/null src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/sha1-alpha.S:1.1
--- /dev/null	Sat Mar  3 23:00:45 2018
+++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/alpha/sha1-alpha.S	Sat Mar  3 23:00:45 2018
@@ -0,0 +1,2175 @@
+#ifdef __linux__
+#include <asm/regdef.h>
+#else
+#include <asm.h>
+#include <regdef.h>
+#endif
+
+.text
+
+.set	noat
+.set	noreorder
+.globl	sha1_block_data_order
+.align	5
+.ent	sha1_block_data_order
+sha1_block_data_order:
+	lda	sp,-64(sp)
+	stq	ra,0(sp)
+	stq	s0,8(sp)
+	stq	s1,16(sp)
+	stq	s2,24(sp)
+	stq	s3,32(sp)
+	stq	s4,40(sp)
+	stq	s5,48(sp)
+	stq	fp,56(sp)
+	.mask	0x0400fe00,-64
+	.frame	sp,64,ra
+	.prologue 0
+
+	ldl	a3,0(a0)
+	ldl	a4,4(a0)
+	sll	a2,6,a2
+	ldl	a5,8(a0)
+	ldl	t8,12(a0)
+	ldl	t9,16(a0)
+	addq	a1,a2,a2
+
+.Lloop:
+	.set	noreorder
+	ldah	AT,23170(zero)
+	zapnot	a4,0xf,a4
+	lda	AT,31129(AT)	# K_00_19
+	ldq_u	$0,0+0(a1)
+	ldq_u	$1,0+7(a1)
+	ldq_u	$2,(0+2)*4+0(a1)
+	ldq_u	$3,(0+2)*4+7(a1)
+	extql	$0,a1,$0
+	extqh	$1,a1,$1
+
+	or	$1,$0,$0	# pair of 32-bit values are fetched
+
+	srl	$0,24,t10		# vectorized byte swap
+	srl	$0,8,ra
+
+	sll	$0,8,t12
+	sll	$0,24,$0
+	zapnot	t10,0x11,t10
+	zapnot	ra,0x22,ra
+
+	zapnot	$0,0x88,$0
+	or	t10,ra,t10
+	zapnot	t12,0x44,t12
+	sll	a3,5,t11
+
+	or	$0,t10,$0
+	addl	AT,t9,t9
+	and	a4,a5,ra
+	zapnot	a3,0xf,a3
+
+	or	$0,t12,$0
+	srl	a3,27,t10
+	bic	t8,a4,t12
+	sll	a4,30,a4
+
+	extll	$0,4,$1	# extract upper half
+	or	ra,t12,ra
+	addl	$0,t9,t9
+
+	addl	t11,t9,t9
+	srl	a4,32,t12
+	zapnot	$0,0xf,$0
+
+	addl	t10,t9,t9
+	addl	ra,t9,t9
+	or	t12,a4,a4
+	sll	t9,5,t11
+	addl	AT,t8,t8
+	and	a3,a4,ra
+	zapnot	t9,0xf,t9
+
+	srl	t9,27,t10
+	addl	$1,t8,t8
+	bic	a5,a3,t12
+	sll	a3,30,a3
+
+	or	ra,t12,ra
+	addl	t11,t8,t8
+	srl	a3,32,t12
+	zapnot	$1,0xf,$1
+
+	addl	t10,t8,t8
+	addl	ra,t8,t8
+	or	t12,a3,a3
+	ldq_u	$4,(2+2)*4+0(a1)
+	ldq_u	$5,(2+2)*4+7(a1)
+	extql	$2,a1,$2
+	extqh	$3,a1,$3
+
+	or	$3,$2,$2	# pair of 32-bit values are fetched
+
+	srl	$2,24,t10		# vectorized byte swap
+	srl	$2,8,ra
+
+	sll	$2,8,t12
+	sll	$2,24,$2
+	zapnot	t10,0x11,t10
+	zapnot	ra,0x22,ra
+
+	zapnot	$2,0x88,$2
+	or	t10,ra,t10
+	zapnot	t12,0x44,t12
+	sll	t8,5,t11
+
+	or	$2,t10,$2
+	addl	AT,a5,a5
+	and	t9,a3,ra
+	zapnot	t8,0xf,t8
+
+	or	$2,t12,$2
+	srl	t8,27,t10
+	bic	a4,t9,t12
+	sll	t9,30,t9
+
+	extll	$2,4,$3	# extract upper half
+	or	ra,t12,ra
+	addl	$2,a5,a5
+
+	addl	t11,a5,a5
+	srl	t9,32,t12
+	zapnot	$2,0xf,$2
+
+	addl	t10,a5,a5
+	addl	ra,a5,a5
+	or	t12,t9,t9
+	sll	a5,5,t11
+	addl	AT,a4,a4
+	and	t8,t9,ra
+	zapnot	a5,0xf,a5
+
+	srl	a5,27,t10
+	addl	$3,a4,a4
+	bic	a3,t8,t12
+	sll	t8,30,t8
+
+	or	ra,t12,ra
+	addl	t11,a4,a4
+	srl	t8,32,t12
+	zapnot	$3,0xf,$3
+
+	addl	t10,a4,a4
+	addl	ra,a4,a4
+	or	t12,t8,t8
+	ldq_u	$6,(4+2)*4+0(a1)
+	ldq_u	$7,(4+2)*4+7(a1)
+	extql	$4,a1,$4
+	extqh	$5,a1,$5
+
+	or	$5,$4,$4	# pair of 32-bit values are fetched
+
+	srl	$4,24,t10		# vectorized byte swap
+	srl	$4,8,ra
+
+	sll	$4,8,t12
+	sll	$4,24,$4
+	zapnot	t10,0x11,t10
+	zapnot	ra,0x22,ra
+
+	zapnot	$4,0x88,$4
+	or	t10,ra,t10
+	zapnot	t12,0x44,t12
+	sll	a4,5,t11
+
+	or	$4,t10,$4
+	addl	AT,a3,a3
+	and	a5,t8,ra
+	zapnot	a4,0xf,a4
+
+	or	$4,t12,$4
+	srl	a4,27,t10
+	bic	t9,a5,t12
+	sll	a5,30,a5
+
+	extll	$4,4,$5	# extract upper half
+	or	ra,t12,ra
+	addl	$4,a3,a3
+
+	addl	t11,a3,a3
+	srl	a5,32,t12
+	zapnot	$4,0xf,$4
+
+	addl	t10,a3,a3
+	addl	ra,a3,a3
+	or	t12,a5,a5
+	sll	a3,5,t11
+	addl	AT,t9,t9
+	and	a4,a5,ra
+	zapnot	a3,0xf,a3
+
+	srl	a3,27,t10
+	addl	$5,t9,t9
+	bic	t8,a4,t12
+	sll	a4,30,a4
+
+	or	ra,t12,ra
+	addl	t11,t9,t9
+	srl	a4,32,t12
+	zapnot	$5,0xf,$5
+
+	addl	t10,t9,t9
+	addl	ra,t9,t9
+	or	t12,a4,a4
+	ldq_u	$8,(6+2)*4+0(a1)
+	ldq_u	$9,(6+2)*4+7(a1)
+	extql	$6,a1,$6
+	extqh	$7,a1,$7
+
+	or	$7,$6,$6	# pair of 32-bit values are fetched
+
+	srl	$6,24,t10		# vectorized byte swap
+	srl	$6,8,ra
+
+	sll	$6,8,t12
+	sll	$6,24,$6
+	zapnot	t10,0x11,t10
+	zapnot	ra,0x22,ra
+
+	zapnot	$6,0x88,$6
+	or	t10,ra,t10
+	zapnot	t12,0x44,t12
+	sll	t9,5,t11
+
+	or	$6,t10,$6
+	addl	AT,t8,t8
+	and	a3,a4,ra
+	zapnot	t9,0xf,t9
+
+	or	$6,t12,$6
+	srl	t9,27,t10
+	bic	a5,a3,t12
+	sll	a3,30,a3
+
+	extll	$6,4,$7	# extract upper half
+	or	ra,t12,ra
+	addl	$6,t8,t8
+
+	addl	t11,t8,t8
+	srl	a3,32,t12
+	zapnot	$6,0xf,$6
+
+	addl	t10,t8,t8
+	addl	ra,t8,t8
+	or	t12,a3,a3
+	sll	t8,5,t11
+	addl	AT,a5,a5
+	and	t9,a3,ra
+	zapnot	t8,0xf,t8
+
+	srl	t8,27,t10
+	addl	$7,a5,a5
+	bic	a4,t9,t12
+	sll	t9,30,t9
+
+	or	ra,t12,ra
+	addl	t11,a5,a5
+	srl	t9,32,t12
+	zapnot	$7,0xf,$7
+
+	addl	t10,a5,a5
+	addl	ra,a5,a5
+	or	t12,t9,t9
+	ldq_u	$10,(8+2)*4+0(a1)
+	ldq_u	$11,(8+2)*4+7(a1)
+	extql	$8,a1,$8
+	extqh	$9,a1,$9
+
+	or	$9,$8,$8	# pair of 32-bit values are fetched
+
+	srl	$8,24,t10		# vectorized byte swap
+	srl	$8,8,ra
+
+	sll	$8,8,t12
+	sll	$8,24,$8
+	zapnot	t10,0x11,t10
+	zapnot	ra,0x22,ra
+
+	zapnot	$8,0x88,$8
+	or	t10,ra,t10
+	zapnot	t12,0x44,t12
+	sll	a5,5,t11
+
+	or	$8,t10,$8
+	addl	AT,a4,a4
+	and	t8,t9,ra
+	zapnot	a5,0xf,a5
+
+	or	$8,t12,$8
+	srl	a5,27,t10
+	bic	a3,t8,t12
+	sll	t8,30,t8
+
+	extll	$8,4,$9	# extract upper half
+	or	ra,t12,ra
+	addl	$8,a4,a4
+
+	addl	t11,a4,a4
+	srl	t8,32,t12
+	zapnot	$8,0xf,$8
+
+	addl	t10,a4,a4
+	addl	ra,a4,a4
+	or	t12,t8,t8
+	sll	a4,5,t11
+	addl	AT,a3,a3
+	and	a5,t8,ra
+	zapnot	a4,0xf,a4
+
+	srl	a4,27,t10
+	addl	$9,a3,a3
+	bic	t9,a5,t12
+	sll	a5,30,a5
+
+	or	ra,t12,ra
+	addl	t11,a3,a3
+	srl	a5,32,t12
+	zapnot	$9,0xf,$9
+
+	addl	t10,a3,a3
+	addl	ra,a3,a3
+	or	t12,a5,a5
+	ldq_u	$12,(10+2)*4+0(a1)
+	ldq_u	$13,(10+2)*4+7(a1)
+	extql	$10,a1,$10
+	extqh	$11,a1,$11
+
+	or	$11,$10,$10	# pair of 32-bit values are fetched
+
+	srl	$10,24,t10		# vectorized byte swap
+	srl	$10,8,ra
+
+	sll	$10,8,t12
+	sll	$10,24,$10
+	zapnot	t10,0x11,t10
+	zapnot	ra,0x22,ra
+
+	zapnot	$10,0x88,$10
+	or	t10,ra,t10
+	zapnot	t12,0x44,t12
+	sll	a3,5,t11
+
+	or	$10,t10,$10
+	addl	AT,t9,t9
+	and	a4,a5,ra
+	zapnot	a3,0xf,a3
+
+	or	$10,t12,$10
+	srl	a3,27,t10
+	bic	t8,a4,t12
+	sll	a4,30,a4
+
+	extll	$10,4,$11	# extract upper half
+	or	ra,t12,ra
+	addl	$10,t9,t9
+
+	addl	t11,t9,t9
+	srl	a4,32,t12
+	zapnot	$10,0xf,$10
+
+	addl	t10,t9,t9
+	addl	ra,t9,t9
+	or	t12,a4,a4
+	sll	t9,5,t11
+	addl	AT,t8,t8
+	and	a3,a4,ra
+	zapnot	t9,0xf,t9
+
+	srl	t9,27,t10
+	addl	$11,t8,t8
+	bic	a5,a3,t12
+	sll	a3,30,a3
+
+	or	ra,t12,ra
+	addl	t11,t8,t8
+	srl	a3,32,t12
+	zapnot	$11,0xf,$11
+
+	addl	t10,t8,t8
+	addl	ra,t8,t8
+	or	t12,a3,a3
+	ldq_u	$14,(12+2)*4+0(a1)
+	ldq_u	$15,(12+2)*4+7(a1)
+	extql	$12,a1,$12
+	extqh	$13,a1,$13
+
+	or	$13,$12,$12	# pair of 32-bit values are fetched
+
+	srl	$12,24,t10		# vectorized byte swap
+	srl	$12,8,ra
+
+	sll	$12,8,t12
+	sll	$12,24,$12
+	zapnot	t10,0x11,t10
+	zapnot	ra,0x22,ra
+
+	zapnot	$12,0x88,$12
+	or	t10,ra,t10
+	zapnot	t12,0x44,t12
+	sll	t8,5,t11
+
+	or	$12,t10,$12
+	addl	AT,a5,a5
+	and	t9,a3,ra
+	zapnot	t8,0xf,t8
+
+	or	$12,t12,$12
+	srl	t8,27,t10
+	bic	a4,t9,t12
+	sll	t9,30,t9
+
+	extll	$12,4,$13	# extract upper half
+	or	ra,t12,ra
+	addl	$12,a5,a5
+
+	addl	t11,a5,a5
+	srl	t9,32,t12
+	zapnot	$12,0xf,$12
+
+	addl	t10,a5,a5
+	addl	ra,a5,a5
+	or	t12,t9,t9
+	sll	a5,5,t11
+	addl	AT,a4,a4
+	and	t8,t9,ra
+	zapnot	a5,0xf,a5
+
+	srl	a5,27,t10
+	addl	$13,a4,a4
+	bic	a3,t8,t12
+	sll	t8,30,t8
+
+	or	ra,t12,ra
+	addl	t11,a4,a4
+	srl	t8,32,t12
+	zapnot	$13,0xf,$13
+
+	addl	t10,a4,a4
+	addl	ra,a4,a4
+	or	t12,t8,t8
+	extql	$14,a1,$14
+	extqh	$15,a1,$15
+
+	or	$15,$14,$14	# pair of 32-bit values are fetched
+
+	srl	$14,24,t10		# vectorized byte swap
+	srl	$14,8,ra
+
+	sll	$14,8,t12
+	sll	$14,24,$14
+	zapnot	t10,0x11,t10
+	zapnot	ra,0x22,ra
+
+	zapnot	$14,0x88,$14
+	or	t10,ra,t10
+	zapnot	t12,0x44,t12
+	sll	a4,5,t11
+
+	or	$14,t10,$14
+	addl	AT,a3,a3
+	and	a5,t8,ra
+	zapnot	a4,0xf,a4
+
+	or	$14,t12,$14
+	srl	a4,27,t10
+	bic	t9,a5,t12
+	sll	a5,30,a5
+
+	extll	$14,4,$15	# extract upper half
+	or	ra,t12,ra
+	addl	$14,a3,a3
+
+	addl	t11,a3,a3
+	srl	a5,32,t12
+	zapnot	$14,0xf,$14
+
+	addl	t10,a3,a3
+	addl	ra,a3,a3
+	or	t12,a5,a5
+	sll	a3,5,t11
+	addl	AT,t9,t9
+	and	a4,a5,ra
+	xor	$2,$0,$0
+
+	zapnot	a3,0xf,a3
+	addl	$15,t9,t9
+	bic	t8,a4,t12
+	xor	$8,$0,$0
+
+	srl	a3,27,t10
+	addl	t11,t9,t9
+	or	ra,t12,ra
+	xor	$13,$0,$0
+
+	sll	a4,30,a4
+	addl	t10,t9,t9
+	srl	$0,31,t11
+
+	addl	ra,t9,t9
+	srl	a4,32,t12
+	addl	$0,$0,$0
+
+	or	t12,a4,a4
+	zapnot	$15,0xf,$15
+	or	t11,$0,$0
+	sll	t9,5,t11
+	addl	AT,t8,t8
+	and	a3,a4,ra
+	xor	$3,$1,$1
+
+	zapnot	t9,0xf,t9
+	addl	$0,t8,t8
+	bic	a5,a3,t12
+	xor	$9,$1,$1
+
+	srl	t9,27,t10
+	addl	t11,t8,t8
+	or	ra,t12,ra
+	xor	$14,$1,$1
+
+	sll	a3,30,a3
+	addl	t10,t8,t8
+	srl	$1,31,t11
+
+	addl	ra,t8,t8
+	srl	a3,32,t12
+	addl	$1,$1,$1
+
+	or	t12,a3,a3
+	zapnot	$0,0xf,$0
+	or	t11,$1,$1
+	sll	t8,5,t11
+	addl	AT,a5,a5
+	and	t9,a3,ra
+	xor	$4,$2,$2
+
+	zapnot	t8,0xf,t8
+	addl	$1,a5,a5
+	bic	a4,t9,t12
+	xor	$10,$2,$2
+
+	srl	t8,27,t10
+	addl	t11,a5,a5
+	or	ra,t12,ra
+	xor	$15,$2,$2
+
+	sll	t9,30,t9
+	addl	t10,a5,a5
+	srl	$2,31,t11
+
+	addl	ra,a5,a5
+	srl	t9,32,t12
+	addl	$2,$2,$2
+
+	or	t12,t9,t9
+	zapnot	$1,0xf,$1
+	or	t11,$2,$2
+	sll	a5,5,t11
+	addl	AT,a4,a4
+	and	t8,t9,ra
+	xor	$5,$3,$3
+
+	zapnot	a5,0xf,a5
+	addl	$2,a4,a4
+	bic	a3,t8,t12
+	xor	$11,$3,$3
+
+	srl	a5,27,t10
+	addl	t11,a4,a4
+	or	ra,t12,ra
+	xor	$0,$3,$3
+
+	sll	t8,30,t8
+	addl	t10,a4,a4
+	srl	$3,31,t11
+
+	addl	ra,a4,a4
+	srl	t8,32,t12
+	addl	$3,$3,$3
+
+	or	t12,t8,t8
+	zapnot	$2,0xf,$2
+	or	t11,$3,$3
+	sll	a4,5,t11
+	addl	AT,a3,a3
+	and	a5,t8,ra
+	xor	$6,$4,$4
+
+	zapnot	a4,0xf,a4
+	addl	$3,a3,a3
+	bic	t9,a5,t12
+	xor	$12,$4,$4
+
+	srl	a4,27,t10
+	addl	t11,a3,a3
+	or	ra,t12,ra
+	xor	$1,$4,$4
+
+	sll	a5,30,a5
+	addl	t10,a3,a3
+	srl	$4,31,t11
+
+	addl	ra,a3,a3
+	srl	a5,32,t12
+	addl	$4,$4,$4
+
+	or	t12,a5,a5
+	zapnot	$3,0xf,$3
+	or	t11,$4,$4
+	ldah	AT,28378(zero)
+	lda	AT,-5215(AT)	# K_20_39
+	sll	a3,5,t11
+	addl	AT,t9,t9
+	zapnot	a3,0xf,a3
+	xor	$7,$5,$5
+
+	sll	a4,30,t12
+	addl	t11,t9,t9
+	xor	a4,a5,ra
+	xor	$13,$5,$5
+
+	srl	a4,2,a4
+	addl	$4,t9,t9
+	xor	t8,ra,ra
+	xor	$2,$5,$5
+
+	srl	$5,31,t11
+	addl	ra,t9,t9
+	srl	a3,27,t10
+	addl	$5,$5,$5
+
+	or	t12,a4,a4
+	addl	t10,t9,t9
+	or	t11,$5,$5
+	zapnot	$4,0xf,$4
+	sll	t9,5,t11
+	addl	AT,t8,t8
+	zapnot	t9,0xf,t9
+	xor	$8,$6,$6
+
+	sll	a3,30,t12
+	addl	t11,t8,t8
+	xor	a3,a4,ra
+	xor	$14,$6,$6
+
+	srl	a3,2,a3
+	addl	$5,t8,t8
+	xor	a5,ra,ra
+	xor	$3,$6,$6
+
+	srl	$6,31,t11
+	addl	ra,t8,t8
+	srl	t9,27,t10
+	addl	$6,$6,$6
+
+	or	t12,a3,a3
+	addl	t10,t8,t8
+	or	t11,$6,$6
+	zapnot	$5,0xf,$5
+	sll	t8,5,t11
+	addl	AT,a5,a5
+	zapnot	t8,0xf,t8
+	xor	$9,$7,$7
+
+	sll	t9,30,t12
+	addl	t11,a5,a5
+	xor	t9,a3,ra
+	xor	$15,$7,$7
+
+	srl	t9,2,t9
+	addl	$6,a5,a5
+	xor	a4,ra,ra
+	xor	$4,$7,$7
+
+	srl	$7,31,t11
+	addl	ra,a5,a5
+	srl	t8,27,t10
+	addl	$7,$7,$7
+
+	or	t12,t9,t9
+	addl	t10,a5,a5
+	or	t11,$7,$7
+	zapnot	$6,0xf,$6
+	sll	a5,5,t11
+	addl	AT,a4,a4
+	zapnot	a5,0xf,a5
+	xor	$10,$8,$8
+
+	sll	t8,30,t12
+	addl	t11,a4,a4
+	xor	t8,t9,ra
+	xor	$0,$8,$8
+
+	srl	t8,2,t8
+	addl	$7,a4,a4
+	xor	a3,ra,ra
+	xor	$5,$8,$8
+
+	srl	$8,31,t11
+	addl	ra,a4,a4
+	srl	a5,27,t10
+	addl	$8,$8,$8
+
+	or	t12,t8,t8
+	addl	t10,a4,a4
+	or	t11,$8,$8
+	zapnot	$7,0xf,$7
+	sll	a4,5,t11
+	addl	AT,a3,a3
+	zapnot	a4,0xf,a4
+	xor	$11,$9,$9
+
+	sll	a5,30,t12
+	addl	t11,a3,a3
+	xor	a5,t8,ra
+	xor	$1,$9,$9
+
+	srl	a5,2,a5
+	addl	$8,a3,a3
+	xor	t9,ra,ra
+	xor	$6,$9,$9
+
+	srl	$9,31,t11
+	addl	ra,a3,a3
+	srl	a4,27,t10
+	addl	$9,$9,$9
+
+	or	t12,a5,a5
+	addl	t10,a3,a3
+	or	t11,$9,$9
+	zapnot	$8,0xf,$8
+	sll	a3,5,t11
+	addl	AT,t9,t9
+	zapnot	a3,0xf,a3
+	xor	$12,$10,$10
+
+	sll	a4,30,t12
+	addl	t11,t9,t9
+	xor	a4,a5,ra
+	xor	$2,$10,$10
+
+	srl	a4,2,a4
+	addl	$9,t9,t9
+	xor	t8,ra,ra
+	xor	$7,$10,$10
+
+	srl	$10,31,t11
+	addl	ra,t9,t9
+	srl	a3,27,t10
+	addl	$10,$10,$10
+
+	or	t12,a4,a4
+	addl	t10,t9,t9
+	or	t11,$10,$10
+	zapnot	$9,0xf,$9
+	sll	t9,5,t11
+	addl	AT,t8,t8
+	zapnot	t9,0xf,t9
+	xor	$13,$11,$11
+
+	sll	a3,30,t12
+	addl	t11,t8,t8
+	xor	a3,a4,ra
+	xor	$3,$11,$11
+
+	srl	a3,2,a3
+	addl	$10,t8,t8
+	xor	a5,ra,ra
+	xor	$8,$11,$11
+
+	srl	$11,31,t11
+	addl	ra,t8,t8
+	srl	t9,27,t10
+	addl	$11,$11,$11
+
+	or	t12,a3,a3
+	addl	t10,t8,t8
+	or	t11,$11,$11
+	zapnot	$10,0xf,$10
+	sll	t8,5,t11
+	addl	AT,a5,a5
+	zapnot	t8,0xf,t8
+	xor	$14,$12,$12
+
+	sll	t9,30,t12
+	addl	t11,a5,a5
+	xor	t9,a3,ra
+	xor	$4,$12,$12
+
+	srl	t9,2,t9
+	addl	$11,a5,a5
+	xor	a4,ra,ra
+	xor	$9,$12,$12
+
+	srl	$12,31,t11
+	addl	ra,a5,a5
+	srl	t8,27,t10
+	addl	$12,$12,$12
+
+	or	t12,t9,t9
+	addl	t10,a5,a5
+	or	t11,$12,$12
+	zapnot	$11,0xf,$11
+	sll	a5,5,t11
+	addl	AT,a4,a4
+	zapnot	a5,0xf,a5
+	xor	$15,$13,$13
+
+	sll	t8,30,t12
+	addl	t11,a4,a4
+	xor	t8,t9,ra
+	xor	$5,$13,$13
+
+	srl	t8,2,t8
+	addl	$12,a4,a4
+	xor	a3,ra,ra
+	xor	$10,$13,$13
+
+	srl	$13,31,t11
+	addl	ra,a4,a4
+	srl	a5,27,t10
+	addl	$13,$13,$13
+
+	or	t12,t8,t8
+	addl	t10,a4,a4
+	or	t11,$13,$13
+	zapnot	$12,0xf,$12
+	sll	a4,5,t11
+	addl	AT,a3,a3
+	zapnot	a4,0xf,a4
+	xor	$0,$14,$14
+
+	sll	a5,30,t12
+	addl	t11,a3,a3
+	xor	a5,t8,ra
+	xor	$6,$14,$14
+
+	srl	a5,2,a5
+	addl	$13,a3,a3
+	xor	t9,ra,ra
+	xor	$11,$14,$14
+
+	srl	$14,31,t11
+	addl	ra,a3,a3
+	srl	a4,27,t10
+	addl	$14,$14,$14
+
+	or	t12,a5,a5
+	addl	t10,a3,a3
+	or	t11,$14,$14
+	zapnot	$13,0xf,$13
+	sll	a3,5,t11
+	addl	AT,t9,t9
+	zapnot	a3,0xf,a3
+	xor	$1,$15,$15
+
+	sll	a4,30,t12
+	addl	t11,t9,t9
+	xor	a4,a5,ra
+	xor	$7,$15,$15
+
+	srl	a4,2,a4
+	addl	$14,t9,t9
+	xor	t8,ra,ra
+	xor	$12,$15,$15
+
+	srl	$15,31,t11
+	addl	ra,t9,t9
+	srl	a3,27,t10
+	addl	$15,$15,$15
+
+	or	t12,a4,a4
+	addl	t10,t9,t9
+	or	t11,$15,$15
+	zapnot	$14,0xf,$14
+	sll	t9,5,t11
+	addl	AT,t8,t8
+	zapnot	t9,0xf,t9
+	xor	$2,$0,$0
+
+	sll	a3,30,t12
+	addl	t11,t8,t8
+	xor	a3,a4,ra
+	xor	$8,$0,$0
+
+	srl	a3,2,a3
+	addl	$15,t8,t8
+	xor	a5,ra,ra
+	xor	$13,$0,$0
+
+	srl	$0,31,t11
+	addl	ra,t8,t8
+	srl	t9,27,t10
+	addl	$0,$0,$0
+
+	or	t12,a3,a3
+	addl	t10,t8,t8
+	or	t11,$0,$0
+	zapnot	$15,0xf,$15
+	sll	t8,5,t11
+	addl	AT,a5,a5
+	zapnot	t8,0xf,t8
+	xor	$3,$1,$1
+
+	sll	t9,30,t12
+	addl	t11,a5,a5
+	xor	t9,a3,ra
+	xor	$9,$1,$1
+
+	srl	t9,2,t9
+	addl	$0,a5,a5
+	xor	a4,ra,ra
+	xor	$14,$1,$1
+
+	srl	$1,31,t11
+	addl	ra,a5,a5
+	srl	t8,27,t10
+	addl	$1,$1,$1
+
+	or	t12,t9,t9
+	addl	t10,a5,a5
+	or	t11,$1,$1
+	zapnot	$0,0xf,$0
+	sll	a5,5,t11
+	addl	AT,a4,a4
+	zapnot	a5,0xf,a5
+	xor	$4,$2,$2
+
+	sll	t8,30,t12
+	addl	t11,a4,a4
+	xor	t8,t9,ra
+	xor	$10,$2,$2
+
+	srl	t8,2,t8
+	addl	$1,a4,a4
+	xor	a3,ra,ra
+	xor	$15,$2,$2
+
+	srl	$2,31,t11
+	addl	ra,a4,a4
+	srl	a5,27,t10
+	addl	$2,$2,$2
+
+	or	t12,t8,t8
+	addl	t10,a4,a4
+	or	t11,$2,$2
+	zapnot	$1,0xf,$1
+	sll	a4,5,t11
+	addl	AT,a3,a3
+	zapnot	a4,0xf,a4
+	xor	$5,$3,$3
+
+	sll	a5,30,t12
+	addl	t11,a3,a3
+	xor	a5,t8,ra
+	xor	$11,$3,$3
+
+	srl	a5,2,a5
+	addl	$2,a3,a3
+	xor	t9,ra,ra
+	xor	$0,$3,$3
+
+	srl	$3,31,t11
+	addl	ra,a3,a3
+	srl	a4,27,t10
+	addl	$3,$3,$3
+
+	or	t12,a5,a5
+	addl	t10,a3,a3
+	or	t11,$3,$3
+	zapnot	$2,0xf,$2
+	sll	a3,5,t11
+	addl	AT,t9,t9
+	zapnot	a3,0xf,a3
+	xor	$6,$4,$4
+
+	sll	a4,30,t12
+	addl	t11,t9,t9
+	xor	a4,a5,ra
+	xor	$12,$4,$4
+
+	srl	a4,2,a4
+	addl	$3,t9,t9
+	xor	t8,ra,ra
+	xor	$1,$4,$4
+
+	srl	$4,31,t11
+	addl	ra,t9,t9
+	srl	a3,27,t10
+	addl	$4,$4,$4
+
+	or	t12,a4,a4
+	addl	t10,t9,t9
+	or	t11,$4,$4
+	zapnot	$3,0xf,$3
+	sll	t9,5,t11
+	addl	AT,t8,t8
+	zapnot	t9,0xf,t9
+	xor	$7,$5,$5
+
+	sll	a3,30,t12
+	addl	t11,t8,t8
+	xor	a3,a4,ra
+	xor	$13,$5,$5
+
+	srl	a3,2,a3
+	addl	$4,t8,t8
+	xor	a5,ra,ra
+	xor	$2,$5,$5
+
+	srl	$5,31,t11
+	addl	ra,t8,t8
+	srl	t9,27,t10
+	addl	$5,$5,$5
+
+	or	t12,a3,a3
+	addl	t10,t8,t8
+	or	t11,$5,$5
+	zapnot	$4,0xf,$4
+	sll	t8,5,t11
+	addl	AT,a5,a5
+	zapnot	t8,0xf,t8
+	xor	$8,$6,$6
+
+	sll	t9,30,t12
+	addl	t11,a5,a5
+	xor	t9,a3,ra
+	xor	$14,$6,$6
+
+	srl	t9,2,t9
+	addl	$5,a5,a5
+	xor	a4,ra,ra
+	xor	$3,$6,$6
+
+	srl	$6,31,t11
+	addl	ra,a5,a5
+	srl	t8,27,t10
+	addl	$6,$6,$6
+
+	or	t12,t9,t9
+	addl	t10,a5,a5
+	or	t11,$6,$6
+	zapnot	$5,0xf,$5
+	sll	a5,5,t11
+	addl	AT,a4,a4
+	zapnot	a5,0xf,a5
+	xor	$9,$7,$7
+
+	sll	t8,30,t12
+	addl	t11,a4,a4
+	xor	t8,t9,ra
+	xor	$15,$7,$7
+
+	srl	t8,2,t8
+	addl	$6,a4,a4
+	xor	a3,ra,ra
+	xor	$4,$7,$7
+
+	srl	$7,31,t11
+	addl	ra,a4,a4
+	srl	a5,27,t10
+	addl	$7,$7,$7
+
+	or	t12,t8,t8
+	addl	t10,a4,a4
+	or	t11,$7,$7
+	zapnot	$6,0xf,$6
+	sll	a4,5,t11
+	addl	AT,a3,a3
+	zapnot	a4,0xf,a4
+	xor	$10,$8,$8
+
+	sll	a5,30,t12
+	addl	t11,a3,a3
+	xor	a5,t8,ra
+	xor	$0,$8,$8
+
+	srl	a5,2,a5
+	addl	$7,a3,a3
+	xor	t9,ra,ra
+	xor	$5,$8,$8
+
+	srl	$8,31,t11
+	addl	ra,a3,a3
+	srl	a4,27,t10
+	addl	$8,$8,$8
+
+	or	t12,a5,a5
+	addl	t10,a3,a3
+	or	t11,$8,$8
+	zapnot	$7,0xf,$7
+	ldah	AT,-28900(zero)
+	lda	AT,-17188(AT)	# K_40_59
+	sll	a3,5,t11
+	addl	AT,t9,t9
+	zapnot	a3,0xf,a3
+	xor	$11,$9,$9
+
+	srl	a3,27,t10
+	and	a4,a5,ra
+	and	a4,t8,t12
+	xor	$1,$9,$9
+
+	sll	a4,30,a4
+	addl	t11,t9,t9
+	xor	$6,$9,$9
+
+	srl	$9,31,t11
+	addl	t10,t9,t9
+	or	ra,t12,ra
+	and	a5,t8,t12
+
+	or	ra,t12,ra
+	srl	a4,32,t12
+	addl	$8,t9,t9
+	addl	$9,$9,$9
+
+	or	t12,a4,a4
+	addl	ra,t9,t9
+	or	t11,$9,$9
+	zapnot	$8,0xf,$8
+	sll	t9,5,t11
+	addl	AT,t8,t8
+	zapnot	t9,0xf,t9
+	xor	$12,$10,$10
+
+	srl	t9,27,t10
+	and	a3,a4,ra
+	and	a3,a5,t12
+	xor	$2,$10,$10
+
+	sll	a3,30,a3
+	addl	t11,t8,t8
+	xor	$7,$10,$10
+
+	srl	$10,31,t11
+	addl	t10,t8,t8
+	or	ra,t12,ra
+	and	a4,a5,t12
+
+	or	ra,t12,ra
+	srl	a3,32,t12
+	addl	$9,t8,t8
+	addl	$10,$10,$10
+
+	or	t12,a3,a3
+	addl	ra,t8,t8
+	or	t11,$10,$10
+	zapnot	$9,0xf,$9
+	sll	t8,5,t11
+	addl	AT,a5,a5
+	zapnot	t8,0xf,t8
+	xor	$13,$11,$11
+
+	srl	t8,27,t10
+	and	t9,a3,ra
+	and	t9,a4,t12
+	xor	$3,$11,$11
+
+	sll	t9,30,t9
+	addl	t11,a5,a5
+	xor	$8,$11,$11
+
+	srl	$11,31,t11
+	addl	t10,a5,a5
+	or	ra,t12,ra
+	and	a3,a4,t12
+
+	or	ra,t12,ra
+	srl	t9,32,t12
+	addl	$10,a5,a5
+	addl	$11,$11,$11
+
+	or	t12,t9,t9
+	addl	ra,a5,a5
+	or	t11,$11,$11
+	zapnot	$10,0xf,$10
+	sll	a5,5,t11
+	addl	AT,a4,a4
+	zapnot	a5,0xf,a5
+	xor	$14,$12,$12
+
+	srl	a5,27,t10
+	and	t8,t9,ra
+	and	t8,a3,t12
+	xor	$4,$12,$12
+
+	sll	t8,30,t8
+	addl	t11,a4,a4
+	xor	$9,$12,$12
+
+	srl	$12,31,t11
+	addl	t10,a4,a4
+	or	ra,t12,ra
+	and	t9,a3,t12
+
+	or	ra,t12,ra
+	srl	t8,32,t12
+	addl	$11,a4,a4
+	addl	$12,$12,$12
+
+	or	t12,t8,t8
+	addl	ra,a4,a4
+	or	t11,$12,$12
+	zapnot	$11,0xf,$11
+	sll	a4,5,t11
+	addl	AT,a3,a3
+	zapnot	a4,0xf,a4
+	xor	$15,$13,$13
+
+	srl	a4,27,t10
+	and	a5,t8,ra
+	and	a5,t9,t12
+	xor	$5,$13,$13
+
+	sll	a5,30,a5
+	addl	t11,a3,a3
+	xor	$10,$13,$13
+
+	srl	$13,31,t11
+	addl	t10,a3,a3
+	or	ra,t12,ra
+	and	t8,t9,t12
+
+	or	ra,t12,ra
+	srl	a5,32,t12
+	addl	$12,a3,a3
+	addl	$13,$13,$13
+
+	or	t12,a5,a5
+	addl	ra,a3,a3
+	or	t11,$13,$13
+	zapnot	$12,0xf,$12
+	sll	a3,5,t11
+	addl	AT,t9,t9
+	zapnot	a3,0xf,a3
+	xor	$0,$14,$14
+
+	srl	a3,27,t10
+	and	a4,a5,ra
+	and	a4,t8,t12
+	xor	$6,$14,$14
+
+	sll	a4,30,a4
+	addl	t11,t9,t9
+	xor	$11,$14,$14
+
+	srl	$14,31,t11
+	addl	t10,t9,t9
+	or	ra,t12,ra
+	and	a5,t8,t12
+
+	or	ra,t12,ra
+	srl	a4,32,t12
+	addl	$13,t9,t9
+	addl	$14,$14,$14
+
+	or	t12,a4,a4
+	addl	ra,t9,t9
+	or	t11,$14,$14
+	zapnot	$13,0xf,$13
+	sll	t9,5,t11
+	addl	AT,t8,t8
+	zapnot	t9,0xf,t9
+	xor	$1,$15,$15
+
+	srl	t9,27,t10
+	and	a3,a4,ra
+	and	a3,a5,t12
+	xor	$7,$15,$15
+
+	sll	a3,30,a3
+	addl	t11,t8,t8
+	xor	$12,$15,$15
+
+	srl	$15,31,t11
+	addl	t10,t8,t8
+	or	ra,t12,ra
+	and	a4,a5,t12
+
+	or	ra,t12,ra
+	srl	a3,32,t12
+	addl	$14,t8,t8
+	addl	$15,$15,$15
+
+	or	t12,a3,a3
+	addl	ra,t8,t8
+	or	t11,$15,$15
+	zapnot	$14,0xf,$14
+	sll	t8,5,t11
+	addl	AT,a5,a5
+	zapnot	t8,0xf,t8
+	xor	$2,$0,$0
+
+	srl	t8,27,t10
+	and	t9,a3,ra
+	and	t9,a4,t12
+	xor	$8,$0,$0
+
+	sll	t9,30,t9
+	addl	t11,a5,a5
+	xor	$13,$0,$0
+
+	srl	$0,31,t11
+	addl	t10,a5,a5
+	or	ra,t12,ra
+	and	a3,a4,t12
+
+	or	ra,t12,ra
+	srl	t9,32,t12
+	addl	$15,a5,a5
+	addl	$0,$0,$0
+
+	or	t12,t9,t9
+	addl	ra,a5,a5
+	or	t11,$0,$0
+	zapnot	$15,0xf,$15
+	sll	a5,5,t11
+	addl	AT,a4,a4
+	zapnot	a5,0xf,a5
+	xor	$3,$1,$1
+
+	srl	a5,27,t10
+	and	t8,t9,ra
+	and	t8,a3,t12
+	xor	$9,$1,$1
+
+	sll	t8,30,t8
+	addl	t11,a4,a4
+	xor	$14,$1,$1
+
+	srl	$1,31,t11
+	addl	t10,a4,a4
+	or	ra,t12,ra
+	and	t9,a3,t12
+
+	or	ra,t12,ra
+	srl	t8,32,t12
+	addl	$0,a4,a4
+	addl	$1,$1,$1
+
+	or	t12,t8,t8
+	addl	ra,a4,a4
+	or	t11,$1,$1
+	zapnot	$0,0xf,$0
+	sll	a4,5,t11
+	addl	AT,a3,a3
+	zapnot	a4,0xf,a4
+	xor	$4,$2,$2
+
+	srl	a4,27,t10
+	and	a5,t8,ra
+	and	a5,t9,t12
+	xor	$10,$2,$2
+
+	sll	a5,30,a5
+	addl	t11,a3,a3
+	xor	$15,$2,$2
+
+	srl	$2,31,t11
+	addl	t10,a3,a3
+	or	ra,t12,ra
+	and	t8,t9,t12
+
+	or	ra,t12,ra
+	srl	a5,32,t12
+	addl	$1,a3,a3
+	addl	$2,$2,$2
+
+	or	t12,a5,a5
+	addl	ra,a3,a3
+	or	t11,$2,$2
+	zapnot	$1,0xf,$1
+	sll	a3,5,t11
+	addl	AT,t9,t9
+	zapnot	a3,0xf,a3
+	xor	$5,$3,$3
+
+	srl	a3,27,t10
+	and	a4,a5,ra
+	and	a4,t8,t12
+	xor	$11,$3,$3
+
+	sll	a4,30,a4
+	addl	t11,t9,t9
+	xor	$0,$3,$3
+
+	srl	$3,31,t11
+	addl	t10,t9,t9
+	or	ra,t12,ra
+	and	a5,t8,t12
+
+	or	ra,t12,ra
+	srl	a4,32,t12
+	addl	$2,t9,t9
+	addl	$3,$3,$3
+
+	or	t12,a4,a4
+	addl	ra,t9,t9
+	or	t11,$3,$3
+	zapnot	$2,0xf,$2
+	sll	t9,5,t11
+	addl	AT,t8,t8
+	zapnot	t9,0xf,t9
+	xor	$6,$4,$4
+
+	srl	t9,27,t10
+	and	a3,a4,ra
+	and	a3,a5,t12
+	xor	$12,$4,$4
+
+	sll	a3,30,a3
+	addl	t11,t8,t8
+	xor	$1,$4,$4
+
+	srl	$4,31,t11
+	addl	t10,t8,t8
+	or	ra,t12,ra
+	and	a4,a5,t12
+
+	or	ra,t12,ra
+	srl	a3,32,t12
+	addl	$3,t8,t8
+	addl	$4,$4,$4
+
+	or	t12,a3,a3
+	addl	ra,t8,t8
+	or	t11,$4,$4
+	zapnot	$3,0xf,$3
+	sll	t8,5,t11
+	addl	AT,a5,a5
+	zapnot	t8,0xf,t8
+	xor	$7,$5,$5
+
+	srl	t8,27,t10
+	and	t9,a3,ra
+	and	t9,a4,t12
+	xor	$13,$5,$5
+
+	sll	t9,30,t9
+	addl	t11,a5,a5
+	xor	$2,$5,$5
+
+	srl	$5,31,t11
+	addl	t10,a5,a5
+	or	ra,t12,ra
+	and	a3,a4,t12
+
+	or	ra,t12,ra
+	srl	t9,32,t12
+	addl	$4,a5,a5
+	addl	$5,$5,$5
+
+	or	t12,t9,t9
+	addl	ra,a5,a5
+	or	t11,$5,$5
+	zapnot	$4,0xf,$4
+	sll	a5,5,t11
+	addl	AT,a4,a4
+	zapnot	a5,0xf,a5
+	xor	$8,$6,$6
+
+	srl	a5,27,t10
+	and	t8,t9,ra
+	and	t8,a3,t12
+	xor	$14,$6,$6
+
+	sll	t8,30,t8
+	addl	t11,a4,a4
+	xor	$3,$6,$6
+
+	srl	$6,31,t11
+	addl	t10,a4,a4
+	or	ra,t12,ra
+	and	t9,a3,t12
+
+	or	ra,t12,ra
+	srl	t8,32,t12
+	addl	$5,a4,a4
+	addl	$6,$6,$6
+
+	or	t12,t8,t8
+	addl	ra,a4,a4
+	or	t11,$6,$6
+	zapnot	$5,0xf,$5
+	sll	a4,5,t11
+	addl	AT,a3,a3
+	zapnot	a4,0xf,a4
+	xor	$9,$7,$7
+
+	srl	a4,27,t10
+	and	a5,t8,ra
+	and	a5,t9,t12
+	xor	$15,$7,$7
+
+	sll	a5,30,a5
+	addl	t11,a3,a3
+	xor	$4,$7,$7
+
+	srl	$7,31,t11
+	addl	t10,a3,a3
+	or	ra,t12,ra
+	and	t8,t9,t12
+
+	or	ra,t12,ra
+	srl	a5,32,t12
+	addl	$6,a3,a3
+	addl	$7,$7,$7
+
+	or	t12,a5,a5
+	addl	ra,a3,a3
+	or	t11,$7,$7
+	zapnot	$6,0xf,$6
+	sll	a3,5,t11
+	addl	AT,t9,t9
+	zapnot	a3,0xf,a3
+	xor	$10,$8,$8
+
+	srl	a3,27,t10
+	and	a4,a5,ra
+	and	a4,t8,t12
+	xor	$0,$8,$8
+
+	sll	a4,30,a4
+	addl	t11,t9,t9
+	xor	$5,$8,$8
+
+	srl	$8,31,t11
+	addl	t10,t9,t9
+	or	ra,t12,ra
+	and	a5,t8,t12
+
+	or	ra,t12,ra
+	srl	a4,32,t12
+	addl	$7,t9,t9
+	addl	$8,$8,$8
+
+	or	t12,a4,a4
+	addl	ra,t9,t9
+	or	t11,$8,$8
+	zapnot	$7,0xf,$7
+	sll	t9,5,t11
+	addl	AT,t8,t8
+	zapnot	t9,0xf,t9
+	xor	$11,$9,$9
+
+	srl	t9,27,t10
+	and	a3,a4,ra
+	and	a3,a5,t12
+	xor	$1,$9,$9
+
+	sll	a3,30,a3
+	addl	t11,t8,t8
+	xor	$6,$9,$9
+
+	srl	$9,31,t11
+	addl	t10,t8,t8
+	or	ra,t12,ra
+	and	a4,a5,t12
+
+	or	ra,t12,ra
+	srl	a3,32,t12
+	addl	$8,t8,t8
+	addl	$9,$9,$9
+
+	or	t12,a3,a3
+	addl	ra,t8,t8
+	or	t11,$9,$9
+	zapnot	$8,0xf,$8
+	sll	t8,5,t11
+	addl	AT,a5,a5
+	zapnot	t8,0xf,t8
+	xor	$12,$10,$10
+
+	srl	t8,27,t10
+	and	t9,a3,ra
+	and	t9,a4,t12
+	xor	$2,$10,$10
+
+	sll	t9,30,t9
+	addl	t11,a5,a5
+	xor	$7,$10,$10
+
+	srl	$10,31,t11
+	addl	t10,a5,a5
+	or	ra,t12,ra
+	and	a3,a4,t12
+
+	or	ra,t12,ra
+	srl	t9,32,t12
+	addl	$9,a5,a5
+	addl	$10,$10,$10
+
+	or	t12,t9,t9
+	addl	ra,a5,a5
+	or	t11,$10,$10
+	zapnot	$9,0xf,$9
+	sll	a5,5,t11
+	addl	AT,a4,a4
+	zapnot	a5,0xf,a5
+	xor	$13,$11,$11
+
+	srl	a5,27,t10
+	and	t8,t9,ra
+	and	t8,a3,t12
+	xor	$3,$11,$11
+
+	sll	t8,30,t8
+	addl	t11,a4,a4
+	xor	$8,$11,$11
+
+	srl	$11,31,t11
+	addl	t10,a4,a4
+	or	ra,t12,ra
+	and	t9,a3,t12
+
+	or	ra,t12,ra
+	srl	t8,32,t12
+	addl	$10,a4,a4
+	addl	$11,$11,$11
+
+	or	t12,t8,t8
+	addl	ra,a4,a4
+	or	t11,$11,$11
+	zapnot	$10,0xf,$10
+	sll	a4,5,t11
+	addl	AT,a3,a3
+	zapnot	a4,0xf,a4
+	xor	$14,$12,$12
+
+	srl	a4,27,t10
+	and	a5,t8,ra
+	and	a5,t9,t12
+	xor	$4,$12,$12
+
+	sll	a5,30,a5
+	addl	t11,a3,a3
+	xor	$9,$12,$12
+
+	srl	$12,31,t11
+	addl	t10,a3,a3
+	or	ra,t12,ra
+	and	t8,t9,t12
+
+	or	ra,t12,ra
+	srl	a5,32,t12
+	addl	$11,a3,a3
+	addl	$12,$12,$12
+
+	or	t12,a5,a5
+	addl	ra,a3,a3
+	or	t11,$12,$12
+	zapnot	$11,0xf,$11
+	ldah	AT,-13725(zero)
+	lda	AT,-15914(AT)	# K_60_79
+	sll	a3,5,t11
+	addl	AT,t9,t9
+	zapnot	a3,0xf,a3
+	xor	$15,$13,$13
+
+	sll	a4,30,t12
+	addl	t11,t9,t9
+	xor	a4,a5,ra
+	xor	$5,$13,$13
+
+	srl	a4,2,a4
+	addl	$12,t9,t9
+	xor	t8,ra,ra
+	xor	$10,$13,$13
+
+	srl	$13,31,t11
+	addl	ra,t9,t9
+	srl	a3,27,t10
+	addl	$13,$13,$13
+
+	or	t12,a4,a4
+	addl	t10,t9,t9
+	or	t11,$13,$13
+	zapnot	$12,0xf,$12
+	sll	t9,5,t11
+	addl	AT,t8,t8
+	zapnot	t9,0xf,t9
+	xor	$0,$14,$14
+
+	sll	a3,30,t12
+	addl	t11,t8,t8
+	xor	a3,a4,ra
+	xor	$6,$14,$14
+
+	srl	a3,2,a3
+	addl	$13,t8,t8
+	xor	a5,ra,ra
+	xor	$11,$14,$14
+
+	srl	$14,31,t11
+	addl	ra,t8,t8
+	srl	t9,27,t10
+	addl	$14,$14,$14
+
+	or	t12,a3,a3
+	addl	t10,t8,t8
+	or	t11,$14,$14
+	zapnot	$13,0xf,$13
+	sll	t8,5,t11
+	addl	AT,a5,a5
+	zapnot	t8,0xf,t8
+	xor	$1,$15,$15
+
+	sll	t9,30,t12
+	addl	t11,a5,a5
+	xor	t9,a3,ra
+	xor	$7,$15,$15
+
+	srl	t9,2,t9
+	addl	$14,a5,a5
+	xor	a4,ra,ra
+	xor	$12,$15,$15
+
+	srl	$15,31,t11
+	addl	ra,a5,a5
+	srl	t8,27,t10
+	addl	$15,$15,$15
+
+	or	t12,t9,t9
+	addl	t10,a5,a5
+	or	t11,$15,$15
+	zapnot	$14,0xf,$14
+	sll	a5,5,t11
+	addl	AT,a4,a4
+	zapnot	a5,0xf,a5
+	xor	$2,$0,$0
+
+	sll	t8,30,t12
+	addl	t11,a4,a4
+	xor	t8,t9,ra
+	xor	$8,$0,$0
+
+	srl	t8,2,t8
+	addl	$15,a4,a4
+	xor	a3,ra,ra
+	xor	$13,$0,$0
+
+	srl	$0,31,t11
+	addl	ra,a4,a4
+	srl	a5,27,t10
+	addl	$0,$0,$0
+
+	or	t12,t8,t8
+	addl	t10,a4,a4
+	or	t11,$0,$0
+	zapnot	$15,0xf,$15
+	sll	a4,5,t11
+	addl	AT,a3,a3
+	zapnot	a4,0xf,a4
+	xor	$3,$1,$1
+
+	sll	a5,30,t12
+	addl	t11,a3,a3
+	xor	a5,t8,ra
+	xor	$9,$1,$1
+
+	srl	a5,2,a5
+	addl	$0,a3,a3
+	xor	t9,ra,ra
+	xor	$14,$1,$1
+
+	srl	$1,31,t11
+	addl	ra,a3,a3
+	srl	a4,27,t10
+	addl	$1,$1,$1
+
+	or	t12,a5,a5
+	addl	t10,a3,a3
+	or	t11,$1,$1
+	zapnot	$0,0xf,$0
+	sll	a3,5,t11
+	addl	AT,t9,t9
+	zapnot	a3,0xf,a3
+	xor	$4,$2,$2
+
+	sll	a4,30,t12
+	addl	t11,t9,t9
+	xor	a4,a5,ra
+	xor	$10,$2,$2
+
+	srl	a4,2,a4
+	addl	$1,t9,t9
+	xor	t8,ra,ra
+	xor	$15,$2,$2
+
+	srl	$2,31,t11
+	addl	ra,t9,t9
+	srl	a3,27,t10
+	addl	$2,$2,$2
+
+	or	t12,a4,a4
+	addl	t10,t9,t9
+	or	t11,$2,$2
+	zapnot	$1,0xf,$1
+	sll	t9,5,t11
+	addl	AT,t8,t8
+	zapnot	t9,0xf,t9
+	xor	$5,$3,$3
+
+	sll	a3,30,t12
+	addl	t11,t8,t8
+	xor	a3,a4,ra
+	xor	$11,$3,$3
+
+	srl	a3,2,a3
+	addl	$2,t8,t8
+	xor	a5,ra,ra
+	xor	$0,$3,$3
+
+	srl	$3,31,t11
+	addl	ra,t8,t8
+	srl	t9,27,t10
+	addl	$3,$3,$3
+
+	or	t12,a3,a3
+	addl	t10,t8,t8
+	or	t11,$3,$3
+	zapnot	$2,0xf,$2
+	sll	t8,5,t11
+	addl	AT,a5,a5
+	zapnot	t8,0xf,t8
+	xor	$6,$4,$4
+
+	sll	t9,30,t12
+	addl	t11,a5,a5
+	xor	t9,a3,ra
+	xor	$12,$4,$4
+
+	srl	t9,2,t9
+	addl	$3,a5,a5
+	xor	a4,ra,ra
+	xor	$1,$4,$4
+
+	srl	$4,31,t11
+	addl	ra,a5,a5
+	srl	t8,27,t10
+	addl	$4,$4,$4
+
+	or	t12,t9,t9
+	addl	t10,a5,a5
+	or	t11,$4,$4
+	zapnot	$3,0xf,$3
+	sll	a5,5,t11
+	addl	AT,a4,a4
+	zapnot	a5,0xf,a5
+	xor	$7,$5,$5
+
+	sll	t8,30,t12
+	addl	t11,a4,a4
+	xor	t8,t9,ra
+	xor	$13,$5,$5
+
+	srl	t8,2,t8
+	addl	$4,a4,a4
+	xor	a3,ra,ra
+	xor	$2,$5,$5
+
+	srl	$5,31,t11
+	addl	ra,a4,a4
+	srl	a5,27,t10
+	addl	$5,$5,$5
+
+	or	t12,t8,t8
+	addl	t10,a4,a4
+	or	t11,$5,$5
+	zapnot	$4,0xf,$4
+	sll	a4,5,t11
+	addl	AT,a3,a3
+	zapnot	a4,0xf,a4
+	xor	$8,$6,$6
+
+	sll	a5,30,t12
+	addl	t11,a3,a3
+	xor	a5,t8,ra
+	xor	$14,$6,$6
+
+	srl	a5,2,a5
+	addl	$5,a3,a3
+	xor	t9,ra,ra
+	xor	$3,$6,$6
+
+	srl	$6,31,t11
+	addl	ra,a3,a3
+	srl	a4,27,t10
+	addl	$6,$6,$6
+
+	or	t12,a5,a5
+	addl	t10,a3,a3
+	or	t11,$6,$6
+	zapnot	$5,0xf,$5
+	sll	a3,5,t11
+	addl	AT,t9,t9
+	zapnot	a3,0xf,a3
+	xor	$9,$7,$7
+
+	sll	a4,30,t12
+	addl	t11,t9,t9
+	xor	a4,a5,ra
+	xor	$15,$7,$7
+
+	srl	a4,2,a4
+	addl	$6,t9,t9
+	xor	t8,ra,ra
+	xor	$4,$7,$7
+
+	srl	$7,31,t11
+	addl	ra,t9,t9
+	srl	a3,27,t10
+	addl	$7,$7,$7
+
+	or	t12,a4,a4
+	addl	t10,t9,t9
+	or	t11,$7,$7
+	zapnot	$6,0xf,$6
+	sll	t9,5,t11
+	addl	AT,t8,t8
+	zapnot	t9,0xf,t9
+	xor	$10,$8,$8
+
+	sll	a3,30,t12
+	addl	t11,t8,t8
+	xor	a3,a4,ra
+	xor	$0,$8,$8
+
+	srl	a3,2,a3
+	addl	$7,t8,t8
+	xor	a5,ra,ra
+	xor	$5,$8,$8
+
+	srl	$8,31,t11
+	addl	ra,t8,t8
+	srl	t9,27,t10
+	addl	$8,$8,$8
+
+	or	t12,a3,a3
+	addl	t10,t8,t8
+	or	t11,$8,$8
+	zapnot	$7,0xf,$7
+	sll	t8,5,t11
+	addl	AT,a5,a5
+	zapnot	t8,0xf,t8
+	xor	$11,$9,$9
+
+	sll	t9,30,t12
+	addl	t11,a5,a5
+	xor	t9,a3,ra
+	xor	$1,$9,$9
+
+	srl	t9,2,t9
+	addl	$8,a5,a5
+	xor	a4,ra,ra
+	xor	$6,$9,$9
+
+	srl	$9,31,t11
+	addl	ra,a5,a5
+	srl	t8,27,t10
+	addl	$9,$9,$9
+
+	or	t12,t9,t9
+	addl	t10,a5,a5
+	or	t11,$9,$9
+	zapnot	$8,0xf,$8
+	sll	a5,5,t11
+	addl	AT,a4,a4
+	zapnot	a5,0xf,a5
+	xor	$12,$10,$10
+
+	sll	t8,30,t12
+	addl	t11,a4,a4
+	xor	t8,t9,ra
+	xor	$2,$10,$10
+
+	srl	t8,2,t8
+	addl	$9,a4,a4
+	xor	a3,ra,ra
+	xor	$7,$10,$10
+
+	srl	$10,31,t11
+	addl	ra,a4,a4
+	srl	a5,27,t10
+	addl	$10,$10,$10
+
+	or	t12,t8,t8
+	addl	t10,a4,a4
+	or	t11,$10,$10
+	zapnot	$9,0xf,$9
+	sll	a4,5,t11
+	addl	AT,a3,a3
+	zapnot	a4,0xf,a4
+	xor	$13,$11,$11
+
+	sll	a5,30,t12
+	addl	t11,a3,a3
+	xor	a5,t8,ra
+	xor	$3,$11,$11
+
+	srl	a5,2,a5
+	addl	$10,a3,a3
+	xor	t9,ra,ra
+	xor	$8,$11,$11
+
+	srl	$11,31,t11
+	addl	ra,a3,a3
+	srl	a4,27,t10
+	addl	$11,$11,$11
+
+	or	t12,a5,a5
+	addl	t10,a3,a3
+	or	t11,$11,$11
+	zapnot	$10,0xf,$10
+	sll	a3,5,t11
+	addl	AT,t9,t9
+	zapnot	a3,0xf,a3
+	xor	$14,$12,$12
+
+	sll	a4,30,t12
+	addl	t11,t9,t9
+	xor	a4,a5,ra
+	xor	$4,$12,$12
+
+	srl	a4,2,a4
+	addl	$11,t9,t9
+	xor	t8,ra,ra
+	xor	$9,$12,$12
+
+	srl	$12,31,t11
+	addl	ra,t9,t9
+	srl	a3,27,t10
+	addl	$12,$12,$12
+
+	or	t12,a4,a4
+	addl	t10,t9,t9
+	or	t11,$12,$12
+	zapnot	$11,0xf,$11
+	sll	t9,5,t11
+	addl	AT,t8,t8
+	zapnot	t9,0xf,t9
+	xor	$15,$13,$13
+
+	sll	a3,30,t12
+	addl	t11,t8,t8
+	xor	a3,a4,ra
+	xor	$5,$13,$13
+
+	srl	a3,2,a3
+	addl	$12,t8,t8
+	xor	a5,ra,ra
+	xor	$10,$13,$13
+
+	srl	$13,31,t11
+	addl	ra,t8,t8
+	srl	t9,27,t10
+	addl	$13,$13,$13
+
+	or	t12,a3,a3
+	addl	t10,t8,t8
+	or	t11,$13,$13
+	zapnot	$12,0xf,$12
+	sll	t8,5,t11
+	addl	AT,a5,a5
+	zapnot	t8,0xf,t8
+	xor	$0,$14,$14
+
+	sll	t9,30,t12
+	addl	t11,a5,a5
+	xor	t9,a3,ra
+	xor	$6,$14,$14
+
+	srl	t9,2,t9
+	addl	$13,a5,a5
+	xor	a4,ra,ra
+	xor	$11,$14,$14
+
+	srl	$14,31,t11
+	addl	ra,a5,a5
+	srl	t8,27,t10
+	addl	$14,$14,$14
+
+	or	t12,t9,t9
+	addl	t10,a5,a5
+	or	t11,$14,$14
+	sll	a5,5,t11
+	addl	AT,a4,a4
+	zapnot	a5,0xf,a5
+	xor	$1,$15,$15
+
+	sll	t8,30,t12
+	addl	t11,a4,a4
+	xor	t8,t9,ra
+	xor	$7,$15,$15
+
+	srl	t8,2,t8
+	addl	$14,a4,a4
+	xor	a3,ra,ra
+	xor	$12,$15,$15
+
+	srl	$15,31,t11
+	addl	ra,a4,a4
+	srl	a5,27,t10
+	addl	$15,$15,$15
+
+	or	t12,t8,t8
+	addl	t10,a4,a4
+	or	t11,$15,$15
+	sll	a4,5,t11
+	addl	AT,a3,a3
+	zapnot	a4,0xf,a4
+	ldl	$0,0(a0)
+
+	sll	a5,30,t12
+	addl	t11,a3,a3
+	xor	a5,t8,ra
+	ldl	$1,4(a0)
+
+	srl	a5,2,a5
+	addl	$15,a3,a3
+	xor	t9,ra,ra
+	ldl	$2,8(a0)
+
+	srl	a4,27,t10
+	addl	ra,a3,a3
+	ldl	$3,12(a0)
+
+	or	t12,a5,a5
+	addl	t10,a3,a3
+	ldl	$4,16(a0)
+	addl	$0,a3,a3
+	addl	$1,a4,a4
+	addl	$2,a5,a5
+	addl	$3,t8,t8
+	addl	$4,t9,t9
+	stl	a3,0(a0)
+	stl	a4,4(a0)
+	addq	a1,64,a1
+	stl	a5,8(a0)
+	stl	t8,12(a0)
+	stl	t9,16(a0)
+	cmpult	a1,a2,t11
+	bne	t11,.Lloop
+
+	.set	noreorder
+	ldq	ra,0(sp)
+	ldq	s0,8(sp)
+	ldq	s1,16(sp)
+	ldq	s2,24(sp)
+	ldq	s3,32(sp)
+	ldq	s4,40(sp)
+	ldq	s5,48(sp)
+	ldq	fp,56(sp)
+	lda	sp,64(sp)
+	ret	(ra)
+.end	sha1_block_data_order
+.ascii	"SHA1 block transform for Alpha, CRYPTOGAMS by <ap...@openssl.org>"
+.align	2

Reply via email to