BTW, have you considered synergetic implementation, which would work as
following. Arrange an intermediate buffer followed by non-accessible
page [commonly would be done with anonymous mmap of two pages followed
by mprotect(PROT_NONE) for the second page]. Upon *_init we call
software SHA*_Init. Then all short inputs go directly through software
SHA*_Update, while everything that is larger than certain value, say 256
bytes, is treated as following. Input stream is first "purged/aligned"
by running single pass of SHA*_Update till SHA*_CTX->data is full. Then
available 64-byte chunks are copied to the *bottom* of first page
mentioned above. Then we set up SEGV signal handler, let hardware suffer
from page fault and collect the intermediate hash values. The procedure
is repeated if more than pagesize was availalbe at a time.
SHA*_CTX->Nl,Nh are adjusted accordingly and remaning bytes [if any] are
fed again to software SHA*_Update. Upon *_final we just call *software*
SHA*_Final.

Yep it works. Proof of concept at http://www.logix.cz/michal/devel/padlock/phe_sum.c It isn't optimized at all, does finalizing in HW so it can be compiled wothout OpenSSL and only works for files <512MB. But it actually works, which is a good start ;-)

Attached is my take on the problem:-) My implementation is dependent on software, OpenSSL in particular, but can be adapted for any software implementation. It's using siglongjmp in signal handler on Unix for better portability. PHE-enabled update routines are meant to be reusable, e.g. they permit arbitrary mix of chunk sizes and are self-contained in respect to signal handling. For remaining details see commentary section. A.

/*
 * Drop-in replacement for sha1sum from GNU coreutils, by Andy Polyakov
 * <[EMAIL PROTECTED]>. Unlike the original this code:
 *
 * - detects and if available deploys VIA PadLock Hash Engine, i.e. it
 *   can still be executed even on non-VIA processors;
 * - adds support for SHA256;
 * - natively supports Win32;
 * - supports both GNU C and Microsoft Visual C compiler;
 *
 ***********************************************************************
 *
 * PadLock Hash Engine, PHE, in VIA Esther CPU turned out to be not as
 * useful as one would normally wish. Trouble is that xsha instruction
 * insists on padding the input message with its length and finalizing
 * the SHA calculation. This means that the whole message is expected to
 * be passed at once. But this is not how for example MAC algorithms use
 * hashing functions... Not to mention that the message might not even
 * fit in memory, e.g. DVD image... They really should have implemented
 * simpler non-finalizing instruction and let us, rather our software,
 * cope with it. We'd manage just fine! But anyway...
 *
 * Basic idea behind this synergetic implementation is to prevent PHE
 * from finalizing SHA output by literally crashing the xsha instruction
 * into a non-accessible page. But instead of terminating the app we
 * collect the intermediate hash value and pass it as initial vector to
 * next crash round or software. Every segmentation violation naturally
 * implies overhead, therefore we use readily available non hardware-
 * assisted routines to handle short chunks, as well as to finalize the
 * resulting hash value.
 *
 * Asymptotic PadLock SHA1 performance is over 4x of hand-coded OpenSSL
 * assembler, while SHA256 - 13x of gcc-compiled C code:-) If compared
 * to each other, PadLock SHA1 and SHA256 performances are virtually
 * same and of 2Gbps order of magnitute on 1.5GHz Esther processor. To
 * give you something else to compare with, PadLock SHA1 performance on
 * just mentioned VIA CPU is ~20% better than hand-coded OpenSSL assem-
 * bler on 2.4GHz Intel P4, while SHA256 is over 300% (>4x) faster.
 */

#define _LARGEFILE_SOURCE 
#define _LARGEFILE64_SOURCE
#define _FILE_OFFSET_BITS 64 

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

#include <openssl/sha.h>

#ifndef _WIN32
#include <unistd.h>
#include <sys/mman.h>
#include <errno.h>
#include <setjmp.h>
#include <signal.h>
#else
#include <windows.h>
#include <io.h>
#include <excpt.h>
#include <malloc.h>
#define ssize_t LONG_PTR
#endif

#ifndef O_LARGEFILE
#define O_LARGEFILE 0
#endif
#ifndef O_BINARY
#define O_BINARY    0
#endif
#ifndef O_TEXT
#define O_TEXT      0
#endif

/* number of pages in Buffer; more is better for performance, but values
 * above 2 don't give as much boost as transition from 1 to 2 */
#define NPAGES 3

static unsigned char	*Buffer;
static size_t		BufSiz;
static int		SoftSHA=1;
#if O_BINARY
static int		omode = O_BINARY;
static int		cmode = '*';
#else
static int		omode = O_TEXT;
static char		cmode = ' ';
#endif
static int		warn  = 0;
static int		quiet = 0;
static int		bits  = 160;

#ifndef _WIN32
/* why siglongjmp and not adjusting %eip? the former is just more portable
   among Unix flavors :-) */
static sigjmp_buf xsha_jmp;
static void segv_handler (int sig)		{ siglongjmp (xsha_jmp,sig); }
static const struct sigaction segv_action =	{ .sa_handler = segv_handler };
#else
static EXCEPTION_DISPOSITION __cdecl segv_handler(
	struct _EXCEPTION_RECORD *ExceptionRecord,void *EstablisherFrame,
	struct _CONTEXT          *ContextRecord,  void *DispatcherContext)
{   if (ExceptionRecord->ExceptionCode == STATUS_ACCESS_VIOLATION)
    {	ContextRecord->Eip += 4;	/* skip over repz xsha */
    	return ExceptionContinueExecution;
    }
  return ExceptionContinueSearch;
}
# ifdef _MSC_VER
#  pragma warning(disable:4733)
# endif
#endif

static int open_wrapper (const char *name)
{ int fd;

    if (name[0]=='-' && name[1]=='\0')
    {	fd = fileno(stdin);
#if defined(_WIN32) || defined(__CYGWIN__)
	_setmode (fd,omode);
#endif
    }
    else
	fd = open (name,O_RDONLY|O_LARGEFILE|omode);

  return fd;
}

#ifdef _MSC_VER
static void __fastcall chunkmove (void *dst,const void *src,size_t len)
{   _asm {
	mov	eax,len
spin:	movq	mm0,QWORD PTR[eax+edx-64]
	movq	mm1,QWORD PTR[eax+edx-56]
	movq	mm2,QWORD PTR[eax+edx-48]
	movq	mm3,QWORD PTR[eax+edx-40]
	movq	mm4,QWORD PTR[eax+edx-32]
	movq	mm5,QWORD PTR[eax+edx-24]
	movq	mm6,QWORD PTR[eax+edx-16]
	movq	mm7,QWORD PTR[eax+edx-8]
	movq	QWORD PTR[eax+ecx-64],mm0
	movq	QWORD PTR[eax+ecx-56],mm1
	movq	QWORD PTR[eax+ecx-48],mm2
	movq	QWORD PTR[eax+ecx-40],mm3
	movq	QWORD PTR[eax+ecx-32],mm4
	movq	QWORD PTR[eax+ecx-24],mm5
	movq	QWORD PTR[eax+ecx-16],mm6
	movq	QWORD PTR[eax+ecx-8],mm7
	sub	eax,64
	jnz	spin
	emms
    }
}

/* Borrowed from <openssl>/crypto/engine/eng_padlock.c */
static int
phe_available(void)
{   _asm {
	pushfd
	pop	eax
	mov	ecx,eax
	xor	eax,1<<21
	push	eax
	popfd
	pushfd
	pop	eax
	xor	eax,ecx
	bt	eax,21
	jnc	noluck
	mov	eax,0
	cpuid
	xor	eax,eax
	cmp	ebx,'tneC'
	jne	noluck
	cmp	edx,'Hrua'
	jne	noluck
	cmp	ecx,'slua'
	jne	noluck
	mov	eax,0xC0000000
	cpuid
	mov	edx,eax
	xor	eax,eax
	cmp	edx,0xC0000001
	jb	noluck
	mov	eax,0xC0000001
	cpuid
	xor	eax,eax
	bt	edx,10
	jnc	noluck
	bt	edx,11
	jnc	noluck
	inc	eax
noluck:
    }
}
#else
static inline void chunkmove (void *dst,const void *src,size_t len)
{   asm volatile (
	"1:"
	"movq	-64(%1,%0),%%mm0\n"
	"movq	-56(%1,%0),%%mm1\n"
	"movq	-48(%1,%0),%%mm2\n"
	"movq	-40(%1,%0),%%mm3\n"
	"movq	-32(%1,%0),%%mm4\n"
	"movq	-24(%1,%0),%%mm5\n"
	"movq	-16(%1,%0),%%mm6\n"
	"movq	-8(%1,%0),%%mm7\n"
	"movq	%%mm0,-64(%2,%0)\n"
	"movq	%%mm1,-56(%2,%0)\n"
	"movq	%%mm2,-48(%2,%0)\n"
	"movq	%%mm3,-40(%2,%0)\n"
	"movq	%%mm4,-32(%2,%0)\n"
	"movq	%%mm5,-24(%2,%0)\n"
	"movq	%%mm6,-16(%2,%0)\n"
	"movq	%%mm7,-8(%2,%0)\n"
	"subl	$64,%0\n"
	"jnz	1b\n"
	"emms"
	: "+r"(len) : "r"(src),"r"(dst) : "mm0","mm1","mm2","mm3",
					  "mm4","mm5","mm6","mm7");
}

/* Two below are borrowed from <openssl>/crypto/engine/eng_padlock.c */
/* Helper function - check if a CPUID instruction
   is available on this CPU */
static int
padlock_insn_cpuid_available(void)
{ int result = -1;

    /* We're checking if the bit #21 of EFLAGS 
       can be toggled. If yes = CPUID is available. */
    asm volatile (
	"pushf\n"
	"popl %%eax\n"
	"xorl $0x200000, %%eax\n"
	"movl %%eax, %%ecx\n"
	"andl $0x200000, %%ecx\n"
	"pushl %%eax\n"
	"popf\n"
	"pushf\n"
	"popl %%eax\n"
	"andl $0x200000, %%eax\n"
	"xorl %%eax, %%ecx\n"
	"movl %%ecx, %0\n"
	: "=r" (result) : : "eax", "ecx");
	
  return (result == 0);
}
/* Load supported features of the CPU to see if
   the PadLock Hash Engine is available and engaged. */
static int
phe_available(void)
{ char vendor_string[16];
  unsigned int eax, edx;

    /* First check if the CPUID instruction is available at all... */
    if (! padlock_insn_cpuid_available())
	return 0;

    /* Are we running on the Centaur (VIA) CPU? */
    eax = 0x00000000;
    vendor_string[12] = 0;
    asm volatile (
	"pushl	%%ebx\n"
	"cpuid\n"
	"movl	%%ebx,(%%edi)\n"
	"movl	%%edx,4(%%edi)\n"
	"movl	%%ecx,8(%%edi)\n"
	"popl	%%ebx"
	: "+a"(eax) : "D"(vendor_string) : "ecx", "edx");
    if (strcmp(vendor_string, "CentaurHauls") != 0)
	return 0;

    /* Check for Centaur Extended Feature Flags presence */
    eax = 0xC0000000;
    asm volatile ("pushl %%ebx; cpuid; popl	%%ebx"
	: "+a"(eax) : : "ecx", "edx");
    if (eax < 0xC0000001)
	return 0;

    /* Read the Centaur Extended Feature Flags */
    eax = 0xC0000001;
    asm volatile ("pushl %%ebx; cpuid; popl %%ebx"
	: "+a"(eax), "=d"(edx) : : "ecx");

  return ((edx & (0x3<<10)) == (0x3<<10));
}
#endif

static int tail_process (const char *name,
			const unsigned char *md,size_t sz,const char *inp)
{ int i;

    if (inp)
    { unsigned char *ref = alloca (sz);
      unsigned int   byte;

	for (i=0;i<sz;i++,inp+=2)
	    sscanf (inp,"%02x",&byte), ref [i] = byte;

	return (memcmp (md,ref,sz)) ? 1 : 0;
    }
    else
    {	for (i=0;i<sz;i++) printf ("%02x",md[i]);
	printf (" %c%s\n",cmode,name);
    }

  return 0;
}

#ifdef _MSC_VER
static void __fastcall xsha1 (size_t num,unsigned int *ctx,const void *inp)
{   _asm mov	edi,edx
    _asm mov	esi,inp
    _asm xor	eax,eax

    _asm push	OFFSET segv_handler
    _asm push	DWORD PTR fs:[0]
    _asm mov	DWORD PTR fs:[0],esp

    _asm _emit	0xf3 _asm _emit	0x0f _asm _emit	0xa6 _asm _emit	0xc8

    _asm pop	DWORD PTR fs:[0]
    _asm add	esp,4
}
#else
static inline void xsha1 (size_t num,unsigned int *ctx,const void *inp)
{ unsigned int eax=0;
 
   asm volatile (
#ifdef _WIN32
	"pushl	%4\n"
	"pushl	%%fs:0\n"
	"movl	%%esp,%%fs:0\n"
#endif
	".byte	0xf3,0x0f,0xa6,0xc8\n"  
#ifdef _WIN32
	"popl	%%fs:0\n"
	"addl	$4,%%esp"
#endif
	: "+a"(eax),"+S"(inp),"+c"(num) : "D"(ctx),"p"(segv_handler));
}
#endif

static void padlock_sha1 (SHA_CTX *ctx,const void *buf,size_t len)
{ const unsigned char	*ptr=buf;
  unsigned char		abcde[16+128+SHA_CBLOCK],*align;
  size_t		res,n;
#ifndef _WIN32
  struct sigaction	oact;
#endif

    if ((n = ctx->num))
    {	res = SHA_CBLOCK - n;
	SHA1_Update (ctx,ptr,res);
	ptr += res;
	len -= res;
    }

    /* The value of 512 is experimentally found on Linux. On Windows
     * we should be able to switch a tad sooner, because exception
     * handler is installed without any system calls, but just a tad,
     * so 512 is close enough... */
    if (len < 512 || SoftSHA)
    {	SHA1_Update (ctx,ptr,len);
	return;
    }

    /* hash values vector is required to be 16 bytes aligned */
    align = (unsigned char *)(((size_t)abcde+15)&-16);
    /* copy current hash values vector from SHA_CTX... */
    memcpy (align,&ctx->h0,sizeof(ctx->h0)*5);

    res = len % SHA_CBLOCK;
    len -= res;

    /* save the tail for later processing... */
    if (res)			memcpy (align+128,ptr+len,res);

    n = BufSiz - len;
    if (n>0 || buf!=Buffer)	chunkmove (Buffer+n,ptr,len);
    ptr = Buffer + n;

#ifndef _WIN32
    /* of course we could have set signal handler once, but the idea
     * behind this exercise is also to produce reusable code... */
    sigaction (SIGSEGV,&segv_action,&oact);
    if (sigsetjmp (xsha_jmp,1) == 0)
#endif
	xsha1 (len+64,(unsigned int *)align,ptr);
#ifndef _WIN32
    sigaction (SIGSEGV,&oact,NULL);
#endif

    /* copy hash values vector back to SHA_CTX... */
    memcpy (&ctx->h0,align,sizeof(ctx->h0)*5);
    /* ... and adjust bit counter [relies on len being unsigned 32-bit] */
    ctx->Nh += len>>29;
    ctx->Nl += len<<=3;
    if (ctx->Nl < len)	ctx->Nh++;

    /* ... and process the tail */
    if (res)			SHA1_Update (ctx,align+128,res);
}

static int process_file_sha1 (const char *name,const char *inp)
{ SHA_CTX	ctx;
  int		fd;
  ssize_t	n;
  size_t	offset,bufsiz;
  unsigned char	md [SHA_DIGEST_LENGTH];

    fd = open_wrapper (name);
    if (fd<0) { perror (name); return 2; }

    SHA1_Init (&ctx);		/* pure software */

    offset = 0;
    bufsiz = BufSiz;
    while ((n = read (fd,Buffer+offset,bufsiz-offset)) > 0)
    {	offset += n;
	if (offset == bufsiz)	padlock_sha1 (&ctx,Buffer,offset),
				offset=0;
    }
    if (n<0) { perror ("read"); return 2; }

    if (offset)	padlock_sha1 (&ctx,Buffer,offset);

    SHA1_Final (md,&ctx);	/* pure software */

  return tail_process (name,md,sizeof(md),inp);
}

#ifdef SHA256_CBLOCK
#ifdef _MSC_VER
static void __fastcall xsha256 (size_t num,unsigned int *ctx,const void *inp)
{   _asm mov	edi,edx
    _asm mov	esi,inp
    _asm xor	eax,eax

    _asm push	OFFSET segv_handler
    _asm push	DWORD PTR fs:[0]
    _asm mov	DWORD PTR fs:[0],esp

    _asm _emit	0xf3 _asm _emit	0x0f _asm _emit	0xa6 _asm _emit	0xd0

    _asm pop	DWORD PTR fs:[0]
    _asm add	esp,4
}
#else
static inline void xsha256 (size_t num,unsigned int *ctx,const void *inp)
{ unsigned int eax=0;

    asm volatile (
#ifdef _WIN32
	"pushl	%4\n"
	"pushl	%%fs:0\n"
	"movl	%%esp,%%fs:0\n"
#endif
	".byte	0xf3,0x0f,0xa6,0xd0\n"  
#ifdef _WIN32
	"popl	%%fs:0\n"
	"addl	$4,%%esp"
#endif
	: "+a"(eax),"+S"(inp),"+c"(num) : "D"(ctx),"p"(segv_handler));
}
#endif

static void padlock_sha256 (SHA256_CTX *ctx,const void *buf,size_t len)
{ const unsigned char	*ptr=buf;
  unsigned char		abcdefgh[16+128+SHA256_CBLOCK],*align;
  size_t		res,n;
#ifndef _WIN32
  struct sigaction	oact;
#endif

    if ((n = ctx->num))
    {	res = SHA256_CBLOCK - n;
	SHA256_Update (ctx,ptr,res);
	ptr += res;
	len -= res;
    }

    /* C implementation is so slow that hardware beats it already
     * after single chunk operation... Even hand-coded assembler
     * is unlikely to be fast enough to elevate the limit much... */
    if (len < 128 || SoftSHA)
    {	SHA256_Update (ctx,ptr,len);
	return;
    }

    /* hash values vector is required to be 16 bytes aligned */
    align = (unsigned char *)(((size_t)abcdefgh+15)&-16);
    /* copy current hash values vector from SHA_CTX... */
    memcpy (align,&ctx->h,sizeof(ctx->h));

    res = len % SHA256_CBLOCK;
    len -= res;

    /* save the tail for later processing... */
    if (res)			memcpy (align+128,ptr+len,res);

    n = BufSiz - len;
    if (n>0 || buf!=Buffer)	chunkmove (Buffer+n,ptr,len);
    ptr = Buffer + n;

#ifndef _WIN32
    /* of course we could have set signal handler once, but the idea
     * behind this exercise is also to produce reusable code... */
    sigaction (SIGSEGV,&segv_action,&oact);
    if (sigsetjmp (xsha_jmp,1) == 0)
#endif
	xsha256 (len+64,(unsigned int *)align,ptr);
#ifndef _WIN32
    sigaction (SIGSEGV,&oact,NULL);
#endif

    /* copy hash values vector back to SHA_CTX... */
    memcpy (&ctx->h,align,sizeof(ctx->h));
    /* ... and adjust bit counter [relies on len being unsigned 32-bit] */
    ctx->Nh += len>>29;
    ctx->Nl += len<<=3;
    if (ctx->Nl < len)	ctx->Nh++;

    /* ... and process the tail */
    if (res)			SHA256_Update (ctx,align+128,res);
}

static int process_file_sha256 (const char *name, const char *inp)
{ SHA256_CTX	ctx;
  int		fd;
  ssize_t	n;
  size_t	offset,bufsiz;
  unsigned char	md [SHA256_DIGEST_LENGTH];

    fd = open_wrapper (name);
    if (fd<0) { perror (name); return 2; }

    SHA256_Init (&ctx);		/* pure software */

    offset = 0;
    bufsiz = BufSiz;
    while ((n = read (fd,Buffer+offset,bufsiz-offset)) > 0)
    {	offset += n;
	if (offset == bufsiz)	padlock_sha256 (&ctx,Buffer,offset),
				offset=0;
    }
    if (n<0) { perror ("read"); return 2; }

    if (offset)	padlock_sha256 (&ctx,Buffer,offset);

    SHA256_Final (md,&ctx);	/* pure software */

  return tail_process (name,md,sizeof(md),inp);
}
#endif

void usage (const char *prog)
{ const char *alg  = bits==160?"SHA1":"SHA256";
  const char str[] =
"Usage: %s [OPTION] [FILE]...\n"
"   or: %s [OPTION] --check [FILE]\n"
"Print or check %s (%d-bit) checksums.\n"
"With no FILE, or when FILE is -, read standard input.\n"
"\n"
"  -b, --binary            read files in binary mode (default on DOS/Windows)\n"
"  -c, --check             check %s sums against given list\n"
"  -t, --text              read files in text mode (default)\n"
"\n"
"The following two options are useful only when verifying checksums:\n"
"      --status            don't output anything, status code shows success\n"
"  -w, --warn              warn about improperly formated checksum lines\n"
"\n"
"      --help     display this help and exit\n"
"      --version  output version information and exit\n"
"\n"
"The sums are computed as described in FIPS-180-%d.  When checking, the input\n"
"should be a former output of this program.  The default mode is to print\n"
"a line with checksum, a character indicating type (`*' for binary, ` ' for\n"
"text), and name for each FILE.\n";

    printf (str,prog,prog,alg,bits,alg,bits==160?1:2);
    exit (0);
}

void version (const char *prog)
{   printf ("PHE-enabled %ssum 1.0 by <[EMAIL PROTECTED]>\n",
	    bits==160?"sha1":"sha256");
    exit (0);
}

void wrongopt (const char *prog,const char *opt)
{   if      (opt[0]=='-' && strcmp(opt,"--check"))
	fprintf (stderr,"%s: unrecognized option `%s'\n",prog,opt);
    else if (opt[0]=='c' || opt[0]=='-')
	fprintf (stderr,"%s: only one argument may be specified "
			"when using --check\n",prog);
    else
	fprintf (stderr,"%s: invalid option -- %c\n",prog,opt[0]);
    fprintf (stderr,"Try `%s --help' for more information.\n",prog);
    exit (1);
}

int check (const char *checklist,int (*proc)(const char *,const char *))
{ FILE *fp;
  char  line [4096],*s;
  int   i,num=0,openfails=0,formatfails=0,ret=0;
  size_t len;
  const char * const stat[] = { "OK", "FAILED", "FAILED open or read" };

    if (checklist[0]=='-' && checklist[1]=='\0')
	fp = stdin;
    else
    {	fp = fopen (checklist,"r");
	if (fp == NULL)
	{   perror (checklist);
	    exit (1);
	}
    }

    while ((s = fgets (line,sizeof(line),fp)))
    {   num++;

	if (!(len = strlen (s))) break;
	while (--len && s[len]!='\0' && (s[len]=='\n' || s[len]=='\r'))
	    s[len]='\0';

	for (i=0;i<bits/8;i++,s+=2)
	    if (!isxdigit(s[0]) || !isxdigit(s[1])) break;

	if (i<bits/8 || s[0]!=' ')
	{   ret |= 4;
	    formatfails++;
	    if (warn)
		fprintf (stderr,"%s: %d: improperly formatted %s checksum line\n",
				checklist,num,bits==160?"SHA1":"SHA256");
	    continue;
	}

	if (s[1]=='*')	omode = O_BINARY;
	else		omode = O_TEXT;
	ret |= (i = (*proc) (s+2,line));
	if (i==2)	openfails++;
	if (!quiet)	printf ("%s: %s\n",s+2,stat[i]);
    }

    if (!quiet)
    {	num -= formatfails;
	if (num==0)
	    fprintf (stderr,"%s: no properly formatted %s checksum lines found\n", 
			    checklist,bits==160?"SHA1":"SHA256");
	else if (openfails)
	    fprintf (stderr,"%s: WARNING: %d of %d listed file%s could not be read\n",
			    checklist,openfails,num,num>1?"s":"");
    }

  return ret;
}

int main(int argc,const char *argv[])
{ int  n,i,j,ret=0;
  int (*proc)(const char *,const char *) = process_file_sha1;
  const char *checklist = NULL;

#ifdef SHA256_CBLOCK
    /* if there is sha256 in program name, hash with SHA256:-) */
    if (strstr (argv[0],"sha256"))	proc = process_file_sha256, bits=256;
#endif

    for (i=1;i<argc;i++)
    { const char *s = argv[i];

	if (s[0]!='-') break;
 
 	if (s[1]=='-')
	{   if      (!strcmp(s,"--help"))	usage (argv[0]);
	    else if (!strcmp(s,"--version"))	version (argv[0]);
	    else if (!strcmp(s,"--status"))	quiet=1;
	    else if (!strcmp(s,"--binary"))	omode=O_BINARY, cmode='*';
	    else if (!strcmp(s,"--text"))	omode=O_TEXT,   cmode=' ';
	    else if (!strcmp(s,"--warn"))	warn =1;
	    else if (!strcmp(s,"--check"))
	    {	i++;
		if (i==argc)			checklist = "-";
		else				checklist = argv[i++];
		if (i<argc)			wrongopt (argv[0],s);
	    }
	    else				wrongopt (argv[0],s);
	}
	else for (j=1;s[j];j++)
	{   if      (s[j]=='b')			omode=O_BINARY, cmode='*';
	    else if (s[j]=='t')			omode=O_TEXT,   cmode=' ';
	    else if (s[j]=='w')			warn =1;
	    else if (s[j]=='c')
	    {	i++;
		if (i==argc)			checklist = "-";
		else				checklist = argv[i++];
		if (i<argc)			wrongopt (argv[0],s+j);
	    }
	    else				wrongopt (argv[0],s+j);
	}
    }

#ifdef _SC_PAGE_SIZE
    n = sysconf(_SC_PAGE_SIZE);
#else
    n = 4096;
#endif
    BufSiz = NPAGES*n;

    /* arrange buffer followed by non-accessible page */
#ifndef _WIN32
    Buffer = mmap (0,BufSiz+n,PROT_READ|PROT_WRITE,
				MAP_PRIVATE|MAP_ANONYMOUS,-1,0);
    if (Buffer == (void *)MAP_FAILED)
	perror ("mmap"), exit (errno);
    if (mprotect (Buffer+BufSiz,n,PROT_NONE) != 0)
	perror ("mprotect"), exit (errno);
#else
    { DWORD junk;
	Buffer = VirtualAlloc (NULL,BufSiz+n,MEM_COMMIT,PAGE_READWRITE);
	if (Buffer == NULL)
	    fprintf (stderr,"VirtualAlloc failed: %d\n",GetLastError()),
	    exit (GetLastError());
	if (!VirtualProtect (Buffer+BufSiz,n,PAGE_NOACCESS,&junk))
	    fprintf (stderr,"VirtualProtect failed: %d\n",GetLastError()),
	    exit (GetLastError());
    }
#endif

    if (phe_available ())
    {	/*
	 * SPECIAL NOTE ABOUT -= 64
	 * The catch is that SEGV is risen 64 bytes prior to the guard
	 * page, therefore BufSiz needs to be adjusted. This behaviour
	 * might be specific to particular CPU stepping...
	 */
	BufSiz -= 64;
	SoftSHA = 0;
    }

    if (checklist)
	ret = check (checklist,proc);
    else if (i==argc)
	ret = (*proc) ("-",NULL);
    else for (;i<argc;i++)
    	ret |= (*proc) (argv[i],NULL);

  return ret;
}

Reply via email to