Re: [OctDev] Patch for nan package on GNU/Hurd

schloegl Tue, 24 Apr 2012 01:03:32 -0700

Thanks for the GNU/Hurd fix. The version in the repository has already
advanced, I considered your patch in the following way.


  Alois


> Sorry, forgot to attach the patch…
> Sébastien Villemot <sebastien.ville...@ens.fr> writes:
>> I attach a patch which is applied against the (upcoming) Debian release
2.5.5-2 of octave-nan.
>> It fixes a compilation error on GNU/Hurd (see [1]).
>> Please consider it for inclusion in the next release of the nan
package.
>> [1] http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=670073
> --
> Sébastien Villemot
> Researcher in Economics & Debian Maintainer
> http://www.dynare.org/sebastien
> Phone: +33-1-40-77-84-04 - GPG Key: 4096R/381A7594
> ------------------------------------------------------------------------------
For Developers, A Lot Can Happen In A Second.
> Boundary is the first to Know...and Tell You.
> Monitor Your Applications in Ultra-Fine Resolution. Try it FREE!
http://p.sf.net/sfu/Boundary-d2dvs2
> _______________________________________________
> Octave-dev mailing list
> Octave-dev@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/octave-dev

//-------------------------------------------------------------------
//   C-MEX implementation of kth element - this function is part of the NaN-toolbox.
//
//   usage: x = xptopen(filename)
//   usage: x = xptopen(filename,'r')
//		read filename and return variables in struct x

//   usage: xptopen(filename,'w',x)
//		save fields of struct x in filename

//   usage: x = xptopen(filename,'a',x)
//		append fields of struct x to filename
//
//   References:
//
//   This program is free software; you can redistribute it and/or modify
//   it under the terms of the GNU General Public License as published by
//   the Free Software Foundation; either version 3 of the License, or
//   (at your option) any later version.
//
//   This program is distributed in the hope that it will be useful,
//   but WITHOUT ANY WARRANTY; without even the implied warranty of
//   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//   GNU General Public License for more details.
//
//   You should have received a copy of the GNU General Public License
//   along with this program; if not, see <http://www.gnu.org/licenses/>.
//
//    $Id: xptopen.cpp 10320 2012-04-24 07:58:12Z schloegl $
//    Copyright (C) 2010,2011,2012 Alois Schloegl <alois.schlo...@ist.ac.at>
//    This function is part of the NaN-toolbox
//    http://pub.ist.ac.at/~schloegl/matlab/NaN/
//
// References:
// [1]	TS-140 THE RECORD LAYOUT OF A DATA SET IN SAS TRANSPORT (XPORT) FORMAT
//	http://support.sas.com/techsup/technote/ts140.html
// [2] IBM floating point format
//	http://en.wikipedia.org/wiki/IBM_Floating_Point_Architecture
// [3] see http://old.nabble.com/Re%3A-IBM-integer-and-double-formats-p20428979.html
// [4] STATA File Format
//	http://www.stata.com/help.cgi?dta
//	http://www.stata.com/help.cgi?dta_113
//-------------------------------------------------------------------

/*
SPSS file format
// http://cvs.savannah.gnu.org/pspp/doc/data-file-format.texi?root=pspp&content-type=text%2Fplain
*/

#define TEST_CONVERSION 2  // 0: ieee754, 1: SAS converter (big endian bug), 2: experimental
#define DEBUG 0

#include <ctype.h>
#include <math.h>
//#include <sqlite.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <sys/param.h>
#include <time.h>
#include "mex.h"

#ifdef tmwtypes_h
  #if (MX_API_VER<=0x07020000)
    typedef int mwSize;
    typedef int mwIndex;
  #endif
#endif

#define NaN  		(0.0/0.0)
#define fix(m)     	(m<0 ? ceil(m) : floor(m))
#define max(a,b)	(((a) > (b)) ? (a) : (b))
#define min(a,b)	(((a) < (b)) ? (a) : (b))


#if 0

#elif defined(__linux__) 
#  include <endian.h>
#  include <byteswap.h>

#elif defined(__GLIBC__)	// for Hurd
#  include <endian.h>
#  include <byteswap.h>

#elif defined(__MINGW32__) 
   /* use local version because MINGW does not provide byteswap.h */
#  define __BIG_ENDIAN		4321
#  define __LITTLE_ENDIAN  	1234
#  define __BYTE_ORDER 		__LITTLE_ENDIAN

#elif defined(__NetBSD__)
#  include <sys/bswap.h>
#  define __BIG_ENDIAN _BIG_ENDIAN
#  define __LITTLE_ENDIAN _LITTLE_ENDIAN
#  define __BYTE_ORDER _BYTE_ORDER
#  define bswap_16(x) bswap16(x)
#  define bswap_32(x) bswap32(x)
#  define bswap_64(x) bswap64(x)

#elif defined(__APPLE__)
#  include <CoreFoundation/CFByteOrder.h>
#  define __BIG_ENDIAN       4321
#  define __LITTLE_ENDIAN  1234
#if (defined(__LITTLE_ENDIAN__) && (__LITTLE_ENDIAN__ == 1))
    #define __BYTE_ORDER __LITTLE_ENDIAN
#else
    #define __BYTE_ORDER __BIG_ENDIAN
#endif
#  define bswap_16(x) CFSwapInt16(x)
#  define bswap_32(x) CFSwapInt32(x)
#  define bswap_64(x) CFSwapInt64(x)

#elif (defined(BSD) && (BSD >= 199103)) && !defined(__GLIBC__)
#  include <machine/endian.h>
#  define __BIG_ENDIAN _BIG_ENDIAN
#  define __LITTLE_ENDIAN _LITTLE_ENDIAN
#  define __BYTE_ORDER _BYTE_ORDER
#  define bswap_16(x) __bswap16(x)
#  define bswap_32(x) __bswap32(x)
#  define bswap_64(x) __bswap64(x)

#elif defined(__GNUC__) 
   /* use byteswap macros from the host system, hopefully optimized ones ;-) */
#  include <endian.h>
#  include <byteswap.h>
#  define bswap_16(x) __bswap_16 (x)
#  define bswap_32(x) __bswap_32 (x)
#  define bswap_64(x) __bswap_64 (x)

#elif defined(__sparc__) 
#  define __BIG_ENDIAN  	4321
#  define __LITTLE_ENDIAN  	1234
#  define __BYTE_ORDER 	__BIG_ENDIAN

#else
#  error Unknown platform
#endif 

#if defined(__MINGW32__) || defined(__sparc__) 

# ifndef bswap_16
#  define bswap_16(x)   \
	((((x) & 0xff00) >> 8) | (((x) & 0x00ff) << 8))
# endif

# ifndef bswap_32
#  define bswap_32(x)   \
	 ((((x) & 0xff000000) >> 24) \
        | (((x) & 0x00ff0000) >> 8)  \
	| (((x) & 0x0000ff00) << 8)  \
	| (((x) & 0x000000ff) << 24))

# endif

# ifndef bswap_64
#  define bswap_64(x) \
      	 ((((x) & 0xff00000000000000ull) >> 56)	\
      	| (((x) & 0x00ff000000000000ull) >> 40)	\
      	| (((x) & 0x0000ff0000000000ull) >> 24)	\
      	| (((x) & 0x000000ff00000000ull) >> 8)	\
      	| (((x) & 0x00000000ff000000ull) << 8)	\
      	| (((x) & 0x0000000000ff0000ull) << 24)	\
      	| (((x) & 0x000000000000ff00ull) << 40)	\
      	| (((x) & 0x00000000000000ffull) << 56))
# endif

#endif


#if !defined(__BIG_ENDIAN) && !defined(__LITTLE_ENDIAN) 
#error  ENDIANITY is not known 
#endif 

#if !defined(bswap_16) || !defined(bswap_32) || !defined(bswap_64)
#error SWAP operation not available 
#endif 


#if __BYTE_ORDER == __BIG_ENDIAN
#define l_endian_u16(x) ((uint16_t)bswap_16((uint16_t)(x)))
#define l_endian_u32(x) ((uint32_t)bswap_32((uint32_t)(x)))
#define l_endian_u64(x) ((uint64_t)bswap_64((uint64_t)(x)))
#define l_endian_i16(x) ((int16_t)bswap_16((int16_t)(x)))
#define l_endian_i32(x) ((int32_t)bswap_32((int32_t)(x)))
#define l_endian_i64(x) ((int64_t)bswap_64((int64_t)(x)))

#define b_endian_u16(x) ((uint16_t)(x))
#define b_endian_u32(x) ((uint32_t)(x))
#define b_endian_u64(x) ((uint64_t)(x))
#define b_endian_i16(x) ((int16_t)(x))
#define b_endian_i32(x) ((int32_t)(x))
#define b_endian_i64(x) ((int64_t)(x))

#elif __BYTE_ORDER==__LITTLE_ENDIAN
#define l_endian_u16(x) ((uint16_t)(x))
#define l_endian_u32(x) ((uint32_t)(x))
#define l_endian_u64(x) ((uint64_t)(x))
#define l_endian_i16(x) ((int16_t)(x))
#define l_endian_i32(x) ((int32_t)(x))
#define l_endian_i64(x) ((int64_t)(x))

#define b_endian_u16(x) ((uint16_t)bswap_16((uint16_t)(x)))
#define b_endian_u32(x) ((uint32_t)bswap_32((uint32_t)(x)))
#define b_endian_u64(x) ((uint64_t)bswap_64((uint64_t)(x)))
#define b_endian_i16(x) ((int16_t)bswap_16((int16_t)(x)))
#define b_endian_i32(x) ((int32_t)bswap_32((int32_t)(x)))
#define b_endian_i64(x) ((int64_t)bswap_64((int64_t)(x)))

#endif /* __BYTE_ORDER */


/* 
	Including ZLIB enables reading gzipped files (they are decompressed on-the-fly)  
	The output files can be zipped, too. 
 */

#ifdef WITH_ZLIB
#include <zlib.h>
#endif


double xpt2d(uint64_t x);
uint64_t d2xpt(double x);
double tm_time2gdf_time(struct tm *t);

/*
	compare first n characters of two strings, ignore case
 */
int strncmpi(const char* str1, const char* str2, size_t n)
{
	unsigned int k=0;
	int r=0;
	while (!r && str1[k] && str2[k] && (k<n)) {
		r = tolower(str1[k]) - tolower(str2[k]);
		k++;
	}
	return(r);
}

void mexFunction(int POutputCount,  mxArray* POutput[], int PInputCount, const mxArray *PInputs[])
{
	const char L1[] = "HEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!000000000000000000000000000000  ";
	const char L2[] = "SAS     SAS     SASLIB 6.06     bsd4.2                          13APR89:10:20:06";
	//const char L3[] = "";
	const char L4[] = "HEADER RECORD*******MEMBER  HEADER RECORD!!!!!!!000000000000000001600000000140  ";
	const char L5[] = "HEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!000000000000000000000000000000  ";
	const char L6[] = "SAS     ABC     SASLIB 6.06     bsd4.2                          13APR89:10:20:06";
	//const char L7[] = "";
	const char L8[] = "HEADER RECORD*******NAMESTR HEADER RECORD!!!!!!!000000000200000000000000000000  ";
	const char LO[] = "HEADER RECORD*******OBS     HEADER RECORD!!!!!!!000000000000000000000000000000  ";

	const  char DATEFORMAT[] = "%d%b%y:%H:%M:%S";
	char   *fn = NULL;
	char   Mode[3] = "r";
	size_t count = 0, HeadLen0=80*8, HeadLen2=0, sz2 = 0;
	uint32_t NS = 0;
	char   H0[HeadLen0];
	char   *H2 = NULL;
	char   SWAP = 0;

#ifndef ZLIB_H
	FILE   *fid;
#else
	gzFile  fid;
	#define fopen 		gzopen	
	#define fread(a,b,c,d) 	(gzread(d,a,b*c)/b)	
	#define fwrite(a,b,c,d)	(gzwrite(d,a,b*c)/b)	
	#define feof 		gzeof	
	#define fseek 		gzseek	
	#define fclose 		gzclose	
	#define	rewind(fid) 	(gzseek(fid,0,SEEK_SET)) 
#endif

	// check for proper number of input and output arguments
	if ( PInputCount > 0 && mxGetClassID(PInputs[0])==mxCHAR_CLASS) {
		size_t buflen = (mxGetM(PInputs[0]) * mxGetN(PInputs[0]) * sizeof(mxChar)) + 1;
		fn = (char*)malloc(buflen);
		mxGetString(PInputs[0], fn, buflen);
	}
	else {
		mexPrintf("XPTOPEN read of several file formats and writing of the SAS Transport Format (*.xpt)\n");
		mexPrintf("\n\tX = xptopen(filename)\n");
		mexPrintf("\tX = xptopen(filename,'r')\n");
		mexPrintf("\t\tread filename and return variables in struct X\n");
#ifdef ZLIB_H		
		mexPrintf("\tSupported are ARFF, SAS-XPT and STATA files with or w/o zlib/gzip compression.\n");
#else
		mexPrintf("\tSupported are ARFF, SAS-XPT and STATA files.\n");
#endif
		mexPrintf("\n\tX = xptopen(filename,'w',X)\n");
		mexPrintf("\t\tsave fields of struct X in filename.\n\n");
		mexPrintf("\tThe fields of X must be column vectors of equal length.\n");
		mexPrintf("\tEach vector is either a numeric vector or a cell array of strings.\n");
		mexPrintf("\nThe SAS-XPT format stores Date/Time as numeric value counting the number of days since 1960-01-01.\n\n");
		return;
	}

	if (PInputCount > 1)
	if (mxGetClassID(PInputs[1])==mxCHAR_CLASS && mxGetNumberOfElements(PInputs[1])) {
		mxGetString(PInputs[1],Mode,3);
		Mode[2]=0;
	}

	fid = fopen(fn,Mode);
	if (fid < 0) {
	        mexWarnMsgTxt("Warning XPTOPEN: suppor for SPSS file format is very experimantal ( do not use it for production use)\n");
		}

	if (Mode[0]=='r' || Mode[0]=='a' ) {
		count += fread(H0,1,80*8,fid);
		enum FileFormat {
			noFile, unknown, ARFF, SASXPT, SPSS, SQLite, STATA
		};
		enum FileFormat TYPE; 		/* type of file format */
		uint8_t		LittleEndian;   /* 1 if file is LittleEndian data format and 0 for big endian data format*/

		TYPE = unknown;
		if (!memcmp(H0,"$FL2@(#) SPSS DATA FILE",23) || !memcmp(H0,"$FL2@(#) PASW STATISTICS DATA FILE",27)) {
		/*
			SPSS file format
		*/
                        uint32_t M=0; 

		        mexWarnMsgTxt("XPTOPEN: support of for SPSS file format is very experimental (do not use it for production use)\n");

			TYPE = SPSS;
			switch (*(uint32_t*)(H0+64)) {
			case 0x00000002:
			case 0x00000003:
		    		LittleEndian = 1;
		    		SWAP = __BYTE_ORDER==__BIG_ENDIAN;
		    		NS = l_endian_u32(*(uint32_t*)(H0+68));
		    		M  = l_endian_u32(*(uint32_t*)(H0+80));
			    	break;
			case 0x02000000:
			case 0x03000000:
		    		SWAP = __BYTE_ORDER==__LITTLE_ENDIAN;
		    		LittleEndian = 0;
		    		NS = b_endian_u32(*(uint32_t*)(H0+68));
		    		M  = b_endian_u32(*(uint32_t*)(H0+80));
			    	break;
			default:
				TYPE = unknown;
			}
			NS = *(int32_t*)(H0+80);
			M  = *(int32_t*)(H0+80);
			if (SWAP) {
				NS = bswap_32(NS);
				M  = bswap_32(M);
			}
			HeadLen0 = 184;
			char *H2 = (char*)malloc(NS*32);
			size_t c2 = 0;

			/*
				Read Variable SPSS header
			*/
			int ns = 0;
			const char **ListOfVarNames = (const char**)malloc((NS+1) * sizeof(char*));
			char *VarNames   = (char*)malloc((NS+1) * sizeof(char) * 9);
			double *MISSINGS = (double*)malloc((NS+1) * sizeof(double));
			for (uint32_t k=0; k<NS; k++) {
				int32_t rec_type, type, FlagHasLabel, FlagMissing;
				c2 += fread(&rec_type,1,4,fid);
				c2 += fread(&type,1,4,fid);
				c2 += fread(&FlagHasLabel,1,4,fid);
				c2 += fread(&FlagMissing,1,4,fid);
				fseek(fid,4,SEEK_CUR);
				if (SWAP) {
					rec_type     = bswap_32(rec_type);
					type         = bswap_32(type);
					FlagHasLabel = bswap_32(FlagHasLabel);
				}
				if (rec_type != 2) ;//error('invalid SPSS file');
				c2 += fread(VarNames+9*ns,1,8,fid);
				VarNames[9*ns+8] = 0;
				ListOfVarNames[ns] = VarNames+9*ns;
				if (FlagHasLabel==1) {
					int32_t LenLabel;
					c2 += fread(&LenLabel,1,4,fid);
					if (SWAP) LenLabel = bswap_32(LenLabel);
					if (LenLabel%4) LenLabel += 4 - LenLabel % 4;
					fseek(fid,LenLabel,SEEK_CUR);
				}
				if (FlagMissing)
					c2 += fread(MISSINGS+ns,1,8,fid);

				if (type != -1) ns++;
			}


			NS = ns;
			mxArray **R = (mxArray**) mxMalloc(NS * sizeof(mxArray*));
			/* ToDo:
				EXTRACT data
			*/

			/* convert into output */
			POutput[0] = mxCreateStructMatrix(1, 1, NS, ListOfVarNames);
			for (uint32_t k = 0; k < NS; k++) {
				 mxSetField(POutput[0], 0, ListOfVarNames[k], R[k]);
			}

			if (MISSINGS) 	    free(MISSINGS);
			if (VarNames) 	    free(VarNames);
			if (ListOfVarNames) free(ListOfVarNames);
			if (H2) 	    free(H2);
		}

		if (TYPE == SPSS) {
/*
The records must appear in the following order:
- File header record.
- Variable records.
- All pairs of value labels records and value label variables records,
if present.
- Document record, if present.
- Any of the following records, if present, in any order:
	Machine integer info record.
	Machine floating-point info record.
	Variable display parameter record.
	Long variable names record.
	Miscellaneous informational records.
- Dictionary termination record.
- Data record.

*/			;
		}

		else if (!memcmp(H0,"SQLite format 3\000",16) && H0[21]==64 && H0[22]==32 && H0[23]==32 ) {
			TYPE = SQLite;

			fclose(fid);
			mexErrMsgTxt("SQLite format not supported yet");
			return;
		}

		else if ((H0[0]>=0x6e || H0[0]<=114) && (H0[1]==1 || H0[1]==2) && H0[2]==1 && H0[3]==0) {
		/*
			STATA File Format
			http://www.stata.com/help.cgi?dta
			http://www.stata.com/help.cgi?dta_113
			Stata files written by R start with 0x6e
		*/
                        uint32_t M=0; 

			TYPE = STATA;
			// Header 119 bytes
	    		LittleEndian = H0[1]==2;
	    		if (LittleEndian) {
	    			NS = l_endian_u16(*(uint16_t*)(H0+4));
	    			M  = l_endian_u32(*(uint32_t*)(H0+6));
	    		}
	    		else {
	    			NS = b_endian_u16(*(uint16_t*)(H0+4));
	    			M  = b_endian_u32(*(uint32_t*)(H0+6));
	    		}

	    		// Descriptors
	    		int fmtlen = (H0[0]==113) ? 12 : 49;
	    		fseek(fid,109,SEEK_SET);
	    		size_t HeadLen2 = 2+NS*(1+33+2+fmtlen+33+81);
			char *H1 = (char*)malloc(HeadLen2);
			HeadLen2 = fread(H1,1,HeadLen2,fid);

			// expansion fields
			char typ; int32_t len;
			char flagSWAP = (((__BYTE_ORDER == __BIG_ENDIAN) && LittleEndian) || ((__BYTE_ORDER == __LITTLE_ENDIAN) && !LittleEndian));
			do {
				fread(&typ,1,1,fid);
				fread(&len,4,1,fid);
				if (flagSWAP) bswap_32(len);
				fseek(fid,len,SEEK_CUR);
			} while (len);
			uint8_t *typlist = (uint8_t*)H1;

/*
			char *varlist = H1+NS;
			char *srtlist;
			char *fmtlist = H1+NS*36+2;
			char *lbllist = H1+NS*(36+fmtlen)+2;
*/

			mxArray **R = (mxArray**) mxMalloc(NS*sizeof(mxArray*));
			size_t *bi = (size_t*) malloc((NS+1)*sizeof(size_t*));
			const char **ListOfVarNames = (const char**)malloc(NS * sizeof(char*));
			bi[0] = 0;
			for (size_t k = 0; k < NS; k++) {
				size_t sz;
				ListOfVarNames[k] = H1+NS+33*k;
				switch (typlist[k]) {
				case 0xfb: sz = 1; break;
				case 0xfc: sz = 2; break;
				case 0xfd: sz = 4; break;
				case 0xfe: sz = 4; break;
				case 0xff: sz = 8; break;
				default: sz = typlist[k];
				}
				bi[k+1] = bi[k]+sz;
			}

			// data
			uint8_t *data = (uint8_t *) malloc(bi[NS] * M);
			fread(data, bi[NS], M, fid);

			char *f = (char*)malloc(bi[NS]+1);
			for (size_t k = 0; k < NS; k++) {
				switch (typlist[k]) {
				case 0xfb:
					R[k] = mxCreateDoubleMatrix(M, 1, mxREAL);
					for (size_t m = 0; m < M; m++) {
						int8_t d = *(int8_t*)(data+bi[k]+m*bi[NS]);
						((double*)mxGetData(R[k]))[m] = (d>100) ? NaN : d;
					}
					break;
				case 0xfc:
					R[k] = mxCreateDoubleMatrix(M, 1, mxREAL);
					if (flagSWAP) for (size_t m = 0; m < M; m++) {
						int16_t d = (int16_t) bswap_16(*(uint16_t*)(data+bi[k]+m*bi[NS]));
						((double*)mxGetData(R[k]))[m] = (d>32740) ? NaN : d;
					}
					else for (size_t m = 0; m < M; m++) {
						int16_t d = *(int16_t*)(data+bi[k]+m*bi[NS]);
						((double*)mxGetData(R[k]))[m] = (d>32740) ? NaN : d;
					}
					break;
				case 0xfd:
					R[k] = mxCreateDoubleMatrix(M, 1, mxREAL);
					if (flagSWAP) for (size_t m = 0; m < M; m++) {
						int32_t d = (int32_t)bswap_32(*(uint32_t*)(data+bi[k]+m*bi[NS]));
						((double*)mxGetData(R[k]))[m] = (d>2147483620) ? NaN : d;
					}
					else for (size_t m = 0; m < M; m++) {
						int32_t d = *(int32_t*)(data+bi[k]+m*bi[NS]);
						((double*)mxGetData(R[k]))[m] = (d>2147483620) ? NaN : d;
					}
					break;
				case 0xfe:
					R[k] = mxCreateNumericMatrix(M, 1, mxSINGLE_CLASS, mxREAL);
					if (flagSWAP) for (size_t m = 0; m < M; m++) {
						((uint32_t*)mxGetData(R[k]))[m] = bswap_32(*(uint32_t*)(data+bi[k]+m*bi[NS]));;
					}
					else for (size_t m = 0; m < M; m++) {
						((uint32_t*)mxGetData(R[k]))[m] = *(uint32_t*)(data+bi[k]+m*bi[NS]);
					}
					break;
				case 0xff:
					R[k] = mxCreateDoubleMatrix(M, 1, mxREAL);
					if (flagSWAP) for (size_t m = 0; m < M; m++) {
						((uint64_t*)mxGetData(R[k]))[m] = bswap_64(*(uint64_t*)(data+bi[k]+m*bi[NS]));
					}
					else for (size_t m = 0; m < M; m++) {
						((uint64_t*)mxGetData(R[k]))[m] = *(uint64_t*)(data+bi[k]+m*bi[NS]);
					}
					break;
				default:
					R[k] =	mxCreateCellMatrix(M, 1);
					size_t sz = typlist[k];
					for (size_t m = 0; m < M; m++) {
						memcpy(f, data+bi[k]+m*bi[NS], sz);
						f[sz] = 0;
						mxSetCell(R[k], m, mxCreateString(f));
					}
				}
			}
			if (f)  free(f);
			if (H1) free(H1);
			if (bi) free(bi);

			/* convert into output */
			POutput[0] = mxCreateStructMatrix(1, 1, NS, ListOfVarNames);
			for (size_t k = 0; k < NS; k++) {
				 mxSetField(POutput[0], 0, ListOfVarNames[k], R[k]);
			}

			if (ListOfVarNames) free(ListOfVarNames);
		}

		else if (H0[0]=='%' || H0[0]=='@') {
		/*
			 ARFF
		*/
                        uint32_t M=0; 

			TYPE = ARFF;
			rewind(fid);

			char *H1 = NULL;
			count = 0;
			size_t ns = 0;
			char *vartyp = NULL;
			char **datestr = NULL;
			const char **ListOfVarNames = NULL;
			mxArray **R = NULL;
			size_t m = 0;

			while (!feof(fid)) {
				HeadLen0 = max(1024,HeadLen0*2);
				H1 = (char*)realloc(H1,HeadLen0);
				count += fread(H1+count,1,HeadLen0-count-1,fid);
			}
			H1[count]   = 0;

			switch (H1[count-1]) {
			case 0x0a:
			case 0x0d:
				H1[count]   = 0;
				break;
			default:
				H1[count]   = 0x0a;
			}
			H1[count+1] = 0;

			char *line = strtok(H1,"\x0a\0x0d");

			int status = 0;
			while (line) {

				if (!strncmpi(line,"@relation",9)) {
					status = 1;
				}

				else if (status == 1 && !strncmpi(line,"@attribute",10)) {
					if (ns<=NS) {
						ns = max(16, ns*2);
						ListOfVarNames = (const char**)realloc(ListOfVarNames,ns*sizeof(char*));
						vartyp         = (char*)realloc(vartyp,ns*sizeof(char));
						R              = (mxArray**) mxRealloc(R,ns*sizeof(mxArray*));
					}
					size_t k = 10;
					char *p1, *p2;
					while (isspace(line[k])) k++;
					p1 = line+k;
					while (!isspace(line[k])) k++;
					line[k++]=0;
					while (isspace(line[k])) k++;
					p2 = line+k;

					ListOfVarNames[NS] = p1;
					if      (!strncmpi(p2,"numeric",7)) {
						vartyp[NS] = 1;
					}
					else if (!strncmpi(p2,"integer",7)) {
						vartyp[NS] = 2;
					}
					else if (!strncmpi(p2,"real",4)) {
						vartyp[NS] = 3;
					}
					else if (!strncmpi(p2,"string",6)) {
						vartyp[NS] = 4;
					}
					else if (!strncmpi(p2,"{",1)) {
						vartyp[NS] = 5;
					}
					else if (!strncmpi(p2,"date",4)) {
						vartyp[NS] = 6;
						datestr = (char**)realloc(datestr,(NS+1)*sizeof(char*));
						p2+=4;
						while (isspace(*p2)) p2++;
						datestr[NS] = p2;
						if (p2[0]==34) {
							p2++;
							while (p2[0]!=34 && p2[0]) p2++;
							p2[1]=0;
						}
					}
					else if (!strncmpi(p2,"relational",10)) {
						vartyp[NS] = 7;
					}
					else vartyp[NS] = 99;

					NS++;
				}

				else if (status == 1 && !strncmpi(line,"@data",5)) {
					status = 2;
					char *p = line;
					while (*p) p++;  // goto end of current line
					p++; 		   // skip \x00
					M = 0;
					while (*p) {
						if (p[0]==0x0a || p[0]==0x0d) {
							// count number of <CR>
							M++;
							// skip next char (deals with <CR><NL>)
							p+=2;
						}
						else p++;
					}
					for (size_t k=0; k<NS; k++) {
						if (vartyp[k]==4 || vartyp[k]==5)
							R[k] = mxCreateCellMatrix(M, 1);
						else
							R[k] = mxCreateDoubleMatrix(M, 1, mxREAL);
					}
				}

				else if (status == 2) {

					size_t p = 0,k;
					for (ns = 0; ns<NS; ns++) {
						// read next token
						while (isspace(line[p])) p++;
						if (line[p]==39) {
							p++; k=p;
							while (line[k]!=39 && line[k]) k++;
							// if (!line[k]) ; // error
							line[k++] = 0;
						}
						else
							k=p;
						while (line[k] != ',' && line[k] != 0) k++;
						line[k] = 0;

						if (vartyp[ns] < 4) {
							double d = atof(line+p);
							*(mxGetPr(R[ns])+m) = d;
						}
						else if (vartyp[ns] < 6) {
							mxSetCell(R[ns], m, mxCreateString(line+p));
						}
						else if (vartyp[ns] == 6) {
							size_t kk[6],n=0, N=strlen(datestr[ns]);
							char T0[6][5];
							char ix = 0;
							struct tm t;

							for (n=0; n < N; n++) {
								switch (datestr[ns][n]) {
								case 'Y':
									ix = 0;
									break;
								case 'M':
									ix = 1;
									break;
								case 'd':
									ix = 2;
									break;
								case 'H':
									ix = 3;
									break;
								case 'm':
									ix = 4;
									break;
								case 's':
									ix = 5;
									break;
								default:
									ix = 99;
								}

								if (ix < 6) {
									T0[ix][kk[ix]++] = line[p+n];
								}
							}
							for (n=0; n<6; n++) {
								T0[n][kk[n]] = 0;
							}
							t.tm_year = atoi(T0[0]);
							t.tm_mon  = atoi(T0[1]);
							t.tm_mday = atoi(T0[2]);
							t.tm_hour = atoi(T0[3]);
							t.tm_min  = atoi(T0[4]);
							t.tm_sec  = atoi(T0[5]);

							*(mxGetPr(R[ns])+m) = tm_time2gdf_time(&t);
						}
						p = k+1;
					}
					m++;
				}
				line = strtok(NULL, "\x0a\x0d");
			}

			/* convert into output */
			POutput[0] = mxCreateStructMatrix(1, 1, NS, ListOfVarNames);
			for (size_t k = 0; k < NS; k++) {
				mxSetField(POutput[0], 0, ListOfVarNames[k], R[k]);
			}

			if (ListOfVarNames) free(ListOfVarNames);
			if (vartyp) free(vartyp);
			if (datestr) free(datestr);
			if (H1) free(H1);
		}

		else if (!memcmp(H0,"HEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!000000000000000000000000000000",78)) {
		/*
			 SAS Transport file format (XPORT)
		*/
                        size_t M=0; 
			TYPE = SASXPT;

			/* TODO: sanity checks */

			char tmp[5];
			memcpy(tmp,H0+7*80+54,4);
			tmp[4] = 0;
			NS = atoi(tmp);

			char *tmp2;
			sz2 = strtoul(H0+4*80-6, &tmp2, 10);

			HeadLen2 = NS*sz2;
			if (HeadLen2 % 80) HeadLen2 = (HeadLen2 / 80 + 1) * 80;

			/* read namestr header, and header line "OBS" */
			H2 = (char*) realloc(H2, HeadLen2+81);
			count  += fread(H2,1,HeadLen2+80,fid);

			/* size of single record */
			size_t pos=0, recsize = 0, POS = 0;
			for (size_t k = 0; k < NS; k++)
				recsize += b_endian_u16(*(int16_t*)(H2+k*sz2+4));

			/* read data section */
			size_t szData = 0;
			uint8_t *Data = NULL;
			while (!feof(fid)) {
				size_t szNew = max(16,szData*2);
				Data         = (uint8_t*)realloc(Data,szNew);
				szData      += fread(Data+szData,1,szNew-szData,fid);
			}

			M = szData/recsize;

			mxArray **R = (mxArray**) mxMalloc(NS*sizeof(mxArray*));
			const char **ListOfVarNames = (const char**)malloc(NS * sizeof(char*));
			char *VarNames = (char*)malloc(NS * 9);

			for (size_t k = 0; k < NS; k++) {
				size_t maxlen = b_endian_u16(*(int16_t*)(H2+k*sz2+4));

				ListOfVarNames[k] = VarNames+pos;
				unsigned int n = k*sz2+8;
				// int flagDate = (!memcmp(H2+n+48,"DATE    ",8) || !memcmp(H2+n+48,"MONNAME ",8)); // not used
				do {
					VarNames[pos++] = H2[n];
				} while (isalnum(H2[++n]) && (n < k*sz2+16));


				VarNames[pos++] = 0;

				if ((*(int16_t*)(H2+k*sz2)) == b_endian_u16(1) && (*(int16_t*)(H2+k*sz2+4)) == b_endian_u16(8) ) {
					// numerical data
					R[k] = mxCreateDoubleMatrix(M, 1, mxREAL);
					for (size_t m=0; m<M; m++) {
						double d = xpt2d(b_endian_u64(*(uint64_t*)(Data+m*recsize+POS)));

//						if (flagDate) d += 715876;  // add number of days from 0000-Jan-01 to 1960-Jan-01

						*(mxGetPr(R[k])+m) = d;

					}
				}
				else if ((*(int16_t*)(H2+k*sz2)) == b_endian_u16(2)) {
					// character string
					R[k] = mxCreateCellMatrix(M, 1);
					char *f = (char*)malloc(maxlen+1);
					for (size_t m=0; m<M; m++) {
						memcpy(f, Data+m*recsize+POS, maxlen);
						f[maxlen] = 0;
						mxSetCell(R[k], m, mxCreateString(f));
					}
					if (f) free(f);
				}
				POS += maxlen;
			}

			POutput[0] = mxCreateStructMatrix(1, 1, NS, ListOfVarNames);
			for (size_t k = 0; k < NS; k++) {
				 mxSetField(POutput[0], 0, ListOfVarNames[k], R[k]);
			}

			if (VarNames) 	    free(VarNames);
			if (ListOfVarNames) free(ListOfVarNames);
			if (Data)	    free(Data);
			/* end of reading SAS format */
		}

		else {
			fclose(fid);
			mexErrMsgTxt("file format not supported");
			return;
		}



	}

//	if (Mode[0]=='w' || Mode[0]=='a' ) {
	if (Mode[0]=='w') {

		NS += mxGetNumberOfFields(PInputs[2]);

		// generate default (fixed) header
		if (Mode[0]=='w') {
			memset(H0,' ',80*8);
			memcpy(H0,L1,strlen(L1));
			memcpy(H0+80,L2,strlen(L2));

			memcpy(H0+80*3,L4,strlen(L4));
			memcpy(H0+80*4,L5,strlen(L5));
			memcpy(H0+80*5,L6,strlen(L6));

			memcpy(H0+80*7,L8,strlen(L8));
		}

		time_t t;
		time(&t);
		char tt[20];
		strftime(tt, 17, DATEFORMAT, localtime(&t));
		memcpy(H0+80*2-16,tt,16);
		memcpy(H0+80*2,tt,16);
		memcpy(H0+80*5+8,fn,min(8,strcspn(fn,".\x00")));
		memcpy(H0+80*5+32,"XPTOPEN.MEX (OCTAVE/MATLAB)",27);
		memcpy(H0+80*6-16,tt,16);
		memcpy(H0+80*6,tt,16);

		char tmp[17];
		sprintf(tmp,"%04i", NS);	// number of variables
		memcpy(H0+80*7+54, tmp, 4);

		if (sz2==0) sz2 = 140;
		if (sz2 < 136)
			mexErrMsgTxt("error XPTOPEN: incorrect length of namestr field");

		/* generate variable NAMESTR header */
		HeadLen2 = NS*sz2;
		if (HeadLen2 % 80) HeadLen2 = (HeadLen2 / 80 + 1) * 80;
		H2 = (char*) realloc(H2,HeadLen2);
		memset(H2,0,HeadLen2);

		mwIndex M = 0;
		mxArray **F = (mxArray**) mxMalloc(NS*sizeof(mxArray*));
		char **Fstr = (char**) malloc(NS*sizeof(char*));
		size_t *MAXLEN = (size_t*) malloc(NS*sizeof(size_t*));
		for (int16_t k = 0; k < NS; k++) {
			Fstr[k] = NULL;
			MAXLEN[k]=0;
			F[k] = mxGetFieldByNumber(PInputs[2],0,k);
			if (k==0) M = mxGetM(F[k]);
			else if (M != mxGetM(F[k])) {
				if (H2) free(H2);
				if (F)  free(F);
				mexErrMsgTxt("Error XPTOPEN: number of elements (rows) do not fit !!!");
			}

			if (mxIsChar(F[k])) {
				*(int16_t*)(H2+k*sz2) = b_endian_u16(2);
				*(int16_t*)(H2+k*sz2+4) = b_endian_u16(mxGetN(F[k]));
			}
			else if (mxIsCell(F[k])) {
				size_t maxlen = 0;
				for (mwIndex m = 0; m<M; m++) {
					mxArray *f = mxGetCell(F[k],m);
					if (mxIsChar(f) || mxIsEmpty(f)) {
						size_t len = mxGetNumberOfElements(f);
						if (maxlen<len) maxlen = len;
					}
				}
				Fstr[k] = (char*) malloc(maxlen+1);
				*(int16_t*)(H2+k*sz2) = b_endian_u16(2);
				*(int16_t*)(H2+k*sz2+4) = b_endian_u16(maxlen);
				MAXLEN[k] = maxlen;
			}
			else {
				*(int16_t*)(H2+k*sz2) = b_endian_u16(1);
				*(int16_t*)(H2+k*sz2+4) = b_endian_u16(8);
			}
			*(int16_t*)(H2+k*sz2+6) = b_endian_u16(k);
			strncpy(H2+k*sz2+8,mxGetFieldNameByNumber(PInputs[2],k),8);
		}

		count  = fwrite(H0, 1, HeadLen0, fid);
		count += fwrite(H2, 1, HeadLen2, fid);
		/* write OBS header line */
		count += fwrite(LO, 1, strlen(LO), fid);
		for (mwIndex m = 0; m < M; m++) {
			for (int16_t k = 0; k < NS; k++) {

				if (*(int16_t*)(H2+k*sz2) == b_endian_u16(1)) {
					// numeric
					uint64_t u64 = b_endian_u64(d2xpt(*(mxGetPr(F[k])+m)));
					count += fwrite(&u64, 1, 8, fid);
				}

/*				else if (mxIsChar(F[k])) {
					*(int16_t*)(H2+k*sz2) = b_endian_u16(2);
					*(int16_t*)(H2+k*sz2+4) = b_endian_u16(mxGetN(F[k]));
				}
*/
				else if (mxIsCell(F[k])) {
					size_t maxlen = MAXLEN[k];
					mxArray *f = mxGetCell(F[k],m);
					mxGetString(f, Fstr[k], maxlen+1);
					count += fwrite(Fstr[k], 1, maxlen, fid);
				}
			}
		}
/*
		// padding to full multiple of 80 byte:
		// FIXME: this might introduce spurios sample values
		char d = count%80;
		while (d--) fwrite("\x20",1,1,fid);
*/
		// free memory
		for (size_t k=0; k<NS; k++) if (Fstr[k]) free(Fstr[k]);
		if (Fstr)   free(Fstr);
		if (MAXLEN) free(MAXLEN);
		Fstr = NULL;
	}
	fclose(fid);

	if (H2) free(H2);
	H2 = NULL;

	return;
}


/*
	XPT2D converts from little-endian IBM to little-endian IEEE format
*/

double xpt2d(uint64_t x) {
	// x is little-endian 64bit IBM floating point format
	char c = *((char*)&x+7) & 0x7f;
	uint64_t u = x;
	*((char*)&u+7)=0;

#if __BYTE_ORDER == __BIG_ENDIAN

	mexErrMsgTxt("IEEE-to-IBM conversion on big-endian platform not supported, yet");

#elif __BYTE_ORDER==__LITTLE_ENDIAN

#if DEBUG
	mexPrintf("xpt2d(%016Lx): [0x%x]\n",x,c);
#endif

	// missing values
	if ((c==0x2e || c==0x5f || (c>0x40 && c<0x5b)) && !u )
		return(NaN);

	int s,e;
	s = *(((char*)&x) + 7) & 0x80;		// sign
	e = (*(((char*)&x) + 7) & 0x7f) - 64;	// exponent
	*(((char*)&x) + 7) = 0; 		// mantisse x

#if DEBUG
	mexPrintf("%x %x %016Lx\n",s,e,x);
#endif

	double y = ldexp((double)x, e*4-56);
	if (s) return(-y);
	else   return( y);

#endif
}


/*
	D2XPT converts from little-endian IEEE to little-endian IBM format
*/

uint64_t d2xpt(double x) {
	uint64_t s,m;
	int e;

#if __BYTE_ORDER == __BIG_ENDIAN

	mexErrMsgTxt("IEEE-to-IBM conversion on big-endian platform not supported, yet");


#elif __BYTE_ORDER==__LITTLE_ENDIAN

	if (x != x) return(0x2eLL << 56);	// NaN - not a number

	if (fabs(x) == 1.0/0.0) return(0x5fLL << 56); 	// +-infinity

	if (x == 0.0) return(0);

	if (x > 0.0) s=0;
	else s=1;

	x = frexp(x,&e);

#if DEBUG
	mexPrintf("d2xpt(%f)\n",x);
#endif
	// see http://old.nabble.com/Re%3A-IBM-integer-and-double-formats-p20428979.html
	m = *(uint64_t*) &x;
	*(((char*)&m) + 6) &= 0x0f; //
	if (e) *(((char*)&m) + 6) |= 0x10; // reconstruct implicit leading '1' for normalized numbers
	m <<= (3-(-e & 3));
	*(((uint8_t*)&m) + 7)  = s ? 0x80 : 0;
	e = (e + (-e & 3)) / 4 + 64;

	if (e >= 128) return(0x5f); // overflow
	if (e < 0) {
		uint64_t h = 1<<(4*-e - 1);
		m = m / (2*h) + (m & h && m & (3*h-1) ? 1 : 0);
		e = 0;
	}
	return (((uint64_t)e)<<56 | m);

#endif

}


double tm_time2gdf_time(struct tm *t) {
	/* based Octave's datevec.m
	it referes Peter Baum's algorithm at http://vsg.cape.com/~pbaum/date/date0.htm
	but the link is not working anymore as of 2008-12-03.

	Other links to Peter Baum's algorithm are
	http://www.rexswain.com/b2mmddyy.rex
	http://www.dpwr.net/forums/index.php?s=ecfa72e38be61327403126e23aeea7e5&showtopic=4309
	*/

	int Y,M,s; //h,m,
	double D;

	D = t->tm_mday;
	M = t->tm_mon+1;
	Y = t->tm_year+1900;

	// Set start of year to March by moving Jan. and Feb. to previous year.
  	// Correct for months > 12 by moving to subsequent years.
  	Y += fix ((M-14.0)/12);

  	const int monthstart[] = {306, 337, 0, 31, 61, 92, 122, 153, 184, 214, 245, 275};
	// Lookup number of days since start of the current year.
  	D += monthstart[t->tm_mon % 12] + 60;

	// Add number of days to the start of the current year. Correct
  	// for leap year every 4 years except centuries not divisible by 400.
  	D += 365*Y + floor (Y/4) - floor (Y/100) + floor (Y/400);

  	// Add fraction representing current second of the day.
  	s = t->tm_hour*3600 + t->tm_min*60 + t->tm_sec;

	// s -= timezone;
	return(D + s/86400.0);
}

------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/

_______________________________________________
Octave-dev mailing list
Octave-dev@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/octave-dev

Re: [OctDev] Patch for nan package on GNU/Hurd

Reply via email to