Revision: 5239
http://matplotlib.svn.sourceforge.net/matplotlib/?rev=5239&view=rev
Author: sameerd
Date: 2008-05-23 12:27:23 -0700 (Fri, 23 May 2008)
Log Message:
-----------
Getting mlab.py in sync with the trunk
Modified Paths:
--------------
branches/v0_91_maint/lib/matplotlib/mlab.py
Modified: branches/v0_91_maint/lib/matplotlib/mlab.py
===================================================================
--- branches/v0_91_maint/lib/matplotlib/mlab.py 2008-05-23 19:05:51 UTC (rev
5238)
+++ branches/v0_91_maint/lib/matplotlib/mlab.py 2008-05-23 19:27:23 UTC (rev
5239)
@@ -45,7 +45,6 @@
this efficiently by caching the direct FFTs.
= record array helper functions =
-
* rec2txt : pretty print a record array
* rec2csv : store record array in CSV file
* csv2rec : import record array from CSV file with type inspection
@@ -55,7 +54,7 @@
* rec_groupby : summarize data by groups (similar to SQL GROUP BY)
* rec_summarize : helper code to filter rec array fields into new fields
-For the rec viewer clases (eg rec2csv), there are a bunch of Format
+For the rec viewer functions(e rec2csv), there are a bunch of Format
objects you can pass into the functions that will do things like color
negative values red, set percent formatting and scaling, etc.
@@ -84,10 +83,11 @@
"""
from __future__ import division
-import sys, datetime, csv, warnings, copy, os
+import csv, warnings, copy, os
-import numpy as npy
+import numpy as np
+
from matplotlib import nxutils
from matplotlib import cbook
@@ -100,28 +100,28 @@
def linspace(*args, **kw):
warnings.warn("use numpy.linspace", DeprecationWarning)
- return npy.linspace(*args, **kw)
+ return np.linspace(*args, **kw)
def meshgrid(x,y):
warnings.warn("use numpy.meshgrid", DeprecationWarning)
- return npy.meshgrid(x,y)
+ return np.meshgrid(x,y)
def mean(x, dim=None):
warnings.warn("Use numpy.mean(x) or x.mean()", DeprecationWarning)
if len(x)==0: return None
- return npy.mean(x, axis=dim)
+ return np.mean(x, axis=dim)
def logspace(xmin,xmax,N):
- return npy.exp(npy.linspace(npy.log(xmin), npy.log(xmax), N))
+ return np.exp(np.linspace(np.log(xmin), np.log(xmax), N))
def _norm(x):
"return sqrt(x dot x)"
- return npy.sqrt(npy.dot(x,x))
+ return np.sqrt(np.dot(x,x))
def window_hanning(x):
"return x times the hanning window of len(x)"
- return npy.hanning(len(x))*x
+ return np.hanning(len(x))*x
def window_none(x):
"No window function; simply return x"
@@ -131,7 +131,7 @@
def conv(x, y, mode=2):
'convolve x with y'
warnings.warn("Use numpy.convolve(x, y, mode='full')", DeprecationWarning)
- return npy.convolve(x,y,mode)
+ return np.convolve(x,y,mode)
def detrend(x, key=None):
if key is None or key=='constant':
@@ -141,10 +141,10 @@
def demean(x, axis=0):
"Return x minus its mean along the specified axis"
- x = npy.asarray(x)
+ x = np.asarray(x)
if axis:
ind = [slice(None)] * axis
- ind.append(npy.newaxis)
+ ind.append(np.newaxis)
return x - x.mean(axis)[ind]
return x - x.mean(axis)
@@ -159,8 +159,8 @@
def detrend_linear(y):
"Return y minus best fit line; 'linear' detrending "
# This is faster than an algorithm based on linalg.lstsq.
- x = npy.arange(len(y), dtype=npy.float_)
- C = npy.cov(x, y, bias=1)
+ x = np.arange(len(y), dtype=np.float_)
+ C = np.cov(x, y, bias=1)
b = C[0,1]/C[0,0]
a = y.mean() - b*x.mean()
return y - (b*x + a)
@@ -200,41 +200,41 @@
if NFFT % 2:
raise ValueError('NFFT must be even')
- x = npy.asarray(x) # make sure we're dealing with a numpy array
+ x = np.asarray(x) # make sure we're dealing with a numpy array
# zero pad x up to NFFT if it is shorter than NFFT
if len(x)<NFFT:
n = len(x)
- x = npy.resize(x, (NFFT,)) # Can't use resize method.
+ x = np.resize(x, (NFFT,)) # Can't use resize method.
x[n:] = 0
# for real x, ignore the negative frequencies
- if npy.iscomplexobj(x): numFreqs = NFFT
+ if np.iscomplexobj(x): numFreqs = NFFT
else: numFreqs = NFFT//2+1
if cbook.iterable(window):
assert(len(window) == NFFT)
windowVals = window
else:
- windowVals = window(npy.ones((NFFT,),x.dtype))
+ windowVals = window(np.ones((NFFT,),x.dtype))
step = NFFT-noverlap
ind = range(0,len(x)-NFFT+1,step)
n = len(ind)
- Pxx = npy.zeros((numFreqs,n), npy.float_)
+ Pxx = np.zeros((numFreqs,n), np.float_)
# do the ffts of the slices
for i in range(n):
thisX = x[ind[i]:ind[i]+NFFT]
thisX = windowVals * detrend(thisX)
- fx = npy.absolute(npy.fft.fft(thisX))**2
+ fx = np.absolute(np.fft.fft(thisX))**2
Pxx[:,i] = fx[:numFreqs]
if n>1:
Pxx = Pxx.mean(axis=1)
# Scale the spectrum by the norm of the window to compensate for
# windowing loss; see Bendat & Piersol Sec 11.5.2
- Pxx /= (npy.abs(windowVals)**2).sum()
+ Pxx /= (np.abs(windowVals)**2).sum()
- freqs = Fs/NFFT * npy.arange(numFreqs)
+ freqs = Fs/NFFT * np.arange(numFreqs)
return Pxx, freqs
@@ -267,32 +267,32 @@
if NFFT % 2:
raise ValueError, 'NFFT must be even'
- x = npy.asarray(x) # make sure we're dealing with a numpy array
- y = npy.asarray(y) # make sure we're dealing with a numpy array
+ x = np.asarray(x) # make sure we're dealing with a numpy array
+ y = np.asarray(y) # make sure we're dealing with a numpy array
# zero pad x and y up to NFFT if they are shorter than NFFT
if len(x)<NFFT:
n = len(x)
- x = npy.resize(x, (NFFT,))
+ x = np.resize(x, (NFFT,))
x[n:] = 0
if len(y)<NFFT:
n = len(y)
- y = npy.resize(y, (NFFT,))
+ y = np.resize(y, (NFFT,))
y[n:] = 0
# for real x, ignore the negative frequencies
- if npy.iscomplexobj(x): numFreqs = NFFT
+ if np.iscomplexobj(x): numFreqs = NFFT
else: numFreqs = NFFT//2+1
if cbook.iterable(window):
assert(len(window) == NFFT)
windowVals = window
else:
- windowVals = window(npy.ones((NFFT,), x.dtype))
+ windowVals = window(np.ones((NFFT,), x.dtype))
step = NFFT-noverlap
ind = range(0,len(x)-NFFT+1,step)
n = len(ind)
- Pxy = npy.zeros((numFreqs,n), npy.complex_)
+ Pxy = np.zeros((numFreqs,n), np.complex_)
# do the ffts of the slices
for i in range(n):
@@ -300,9 +300,9 @@
thisX = windowVals*detrend(thisX)
thisY = y[ind[i]:ind[i]+NFFT]
thisY = windowVals*detrend(thisY)
- fx = npy.fft.fft(thisX)
- fy = npy.fft.fft(thisY)
- Pxy[:,i] = npy.conjugate(fx[:numFreqs])*fy[:numFreqs]
+ fx = np.fft.fft(thisX)
+ fy = np.fft.fft(thisY)
+ Pxy[:,i] = np.conjugate(fx[:numFreqs])*fy[:numFreqs]
@@ -310,8 +310,8 @@
# windowing loss; see Bendat & Piersol Sec 11.5.2
if n>1:
Pxy = Pxy.mean(axis=1)
- Pxy /= (npy.abs(windowVals)**2).sum()
- freqs = Fs/NFFT*npy.arange(numFreqs)
+ Pxy /= (np.abs(windowVals)**2).sum()
+ freqs = Fs/NFFT*np.arange(numFreqs)
return Pxy, freqs
def specgram(x, NFFT=256, Fs=2, detrend=detrend_none,
@@ -342,9 +342,9 @@
segments.
"""
- x = npy.asarray(x)
+ x = np.asarray(x)
assert(NFFT>noverlap)
- #if npy.log(NFFT)/npy.log(2) != int(npy.log(NFFT)/npy.log(2)):
+ #if np.log(NFFT)/np.log(2) != int(np.log(NFFT)/np.log(2)):
# raise ValueError, 'NFFT must be a power of 2'
if NFFT % 2:
raise ValueError('NFFT must be even')
@@ -353,42 +353,42 @@
# zero pad x up to NFFT if it is shorter than NFFT
if len(x)<NFFT:
n = len(x)
- x = npy.resize(x, (NFFT,))
+ x = np.resize(x, (NFFT,))
x[n:] = 0
# for real x, ignore the negative frequencies
- if npy.iscomplexobj(x):
+ if np.iscomplexobj(x):
numFreqs=NFFT
else:
numFreqs = NFFT//2+1
if cbook.iterable(window):
assert(len(window) == NFFT)
- windowVals = npy.asarray(window)
+ windowVals = np.asarray(window)
else:
- windowVals = window(npy.ones((NFFT,),x.dtype))
+ windowVals = window(np.ones((NFFT,),x.dtype))
step = NFFT-noverlap
- ind = npy.arange(0,len(x)-NFFT+1,step)
+ ind = np.arange(0,len(x)-NFFT+1,step)
n = len(ind)
- Pxx = npy.zeros((numFreqs,n), npy.float_)
+ Pxx = np.zeros((numFreqs,n), np.float_)
# do the ffts of the slices
for i in range(n):
thisX = x[ind[i]:ind[i]+NFFT]
thisX = windowVals*detrend(thisX)
- fx = npy.absolute(npy.fft.fft(thisX))**2
+ fx = np.absolute(np.fft.fft(thisX))**2
Pxx[:,i] = fx[:numFreqs]
# Scale the spectrum by the norm of the window to compensate for
# windowing loss; see Bendat & Piersol Sec 11.5.2
- Pxx /= (npy.abs(windowVals)**2).sum()
+ Pxx /= (np.abs(windowVals)**2).sum()
t = 1/Fs*(ind+NFFT/2)
- freqs = Fs/NFFT*npy.arange(numFreqs)
+ freqs = Fs/NFFT*np.arange(numFreqs)
- if npy.iscomplexobj(x):
+ if np.iscomplexobj(x):
# center the frequency range at zero
- freqs = npy.concatenate((freqs[NFFT/2:]-Fs,freqs[:NFFT/2]))
- Pxx = npy.concatenate((Pxx[NFFT/2:,:],Pxx[:NFFT/2,:]),0)
+ freqs = np.concatenate((freqs[NFFT/2:]-Fs,freqs[:NFFT/2]))
+ Pxx = np.concatenate((Pxx[NFFT/2:,:],Pxx[:NFFT/2,:]),0)
return Pxx, freqs, t
@@ -420,7 +420,7 @@
Pyy, f = psd(y, NFFT, Fs, detrend, window, noverlap)
Pxy, f = csd(x, y, NFFT, Fs, detrend, window, noverlap)
- Cxy = npy.divide(npy.absolute(Pxy)**2, Pxx*Pyy)
+ Cxy = np.divide(np.absolute(Pxy)**2, Pxx*Pyy)
Cxy.shape = (len(f),)
return Cxy, f
@@ -440,7 +440,7 @@
"""
warnings.warn("Use numpy.corrcoef", DeprecationWarning)
kw = dict(rowvar=False)
- return npy.corrcoef(*args, **kw)
+ return np.corrcoef(*args, **kw)
def polyfit(*args, **kwargs):
@@ -483,7 +483,7 @@
"""
warnings.warn("use numpy.poyfit", DeprecationWarning)
- return npy.polyfit(*args, **kwargs)
+ return np.polyfit(*args, **kwargs)
@@ -505,7 +505,7 @@
"""
warnings.warn("use numpy.polyval", DeprecationWarning)
- return npy.polyval(*args, **kwargs)
+ return np.polyval(*args, **kwargs)
def vander(*args, **kwargs):
"""
@@ -517,7 +517,7 @@
"""
warnings.warn("Use numpy.vander()", DeprecationWarning)
- return npy.vander(*args, **kwargs)
+ return np.vander(*args, **kwargs)
def donothing_callback(*args):
@@ -596,7 +596,7 @@
# zero pad if X is too short
if numRows < NFFT:
tmp = X
- X = npy.zeros( (NFFT, numCols), X.dtype)
+ X = np.zeros( (NFFT, numCols), X.dtype)
X[:numRows,:] = tmp
del tmp
@@ -611,7 +611,7 @@
del seen
# for real X, ignore the negative frequencies
- if npy.iscomplexobj(X): numFreqs = NFFT
+ if np.iscomplexobj(X): numFreqs = NFFT
else: numFreqs = NFFT//2+1
# cache the FFT of every windowed, detrended NFFT length segement
@@ -621,7 +621,7 @@
assert(len(window) == NFFT)
windowVals = window
else:
- windowVals = window(npy.ones((NFFT,), typecode(X)))
+ windowVals = window(np.ones((NFFT,), typecode(X)))
ind = range(0, numRows-NFFT+1, NFFT-noverlap)
numSlices = len(ind)
FFTSlices = {}
@@ -631,7 +631,7 @@
normVal = norm(windowVals)**2
for iCol in allColumns:
progressCallback(i/Ncols, 'Cacheing FFTs')
- Slices = npy.zeros( (numSlices,numFreqs), dtype=npy.complex_)
+ Slices = np.zeros( (numSlices,numFreqs), dtype=np.complex_)
for iSlice in slices:
thisSlice = X[ind[iSlice]:ind[iSlice]+NFFT, iCol]
thisSlice = windowVals*detrend(thisSlice)
@@ -640,7 +640,7 @@
FFTSlices[iCol] = Slices
if preferSpeedOverMemory:
FFTConjSlices[iCol] = conjugate(Slices)
- Pxx[iCol] = npy.divide(npy.mean(absolute(Slices)**2), normVal)
+ Pxx[iCol] = np.divide(np.mean(absolute(Slices)**2), normVal)
del Slices, ind, windowVals
# compute the coherences and phases for all pairs using the
@@ -657,13 +657,13 @@
if preferSpeedOverMemory:
Pxy = FFTSlices[i] * FFTConjSlices[j]
else:
- Pxy = FFTSlices[i] * npy.conjugate(FFTSlices[j])
- if numSlices>1: Pxy = npy.mean(Pxy)
- Pxy = npy.divide(Pxy, normVal)
- Cxy[(i,j)] = npy.divide(npy.absolute(Pxy)**2, Pxx[i]*Pxx[j])
- Phase[(i,j)] = npy.arctan2(Pxy.imag, Pxy.real)
+ Pxy = FFTSlices[i] * np.conjugate(FFTSlices[j])
+ if numSlices>1: Pxy = np.mean(Pxy)
+ Pxy = np.divide(Pxy, normVal)
+ Cxy[(i,j)] = np.divide(np.absolute(Pxy)**2, Pxx[i]*Pxx[j])
+ Phase[(i,j)] = np.arctan2(Pxy.imag, Pxy.real)
- freqs = Fs/NFFT*npy.arange(numFreqs)
+ freqs = Fs/NFFT*np.arange(numFreqs)
if returnPxx:
return Cxy, Phase, freqs, Pxx
else:
@@ -684,16 +684,16 @@
Sanalytic = 0.5 * ( 1.0 + log(2*pi*sigma**2.0) )
"""
- n,bins = npy.histogram(y, bins)
- n = n.astype(npy.float_)
+ n,bins = np.histogram(y, bins)
+ n = n.astype(np.float_)
- n = npy.take(n, npy.nonzero(n)[0]) # get the positive
+ n = np.take(n, np.nonzero(n)[0]) # get the positive
- p = npy.divide(n, len(y))
+ p = np.divide(n, len(y))
delta = bins[1]-bins[0]
- S = -1.0*npy.sum(p*log(p)) + log(delta)
- #S = -1.0*npy.sum(p*log(p))
+ S = -1.0*np.sum(p*log(p)) + log(delta)
+ #S = -1.0*np.sum(p*log(p))
return S
def hist(y, bins=10, normed=0):
@@ -711,12 +711,12 @@
Credits: the Numeric 22 documentation
"""
warnings.warn("Use numpy.histogram()", DeprecationWarning)
- return npy.histogram(y, bins=bins, range=None, normed=normed)
+ return np.histogram(y, bins=bins, range=None, normed=normed)
def normpdf(x, *args):
"Return the normal pdf evaluated at x; args provides mu, sigma"
mu, sigma = args
- return 1/(npy.sqrt(2*npy.pi)*sigma)*npy.exp(-0.5 * (1/sigma*(x - mu))**2)
+ return 1/(np.sqrt(2*np.pi)*sigma)*np.exp(-0.5 * (1/sigma*(x - mu))**2)
def levypdf(x, gamma, alpha):
@@ -726,26 +726,26 @@
if N%2 != 0:
raise ValueError, 'x must be an event length array; try\n' + \
- 'x = npy.linspace(minx, maxx, N), where N is even'
+ 'x = np.linspace(minx, maxx, N), where N is even'
dx = x[1]-x[0]
- f = 1/(N*dx)*npy.arange(-N/2, N/2, npy.float_)
+ f = 1/(N*dx)*np.arange(-N/2, N/2, np.float_)
- ind = npy.concatenate([npy.arange(N/2, N, int),
- npy.arange(0, N/2, int)])
+ ind = np.concatenate([np.arange(N/2, N, int),
+ np.arange(0, N/2, int)])
df = f[1]-f[0]
- cfl = exp(-gamma*npy.absolute(2*pi*f)**alpha)
+ cfl = exp(-gamma*np.absolute(2*pi*f)**alpha)
- px = npy.fft.fft(npy.take(cfl,ind)*df).astype(npy.float_)
- return npy.take(px, ind)
+ px = np.fft.fft(np.take(cfl,ind)*df).astype(np.float_)
+ return np.take(px, ind)
def find(condition):
"Return the indices where ravel(condition) is true"
- res, = npy.nonzero(npy.ravel(condition))
+ res, = np.nonzero(np.ravel(condition))
return res
def trapz(x, y):
@@ -753,12 +753,12 @@
Trapezoidal integral of y(x).
"""
warnings.warn("Use numpy.trapz(y,x) instead of trapz(x,y)",
DeprecationWarning)
- return npy.trapz(y, x)
+ return np.trapz(y, x)
#if len(x)!=len(y):
# raise ValueError, 'x and y must have the same length'
#if len(x)<2:
# raise ValueError, 'x and y must have > 1 element'
- #return npy.sum(0.5*npy.diff(x)*(y[1:]+y[:-1]))
+ #return np.sum(0.5*np.diff(x)*(y[1:]+y[:-1]))
@@ -769,23 +769,23 @@
If there are two equally long stretches, pick the first
"""
- x = npy.ravel(x)
+ x = np.ravel(x)
if len(x)==0:
- return npy.array([])
+ return np.array([])
ind = (x==0).nonzero()[0]
if len(ind)==0:
- return npy.arange(len(x))
+ return np.arange(len(x))
if len(ind)==len(x):
- return npy.array([])
+ return np.array([])
- y = npy.zeros( (len(x)+2,), x.dtype)
+ y = np.zeros( (len(x)+2,), x.dtype)
y[1:-1] = x
- dif = npy.diff(y)
+ dif = np.diff(y)
up = (dif == 1).nonzero()[0];
dn = (dif == -1).nonzero()[0];
i = (dn-up == max(dn - up)).nonzero()[0][0]
- ind = npy.arange(up[i], dn[i])
+ ind = np.arange(up[i], dn[i])
return ind
@@ -809,7 +809,7 @@
R13 Neural Network Toolbox but is not found in later versions;
its successor seems to be called "processpcs".
"""
- U,s,v = npy.linalg.svd(P)
+ U,s,v = np.linalg.svd(P)
varEach = s**2/P.shape[1]
totVar = varEach.sum()
fracVar = varEach/totVar
@@ -817,7 +817,7 @@
# select the components that are greater
Trans = U[:,ind].transpose()
# The transformed data
- Pcomponents = npy.dot(Trans,P)
+ Pcomponents = np.dot(Trans,P)
return Pcomponents, Trans, fracVar[ind]
def prctile(x, p = (0.0, 25.0, 50.0, 75.0, 100.0)):
@@ -830,16 +830,16 @@
"""
- x = npy.array(x).ravel() # we need a copy
+ x = np.array(x).ravel() # we need a copy
x.sort()
Nx = len(x)
if not cbook.iterable(p):
return x[int(p*Nx/100.0)]
- p = npy.asarray(p)* Nx/100.0
+ p = np.asarray(p)* Nx/100.0
ind = p.astype(int)
- ind = npy.where(ind>=Nx, Nx-1, ind)
+ ind = np.where(ind>=Nx, Nx-1, ind)
return x.take(ind)
def prctile_rank(x, p):
@@ -856,15 +856,15 @@
"""
if not cbook.iterable(p):
- p = npy.arange(100.0/p, 100.0, 100.0/p)
+ p = np.arange(100.0/p, 100.0, 100.0/p)
else:
- p = npy.asarray(p)
+ p = np.asarray(p)
if p.max()<=1 or p.min()<0 or p.max()>100:
raise ValueError('percentiles should be in range 0..100, not 0..1')
ptiles = prctile(x, p)
- return npy.searchsorted(ptiles, x)
+ return np.searchsorted(ptiles, x)
def center_matrix(M, dim=0):
"""
@@ -873,12 +873,12 @@
if dim=1 operate on columns instead of rows. (dim is opposite
to the numpy axis kwarg.)
"""
- M = npy.asarray(M, npy.float_)
+ M = np.asarray(M, np.float_)
if dim:
M = (M - M.mean(axis=0)) / M.std(axis=0)
else:
- M = (M - M.mean(axis=1)[:,npy.newaxis])
- M = M / M.std(axis=1)[:,npy.newaxis]
+ M = (M - M.mean(axis=1)[:,np.newaxis])
+ M = M / M.std(axis=1)[:,np.newaxis]
return M
@@ -922,25 +922,25 @@
try: Ny = len(y0)
except TypeError:
- yout = npy.zeros( (len(t),), npy.float_)
+ yout = np.zeros( (len(t),), np.float_)
else:
- yout = npy.zeros( (len(t), Ny), npy.float_)
+ yout = np.zeros( (len(t), Ny), np.float_)
yout[0] = y0
i = 0
- for i in npy.arange(len(t)-1):
+ for i in np.arange(len(t)-1):
thist = t[i]
dt = t[i+1] - thist
dt2 = dt/2.0
y0 = yout[i]
- k1 = npy.asarray(derivs(y0, thist))
- k2 = npy.asarray(derivs(y0 + dt2*k1, thist+dt2))
- k3 = npy.asarray(derivs(y0 + dt2*k2, thist+dt2))
- k4 = npy.asarray(derivs(y0 + dt*k3, thist+dt))
+ k1 = np.asarray(derivs(y0, thist))
+ k2 = np.asarray(derivs(y0 + dt2*k1, thist+dt2))
+ k3 = np.asarray(derivs(y0 + dt2*k2, thist+dt2))
+ k4 = np.asarray(derivs(y0 + dt*k3, thist+dt))
yout[i+1] = y0 + dt/6.0*(k1 + 2*k2 + 2*k3 + k4)
return yout
@@ -957,8 +957,8 @@
rho = sigmaxy/(sigmax*sigmay)
z = Xmu**2/sigmax**2 + Ymu**2/sigmay**2 - 2*rho*Xmu*Ymu/(sigmax*sigmay)
- denom = 2*npy.pi*sigmax*sigmay*npy.sqrt(1-rho**2)
- return npy.exp( -z/(2*(1-rho**2))) / denom
+ denom = 2*np.pi*sigmax*sigmay*np.sqrt(1-rho**2)
+ return np.exp( -z/(2*(1-rho**2))) / denom
@@ -970,22 +970,22 @@
where x and y are the indices into Z and z are the values of Z at
those indices. x,y,z are 1D arrays
"""
- X,Y = npy.indices(Z.shape)
+ X,Y = np.indices(Z.shape)
return X[Cond], Y[Cond], Z[Cond]
def get_sparse_matrix(M,N,frac=0.1):
'return a MxN sparse matrix with frac elements randomly filled'
- data = npy.zeros((M,N))*0.
+ data = np.zeros((M,N))*0.
for i in range(int(M*N*frac)):
- x = npy.random.randint(0,M-1)
- y = npy.random.randint(0,N-1)
- data[x,y] = npy.random.rand()
+ x = np.random.randint(0,M-1)
+ y = np.random.randint(0,N-1)
+ data[x,y] = np.random.rand()
return data
def dist(x,y):
'return the distance between two points'
d = x-y
- return npy.sqrt(npy.dot(d,d))
+ return np.sqrt(np.dot(d,d))
def dist_point_to_segment(p, s0, s1):
"""
@@ -996,17 +996,17 @@
This algorithm from
http://softsurfer.com/Archive/algorithm_0102/algorithm_0102.htm#Distance%20to%20Ray%20or%20Segment
"""
- p = npy.asarray(p, npy.float_)
- s0 = npy.asarray(s0, npy.float_)
- s1 = npy.asarray(s1, npy.float_)
+ p = np.asarray(p, np.float_)
+ s0 = np.asarray(s0, np.float_)
+ s1 = np.asarray(s1, np.float_)
v = s1 - s0
w = p - s0
- c1 = npy.dot(w,v);
+ c1 = np.dot(w,v);
if ( c1 <= 0 ):
return dist(p, s0);
- c2 = npy.dot(v,v)
+ c2 = np.dot(v,v)
if ( c2 <= c1 ):
return dist(p, s1);
@@ -1049,11 +1049,11 @@
x=window*detrend(x)
else:
x = window(detrend(x))
- z = npy.fft.fft(x)
- a = 2.*npy.pi*1j
- phase = a * npy.random.rand(len(x))
- z = z*npy.exp(phase)
- return npy.fft.ifft(z).real
+ z = np.fft.fft(x)
+ a = 2.*np.pi*1j
+ phase = a * np.random.rand(len(x))
+ z = z*np.exp(phase)
+ return np.fft.ifft(z).real
def liaupunov(x, fprime):
@@ -1066,7 +1066,7 @@
caveat emptor.
It also seems that this function's name is badly misspelled.
"""
- return npy.mean(npy.log(npy.absolute(fprime(x))))
+ return np.mean(np.log(np.absolute(fprime(x))))
class FIFOBuffer:
"""
@@ -1088,10 +1088,10 @@
"""
def __init__(self, nmax):
'buffer up to nmax points'
- self._xa = npy.zeros((nmax,), npy.float_)
- self._ya = npy.zeros((nmax,), npy.float_)
- self._xs = npy.zeros((nmax,), npy.float_)
- self._ys = npy.zeros((nmax,), npy.float_)
+ self._xa = np.zeros((nmax,), np.float_)
+ self._ya = np.zeros((nmax,), np.float_)
+ self._xs = np.zeros((nmax,), np.float_)
+ self._ys = np.zeros((nmax,), np.float_)
self._ind = 0
self._nmax = nmax
self.dataLim = None
@@ -1149,9 +1149,9 @@
def movavg(x,n):
'compute the len(n) moving average of x'
- w = npy.empty((n,), dtype=npy.float_)
+ w = np.empty((n,), dtype=np.float_)
w[:] = 1.0/n
- return npy.convolve(x, w, mode='valid')
+ return np.convolve(x, w, mode='valid')
def save(fname, X, fmt='%.18e',delimiter=' '):
"""
@@ -1185,7 +1185,7 @@
raise ValueError('fname must be a string or file handle')
- X = npy.asarray(X)
+ X = np.asarray(X)
origShape = None
if X.ndim == 1:
origShape = X.shape
@@ -1278,7 +1278,7 @@
thisLen = len(row)
X.append(row)
- X = npy.array(X, npy.float_)
+ X = np.array(X, np.float_)
r,c = X.shape
if r==1 or c==1:
X.shape = max(r,c),
@@ -1313,10 +1313,10 @@
Icelandic Meteorological Office, March 2006 halldor at vedur.is)
"""
# Cast key variables as float.
- x=npy.asarray(x, npy.float_)
- y=npy.asarray(y, npy.float_)
+ x=np.asarray(x, np.float_)
+ y=np.asarray(y, np.float_)
- yp=npy.zeros(y.shape, npy.float_)
+ yp=np.zeros(y.shape, np.float_)
dx=x[1:] - x[:-1]
dy=y[1:] - y[:-1]
@@ -1371,18 +1371,18 @@
"""
# Cast key variables as float.
- x=npy.asarray(x, npy.float_)
- y=npy.asarray(y, npy.float_)
+ x=np.asarray(x, np.float_)
+ y=np.asarray(y, np.float_)
assert x.shape == y.shape
N=len(y)
if yp is None:
yp = slopes(x,y)
else:
- yp=npy.asarray(yp, npy.float_)
+ yp=np.asarray(yp, np.float_)
- xi=npy.asarray(xi, npy.float_)
- yi=npy.zeros(xi.shape, npy.float_)
+ xi=np.asarray(xi, np.float_)
+ yi=np.zeros(xi.shape, np.float_)
# calculate linear slopes
dx = x[1:] - x[:-1]
@@ -1391,7 +1391,7 @@
# find the segment each xi is in
# this line actually is the key to the efficiency of this implementation
- idx = npy.searchsorted(x[1:-1], xi)
+ idx = np.searchsorted(x[1:-1], xi)
# now we have generally: x[idx[j]] <= xi[j] <= x[idx[j]+1]
# except at the boundaries, where it may be that xi[j] < x[0] or xi[j] >
x[-1]
@@ -1412,7 +1412,7 @@
# does more calculations than necessary but exploiting the power
# of numpy, this is far more efficient than coding a loop by hand
# in Python
- yi = yo + dy1dy2 * npy.choose(npy.array(npy.sign(dy1dy2), npy.int32)+1,
+ yi = yo + dy1dy2 * np.choose(np.array(np.sign(dy1dy2), np.int32)+1,
((2*xi-xidx-xidxp1)/((dy1-dy2)*(xidxp1-xidx)),
0.0,
1/(dy1+dy2),))
@@ -1426,7 +1426,7 @@
return value is a sequence of indices into points for the points
that are inside the polygon
"""
- res, = npy.nonzero(nxutils.points_inside_poly(points, verts))
+ res, = np.nonzero(nxutils.points_inside_poly(points, verts))
return res
def poly_below(xmin, xs, ys):
@@ -1439,13 +1439,13 @@
xv, yv = poly_below(0, x, y)
ax.fill(xv, yv)
"""
- xs = npy.asarray(xs)
- ys = npy.asarray(ys)
+ xs = np.asarray(xs)
+ ys = np.asarray(ys)
Nx = len(xs)
Ny = len(ys)
assert(Nx==Ny)
- x = xmin*npy.ones(2*Nx)
- y = npy.ones(2*Nx)
+ x = xmin*np.ones(2*Nx)
+ y = np.ones(2*Nx)
x[:Nx] = xs
y[:Nx] = ys
y[Nx:] = ys[::-1]
@@ -1462,13 +1462,13 @@
"""
Nx = len(x)
if not cbook.iterable(ylower):
- ylower = ylower*npy.ones(Nx)
+ ylower = ylower*np.ones(Nx)
if not cbook.iterable(yupper):
- yupper = yupper*npy.ones(Nx)
+ yupper = yupper*np.ones(Nx)
- x = npy.concatenate( (x, x[::-1]) )
- y = npy.concatenate( (yupper, ylower[::-1]) )
+ x = np.concatenate( (x, x[::-1]) )
+ y = np.concatenate( (yupper, ylower[::-1]) )
return x,y
### the following code was written and submitted by Fernando Perez
@@ -1532,8 +1532,8 @@
floating point exception handling with access to the underlying
hardware."""
- if type(x) is npy.ndarray:
- return exp(npy.clip(x,exp_safe_MIN,exp_safe_MAX))
+ if type(x) is np.ndarray:
+ return exp(np.clip(x,exp_safe_MIN,exp_safe_MAX))
else:
return math.exp(x)
@@ -1543,14 +1543,14 @@
Works like map(), but it returns an array. This is just a convenient
shorthand for numpy.array(map(...))
"""
- return npy.array(map(fn,*args))
+ return np.array(map(fn,*args))
#from numpy import zeros_like
def zeros_like(a):
"""Return an array of zeros of the shape and typecode of a."""
warnings.warn("Use numpy.zeros_like(a)", DeprecationWarning)
- return npy.zeros_like(a)
+ return np.zeros_like(a)
#from numpy import sum as sum_flat
def sum_flat(a):
@@ -1558,32 +1558,32 @@
It uses a.flat, and if a is not contiguous, a call to ravel(a) is made."""
warnings.warn("Use numpy.sum(a) or a.sum()", DeprecationWarning)
- return npy.sum(a)
+ return np.sum(a)
#from numpy import mean as mean_flat
def mean_flat(a):
"""Return the mean of all the elements of a, flattened out."""
warnings.warn("Use numpy.mean(a) or a.mean()", DeprecationWarning)
- return npy.mean(a)
+ return np.mean(a)
def rms_flat(a):
"""Return the root mean square of all the elements of a, flattened out."""
- return npy.sqrt(npy.mean(npy.absolute(a)**2))
+ return np.sqrt(np.mean(np.absolute(a)**2))
def l1norm(a):
"""Return the l1 norm of a, flattened out.
Implemented as a separate function (not a call to norm() for speed)."""
- return npy.sum(npy.absolute(a))
+ return np.sum(np.absolute(a))
def l2norm(a):
"""Return the l2 norm of a, flattened out.
Implemented as a separate function (not a call to norm() for speed)."""
- return npy.sqrt(npy.sum(npy.absolute(a)**2))
+ return np.sqrt(np.sum(np.absolute(a)**2))
def norm_flat(a,p=2):
"""norm(a,p=2) -> l-p norm of a.flat
@@ -1595,9 +1595,9 @@
# This function was being masked by a more general norm later in
# the file. We may want to simply delete it.
if p=='Infinity':
- return npy.amax(npy.absolute(a))
+ return np.amax(np.absolute(a))
else:
- return (npy.sum(npy.absolute(a)**p))**(1.0/p)
+ return (np.sum(np.absolute(a)**p))**(1.0/p)
def frange(xini,xfin=None,delta=None,**kw):
"""frange([start,] stop[, step, keywords]) -> array of floats
@@ -1657,7 +1657,7 @@
# round finds the nearest, so the endpoint can be up to
# delta/2 larger than xfin.
- return npy.arange(npts)*delta+xini
+ return np.arange(npts)*delta+xini
# end frange()
#import numpy.diag as diagonal_matrix
@@ -1665,7 +1665,7 @@
"""Return square diagonal matrix whose non-zero elements are given by the
input array."""
warnings.warn("Use numpy.diag(d)", DeprecationWarning)
- return npy.diag(diag)
+ return np.diag(diag)
def identity(n, rank=2, dtype='l', typecode=None):
"""identity(n,r) returns the identity matrix of shape (n,n,...,n) (rank r).
@@ -1686,7 +1686,7 @@
warnings.warn("Use dtype kwarg instead of typecode",
DeprecationWarning)
dtype = typecode
- iden = npy.zeros((n,)*rank, dtype)
+ iden = np.zeros((n,)*rank, dtype)
for i in range(n):
idx = (i,)*rank
iden[idx] = 1
@@ -1761,7 +1761,7 @@
The function MyFunction() is responsible for handling the dictionary of
keywords it will receive."""
warnings.warn("Use numpy.fromfunction()", DeprecationWarning)
- return npy.fromfunction(function, dimensions, **kwargs)
+ return np.fromfunction(function, dimensions, **kwargs)
### end fperez numutils code
@@ -1792,7 +1792,7 @@
For negative numbers is equivalent to ceil and for positive to floor.
"""
warnings.warn("Use numpy.fix()", DeprecationWarning)
- return npy.fix(x)
+ return np.fix(x)
def rem(x,y):
"""
@@ -1802,10 +1802,10 @@
This also differs from numpy.remainder, which uses floor instead of
fix.
"""
- x,y = npy.asarray(x), npy.asarray(y)
- if npy.any(y == 0):
+ x,y = np.asarray(x), np.asarray(y)
+ if np.any(y == 0):
return None
- return x - y * npy.fix(x/y)
+ return x - y * np.fix(x/y)
def norm(x,y=2):
@@ -1830,28 +1830,28 @@
NORM(V,-inf) = min(abs(V)).
"""
- x = npy.asarray(x)
+ x = np.asarray(x)
if x.ndim == 2:
if y==2:
- return npy.max(npy.linalg.svd(x)[1])
+ return np.max(np.linalg.svd(x)[1])
elif y==1:
- return npy.max(npy.sum(npy.absolute((x)), axis=0))
+ return np.max(np.sum(np.absolute((x)), axis=0))
elif y=='inf':
- return npy.max(npy.sum(npy.absolute((npy.transpose(x))), axis=0))
+ return np.max(np.sum(np.absolute((np.transpose(x))), axis=0))
elif y=='fro':
- xx = npy.dot(x.transpose(), x)
- return npy.sqrt(npy.sum(npy.diag(xx), axis=0))
+ xx = np.dot(x.transpose(), x)
+ return np.sqrt(np.sum(np.diag(xx), axis=0))
else:
raise ValueError('Second argument not permitted for matrices')
else:
- xa = npy.absolute(x)
+ xa = np.absolute(x)
if y == 'inf':
- return npy.max(xa)
+ return np.max(xa)
elif y == '-inf':
- return npy.min(xa)
+ return np.min(xa)
else:
- return npy.power(npy.sum(npy.power(xa,y)),1/float(y))
+ return np.power(np.sum(np.power(xa,y)),1/float(y))
def orth(A):
@@ -1865,8 +1865,8 @@
rank of A.
"""
- A = npy.asarray(A)
- U,S,V = npy.linalg.svd(A)
+ A = np.asarray(A)
+ U,S,V = np.linalg.svd(A)
m,n = A.shape
if m > 1:
@@ -1876,9 +1876,9 @@
else:
s = 0
- tol = max(m,n) * npy.max(s) * _eps_approx
- r = npy.sum(s > tol)
- Q = npy.take(U,range(r),1)
+ tol = max(m,n) * np.max(s) * _eps_approx
+ r = np.sum(s > tol)
+ Q = np.take(U,range(r),1)
return Q
@@ -1891,12 +1891,12 @@
Note that numerix.mlab.rank() is not equivalent to Matlab's rank.
This function is!
"""
- x = npy.asarray(x)
- s = npy.linalg.svd(x, compute_uv=False)
- maxabs = npy.max(npy.absolute(s))
+ x = np.asarray(x)
+ s = np.linalg.svd(x, compute_uv=False)
+ maxabs = np.max(np.absolute(s))
maxdim = max(x.shape)
tol = maxabs * maxdim * _eps_approx
- return npy.sum(s > tol)
+ return np.sum(s > tol)
def sqrtm(x):
"""
@@ -1904,8 +1904,9 @@
This means that s=sqrtm(x) implies dot(s,s) = x.
Note that s and x are matrices.
"""
- return mfuncC(npy.sqrt, x)
+ return mfuncC(np.sqrt, x)
+
def mfuncC(f, x):
"""
mfuncC(f, x) : matrix function with possibly complex eigenvalues.
@@ -1913,12 +1914,12 @@
This function is needed by sqrtm and allows further functions.
"""
- x = npy.asarray(x)
- (v,uT) = npy.linalg.eig(x)
- V = npy.diag(f(v+0j))
+ x = np.asarray(x)
+ (v,uT) = np.linalg.eig(x)
+ V = np.diag(f(v+0j))
# todo: warning: this is not exactly what matlab does
# MATLAB "B/A is roughly the same as B*inv(A)"
- y = npy.dot(uT, npy.dot(V, npy.linalg.inv(uT)))
+ y = np.dot(uT, np.dot(V, np.linalg.inv(uT)))
return approx_real(y)
def approx_real(x):
@@ -1927,9 +1928,9 @@
approx_real(x) : returns x.real if |x.imag| < |x.real| * _eps_approx.
This function is needed by sqrtm and allows further functions.
"""
- ai = npy.absolute(x.imag)
- ar = npy.absolute(x.real)
- if npy.max(ai) <= npy.max(ar) * _eps_approx:
+ ai = np.absolute(x.imag)
+ ar = np.absolute(x.real)
+ if np.max(ai) <= np.max(ar) * _eps_approx:
return x.real
else:
return x
@@ -1940,29 +1941,52 @@
def safe_isnan(x):
'isnan for arbitrary types'
- try: b = npy.isnan(x)
+ try: b = np.isnan(x)
except NotImplementedError: return False
else: return b
-
def safe_isinf(x):
'isnan for arbitrary types'
- try: b = npy.isinf(x)
+ try: b = np.isinf(x)
except NotImplementedError: return False
else: return b
-
def rec_append_field(rec, name, arr, dtype=None):
'return a new record array with field name populated with data from array
arr'
- arr = npy.asarray(arr)
- if dtype is None:
- dtype = arr.dtype
- newdtype = npy.dtype(rec.dtype.descr + [(name, dtype)])
- newrec = npy.empty(rec.shape, dtype=newdtype)
+ warnings.warn("use rec_append_fields", DeprecationWarning)
+ return rec_append_fields(rec, name, arr, dtype)
+
+def rec_append_fields(rec, names, arrs, dtypes=None):
+ """
+ return a new record array with field names populated with data
+ from arrays in arrs. If appending a single field then names, arrs
+ and dtypes do not have to be lists. They can just be the values themselves.
+ """
+ if (not cbook.is_string_like(names) and cbook.iterable(names) \
+ and len(names) and cbook.is_string_like(names[0])):
+ if len(names) != len(arrs):
+ raise ValueError, "number of arrays do not match number of names"
+ else: # we have only 1 name and 1 array
+ names = [names]
+ arrs = [arrs]
+ arrs = map(np.asarray, arrs)
+ if dtypes is None:
+ dtypes = [a.dtype for a in arrs]
+ elif not cbook.iterable(dtypes):
+ dtypes = [dtypes]
+ if len(arrs) != len(dtypes):
+ if len(dtypes) == 1:
+ dtypes = dtypes * len(arrs)
+ else:
+ raise ValueError, "dtypes must be None, a single dtype or a list"
+
+ newdtype = np.dtype(rec.dtype.descr + zip(names, dtypes))
+ newrec = np.empty(rec.shape, dtype=newdtype)
for field in rec.dtype.fields:
newrec[field] = rec[field]
- newrec[name] = arr
- return newrec.view(npy.recarray)
+ for name, arr in zip(names, arrs):
+ newrec[name] = arr
+ return newrec.view(np.recarray)
def rec_drop_fields(rec, names):
@@ -1971,88 +1995,17 @@
names = set(names)
Nr = len(rec)
- newdtype = npy.dtype([(name, rec.dtype[name]) for name in rec.dtype.names
+ newdtype = np.dtype([(name, rec.dtype[name]) for name in rec.dtype.names
if name not in names])
- newrec = npy.empty(Nr, dtype=newdtype)
+ newrec = np.empty(Nr, dtype=newdtype)
for field in newdtype.names:
newrec[field] = rec[field]
- return newrec.view(npy.recarray)
+ return newrec.view(np.recarray)
-def rec_join(key, r1, r2):
- """
- join record arrays r1 and r2 on key; key is a tuple of field
- names. if r1 and r2 have equal values on all the keys in the key
- tuple, then their fields will be merged into a new record array
- containing the intersection of the fields of r1 and r2
- """
- for name in key:
- if name not in r1.dtype.names:
- raise ValueError('r1 does not have key field %s'%name)
- if name not in r2.dtype.names:
- raise ValueError('r2 does not have key field %s'%name)
-
- def makekey(row):
- return tuple([row[name] for name in key])
-
-
- names = list(r1.dtype.names) + [name for name in r2.dtype.names if name
not in set(r1.dtype.names)]
-
-
-
- r1d = dict([(makekey(row),i) for i,row in enumerate(r1)])
- r2d = dict([(makekey(row),i) for i,row in enumerate(r2)])
-
- r1keys = set(r1d.keys())
- r2keys = set(r2d.keys())
-
- keys = r1keys & r2keys
-
- r1ind = [r1d[k] for k in keys]
- r2ind = [r2d[k] for k in keys]
-
-
- r1 = r1[r1ind]
- r2 = r2[r2ind]
-
- r2 = rec_drop_fields(r2, r1.dtype.names)
-
-
- def key_desc(name):
- 'if name is a string key, use the larger size of r1 or r2 before
merging'
- dt1 = r1.dtype[name]
- if dt1.type != npy.string_:
- return (name, dt1.descr[0][1])
-
- dt2 = r1.dtype[name]
- assert dt2==dt1
- if dt1.num>dt2.num:
- return (name, dt1.descr[0][1])
- else:
- return (name, dt2.descr[0][1])
-
-
-
- keydesc = [key_desc(name) for name in key]
-
- newdtype = npy.dtype(keydesc +
- [desc for desc in r1.dtype.descr if desc[0] not in
key ] +
- [desc for desc in r2.dtype.descr if desc[0] not in
key ] )
-
-
- newrec = npy.empty(len(r1), dtype=newdtype)
- for field in r1.dtype.names:
- newrec[field] = r1[field]
-
- for field in r2.dtype.names:
- newrec[field] = r2[field]
-
- return newrec.view(npy.recarray)
-
-
def rec_groupby(r, groupby, stats):
"""
r is a numpy record array
@@ -2063,7 +2016,7 @@
stats is a sequence of (attr, func, outname) which will call x =
func(attr) and assign x to the record array output with attribute
outname.
- Eg, stats = ( ('sales', len, 'numsales'), ('sales', npy.mean, 'avgsale') )
+ Eg, stats = ( ('sales', len, 'numsales'), ('sales', np.mean, 'avgsale') )
return record array has dtype names for each attribute name in in
the the 'groupby' argument, with the associated group values, and
@@ -2095,7 +2048,7 @@
attrs, funcs, outnames = zip(*stats)
names = list(groupby)
names.extend(outnames)
- return npy.rec.fromrecords(rows, names=names)
+ return np.rec.fromrecords(rows, names=names)
@@ -2114,16 +2067,23 @@
for attr, func, outname in summaryfuncs:
names.append(outname)
- arrays.append(npy.asarray(func(r[attr])))
+ arrays.append(np.asarray(func(r[attr])))
- return npy.rec.fromarrays(arrays, names=names)
+ return np.rec.fromarrays(arrays, names=names)
-def rec_join(key, r1, r2):
+
+def rec_join(key, r1, r2, jointype='inner', defaults=None):
"""
join record arrays r1 and r2 on key; key is a tuple of field
names. if r1 and r2 have equal values on all the keys in the key
tuple, then their fields will be merged into a new record array
containing the intersection of the fields of r1 and r2
+
+ The jointype keyword can be 'inner', 'outer', 'leftouter'.
+ To do a rightouter join just reverse r1 and r2.
+
+ The defaults keyword is a dictionary filled with
+ {column_name:default_value} pairs.
"""
for name in key:
@@ -2141,24 +2101,29 @@
r1keys = set(r1d.keys())
r2keys = set(r2d.keys())
- keys = r1keys & r2keys
+ common_keys = r1keys & r2keys
- r1ind = npy.array([r1d[k] for k in keys])
- r2ind = npy.array([r2d[k] for k in keys])
+ r1ind = np.array([r1d[k] for k in common_keys])
+ r2ind = np.array([r2d[k] for k in common_keys])
- # Make sure that the output rows have the same relative order as r1
- sortind = r1ind.argsort()
+ common_len = len(common_keys)
+ left_len = right_len = 0
+ if jointype == "outer" or jointype == "leftouter":
+ left_keys = r1keys.difference(r2keys)
+ left_ind = np.array([r1d[k] for k in left_keys])
+ left_len = len(left_ind)
+ if jointype == "outer":
+ right_keys = r2keys.difference(r1keys)
+ right_ind = np.array([r2d[k] for k in right_keys])
+ right_len = len(right_ind)
- r1 = r1[r1ind[sortind]]
- r2 = r2[r2ind[sortind]]
-
r2 = rec_drop_fields(r2, r1.dtype.names)
def key_desc(name):
'if name is a string key, use the larger size of r1 or r2 before
merging'
dt1 = r1.dtype[name]
- if dt1.type != npy.string_:
+ if dt1.type != np.string_:
return (name, dt1.descr[0][1])
dt2 = r1.dtype[name]
@@ -2172,25 +2137,44 @@
keydesc = [key_desc(name) for name in key]
- newdtype = npy.dtype(keydesc +
+ newdtype = np.dtype(keydesc +
[desc for desc in r1.dtype.descr if desc[0] not in
key ] +
[desc for desc in r2.dtype.descr if desc[0] not in
key ] )
- newrec = npy.empty(len(r1), dtype=newdtype)
+ newrec = np.empty(common_len + left_len + right_len, dtype=newdtype)
+
+ if jointype != 'inner' and defaults is not None: # fill in the defaults
enmasse
+ newrec_fields = newrec.dtype.fields.keys()
+ for k, v in defaults.items():
+ if k in newrec_fields:
+ newrec[k] = v
+
for field in r1.dtype.names:
- newrec[field] = r1[field]
+ newrec[field][:common_len] = r1[field][r1ind]
+ if jointype == "outer" or jointype == "leftouter":
+ newrec[field][common_len:(common_len+left_len)] =
r1[field][left_ind]
for field in r2.dtype.names:
- newrec[field] = r2[field]
+ newrec[field][:common_len] = r2[field][r2ind]
+ if jointype == "outer":
+ newrec[field][-right_len:] =
r2[field][right_ind[right_ind.argsort()]]
- return newrec.view(npy.recarray)
+ # sort newrec using the same order as r1
+ sort_indices = r1ind.copy()
+ if jointype == "outer" or jointype == "leftouter":
+ sort_indices = np.append(sort_indices, left_ind)
+ newrec[:(common_len+left_len)] = newrec[sort_indices.argsort()]
+
+ return newrec.view(np.recarray)
+
+
def csv2rec(fname, comments='#', skiprows=0, checkrows=0, delimiter=',',
- converterd=None, names=None, missing=None):
+ converterd=None, names=None, missing='', missingd=None):
"""
Load data from comma/space/tab delimited file in fname into a
- numpy record array and return the record array.
+ numpy (m)record array and return the record array.
If names is None, a header row is required to automatically assign
the recarray names. The headers will be lower cased, spaces will
@@ -2211,18 +2195,29 @@
data type. When set to zero all rows are validated.
converterd, if not None, is a dictionary mapping column number or
- munged column name to a converter function
+ munged column name to a converter function.
names, if not None, is a list of header names. In this case, no
header will be read from the file
+ missingd - is a dictionary mapping munged column names to field values
+ which signify that the field does not contain actual data and should
+ be masked, e.g. '0000-00-00' or 'unused'
+
+ missing - a string whose value signals a missing field regardless of
+ the column it appears in, e.g. 'unused'
+
if no rows are found, None is returned -- see examples/loadrec.py
"""
if converterd is None:
converterd = dict()
+ if missingd is None:
+ missingd = {}
+
import dateutil.parser
+ import datetime
parsedate = dateutil.parser.parse
@@ -2270,14 +2265,35 @@
process_skiprows(reader)
- dateparser = dateutil.parser.parse
+ def ismissing(name, val):
+ "Should the value val in column name be masked?"
- def myfloat(x):
- if x==missing:
- return npy.nan
+ if val == missing or val == missingd.get(name) or val == '':
+ return True
else:
- return float(x)
+ return False
+ def with_default_value(func, default):
+ def newfunc(name, val):
+ if ismissing(name, val):
+ return default
+ else:
+ return func(val)
+ return newfunc
+
+
+ def mybool(x):
+ if x=='True': return True
+ elif x=='False': return False
+ else: raise ValueError('invalid bool')
+
+ dateparser = dateutil.parser.parse
+ mydateparser = with_default_value(dateparser, datetime.date(1,1,1))
+ myfloat = with_default_value(float, np.nan)
+ myint = with_default_value(int, -1)
+ mystr = with_default_value(str, '')
+ mybool = with_default_value(mybool, None)
+
def mydate(x):
# try and return a date object
d = dateparser(x)
@@ -2285,16 +2301,16 @@
if d.hour>0 or d.minute>0 or d.second>0:
raise ValueError('not a date')
return d.date()
+ mydate = with_default_value(mydate, datetime.date(1,1,1))
-
- def get_func(item, func):
+ def get_func(name, item, func):
# promote functions in this order
- funcmap = {int:myfloat, myfloat:mydate, mydate:dateparser,
dateparser:str}
- try: func(item)
+ funcmap = {mybool:myint,myint:myfloat, myfloat:mydate,
mydate:mydateparser, mydateparser:mystr}
+ try: func(name, item)
except:
- if func==str:
+ if func==mystr:
raise ValueError('Could not find a working conversion
function')
- else: return get_func(item, funcmap[func]) # recurse
+ else: return get_func(name, item, funcmap[func]) # recurse
else: return func
@@ -2310,7 +2326,7 @@
converters = None
for i, row in enumerate(reader):
if i==0:
- converters = [int]*len(row)
+ converters = [mybool]*len(row)
if checkrows and i>checkrows:
break
#print i, len(names), len(row)
@@ -2320,17 +2336,25 @@
if func is None:
func = converterd.get(name)
if func is None:
- if not item.strip(): continue
+ #if not item.strip(): continue
func = converters[j]
if len(item.strip()):
- func = get_func(item, func)
+ func = get_func(name, item, func)
+ else:
+ # how should we handle custom converters and defaults?
+ func = with_default_value(func, None)
converters[j] = func
return converters
# Get header and remove invalid characters
needheader = names is None
if needheader:
- headers = reader.next()
+ for row in reader:
+ if len(row) and row[0].startswith(comments):
+ continue
+ headers = row
+ break
+
# remove these chars
delete = set("""[EMAIL PROTECTED]&*()-=+~\|]}[{';: /?.>,<""")
delete.add('"')
@@ -2346,7 +2370,7 @@
item = itemd.get(item, item)
cnt = seen.get(item, 0)
if cnt>0:
- names.append(item + '%d'%cnt)
+ names.append(item + '_%d'%cnt)
else:
names.append(item)
seen[item] = cnt+1
@@ -2366,15 +2390,24 @@
# iterate over the remaining rows and convert the data to date
# objects, ints, or floats as approriate
rows = []
+ rowmasks = []
for i, row in enumerate(reader):
if not len(row): continue
if row[0].startswith(comments): continue
- rows.append([func(val) for func, val in zip(converters, row)])
+ rows.append([func(name, val) for func, name, val in zip(converters,
names, row)])
+ rowmasks.append([ismissing(name, val) for name, val in zip(names,
row)])
fh.close()
if not len(rows):
return None
- r = npy.rec.fromrecords(rows, names=names)
+ if np.any(rowmasks):
+ try: from numpy.ma import mrecords
+ except ImportError:
+ raise RuntimeError('numpy 1.05 or later is required for masked
array support')
+ else:
+ r = mrecords.fromrecords(rows, names=names, mask=rowmasks)
+ else:
+ r = np.rec.fromrecords(rows, names=names)
return r
@@ -2386,6 +2419,8 @@
def toval(self, x):
return str(x)
+ def fromstr(self, s):
+ return s
class FormatString(FormatObj):
def tostr(self, x):
@@ -2404,6 +2439,7 @@
if x is None: return 'None'
return self.fmt%self.toval(x)
+
class FormatFloat(FormatFormatStr):
def __init__(self, precision=4, scale=1.):
FormatFormatStr.__init__(self, '%%1.%df'%precision)
@@ -2415,10 +2451,24 @@
x = x * self.scale
return x
+ def fromstr(self, s):
+ return float(s)/self.scale
+
+
class FormatInt(FormatObj):
def toval(self, x):
return x
+ def fromstr(self, s):
+ return int(s)
+
+class FormatBool(FormatObj):
+ def toval(self, x):
+ return x
+
+ def fromstr(self, s):
+ return bool(s)
+
class FormatPercent(FormatFloat):
def __init__(self, precision=4):
FormatFloat.__init__(self, precision, scale=100.)
@@ -2427,6 +2477,7 @@
def __init__(self, precision=4):
FormatFloat.__init__(self, precision, scale=1e-3)
+
class FormatMillions(FormatFloat):
def __init__(self, precision=4):
FormatFloat.__init__(self, precision, scale=1e-6)
@@ -2440,19 +2491,30 @@
if x is None: return 'None'
return x.strftime(self.fmt)
+ def fromstr(self, x):
+ import dateutil.parser
+ return dateutil.parser.parse(x).date()
+
class FormatDatetime(FormatDate):
def __init__(self, fmt='%Y-%m-%d %H:%M:%S'):
FormatDate.__init__(self, fmt)
+ def fromstr(self, x):
+ import dateutil.parser
+ return dateutil.parser.parse(x)
+
+
+
defaultformatd = {
- npy.int16 : FormatInt(),
- npy.int32 : FormatInt(),
- npy.int64 : FormatInt(),
- npy.float32 : FormatFloat(),
- npy.float64 : FormatFloat(),
- npy.object_ : FormatObj(),
- npy.string_ : FormatString(),
+ np.bool_ : FormatBool(),
+ np.int16 : FormatInt(),
+ np.int32 : FormatInt(),
+ np.int64 : FormatInt(),
+ np.float32 : FormatFloat(),
+ np.float64 : FormatFloat(),
+ np.object_ : FormatObj(),
+ np.string_ : FormatString(),
}
def get_formatd(r, formatd=None):
@@ -2510,20 +2572,20 @@
def get_justify(colname, column, precision):
ntype = type(column[0])
- if ntype==npy.str or ntype==npy.str_ or ntype==npy.string0 or
ntype==npy.string_:
+ if ntype==np.str or ntype==np.str_ or ntype==np.string0 or
ntype==np.string_:
length = max(len(colname),column.itemsize)
return 0, length+padding, "%s" # left justify
- if ntype==npy.int or ntype==npy.int16 or ntype==npy.int32 or
ntype==npy.int64 or ntype==npy.int8 or ntype==npy.int_:
- length = max(len(colname),npy.max(map(len,map(str,column))))
+ if ntype==np.int or ntype==np.int16 or ntype==np.int32 or
ntype==np.int64 or ntype==np.int8 or ntype==np.int_:
+ length = max(len(colname),np.max(map(len,map(str,column))))
return 1, length+padding, "%d" # right justify
- if ntype==npy.float or ntype==npy.float32 or ntype==npy.float64 or
ntype==npy.float96 or ntype==npy.float_:
+ if ntype==np.float or ntype==np.float32 or ntype==np.float64 or
ntype==np.float96 or ntype==np.float_:
fmt = "%." + str(precision) + "f"
- length = max(len(colname),npy.max(map(len,map(lambda
x:fmt%x,column))))
+ length = max(len(colname),np.max(map(len,map(lambda
x:fmt%x,column))))
return 1, length+padding, fmt # right justify
- return 0, max(len(colname),npy.max(map(len,map(str,column))))+padding,
"%s"
+ return 0, max(len(colname),np.max(map(len,map(str,column))))+padding,
"%s"
if header is None:
header = r.dtype.names
@@ -2566,29 +2628,61 @@
text = os.linesep.join(textl)
return text
-def rec2csv(r, fname, delimiter=',', formatd=None):
+
+
+def rec2csv(r, fname, delimiter=',', formatd=None, missing='',
+ missingd=None):
"""
- Save the data from numpy record array r into a comma/space/tab
+ Save the data from numpy (m)recarray r into a comma/space/tab
delimited file. The record array dtype names will be used for
column headers.
fname - can be a filename or a file handle. Support for gzipped
files is automatic, if the filename ends in .gz
+
+ See csv2rec and rec2csv for information about missing and
+ missingd, which can be used to fill in masked values into your CSV
+ file.
"""
+
+ if missingd is None:
+ missingd = dict()
+
+ def with_mask(func):
+ def newfunc(val, mask, mval):
+ if mask:
+ return mval
+ else:
+ return func(val)
+ return newfunc
+
formatd = get_formatd(r, formatd)
funcs = []
for i, name in enumerate(r.dtype.names):
- funcs.append(csvformat_factory(formatd[name]).tostr)
+ funcs.append(with_mask(csvformat_factory(formatd[name]).tostr))
fh, opened = cbook.to_filehandle(fname, 'w', return_opened=True)
writer = csv.writer(fh, delimiter=delimiter)
header = r.dtype.names
writer.writerow(header)
+
+ # Our list of specials for missing values
+ mvals = []
+ for name in header:
+ mvals.append(missingd.get(name, missing))
+
+ ismasked = False
+ if len(r):
+ row = r[0]
+ ismasked = hasattr(row, '_fieldmask')
+
for row in r:
- writer.writerow([func(val) for func, val in zip(funcs, row)])
+ if ismasked:
+ row, rowmask = row.item(), row._fieldmask.item()
+ else:
+ rowmask = [False] * len(row)
+ writer.writerow([func(val, mask, mval) for func, val, mask, mval
+ in zip(funcs, row, rowmask, mvals)])
if opened:
fh.close()
-
-
-
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Matplotlib-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/matplotlib-checkins