[36/51] [partial] incubator-sdap-nexus git commit: SDAP-1 Import all code under the SDAP SGA

lewismc Fri, 27 Oct 2017 15:40:30 -0700

http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/climatology/clim/util/stats.py
----------------------------------------------------------------------
diff --git a/climatology/clim/util/stats.py b/climatology/clim/util/stats.py
new file mode 100755
index 0000000..e133d3a
--- /dev/null
+++ b/climatology/clim/util/stats.py
@@ -0,0 +1,218 @@
+import sys
+from math import sqrt
+from collections import namedtuple
+
+import numpy as NP
+import scipy.stats
+
+
+def mad( l ):
+    """Compute the median absolute deviation (a robust measure of spread) of 
the list of
+    values *l*."""
+    median = NP.median(l)
+    return NP.median([abs(x - median) for x in l])
+
+
+def robust_std(l, alpha=1/scipy.stats.norm.ppf(0.75)):
+    """Compute a robust estimate of the standard deviation (by default, for 
normally
+    distrbuted samples)."""
+    return alpha * mad(l)
+
+
+def filter_outliers( time_series, n_std=6, indices=False ):
+    """Filter outliers (those samples a distance of *n_std* robust standard 
deviations
+    from the median) and return."""
+    med = NP.median( time_series )
+    std = robust_std( time_series )
+    lhs = zip( *[ (i, x) for i, x in enumerate( time_series ) if abs(x - med) 
< ( std * n_std ) ] )
+    if len( lhs ) == 0:
+        # No outliers detected, return the full time series
+        return time_series
+    I, out = lhs
+    if isinstance( time_series, NP.ndarray ):
+        out = NP.array( out )
+    if indices:
+        return out, I
+    else:
+        return out
+
+
+####################################################################################################
+
+#!/usr/bin/env python
+#
+# Stats.py -- Simple statistics class:  computes mean, stddev, min, max, rms.
+#
+# Author: Brian Wilson
+#    @(#) Stats.py     1.0     2003/11/24
+#                      1.1     2010/12/14 --- rewrote the add method so that
+#                                             higher moments are c alculated 
correctly
+#                                             (Mark D. Butala)
+#
+# Implemented by saving five accumulators:
+#   no of points, mean, sum of squares of diffs from mean, min, and max.
+# Methods:
+#   add    -- add a data point to the accumulating stats
+#   calc   -- compute the five statistics:  n, mean, std dev, min, max, rms
+#   label  -- set the label for printing
+#   format -- set the float format for printing
+#   __repr__   -- generates one-line string version of statistics for easy 
printing
+#   reset  -- zero the accumulators
+#   addm   -- add an array of data points to the accumulators (add multiple)
+#
+# See tests at end of file for example usage.
+#
+
+StatsCalc = namedtuple('StatsCalc', 'n mean stddev min max rms skewness 
kurtosis')
+
+    
+class Stats(object):
+    """Simple statistics class that computes mean, std dev, min, max, and 
rms."""
+    __slots__ = ('count', 'mean', 'stddev', 'min', 'max', 'rms', 'skewness', 
'kurtosis',
+                 'rms2', 'M2', 'M3', 'M4', 'labelStr', 'formatStr', 
'missingValue')
+
+    def __init__(self, missingValue=-9999., label=None, format=None):
+        """Create Stats object, optionally set print label and float format 
string."""
+        self.reset(missingValue)
+        self.missingValue = missingValue
+        self.labelStr = label
+        self.formatStr = format
+        
+    def add(self, val):
+        """Add one data point to the accumulators."""
+        self.count += 1
+        n = self.count
+        if n == 1:
+            self.mean = 0.
+            self.M2 = 0.
+            self.rms2 = 0.
+            self.M3 = 0.
+            self.M4 = 0.
+            self.min = val
+            self.max = val
+        else:
+            self.min = min(self.min, val)
+            self.max = max(self.max, val)            
+
+        delta = val - self.mean         # use devation from mean to prevent 
roundoff/overflow problems
+        delta_n = delta / float(n)
+        delta_n2 = delta_n * delta_n
+        self.mean += delta_n
+        self.rms2 += (val**2 - self.rms2) / float(n)
+        term = delta * delta_n * (n-1)
+        self.M4 += term * delta_n2 * (n*n - 3*n + 3) + 6 * delta_n2 * self.M2 
- 4 * delta_n * self.M3
+        self.M3 += term * delta_n * (n - 2) - 3 * delta_n * self.M2
+        self.M2 += term
+        return self
+
+    def calc(self):
+        """Calculate the statistics for the data added so far.
+        Returns tuple of six values:  n, mean, stddev, min, max, rms.
+        """
+        n = self.count
+        if (n >= 2):
+            M2 = self.M2
+            stddev = sqrt(M2 / float(n - 1))
+            rms = sqrt(self.rms2)
+            self.stddev = stddev
+            self.rms = rms
+            self.skewness = sqrt(n) * self.M3 / (M2 * sqrt(M2))
+            self.kurtosis = (n * self.M4) / (M2 * M2) - 3
+        return StatsCalc(self.count, self.mean, self.stddev, self.min, 
self.max, self.rms, self.skewness, self.kurtosis)
+
+    def label(self, str):
+        """Label the statistics for printing."""
+        self.labelStr = str
+        return self
+        
+    def format(self, str):
+        """Set the float format to be used in printing stats."""
+        self.formatStr = str
+        return self
+        
+    def __repr__(self):
+        """One-line stats representation for simple printing."""
+        if (self.labelStr == None or self.labelStr == ""): self.labelStr = 
"Stats"
+        line = self.labelStr + ": "
+        if self.formatStr:
+            a = [self.formatStr for i in xrange(7)]
+            a.insert(0, '%d')
+            format = ' '.join(a)
+            line += format % self.calc()
+        else:
+            line += "N=%d mean=%f stddev=%f min=%f max=%f rms=%f skewness=%f 
kurtosis=%f" % self.calc()
+        return line
+
+    def reset(self, missingValue):
+        """Reset the accumulators to start over."""
+        self.count = 0
+        self.mean = missingValue
+        self.stddev = missingValue
+        self.min = missingValue
+        self.max = missingValue
+        self.rms = missingValue
+        self.skewness = missingValue
+        self.kurtosis = missingValue
+        self.M2 = 0.
+        self.rms2 = 0.
+        self.M3 = 0.
+        self.M4 = 0.
+        self.labelStr = None
+        self.formatStr = None
+        return self
+
+    def addm(self, seq):
+        """Add multiple - add a sequence of data points all at once."""
+        for val in seq:
+            self.add(val)
+        return self
+
+
+####################################################################################################
+
+
+def main(args):
+    fn = args[0]    
+    try:
+        if fn == '-':
+            fid = sys.stdin
+        else:
+            fid = open(fn, 'r')
+        stats = Stats()
+        stats.addm( (float(x) for x in fid) )
+        print(stats)
+    finally:
+        if fid is not sys.stdin:
+            fid.close()
+                            
+
+if __name__ == '__main__':
+    main(sys.argv[1:])
+
+
+'''
+    def test():
+        """
+>>> print Stats()
+Stats: 0 0.000000 0.000000 0.000000 0.000000 0.000000
+
+>>> def f(s):
+...     for v in [2.3, 4.5, 1.8, 6.2, 3.5]: s.add(v)
+...     s.label('test2')
+...     return s
+>>> print f( Stats() )
+test2: 5 3.660000 1.468279 1.800000 6.200000 3.888480
+
+>>> print Stats().label('test3').addm([2.3, 4.5, 1.8, 6.2, 3.5])
+test3: 5 3.660000 1.468279 1.800000 6.200000 3.888480
+
+>>> print Stats('test4').format('%5.2f').addm([2.3, 4.5, 1.8, 6.2, 3.5])
+test4: 5  3.66  1.47  1.80  6.20  3.89
+
+>>> print Stats('test5', '%4.1f').addm([2.3, 4.5, 1.8, 6.2, 3.5])
+test5: 5  3.7  1.5  1.8  6.2  3.9
+        """
+
+    import doctest
+    doctest.testmod()
+'''


http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/climatology/clim/util/timeJ2000.py
----------------------------------------------------------------------
diff --git a/climatology/clim/util/timeJ2000.py 
b/climatology/clim/util/timeJ2000.py
new file mode 100755
index 0000000..99bcb5e
--- /dev/null
+++ b/climatology/clim/util/timeJ2000.py
@@ -0,0 +1,369 @@
+#!/bin/env python
+
+"""
+timeJ2000.py -- Date & Time class based on native Python datetime, time, and 
calendar
+                libraries. Represents a Date/Time as seconds past J2000 epoch
+                and provides various format conversions and date delta 
arithmetic.
+                Also includes some new smart functions that perform desired
+                transformations on a Do The Right Thing basis.
+"""
+
+import sys, datetime, calendar, time, types
+
+##CONSTANTS
+J2000_1970_EPOCH = 946684800 + 12*60*60 #2000/01/01,12:00:00 in seconds past 
1970
+LATEST_TIME      =  9999999999          #Highest (latest)  time in J2000 to 
care about... useful for initializations
+EARLIEST_TIME    = -9999999999          #Lowest  (earlist) time in J2000 to 
care about... useful for initializations
+
+def echo   (str          ): sys.stdout.write(str + "\n")
+def err    (str          ): sys.stderr.write(str + "\n")
+def warn   (str          ): err("---WARNING, IONOTIME: "+str)
+def die    (str, status=1): err("***ERROR: "+str); sys.exit(status)
+
+##BASE TRANSFORMATIONS
+def ensureYYYY(y):
+    if y>99: return y
+    if y>50: return 1900+y
+    return 2000+y
+
+def ensureYY(y):
+    return y%100
+
+#transforms an hms string to a float hours
+def hms_to_hours(str):
+    return float(str[0:2])+float(str[2:4])/60.0+float(str[4:6])/360.0
+
+def J2000_to_list(sec=0.0):
+    #check for fractional seconds
+    frac=0.0
+    if sec > int(sec):
+        frac=sec-int(sec)
+        sec =int(sec)
+    callist=list(time.gmtime(sec+J2000_1970_EPOCH))
+    #add back in fractional seconds if present
+    if frac > 0.0:
+        callist[5]=callist[5]+frac
+    return callist[0:6]
+def list_to_J2000(inlist):
+    #check for fractional seconds and remove
+    clist=[0,0,0,0,0,0.0] #default to zeros everywhere
+    clist[:len(inlist)]=inlist
+    ss=clist[5]
+    frac=0.0
+    if ss > int(ss):
+        frac=ss-int(ss)
+        clist[5]=int(ss)
+    #transform, adding fractional seconds afterwards
+    return calendar.timegm(clist)-J2000_1970_EPOCH+frac
+
+##INTELLIGENT FUNCTIONS
+def valid_formats():
+    return ('J2000',               #int or float bare number
+            'HHMMSS',              #string
+            'YYMMDD',              #string
+            'YYYYMMDD',            #string
+            'YYMMDDHHMMSS',        #string .
+            'YYYYMMDDHHMMSS',      #string .
+            'YYYYMMDD_HHMMSS',     #string .
+            'YYMMDD_HHMMSS',       #string .
+            'DOY',                 #string
+            'HOD',"HOURSINDAY",    #string hours of day
+            'MOD',"MINUTESINDAY",  #string minutes of day
+            'SOD',"SECONDSINDAY",  #string seconds of day
+            'YYDOY',               #string
+            'LIST',                #list(y,m,d,h,m,s)
+            'HMS',                 #string
+            'YMD',                 #string
+            'YMDHMS',              #string
+            'GAIMSTRING',          #string yyyy/mm/dd,hh:mm:ss.frac
+            'TENETHOURLY',         #string siteDOYlmm.yy.tenet
+            'LOCALHMS',            #string HHMMSS.F adjusted for local time 
(requires longitude in deg)
+            'HOURLETTER'#,          #string a where a(a,x) for each hour of day
+#            'RINEX'                #string 
+            )
+
+def to_J2000(input,format=None):
+    sec=0 #internal representation
+    if format: format=format.upper()
+    
+    #assume J2000 seconds for any bare number
+    if   isinstance(input,types.IntType) or isinstance(input,types.FloatType) 
or isinstance(input,types.LongType) or format=='J2000': return float(input)
+    #if it's a list, simple... will be interpretted as y,m,d,hh,mm,ss with 0's 
in any unspecified slot
+    elif isinstance(input,types.ListType) or 
isinstance(input,types.TupleType): return list_to_J2000(input)
+    #if it's a string, could be many things
+    elif isinstance(input,types.StringType):
+        #strip off any fractional second information first
+        p=input.find('.')
+        frac=0.0
+        if p>=0:
+            if input.find('tenet') < 0:
+                frac=float(input[p:])
+                input  =input[:p]
+        #Autoguess format based on length or user-specified request
+        if len(input)==len('siteDOYlmm.yy.tenet') and format=="TENETHOURLY":
+            
(doy,hl,mm,y)=(int(input[4:7]),input[7:8],int(input[8:10]),int(input[11:13]))
+            
(yyyy,m,d)=J2000_to_list(list_to_J2000((ensureYYYY(int(y)),1,doy)))[0:3]
+            return list_to_J2000((yyyy,m,d,ord(hl)-ord('a'),mm,0))
+        
+        if format=="DOY": 
+            return list_to_J2000((2000,1,int(input)))
+
+        if format in ("HOD","HOURSINDAY"): 
+            return list_to_J2000((2000,1,1,int(input),0,0))
+
+        if format in ("MOD","MINUTESINDAY"): 
+            return list_to_J2000((2000,1,1,0,int(input),0))
+
+        if format in ("SOD","SECONDSINDAY"): 
+            return list_to_J2000((2000,1,1,0,0,int(input)))
+
+        if format=="YYDOY":
+            return 
list_to_J2000((ensureYYYY(int(input[0:2])),1,int(input[2:])))
+        
+        if len(input)==len('a') or format=='HOURLETTER':
+            return list_to_J2000((2000,1,1,ord(input)-ord('a'),0,0))
+        if len(input)==len('YYYY/MM/DD,HH:MM:SS') or format=='GAIMSTRING' or 
format=='ISO':
+            return list_to_J2000((int(input[0:4]),
+                                  int(input[5:7]),
+                                  int(input[8:10]),
+                                  int(input[11:13]),
+                                  int(input[14:16]),
+                                  int(input[17:19])+frac))
+        if len(input)==len('YYYYMMDD_HHMMSS') or format=='YYYYMMDD_HHMMSS':
+            return list_to_J2000((int(input[0:4]),
+                                  int(input[4:6]),
+                                  int(input[6:8]),
+                                  int(input[9:11]),
+                                  int(input[11:13]),
+                                  int(input[13:15])+frac))
+        
+        if len(input)==len('YYMMDD_HHMMSS') or format=='YYMMDD_HHMMSS':
+            return list_to_J2000((ensureYYYY(int(input[0:2])),
+                                  int(input[2:4]),
+                                  int(input[4:6]),
+                                  int(input[7:9]),
+                                  int(input[9:11]),
+                                  int(input[11:13])+frac))
+        
+        if len(input)==len('YYYYMMDDHHMMSS') or format=='YYYYMMDDHHMMSS':
+            return list_to_J2000((int(input[0:4]),
+                                  int(input[4:6]),
+                                  int(input[6:8]),
+                                  int(input[8:10]),
+                                  int(input[10:12]),
+                                  int(input[12:14])+frac))
+        
+        if len(input)==len('YYMMDDHHMMSS') or format=='YYMMDDHHMMSS' or 
format=="YMDHMS":
+            return list_to_J2000((ensureYYYY(int(input[0:2])),
+                                  int(input[2:4]),
+                                  int(input[4:6]),
+                                  int(input[6:8]),
+                                  int(input[8:10]),
+                                  int(input[10:12])+frac))
+        
+        if len(input)==len('YYYYMMDD') or format=='YYYYMMDD':
+            return list_to_J2000((int(input[0:4]),
+                                  int(input[4:6]),
+                                  int(input[6:8])))
+
+        if len(input)==len('HHMMSS') and format in ('HHMMSS','HMS'): 
+            return list_to_J2000((2000,1,1,
+                                  int(input[0:2]),
+                                  int(input[2:4]),
+                                  int(input[4:6])+frac))
+        
+        if len(input)==len('YYMMDD') or format in ('YYMMDD','YMD'): 
+            return list_to_J2000((ensureYYYY(int(input[0:2])),
+                                  int(input[2:4]),
+                                  int(input[4:6])))
+
+        die("Unknown string format",input)
+    die("Unknown input type to to_J2000:",input)
+
+def from_J2000(sec=0,format="YYYYMMDD_HHMMSS",aux=None):
+    #aux contains spare information, thusfar only used for site id's for 
filenames or longitude for localtime
+    format=format.upper()
+    if format == "J2000"                 : return sec
+    (y,m,d,hh,mm,ss)=J2000_to_list(sec)
+    f=""
+    if ss > int(ss): f=("%f"%(ss-int(ss))).strip('0') #remove leading and 
trailing 0
+    if format == "LIST"                  : return [y,m,d,hh,mm,ss]
+    if format == "HOURLETTER"            : return chr(hh+ord('a'))
+    if format in("HOURSINDAY","HOD")     : return hh+mm/60.0+ss/60.0/60.0
+    if format in("MINUTESINDAY","MOD")   : return hh*60+mm+ss/60.0
+    if format in("SECONDSINDAY","SOD")   : return (hh*60+mm)*60+ss
+    if format in("HHMMSS","HMS")         : return "%02d%02d%02d"%(hh,mm,ss)+f
+    if format in("YYMMDD","YMD")         : return 
"%02d%02d%02d"%(ensureYY(y),m,d)
+    if format == "YYYYMMDD"              : return "%04d%02d%02d"%(y,m,d)
+    if format in("YYMMDDHHMMSS","YMDHMS"): return 
"%02d%02d%02d%02d%02d%02d"%(ensureYY(y),m,d,hh,mm,ss)+f
+    if format == "YYYYMMDDHHMMSS"        : return 
"%04d%02d%02d%02d%02d%02d"%(y,m,d,hh,mm,ss)+f
+    if format == "YYMMDD_HHMMSS"         : return 
"%02d%02d%02d_%02d%02d%02d"%(ensureYY(y),m,d,hh,mm,ss)+f
+    if format == "YYYYMMDD_HHMMSS"       : return 
"%04d%02d%02d_%02d%02d%02d"%(y,m,d,hh,mm,ss)+f
+    if format == "GAIMSTRING"            : return 
"%04d/%02d/%02d,%02d:%02d:%02d"%(y,m,d,hh,mm,ss)+f
+    if format == "ISO"                   : return 
"%04d-%02d-%02dT%02d:%02d:%02dZ"%(y,m,d,hh,mm,ss)+f
+    doy = time.gmtime(sec+J2000_1970_EPOCH)[7] #fetch doy
+    if format == "DOY"                   : return "%03d"%(doy)
+    if format == "YYDOY"                 : return "%02d%03d"%(ensureYY(y),doy)
+    if format == "TENETHOURLY"           :
+        if not aux: aux="site"
+        return 
"%4s%03d%1s%02d.%02d.tenet"%(aux,doy,chr(ord('a')+hh),mm,ensureYY(y))
+    if format == "LOCALHMS"              : 
+        if not aux: aux=0
+        localtime = hh + aux/360.0*24.0        #in this case, aux is longitude 
in deg
+        while (localtime <   0): localtime+=+24
+        while (localtime >= 24): localtime-= 24
+        return "%02d%02d%02d"%(localtime,mm,ss)+f
+    die("Unrecognized format string in from_J2000 "+format)
+
+class IonoTime:
+    "Handles conversions between times and dates for all variety of 
ionospheric time interests"
+    #internal representation is seconds past J2000
+    def __init__(self,input=None):
+            self.sec = 0
+            self.set(input)
+    def set(self,input=None,format=None):
+        if not input: return self
+        if isinstance(input,IonoTime):
+            self.sec=input.sec
+        else:
+            self.sec = to_J2000(input,format)
+        return self
+    def to(self,format=None,aux=None):
+        if not format: return self.sec
+        return from_J2000(self.sec,format,aux)
+    def now(self):
+        self.sec = to_J2000(time.localtime()[0:6])
+        return self
+    def nowUTC(self):
+        self.sec = to_J2000(time.gmtime()[0:6])
+        return self
+    def addSeconds(self,s):
+        self.sec+=s
+        return self
+    def addMinutes(self,m):
+        self.sec+=m*60.0
+        return self
+    def addHours  (self,h):
+        self.sec+=h*60.0*60.0
+        return self
+    def addDays   (self,d):
+        self.sec+=d*60.0*60.0*24.0
+        return self
+    def addMonths (self,mi):
+        (y,m,d,hh,mm,ss)=from_J2000(self.sec,"LIST")
+        m+=mi
+        while m > 12:
+            y=y+1
+            m-=12
+        while m < 1:
+            y=y-1
+            m+=12
+        self.sec=to_J2000((y,m,d,hh,mm,ss))
+        return self
+    def addYears (self,yi):
+        (y,m,d,hh,mm,ss)=from_J2000(self.sec,"LIST")
+        self.sec=to_J2000((y+yi,m,d,hh,mm,ss))
+        return self
+    def copy      (self):
+        n=IonoTime(self.sec)
+        return n
+    def makemidnight(self):
+        (y,m,d,hh,mm,ss)=from_J2000(self.sec,"LIST")
+        self.sec=to_J2000((y,m,d))
+        return self
+    def floor(self,interval): #round current object to a specified accuracy
+        (y,m,d,hh,mm,ss)=from_J2000(self.sec,"LIST")
+        interval=interval.lower()
+        if   interval.find('year'  )>=0: self.sec=to_J2000((y, 1, 0,  0,  0,   
   0))
+        elif interval.find('month' )>=0: self.sec=to_J2000((y, m, 1,  0,  0,   
   0))
+        elif interval.find('day'   )>=0: self.sec=to_J2000((y, m, d,  0,  0,   
   0))
+        elif interval.find('hour'  )>=0: self.sec=to_J2000((y, m, d, hh,  0,   
   0))
+        elif interval.find('minute')>=0: self.sec=to_J2000((y, m, d, hh, mm,   
   0))
+        elif interval.find('second')>=0: self.sec=to_J2000((y, m, d, hh, 
mm,int(ss)))
+        else                           : die("IonoTime: Floor: Malformed 
interval: "+interval)
+        return self
+    def __sub__(self,other):
+        return IonoTime(self.sec-other)
+    def __add__(self,other):
+        return IonoTime(self.sec+other)
+#    def __iadd__(self,other):
+#        return IonoTime(self.sec+other)
+#    def __isub__(self,other):
+#        return IonoTime(self.sec-other)
+    def __cmp__(self,other):
+        return cmp(self.sec,other.sec) 
+    def __coerce__(self,other):
+        if isinstance(other,types.FloatType) or 
isinstance(other,types.IntType) or isinstance(other,types.LongType):
+            return (self.sec,other)
+        if isinstance(other,types.StringType):
+            return (from_J2000(self.sec,"YYYYMMDD_HHMMSS"),other)
+        if isinstance(other,types.ListType) or 
isinstance(other,types.TupleType):
+            return (from_J2000(self.sec,"LIST"),other)
+    def __repr__(self):
+        return from_J2000(self.sec,"YYYYMMDD_HHMMSS")
+        
+def test():
+   print "Testing timeJ2000 routines:"
+   print "Checking to_J2000"
+   if not to_J2000("20040606"         )==139752000      : die("FAILED YYYYMMDD 
test")
+   if not to_J2000("040606"           )==139752000      : die("FAILED YYMMDD 
test")
+   if not to_J2000("20040606010101"   )==139755661      : die("FAILED 
YYYYMMDDHHMMSS test")
+   if not to_J2000("c"                )==-36000.0       : die("FAILED 
HOURLETTER test")
+   if not to_J2000("20040606010101.1" )==139755661.1    : die("FAILED 
YYYYMMDDHHMMSS.F test")
+   if not to_J2000("20040606_010101"  )==139755661      : die("FAILED 
YYYYMMDD_HHMMSS test")
+   if not to_J2000("20040606_010101.1")==139755661.1    : die("FAILED 
YYYYMMDD_HHMMSS.F test")
+   if not to_J2000("040606_010101"    )==139755661      : die("FAILED 
YYMMDD_HHMMSS test")
+   if not to_J2000("040606_010101.1"  )==139755661.1    : die("FAILED 
YYMMDD_HHMMSS.F test")
+   if not to_J2000("040606010101"     )==139755661      : die("FAILED 
YYMMDDHHMMSS test")
+   if not to_J2000("040606010101.1"   )==139755661.1    : die("FAILED 
YYMMDDHHMMSS.F test")
+   if not to_J2000("121212.1",'HHMMSS')==732.1          : die("FAILED HHMMSS 
test")
+   if not to_J2000(10244201.1         )==10244201.1     : die("FAILED J2000 
test")
+   if not to_J2000((2004,6,6,1,1,1.1) )==139755661.1    : die("FAILED list 
test")
+   if not to_J2000("103",'DOY'        )==8769600        : die("FAILED DOY 
test")
+   if not to_J2000("00103",'YYDOY'    )==8769600        : die("FAILED YYDOY 
test")
+   if not to_J2000("2004/06/06,01:01:01.1")==139755661.1: die("FAILED 
GAIMSTRING test")
+   if not to_J2000("help158b01.04.tenet",'TENETHOURLY')==139755660.0  : 
die("FAILED TENETHOURLY test")
+   print "Passed to_J2000"
+
+   print "Checking from_J2000"
+   if not from_J2000(139752000  ,"YYYYMMDD"       )=="20040606"             : 
die("FAILED YYYYMMDD test")
+   if not from_J2000(139752000.1,"YYYYMMDD"       )=="20040606"             : 
die("FAILED YYYYMMDD test")
+   if not from_J2000(139752000  ,"YYMMDD"         )=="040606"               : 
die("FAILED YYMMDD test")
+   if not from_J2000(139752000.1,"YYMMDD"         )=="040606"               : 
die("FAILED YYMMDD test")
+   if not from_J2000(139755661  ,"HOURLETTER"     )=="b"                    : 
die("FAILED HOURLETTER test")
+   if not from_J2000(139755661  ,"YYYYMMDDHHMMSS" )=="20040606010101"       : 
die("FAILED YYYYMMDDHHMMSS test")
+   if not from_J2000(139755661.1,"YYYYMMDDHHMMSS" )=="20040606010101.1"     : 
die("FAILED YYYYMMDDHHMMSS.F test")
+   if not from_J2000(139755661  ,"YYYYMMDD_HHMMSS")=="20040606_010101"      : 
die("FAILED YYYYMMDD_HHMMSS test")
+   if not from_J2000(139755661.1,"YYYYMMDD_HHMMSS")=="20040606_010101.1"    : 
die("FAILED YYYYMMDD_HHMMSS.F test")
+   if not from_J2000(139755661  ,"YYMMDD_HHMMSS"  )=="040606_010101"        : 
die("FAILED YYMMDD_HHMMSS test")
+   if not from_J2000(139755661.1,"YYMMDD_HHMMSS"  )=="040606_010101.1"      : 
die("FAILED YYMMDD_HHMMSS.F test")
+   if not from_J2000(139755661  ,"YYMMDDHHMMSS"   )=="040606010101"         : 
die("FAILED YYMMDDHHMMSS test")
+   if not from_J2000(139755661.1,"YYMMDDHHMMSS"   )=="040606010101.1"       : 
die("FAILED YYMMDDHHMMSS.F test")
+   if not from_J2000(732.1      ,"HHMMSS"         )=="121212.1"             : 
die("FAILED HHMMSS.F test")
+   if not from_J2000(139752000.1,"J2000"          )==139752000.1            : 
die("FAILED J2000 test")
+# (1,1.1) == (1,1.1000000001) ?!
+#   if not from_J2000(139755661.1,"LIST"           )==(2004,6,6,1,1,1.1)     : 
die("FAILED LIST test")
+   if not from_J2000(8769600    ,"DOY"            )=="103"                  : 
die("FAILED DOY test")
+   if not from_J2000(8769600    ,"YYDOY"          )=="00103"                : 
die("FAILED YYDOY test")
+   if not from_J2000(139755661.1,"GAIMSTRING"     )=="2004/06/06,01:01:01.1": 
die("FAILED GAIMSTRING test")
+   if not from_J2000(139755661.1,"TENETHOURLY",'help')=="help158b01.04.tenet": 
die("FAILED TENETHOURLY test")
+   print "Passed from_J2000"
+
+   print "Testing IonoTime"
+   if not IonoTime(0)+"a"  =="20000101_120000a"     : die("FAILED string 
coersion test")
+   if not IonoTime(0)+1.0  ==1                      : die("FAILED integer 
coersion test")
+   if not IonoTime(0)+[1,2]==[2000,1,1,12,0,0,1,2]  : die("FAILED list 
coersion test")
+   if not IonoTime(0).addDays(2).addHours(2).addMinutes(2).addSeconds(2) == 
((2*24+2)*60+2)*60+2: die("FAILED deltatime test")
+   if not IonoTime(10)     == IonoTime(10)     : die("FAILED equivalence test")
+   if not IonoTime(12) - IonoTime(10) == 2     : die("FAILED subtraction test")
+   if not IonoTime(12) + IonoTime(10) == 22    : die("FAILED addition test")
+   if not IonoTime(12).makemidnight().to('LOCALHMS',140) == "090000" : 
die("FAILED Midnight or LOCALHMS test")
+   if not IonoTime(6576).floor('day').to('YYYYMMDDHHMMSS') == 
"20000101000000": die("FAILED floor test")
+   print "Passed IonoTime"
+
+
+def main(args):
+    test()
+
+if __name__ == "__main__":
+  main(sys.argv[1:])

http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/climatology/clim/util/warn.py
----------------------------------------------------------------------
diff --git a/climatology/clim/util/warn.py b/climatology/clim/util/warn.py
new file mode 100644
index 0000000..499813d
--- /dev/null
+++ b/climatology/clim/util/warn.py
@@ -0,0 +1,43 @@
+
+#
+# warn.py -- Utility routines to print warning & error messages like --
+#            "module: error message"
+#
+try:  __file__
+except: __file__ = 'warn.py'   # ensure __file__ is set for warning messages
+                               # each module file will execute this code
+import sys, os
+from inspect import getmodule, currentframe
+
+def echo(*s):
+    """Stringify & join any number of args and print resulting string to 
stdout"""
+    sys.stdout.write(' '.join(map(str, s)) + '\n')
+
+def echon(*s):
+    """Same as echo() except join with newlines."""
+    sys.stdout.write('\n'.join(map(str, s)) + '\n')
+
+def echo2(*s):
+    """Stringify & join any number of args and print resulting string to 
stderr"""
+    sys.stderr.write(' '.join(map(str, s)) + '\n')
+
+def echo2n(*s):
+    """Same as echo2() except join with newlines."""
+    sys.stderr.write('\n'.join(map(str, s)) + '\n')
+
+def moduleName(file):
+    """Extract a module name from the python source file name, with appended 
':'."""
+    return os.path.splitext(os.path.split(file)[1])[0] + ":"
+
+
+# Each module must define these functions so that the module name is the 
proper file.
+
+def warn(*s):
+    """Print a warning message to stderr, identifying the module it came 
from."""
+    echo2(moduleName(__file__)+':', *s)
+
+def die(ss, status=1):
+    """Print a warning message to stderr, and die with a non-zero status 
value."""
+    if type(ss) == str: ss = [ss]
+    warn(*ss); sys.exit(status)
+

http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/climatology/clim/util/wls.py
----------------------------------------------------------------------
diff --git a/climatology/clim/util/wls.py b/climatology/clim/util/wls.py
new file mode 100755
index 0000000..d7ed8a5
--- /dev/null
+++ b/climatology/clim/util/wls.py
@@ -0,0 +1,798 @@
+#!/usr/bin/env python
+#-----------------------------------------------------------------------------
+# Name:        filelist.py
+# Purpose:     File listing class/functions.
+#
+# Author:      Brian Wilson
+#
+# Created:     Mon Apr 10 11:01:06 2006
+# Copyright:   (c) 2006, California Institute of Technology.
+#              U.S. Government Sponsorship acknowledged.
+#-----------------------------------------------------------------------------
+#
+USAGE = """
+filelist.py [--help] [--bottomUp] [--directory] [--delete]
+            [--fetchDir <outputDir>] [--fetchWitSubDirs]
+            [--list] [--matchUrl] --quiet] [--regex '.*\.[cC]']
+            [--size] [--topOnly] [--url]
+            [--wildcard '*.txt.*'] [--xml]  <topPaths ...>
+
+Recursively traverse and print (with full paths or URL's) all files
+under the topPath(s) that match ANY of one or more regular expressions
+and/or wildcard glob) strings.  By default, it simply prints the matches,
+but one can also get their sizes, fetch them, or delete them.
+
+The topPaths can be a mixture of local and remote (ftp or http)
+paths, in which case a list of URL's is returned.  If xml mode is
+turned on, then the output is an XML list.
+
+If no regex or wildcard patterns are specified, then ALL files
+are returned.  If files are fetched, then the URL's are
+REWRITTEN to point to the local copies.
+
+"""
+# See the bottom of the file for exact switches and example of use.
+
+import sys, os, re, string, getopt, types, getpass
+import urllib, urllib2, urlparse, time, shutil, socket, stat
+from fnmatch import fnmatchcase
+from ftplib import FTP
+#import dataenc
+
+def matchAnyThenConstrain(root, name, haveRegs, regs, haveWilds, wildCards,
+                          constraintFunction):
+    """Return True if the file name matches any of the compiled regular
+    expressions or any of the wildcard (glob) specs, and (if present) the
+    constraintFunction returns True.  The regex can be a pair of match &
+    substitution patterns.  The 'name' of the file might be altered by a
+    regex substitution and/or the constraintFunction.
+    """
+    if not haveRegs and not haveWilds:
+        if constraintFunction is not None:
+            return constraintFunction(root, name)
+        else:
+            return (True, name)
+    else:
+        match = False
+        if haveRegs:
+            for reg in regs:
+                pattern, subst = reg
+                if pattern.search(name):
+                    match = True
+                    if subst:
+                        name = pattern.sub(subst, name)
+                    break
+        if haveWilds and not match:
+            for wild in wildCards:
+                if fnmatchcase(name, wild):
+                    match = True
+                    break
+        if match and constraintFunction is not None:
+            match, name = constraintFunction(root, name)
+        return (match, name)
+
+
+# Users call this function
+def filelist(urlPaths, regSpecs=[], wildCards=[], needCredentials=False, 
userCredentials=None,
+             matchFunction=matchAnyThenConstrain, constraintFunction=None,
+             matchUrl=False, walkDirectories=True,
+             urlMode=True, xmlMode=True, quietMode=False, verboseMode=False, 
getFileInfo=False,
+             fetchDir=None, fetchIfNewer=False, fetchWithSubDirs=False,
+             directoryMode=False, listMode=False, deleteMode=False, 
topDown=True,
+             stream=sys.stdout):
+    """Recursively traverse and print (with full paths or URL's) all files
+    under the topPath(s) that match one or more regular expressions and/or
+    wildcard (glob) strings, and an optional constraint (T/F) function to
+    further winnow the candidate matches.  (The matchFunction can also be
+    entirely replaced with custom logic.)
+
+    By default, it simply generates the matches, but one can also fetch them,
+    get their sizes, or delete them (if they are local files).
+    Handles local directory paths and ftp/http URL's.
+
+    Returns three file lists: matched, actually fetched, & destination names.
+    """
+    try:
+        matchedFiles = []       # source files that match criteria
+        fetchedFiles = []       # files that were actually fetched this run
+        destinationFiles = []   # destination (local) file names (rewritten 
URL)
+
+        topPaths = []
+        for url in urlPaths:
+            if url == '' or url == None: continue
+            remote, protocol, netloc, path = remoteUrl(url)
+            if not remote: url = os.path.abspath(url)
+            if url[-1] == '/': url = url[:-1]
+            topPaths.append(url)
+
+        if needCredentials and userCredentials is None:
+            userCredentials = promptForCredentials(topPaths)
+
+        if fetchDir:
+            workDir = os.path.join(fetchDir, '.tmp')
+            # fetch into tmp directory & then rename so fetching is atomic
+            try: os.mkdir(workDir)
+            except: pass
+            if not os.path.exists(workDir):
+                die("filelist: Cannot write to fetch directory %s" % fetchDir)
+
+        if isinstance(topPaths, types.StringType): topPaths = [topPaths]
+        regSpecs = [s for s in regSpecs if s != '' and s != None]
+        wildCards = [s for s in wildCards if s != '' and s != None]
+
+        haveRegs = False; regs = []; haveWilds = False; haveMatchFunction = 
False
+        if len(regSpecs) > 0:
+            haveRegs = True
+            regs = []
+            for reg in regSpecs:
+                (pattern, subst) = parse_re_with_subst(reg)
+                regs.append( (re.compile(pattern), subst) )
+        if len(wildCards) > 0:
+            haveWilds = True
+
+        prefix = ''
+        extra = ''
+        suffix = ''
+        if deleteMode:
+            suffix += ' deleted.'
+            if '.' in topPaths:
+                die("filelist: Recursively deleting from the dot (.) path is 
not safe.  Shame.")
+
+        if directoryMode: listMode = False
+        if listMode: getFileInfo = True
+        if quietMode: stream = None
+        sumSizes = 0
+        if xmlMode:
+            matchedFiles.append('<files>')
+            fetchedFiles.append('<files>')
+            _output('<files>', destinationFiles, stream)
+            prefix += '  <file>'
+            suffix += '</file>'
+
+        for top in topPaths:
+            if verboseMode: warn('filelist: searching', top)
+            topMatchCount = 0; topFetchCount = 0
+
+            for root, dirs, files, infos in walk(top, userCredentials, 
walkDirectories, topDown):
+                if verboseMode: warn('filelist: found files in', root)
+                remote, protocol, netloc, path = remoteUrl(root)
+                if directoryMode:
+                    contents = dirs
+                else:
+                    contents = files
+
+                for i in range(len(contents)):
+                    line = ''
+                    file = contents[i]
+                    try:
+                        info = infos[i]
+                    except:
+                        info = None
+                    if matchUrl:
+                        name = os.path.join(root, file)
+                    else:
+                        name = file
+
+                    match, newname = matchFunction(root, name, haveRegs, regs,
+                                                   haveWilds, wildCards, 
constraintFunction)
+                    if match:
+                        line = ''
+                        topMatchCount += 1
+                        fn = os.path.join(root, file)
+
+                        if getFileInfo or (fetchIfNewer and not remote):
+                            if remote:
+                                if info and getFileInfo:
+                                    if listMode: line = info.line
+                                    extra = ' ' + str(info.size) + ' ' + 
str(info.modTime)
+                                    sumSizes += info.size
+                            else:
+                                st = os.stat(fn)
+                                line = ' '.join( map(str, \
+                                        (st.st_mode, st.st_uid, st.st_gid, 
st.st_size, st.st_mtime, fn)))
+                                info = FileInfo(line, st.st_size, st.st_mtime, 
st.st_uid, st.st_gid, st.st_mode)
+                                if getFileInfo:
+                                    extra = ' ' + str(info.size) + ' ' + 
str(info.modTime)
+                                    sumSizes += info.size
+
+                        if not remote and urlMode: fn = makeFileUrl(fn)
+                        matchedFiles.append(prefix + fn + extra + suffix)
+
+                        if matchUrl:
+                            newfn = newname
+                        else:
+                            newfn = os.path.join(root, newname)
+                        newr, newp, newloc, newpath = remoteUrl(newfn)
+                        newfile = os.path.split(newpath)[1]
+
+                        if fetchDir:
+                            if fetchDir == '.': fetchDir = os.getcwd()
+                            if fetchWithSubDirs:
+                                destDir = os.path.join(fetchDir, newpath[1:])
+                            else:
+                                destDir = fetchDir
+                                destFile = os.path.join(destDir, newfile)
+                                tmpFile = os.path.join(workDir, newfile)
+
+                            if shouldFetch(remote, destFile, fetchIfNewer, 
info):
+                                if not quietMode:
+                                    warn('filelist: Fetching ', fn)
+                                    warn('filelist: Writing  ', destFile)
+                                try:
+                                    os.makedirs(destDir)
+                                except:
+                                    # kludge, makedirs throws exception if any 
part of path exists
+                                    pass
+                                if remote:
+                                    urllib.urlretrieve(fn, tmpFile)
+                                else:
+                                    shutil.copyfile(fn, tmpFile)
+                                os.rename(tmpFile, destFile)   # atomic rename 
of file into destDir
+
+                                topFetchCount += 1
+                                fetchedFiles.append(prefix + fn + suffix)
+                                if getFileInfo: line = line + ' ' + destFile
+
+                                # now rewrite URL to point to local copy of 
file
+                                fn = destFile
+                                if not remote and urlMode: fn = makeFileUrl(fn)
+
+                        if not listMode:
+                            line = prefix + fn + extra + suffix
+                        _output(line, destinationFiles, stream)
+                        if deleteMode:
+                            if remote:
+                                die('filelist: Cannot delete remote files 
(yet)')
+                            else:
+                                os.unlink(fn)
+
+            if verboseMode and fetchDir:
+                warn('filelist: Matched %d files from %s' % (topMatchCount, 
top))
+                warn('filelist: Fetched %d files from %s' % (topFetchCount, 
top))
+        if fetchDir:
+            for f in os.listdir(workDir): os.remove(os.path.join(workDir, f))
+            os.rmdir(workDir)
+
+        if xmlMode:
+            matchedFiles.append('</files>')
+            fetchedFiles.append('</files>')
+            _output('<files>', destinationFiles, stream)
+
+        if getFileInfo:
+            if xmlMode:
+                line = '<totalSize>%s</totalSize>' % sumSizes
+            else:
+                line = '#filelist: total size %s' % sumSizes
+            matchedFiles.append(line)
+            _output(line, destinationFiles, stream)
+
+    except KeyboardInterrupt:
+        if fetchDir:
+            for f in os.listdir(workDir): os.remove(os.path.join(workDir, f))
+            os.rmdir(workDir)
+        die('filelist: Keyboard Interrupt')
+
+    return (matchedFiles, fetchedFiles, destinationFiles)
+
+
+def shouldFetch(remote, destFile, fetchIfNewer, srcFileInfo):
+    if remote:
+        if os.path.exists(destFile):
+            doFetch = False
+        else:
+            doFetch = True
+    else:
+        if os.path.exists(destFile):
+            if fetchIfNewer:
+                destModTime = os.path.getmtime(destFile)
+                if destModTime < srcFileInfo.modTime:
+                    doFetch = True
+                else:
+                    doFetch = False
+            else:
+                doFetch = False
+        else:
+            doFetch = True
+    return doFetch
+
+def _output(line, lines, stream=None):
+    """Internal function: Add line to output lines and optionally print to 
stream."""
+    lines.append(line)
+    if stream: print >>stream, line
+
+class FileInfo:
+    """Holder class for those file info. elements that are consistent among 
local
+    files (output of stat), ftp directories, http, etc.  Minimum useful fields 
are
+    modification time and size.  Line contains usual string output of ls -l.
+    """
+    def __init__(self, line, size, modTime, userId=None, groupId=None, 
protectMode=None):
+        self.line=line; self.size=size; self.modTime=modTime
+        self.userId=userId; self.groupId=groupId; self.protectMode=protectMode
+
+class UserCredential(object):
+    """Container for user credential info. like username, password, 
certificate, etc.
+    """
+    def __init__(self, username=None, password=None, validInterval=None, 
certificate=None):
+        self.username = username
+        self.password = password
+        self.validInterval = validInterval     # tuple of Ints (days, hours, 
minutes)
+        if password is not None and validInterval is None:
+            die('UserCredential: If password is present, validInterval is also 
required.')
+        self.certificate = certificate
+
+    def getPassword(self):
+        pw = self._password
+        if pw:
+            pw, daynumber, timestamp = dataenc.pass_dec(pw)
+            if dataenc.unexpired(daynumber, timestamp, self.validInterval):
+                return pw
+            else:
+                return None
+        else:
+            return None
+    def setPassword(self, pw):
+        if pw and pw != '':
+            self._password = dataenc.pass_enc(pw, daynumber=True, 
timestamp=True)
+        else:
+            self._password = pw
+    password = property(getPassword, setPassword)
+
+class UserCredentials:
+    """Contains dictionary of (url, credential) pairs and optionally an 
httpProxy.
+    """
+    def __init__(self, httpProxy=None, credentials={}):
+        self.httpProxy = httpProxy
+        self.credentials = credentials
+    def add(self, url, credential):
+        self.credentials[url] = credential; return self
+    def forUrl(self, url):
+        for key in self.credentials:
+            if url.startswith(key):
+                return self.credentials[key]
+        return None
+
+def promptForCredentials(urls, httpProxy=None):
+    if httpProxy == None:
+        httpProxy = raw_input('Enter HTTP proxy [none]: ')
+        if httpProxy == '': httpProxy = None
+    credentials = UserCredentials(httpProxy)
+    localUserName = getpass.getuser()
+    for url in urls:
+        remote, protocol, netloc, path = remoteUrl(url)
+        if remote:
+            username, password, validInterval = promptForCredential(url, 
localUserName)
+            credential = UserCredential(username, password, validInterval)
+            credentials.add(url, credential)
+    return credentials
+
+def promptForCredential(url, localUserName):
+    remote, protocol, netloc, path = remoteUrl(url)
+    if protocol == 'ftp':
+        defaultUserName = 'anonymous'
+    else:
+        defaultUserName = localUserName
+    username = raw_input('Need credentials for URL %s\nUsername [%s]: ' \
+                         % (url, defaultUserName))
+    if username == '': username = defaultUserName
+    password = ''
+    while password == '':
+        password = getpass.getpass()
+    validInterval = [0, 1, 0]
+    if password != '':
+        response = raw_input('Enter valid time period for credential [(days, 
hours, minutes) = 0 1 0]: ')
+        if response != '':
+            validInterval = response.split()
+    return (username, password, validInterval)
+
+class DirectoryWalker:
+    """Recursively walk directories using the protocol specified in a URL.
+    Sublclasses handle ftp, http, sftp, local file system, etc.
+    """
+    def __init__(self, userCredentials=None, retries=3, sleepTime=5):
+        self.userCredentials = userCredentials
+        self.retries = retries
+        self.sleepTime = sleepTime
+
+    def walk(self, top, walkDirectories=True):
+        """Recursively walk directories on a remote site to retrieve file 
lists.
+        """
+        remote, protocol, netloc, path = remoteUrl(top)
+        status, dir_listing = self.retrieveDirList(top)
+        if status:
+            if len(dir_listing) == 0:
+                yield (top, [], [], [])
+            else:
+                (dirs, files, infos) = self.parseDirList(dir_listing, path)
+                yield (top, dirs, files, infos)
+
+                if walkDirectories:
+                    for dir in dirs:
+                        # Depth-first recursion
+                        for root, dirs, files, infos in self.walk(top + '/' + 
dir, walkDirectories):
+                            yield (root, dirs, files, infos)
+        else:
+            warn('DirectoryWalker: error, unable to retrieve directory listing 
at', top)
+            yield (top, [], [], [])
+
+    def retrieveDirList(self, url):
+        """Retrieve directory listing as a list of text lines.  Returns 
(status, dirList)."""
+        pass
+    def parseDirList(self, dirList, path=None):
+        """Parse directory listing (text) and return three lists (dirs, files, 
fileInfos)."""
+        pass
+
+class FtpDirectoryWalker(DirectoryWalker):
+    """Recursively walk directories on an ftp site."""
+    def __init__(self, userCredentials=None, retries=3, sleepTime=5):
+        DirectoryWalker.__init__(self, userCredentials, retries, sleepTime)
+
+    def retrieveDirList(self, url):
+        """Retrieve a directory listing via ftp with retries.
+        """
+        remote, protocol, netloc, path = remoteUrl(url)
+        credential = None
+        if self.userCredentials:
+            credential = self.userCredentials.forUrl(url)
+        dir = ''; dir_list = []
+        ftp = FTP()
+        for i in range(self.retries):
+            try:
+                ftp.connect(netloc)
+                if credential is None or \
+                   credential.username == 'anonymous' or \
+                   credential.username == '':
+                    ftp.login()
+                else:
+                    ftp.login(credential.username, credential.password)
+                ftp.cwd(path)
+                ftp.retrlines('LIST', dir_list.append)
+                ftp.quit()
+                dir = '\n'.join(dir_list)
+                return (True, dir)
+            except:
+                pass
+            time.sleep(self.sleepTime)
+            warn('FtpDirectoryWalker: connect retry to ', netloc, path)
+        return (False, dir)
+
+    def parseDirList(self, dir, path=None):
+        """Parse long directory listing returned by ftp or (ls -l).
+        Separate entries into directories and files.
+        """
+        dirs = []; files = []; infos = []
+        for entry in dir.split('\n'):
+            fields = entry.split()
+            if len(fields) < 7: continue
+            fn = fields[-1]
+            if fn == '.' or fn == '..': continue
+            if re.match('^d', fields[0])and fields[0][7] == 'r':
+                dirs.append(fn)
+            else:
+                files.append(fn)
+                info = FileInfo(entry, int(fields[4]), '-'.join(fields[5:8]), \
+                                fields[2], fields[3], fields[0])
+                infos.append(info)
+        return (dirs, files, infos)
+
+class DirListingParser(object):
+    """Base class for directory listing parsers."""
+    def __init__(self, regex):
+        self.regex = regex
+        self.compiledRegex = re.compile(self.regex)
+        
+    def parse(self, dir, listingHtml):
+        """Return (dirs, files, infos)."""
+        dirs = []; files = []; infos = []
+        raise NotImplementedError, "Override this method in sub class."
+    
+class ApacheDirListingParser(DirListingParser):
+    """Parser class for apache."""
+    def parse(self, dir, listingHtml):
+        dirs = []; files = []; infos = []
+        items = self.compiledRegex.findall(listingHtml)
+        for item, itemName in items:
+            if itemName.strip() == 'Parent Directory': continue
+            if isinstance(item, str):
+                name = item
+            else:
+                name, dateTime, size = item[:]
+
+            if name.endswith('/'):
+                type = 'd'
+                dirs.append(name[:-1])
+            else:
+                type = '-'
+                files.append(name)
+            #not doing file info
+            '''
+            size = size.lower()
+            if size.endswith('k'):
+                size = int(size[:-1]) * 1024
+            elif size.endswith('m'):
+                size = int(size[:-1]) * 1024 * 1024
+            else:
+                size = -1
+            line = '%s---------  1 ? ? %15d %s %s' % (type, size, dateTime, 
name)
+            info = FileInfo(line, size, dateTime)
+            '''
+            infos.append(None)
+        return (dirs, files, infos)
+    
+class CDAACDirListingParser(DirListingParser):
+    """Parser class for CDAAC data server."""
+    def parse(self, dir, listingHtml):
+        dirs = []; files = []; infos = []
+        items = self.compiledRegex.findall(listingHtml)
+        for item, itemName in items:
+            if itemName.strip() == 'Parent Directory': continue
+            if isinstance(item, str):
+                name = item
+            else:
+                name, dateTime, size = item[:]
+            if name.endswith('/'):
+                type = 'd'
+                dirs.append(name)
+            else:
+                type = '-'
+                files.append(name)
+            #not doing file info
+            '''
+            size = size.lower()
+            if size.endswith('k'):
+                size = int(size[:-1]) * 1024
+            elif size.endswith('m'):
+                size = int(size[:-1]) * 1024 * 1024
+            else:
+                size = -1
+            line = '%s---------  1 ? ? %15d %s %s' % (type, size, dateTime, 
name)
+            info = FileInfo(line, size, dateTime)
+            '''
+            infos.append(None)
+        return (dirs, files, infos)
+
+class HttpDirectoryWalker(DirectoryWalker):
+    """Recursively walk directories on an http (web) site to retrieve file 
lists.
+    Handles many styles of HTML directory listings, but still very FRAGILE.
+    """
+    
+    #list of directory listing parser plugins
+    DIR_LIST_REGEX_PLUGINS = [
+        #apache 2.0.55 directory listing
+        ApacheDirListingParser(r'(?i)alt="\[.*?\]">\s*<A 
HREF="(?P<name>.*?)">(.*?)</A>'),
+        #CDAAC (COSMIC Data)
+        CDAACDirListingParser(r'(?i)<LI><A HREF="(?P<name>.*?)">(.*?)</A>'),
+        ]
+    
+    def __init__(self, userCredentials=None, retries=3, sleepTime=5):
+        DirectoryWalker.__init__(self, userCredentials, retries, sleepTime)
+        if self.userCredentials:
+            if self.userCredentials.httpProxy:
+                os.environ['http_proxy'] = self.userCredentials.httpProxy
+                # global kludge, default proxyHandler looks up proxy there
+            passwordMgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
+            for url, cred in self.userCredentials.credentials.iteritems():
+                passwordMgr.add_password(None, url, cred.username, 
cred.password)
+            authHandler = urllib2.HTTPBasicAuthHandler(passwordMgr)
+            opener = urllib2.build_opener(authHandler)
+        else:
+#            opener = urllib2.build_opener()
+            opener = None
+#        opener.add_headers = [('User-agent', 'Mozilla/5.0')]
+        self.opener = opener
+
+    def retrieveDirList(self, url):
+        """Retrieve an HTML directory listing via http with retries.
+        """
+###        url = os.path.join(url, 'contents.html')     ### hack for DAP 
servers at GES-DISC
+        dir_listing = ''
+        proxies = {}
+        for i in range(self.retries):
+            try:
+                if self.opener:
+                    response = self.opener.open(url)
+                else:
+                    response = urllib.urlopen(url)
+            except IOError, e:
+                if hasattr(e, 'reason'):
+                    warn('HttpDirectoryWalker: Error, failed to reach server 
because: %s' % e.reason)
+                elif hasattr(e, 'code'):
+                    warn('HttpDirectoryWalker: Server could not fulfill 
request, error code %s' % e.code)
+            else:
+                dir_listing = response.read()
+                return (True, dir_listing)
+            time.sleep(self.sleepTime)
+            warn('HttpDirectoryWalker: retrying ', url)
+        return (False, dir_listing)
+
+    reDirPath = re.compile(r'(?i)<H1>.*?Index of\s*?(\S+?)\s*?</H1>')
+
+    def parseDirList(self, dir, path):
+        """Parse fragile HTML directory listings returned by various HTTP 
servers,
+        including Apache and OpenDAP.  Separate entries into directories and 
files.
+        """
+        dirs = []; files = []; infos = []
+        if path:
+            match = HttpDirectoryWalker.reDirPath.search(dir)
+            if not match:
+                die('HttpDirectoryWalker: Cannot find directory name %s in 
HTML listing:\n%s' % (path, dir))
+            dirName = match.group(1)
+            if dirName not in path:
+                warn('HttpDirectoryWalker: Directory name %s in HTML listing 
does not agree with path %s:\n%s' % (dirName, path, dir))
+
+        # Try to find directory lines that contain file info
+        reDirListWithStat = re.compile( \
+            r'(?i)<A HREF=[\'"]*?(?P<name>[^\?].*?' + dirName + 
r'.*?)[\'"]*?>.*?</A>\s*(?P<dateTime>\S+ \S+)\s+?(?P<size>\S+)\s*?$')
+        items = reDirListWithStat.findall(dir)
+        # If not, then try to find simple directory lines
+        if len(items) == 0:
+            reDirList = re.compile( \
+                r'(?i)<A HREF=[\'"]*?(?P<name>[^\?].*?' + dirName + 
r'.*?)[\'"]*?>.*?</A>')
+            items = reDirList.findall(dir)
+            
+        if len(items) != 0:
+            dateTime = '? ?'; size = ''
+            for item in items:
+                if isinstance(item, str):
+                    name = item
+                else:
+                    name, dateTime, size = item[:]
+                if dirName not in name: continue
+    
+                if name.endswith('/'):
+                    type = 'd'
+                    dirs.append(name)
+                else:
+                    type = '-'
+                    files.append(name)
+                size = size.lower()
+                if size.endswith('k'):
+                    size = int(size[:-1]) * 1024
+                elif size.endswith('m'):
+                    size = int(size[:-1]) * 1024 * 1024
+                else:
+                    size = -1
+                line = '%s---------  1 ? ? %15d %s %s' % (type, size, 
dateTime, name)
+                info = FileInfo(line, size, dateTime)
+                infos.append(info)
+                print line
+        
+        #try plugins
+        else:
+            for plugin in self.DIR_LIST_REGEX_PLUGINS:
+                pluginResults = plugin.parse(dirName, dir)
+                if len(pluginResults[0]) != 0 or len(pluginResults[1]) != 0 or 
\
+                    len(pluginResults[2]) != 0: return pluginResults
+                
+        return (dirs, files, infos)
+
+
+def walk(top, userCredentials=None, walkDirectories=True, topDown=True):
+    """Recursively walk directories to retrieve file lists.
+    Returns the topPath, contained subdirectories and files, and
+    optionally FileInfo objects (if info is included in protocol results).
+    Handles local directory paths and ftp/http protocols (URL's).
+    """
+    remote, protocol, netloc, path = remoteUrl(top)
+    if remote:
+        if protocol == 'ftp':
+            ftpWalker = FtpDirectoryWalker(userCredentials)
+            for root, dirs, files, infos in ftpWalker.walk(top, 
walkDirectories):
+                yield (root, dirs, files, infos)
+        elif protocol == 'http':
+#            import pdb; pdb.set_trace()
+            httpWalker = HttpDirectoryWalker(userCredentials)
+            for root, dirs, files,infos in httpWalker.walk(top, 
walkDirectories):
+                yield (root, dirs, files, infos)
+        elif protocol == 'sftp':
+            sftpWalker = SftpDirectoryWalker(userCredentials)
+            for root, dirs, files,infos in sftpWalker.walk(top, 
walkDirectories):
+                yield (root, dirs, files, infos)
+        else:
+            die('filelist: Cannot handle protocol ', protocol)
+    else:
+        if walkDirectories:
+            for root, dirs, files in os.walk(top, topDown):
+                yield (root, dirs, files, [])
+        else:
+            files = os.listdir(top)
+            yield (top, [], files, [])
+
+def remoteUrl(url):
+    """Returns True if the URL is remote; also returns protocol,
+    net location (host:port), and path."""
+    protocol, netloc, path, params, query, fragment = urlparse.urlparse(url)
+    if protocol == '':
+        return (False, protocol, netloc, path)
+    else:
+        return (True, protocol, netloc, path)
+
+
+# utils
+RE_WITH_SUBST_PATTERN = re.compile(r'^s/(.+)/(.+)/$')
+def parse_re_with_subst(str):
+    match = RE_WITH_SUBST_PATTERN.match(str)
+    if match:
+        return (match.group(1), match.group(2))
+    else:
+        return (str, None)
+
+def hostName():
+    return socket.gethostbyaddr(socket.gethostname())[0]
+
+FILE_URL_PREFIX = 'file://' + hostName()
+def makeFileUrl(file):
+    return FILE_URL_PREFIX + file
+
+def warn(*str): sys.stderr.write(' '.join(str) + '\n')
+def die(str, status=1): warn(str); sys.exit(status)
+
+def main():
+    """Main function for outside scripts to call."""
+
+    from sys import argv
+
+    if len(argv) < 2: die(USAGE)
+    try:
+        opts, argv = getopt.getopt(argv[1:], 'hbcdf:ilqr:stuvw:x',
+                         ['help', 'bottomUp', 'credentials', 'delete', 
'directory',
+                          'fetchDir=', 'fetchIfNewer', 'fetchWithSubDirs', 
'info',
+                          'list', 'quiet', 'regex=', 'size', 'topOnly',
+                          'url', 'verbose', 'wildcard=', 'xml'])
+    except getopt.GetoptError, (msg, bad_opt):
+        die("%s error: Bad option: %s, %s" % (argv[0], bad_opt, msg))
+
+    regSpecs = []; wildCards = []; matchUrl=False; walkDirectories = True
+    needCredentials = False; userCredentials = None
+    urlMode=False; xmlMode=False; quietMode=False; verboseMode=False; 
getFileInfo=False
+    fetchDir = None; fetchIfNewer=False; fetchWithSubDirs=False
+    directoryMode = False; deleteMode = False; topDown = True; listMode = False
+
+    for opt, val in opts:
+        if opt   in ('-h', '--help'):       die(USAGE)
+        elif opt in ('-b', '--bottomUp'):   topDown = False
+        elif opt in ('-c', '--credentials'):   needCredentials = True
+        elif opt in ('-d', '--directory'):  directoryMode=True
+        elif opt in ('--delete'):           deleteMode=True
+        elif opt in ('-f', '--fetchDir'):   fetchDir = val
+                                            # retrieve remote files to this dir
+        elif opt in ('--fetchIfNewer'):     fetchIfNewer=True
+                                            # only fetch if src file is newer 
than existing dest file
+        elif opt in ('--fetchWithSubDirs'): fetchWithSubDirs=True
+                                            # mirror subdirectories when 
fetching
+        elif opt in ('-i', '--info'):       getFileInfo=True
+        elif opt in ('-l', '--list'):       listMode=True
+        elif opt in ('-m', '--matchUrl'):   matchUrl=True
+                                            # regexs match entire URL/path, 
not just file name
+        elif opt in ('-q', '--quiet'):      quietMode=True
+                                            # don't print files during walk
+        elif opt in ('-r', '--regex'):      regSpecs.append(val)
+        elif opt in ('-s', '--size'):       sizeMode=True
+        elif opt in ('-t', '--topOnly'):    walkDirectories=False
+        elif opt in ('-u', '--url'):        urlMode=True
+                                            # return URL's (file:, ftp:, 
http:, etc.)
+        elif opt in ('-v', '--verbose'):    verboseMode=True
+        elif opt in ('-w', '--wildcard'):   wildCards.append(val)
+        elif opt in ('-x', '--xml'):        xmlMode=True   # return list in 
XML format
+        else: die(USAGE)
+
+#    import pdb; pdb.set_trace()
+
+    matchedFiles, fetchedFiles, destinationFiles = \
+            filelist(argv, regSpecs, wildCards, needCredentials, 
userCredentials,
+                     matchAnyThenConstrain, None, matchUrl, walkDirectories,
+                     urlMode, xmlMode, quietMode, verboseMode, getFileInfo,
+                     fetchDir, fetchIfNewer, fetchWithSubDirs,
+                     directoryMode, listMode, deleteMode, topDown)
+
+    if quietMode:
+        if listMode == 'match':
+            print matchedFiles
+        elif listMode == 'fetch':
+            print fetchedFiles
+        elif listMode == 'destination':
+            print destinationFiles
+        else:
+            pass
+
+
+if __name__ ==  '__main__': main()

http://git-wip-us.apache.org/repos/asf/incubator-sdap-nexus/blob/ff98fa34/climatology/clim/variables.py
----------------------------------------------------------------------
diff --git a/climatology/clim/variables.py b/climatology/clim/variables.py
new file mode 100755
index 0000000..c5136f0
--- /dev/null
+++ b/climatology/clim/variables.py
@@ -0,0 +1,140 @@
+"""
+ variables.py
+
+Interface to Get Variables out of EOS HDF4/5 and netCDF3/4 files, with
+smart dataset discovery and variable caching behind it.
+
+"""
+
+import sys, os, urlparse, time
+#from pyhdf.SD import SD, SDC
+import netCDF4
+#from pydap.client import open_url
+import numpy as N
+
+
+def getVariables(url, varNames=None, vars={}, kind=None, arrayOnly=False, 
order='C', retries=2, sleep=1, set_auto_scale=True, set_auto_mask=True):
+    """Interface function to get variables from many file formats or via DAP.  
Here kludge for special case."""
+    urlStr = url
+    url = urlparse.urlparse(url)
+    path = url.path
+
+    if varNames is None:
+        varNames = url.query.split(',')
+    else:
+        if isinstance(varNames, tuple):
+            vars = []
+        if url.scheme == 'http':
+            if 'dap' in urlStr.lower():
+                if kind is None: kind = 'dap'
+                if url.query == '':
+                    urlStr = urlStr + '?' + ','.join(varNames)
+            else:
+                if kind is None: kind = 'w10n'
+
+    if url.scheme == '':
+        if kind is None:
+            kind = fileKind(path)
+        else:
+            kind = kind.lower()
+
+        if kind == 'h5' or kind == 'hdf5':
+            pass
+
+        elif kind == 'hdf' or kind == 'hdf4':
+            d = SD(path, SDC.READ)
+            if varNames == 'ALL':
+                varNames = d.datasets().keys()
+            for varName in varNames:
+                var = d.select(varName)
+                if arrayOnly:
+                    if order == 'F':
+                        var = N.array(var[:], order='F')
+                    else:
+                        var = var[:]
+                if isinstance(vars, list):
+                    vars.append(var)
+                else:
+                    vars[varName] = var
+            if not isinstance(vars, list):
+                vars['_fileHandle'] = d
+
+        elif kind == 'nc':
+            d = netCDF4.Dataset(path)
+            d.set_auto_scale(set_auto_scale)
+            d.set_auto_mask(set_auto_mask)
+            if varNames == 'ALL':
+                varNames = d.variables.keys()
+            for varName in varNames:
+                var = d.variables[varName]
+                if arrayOnly:
+                    if order == 'F':
+                        var = N.array(var[:], order='F')
+                    else:
+                        var = var[:]
+                if isinstance(vars, list):
+                    vars.append(var)
+                else:
+                    vars[varName] = var
+            if not isinstance(vars, list):
+                vars['_fileHandle'] = d
+
+    else:
+        if kind == 'dap':
+            print >>sys.stderr, 'DAP get of: %s' % urlStr
+            retries += 1
+            retriesSave = retries
+            while retries > 0:
+                try:
+                    d = open_url(urlStr)
+                    retries = 0
+                except:
+                    retries -= 1
+                    if retries == 0:
+                        print >>sys.stderr, 'getVariables: Error, DAP cannot 
open: %s' % urlStr
+                        return (vars, d)
+                    time.sleep(sleep)
+
+            if varNames == 'ALL':
+                varNames = d.keys()
+
+            for varName in varNames:
+                var = d[varName]
+                retries = retriesSave
+                while retries > 0:
+                    try:
+                        if arrayOnly:
+                            if order == 'F':
+                                var = N.array(var[:], order='F')
+                            else:
+                                var = var[:]   # actually does DAP call to 
read array
+                        retries = 0
+                    except:
+                        retries -= 1
+                        if retries == 0:
+                            print >>sys.stderr, 'getVariables: Error, DAP 
cannot get variable: %s' % varName
+                        else:
+                            time.sleep(sleep)
+
+                    if isinstance(vars, list):
+                        vars.append(var)
+                    else:
+                        vars[varName] = var
+            if not isinstance(vars, list):
+                vars['_fileHandle'] = d
+
+
+        elif kind == 'w10n':
+            vars = None
+    return (vars, d)
+
+
+def close(fh):
+    if hasattr(fh, 'end'):
+        fh.end()
+    elif hasattr(fh, 'close'):
+        fh.close()
+        
+def fileKind(path):
+    return os.path.splitext(path)[1][1:].lower()
+

[36/51] [partial] incubator-sdap-nexus git commit: SDAP-1 Import all code under the SDAP SGA

Reply via email to