Re: What is "self"?

2005-09-23 Thread Michael Spencer
Ron Adam wrote:
> Erik Max Francis wrote:
> 
>>Ron Adam wrote:
>>
>>
>>>When you call a method of an instance, Python translates it to...
>>>
>>> leader.set_name(leader, "John")
>>
>>
>>It actually translates it to
>>
>>Person.set_name(leader, "John")
>>
> 
> 
> I thought that I might have missed something there.
> 
> Is there a paper on how python accesses and stores instance data and 
> methods?  I googled but couldn't find anything that addressed this 
> particular question.
> 
>  >>> class a(object):
> ...def x(self):
> ...   print 'x'
> ...
>  >>> b = a()
>  >>> b
> <__main__.a object at 0x009D1890>
>  >>> b.x
> >
> 
> So what exactly is a bound method object?  Does it possibly translates 
> to something like the following?
> 
>  def x(*args, **kwds):
>  self = ?
>  return __class__.self(self, *args, **kwds)
> 
> Cheers,
> Ron
> 
> 
> 
> 
> 

All is explained at:
http://users.rcn.com/python/download/Descriptor.htm#functions-and-methods
and further at:
http://www.python.org/pycon/2005/papers/36/pyc05_bla_dp.pdf

"For objects, the machinery is in object.__getattribute__ which transforms b.x 
into type(b).__dict__['x'].__get__(b, type(b))."

What follows is my interpretation - hope it's correct:

# what exactly is a bound method object?
# Illustrate b.f => type(b).__dict__['x'].__get__(b, type(b))

  >>> class B(object):
  ... def f(self, x):
  ... return x or 42
  ...
  >>> b = B()
  >>> type(b).__dict__['f']
# a plain old function
  >>> _.__get__(b, type(b))   # invoke the descriptor protocol
  # to make a bound method
  >
  >>>

You don't have to use object.__getattribute__ to get a bound method.  Nor does 
the function have to be in the class dictionary.  You can just call any 
function 
descriptor yourself:

  >>> def g(self, y):
  ... return self.f(y)
  ...
  >>> boundg = g.__get__(b)  # bind to B instance
  >>> boundg
  >
  >>> boundg(0)
  42
  >>>

Looked at this way, function.__get__ just does partial function application 
(aka 
currying).

  >>> def f(x, y):
  ... return x+y
  ...
  >>> add42 = f.__get__(42)
  >>> add42
  
  >>> add42(1)
  43


Michael

-- 
http://mail.python.org/mailman/listinfo/python-list


Re: Wrapping classes

2005-09-23 Thread Michael Spencer
Jeremy Sanders wrote:
> Colin J. Williams wrote:
> 
> 
>>Could you not have functions a and b each of which returns a NumArray
>>instance?
>>
>>Your expression would then be something like a(..)+2*b(..).
> 
> 
> The user enters the expression (yes - I'm aware of the possible security
> issues), as it is a scientific application. I don't think they'd like to
> put () after each variable name.
> 
> I could always munge the expression after the user enters it, of course.
> 
> Jeremy
> 
Alternatively, you could build your own expression calculator, and initialize 
the objects if necessary as they are evaluated.  If you are happy with Python 
syntax for your expressiones then the stdlib compiler package is helpful.  The 
example below is not tested beyond what you see.  It's a bit verbose, but most 
of the code is boilerplate.

  >>> a = 3
  >>> b = 4
  >>> calc('a * b')
  using a
  using b
  12
  >>> calc('a * b ** (b - a) * "a"')
  using a
  using b
  using b
  using a
  ''
   >>> calc("0 and a or b")
  using b
  4
  >>> calc("1 and a or b")
  using a
  3
  >>> calc("1 and a or c")
  using a
  3
  >>> calc("0 and a or c")
  Undefined symbol: c
  >>>


HTH, Michael

-

import compiler


class CalcError(Exception):
 def __init__(self,error,descr = None,node = None):
 self.error = error
 self.descr = descr
 self.node = node

 def __repr__(self):
 return "%s: %s" % (self.error, self.descr)
 __str__ = __repr__


class LazyCalc(object):

 def __init__(self, namespace):
 self._cache = {} # dispatch table
 self.context = namespace

 def visit(self, node,**kw):
 cls = node.__class__
 meth = self._cache.setdefault(cls,
 getattr(self,'visit'+cls.__name__,self.default))
 return meth(node, **kw)

 def visitExpression(self, node, **kw):
 return self.visit(node.node)


 # Binary Ops
 def visitAdd(self,node,**kw):
 return self.visit(node.left) + self.visit(node.right)
 def visitDiv(self,node,**kw):
 return self.visit(node.left) / self.visit(node.right)
 def visitFloorDiv(self,node,**kw):
 return self.visit(node.left) // self.visit(node.right)
 def visitLeftShift(self,node,**kw):
 return self.visit(node.left) << self.visit(node.right)
 def visitMod(self,node,**kw):
 return self.visit(node.left) % self.visit(node.right)
 def visitMul(self,node,**kw):
 return self.visit(node.left) * self.visit(node.right)
 def visitPower(self,node,**kw):
 return self.visit(node.left) ** self.visit(node.right)
 def visitRightShift(self,node,**kw):
 return self.visit(node.left) >> self.visit(node.right)
 def visitSub(self,node,**kw):
 return self.visit(node.left) - self.visit(node.right)

 # Unary ops
 def visitNot(self,node,*kw):
 return not self.visit(node.expr)
 def visitUnarySub(self,node,*kw):
 return -self.visit(node.expr)
 def visitInvert(self,node,*kw):
 return ~self.visit(node.expr)
 def visitUnaryAdd(self,node,*kw):
 return +self.visit(node.expr)

 # Flow Control
 def visitAnd(self,node,**kw):
 for arg in node.nodes:
 val = self.visit(arg)
 if not val:
 return val
 return val
 def visitOr(self,node,**kw):
 for arg in node.nodes:
 val = self.visit(arg)
 if val:
 return val
 return val

 # Logical Ops
 def visitBitand(self,node,**kw):
 return reduce(lambda a,b: a & b,[self.visit(arg) for arg in 
node.nodes])
 def visitBitor(self,node,**kw):
 return reduce(lambda a,b: a | b,[self.visit(arg) for arg in 
node.nodes])
 def visitBitxor(self,node,**kw):
 return reduce(lambda a,b: a ^ b,[self.visit(arg) for arg in 
node.nodes])
 def visitCompare(self,node,**kw):
 comparisons = {
 "<": operator.lt, # strictly less than
 "<=": operator.le,# less than or equal
 ">": operator.gt, # strictly greater than
 ">=": operator.ge, # greater than or equal
 "==": operator.eq, # equal
 "!=": operator.ne, # not equal
 "<>": operator.ne, # not equal
 "is": operator.is_, # object identity
 "is not": operator.is_not # negated object identity
 }
 obj = self.visit(node.expr)
 for op, compnode in node.ops:
 compobj = self.visit(compnode)
 if not comparisons[op](obj, compobj):
 return False
 obj  = compobj
 return True


 # Values
 def visitCallFunc(self,node,**kw):
 raise CalcError("Functions not supported", node.node)

 def visitName(self, node, **kw):
 """LazyEvaluation"""
 name = node.name
 try:
 val = eval(name, self.context)
 except

Re: What is "self"?

2005-09-27 Thread Michael Spencer
Ron Adam wrote:
> What I've noticed is you can block the visibility of a class attribute, 
> which include methods, by inserting an object in the instance with the 
> same name.
> 
[snip example of this behavior]

Yes, that's true for "non-data descriptors" (see last two bullets below)

Raymond Hettinger [http://users.rcn.com/python/download/Descriptor.htm]
 >
 > The important points to remember are:
 >
 > * descriptors are invoked by the __getattribute__ method
 > * overriding __getattribute__ prevents automatic descriptor calls
 > * __getattribute__ is only available with new style classes and objects
 > * object.__getattribute__ and type.__getattribute__ make different calls 
to __get__.
 > * data descriptors always override instance dictionaries.
 > * non-data descriptors may be overridden by instance dictionaries.

Michael

-- 
http://mail.python.org/mailman/listinfo/python-list


Re: Silly function call lookup stuff?

2005-09-27 Thread Michael Spencer
Lucas Lemmens wrote:
> Dear pythonians,
> 
> I've been reading/thinking about the famous function call speedup 
> trick where you use a function in the local context to represent 
> a "remoter" function to speed up the 'function lookup'.
> 
> "This is especially usefull in a loop where you call the function a 
> zillion time" they say.
> 
> I think this is very odd behavior. 
> 
> Why isn't the result of the first function-lookup cached so that following
> function calls don't need to do the function-lookup at all?
> 
I guess because the function name may be re-bound between loop iterations.  Are 
there good applications of this?  I don't know.

> And if the context changes (an import-statement say) reset the
> cached 'function-lookups'.

In general an object doesn't know what names are bound to it and there are many 
ways besides an import statement of binding/re-binding, so "if the context 
changes" is easier said than done.

> 
> This way any function would only need to be looked up once.
> 
> L.
> 
Would you apply this optimization to all lookups in outer scopes, or just 
callables?  Why? ;-)

Michael

-- 
http://mail.python.org/mailman/listinfo/python-list


Re: grouping array

2005-09-29 Thread Michael Spencer
[EMAIL PROTECTED] wrote:
> hi if I have an array
> 
> say x = [[2,2,0,0,1,1],
>  [1,1,0,0,1,1],
>  [1,1,0,0,1,1]]
> I basically want to group regions that are non zero like I want to get
> the coordinates of non zero regions..as (x1,y1,x2,y2)
> [(0,0,2,1),(0,4,2,5)] which show the top left(x1,y1) and bottom
> right(x2,y2) corners of each group.hope i am clear.
> 
> Thanks
> 
How about this:


def getregions(grid):
 """Yield lists of adjancent points, not necessarily rectangular"""
 adj = [(-1,0),(+1,0),(0,-1),(0,+1)] # horizontal and vertical adjacencies
 # could add diagonals

 points = set((y,x) for y, row in enumerate(grid)
for x, cell in enumerate(row)
if cell)

 while points:# set of (y,x) non-zero points
 region = [points.pop()]  # start a new region with any remaining point
 ptr = 0
 while ptr < len(region):
 y, x = region[ptr]
 adjpoints = set((y + j, x + i) for j, i in adj)
 adjpoints &= points # keep only the non-zero, unseen points
 points -= adjpoints # remove these adjancencies from points
 region.extend(adjpoints) # add them to the region
 ptr += 1
 yield region

def getregioncoords(grid):
 """Get top left and bottom right of *rectangular* regions"""
 regions = getregions(grid)
 return [(reg[0], reg[-1]) for reg in regions if reg.sort() or True]


  >>> x = [[2,2,0,0,1,1],
  ...  [1,1,0,0,1,1],
  ...  [1,1,0,0,1,1]]
  ...
  ...
  >>> getregioncoords(x)
  [((0, 0), (2, 1)), ((0, 4), (2, 5))]
   >>> x = [[1,0,1,0,1]]
  >>> getregioncoords(x)
  [((0, 0), (0, 0)), ((0, 2), (0, 2)), ((0, 4), (0, 4))]
  >>> x = [[random.choice([0,1,2]) for x in range(6)] for y in range(6)]
  >>> pprint.pprint(x)
  [[1, 1, 2, 1, 2, 0],
   [2, 0, 0, 2, 0, 1],
   [1, 2, 2, 0, 2, 0],
   [0, 1, 0, 0, 0, 0],
   [2, 0, 0, 1, 1, 0],
   [2, 2, 2, 0, 1, 0]]
  >>> print "\n".join(str(reg) for reg in getregions(x))
  [(0, 1), (0, 0), (0, 2), (1, 0), (0, 3), (2, 0), (1, 3), (0, 4), (2, 1), (3, 
1), (2, 2)]
  [(5, 4), (4, 4), (4, 3)]
  [(5, 0), (5, 1), (4, 0), (5, 2)]
  [(1, 5)]
  [(2, 4)]
  >>>

Unfortunately, it's rather slow.  This one is much faster, using just one data 
structure

def getregions2(grid):
 """Yield lists of adjancent points, not necessarily rectangular"""
 adj = [(-1,0),(+1,0),(0,-1),(0,+1)] # horizontal and vertical adjacencies
 # could add diagonals
 rows = len(grid)
 cols = len(grid[0])
 griddata = []
 for row in grid:
 griddata.extend(row)
 for y in xrange(rows):
 ybase = y * cols
 for x in xrange(cols):
 if griddata[ybase + x]:
 griddata[ybase + x] = 0
 region = [(y, x)]
 append = region.append
 ptr = 0
 while ptr < len(region):
 y1, x1 = region[ptr]
 for j, i in adj:
 y2, x2 = y1 + j, x1 + i
 if y2 < 0 or y2 == rows: continue
 if x2 < 0 or x2 == cols: continue
 if griddata[y2 * cols + x2]:
 append((y2, x2))
 griddata[y2 * cols + x2] = 0
 ptr += 1
 yield region



Michael

-- 
http://mail.python.org/mailman/listinfo/python-list


Re: grouping array

2005-09-30 Thread Michael Spencer
[EMAIL PROTECTED] wrote:
> fredrick's solutions seems to be more closer to what I was looking
> for.But I am still not sure if that could be done without the use of
> Image module.

What do you mean by "closer to what I was looking
for"?  For the single test case you provided:

 > say x = [[2,2,0,0,1,1],
 >  [1,1,0,0,1,1],
 >  [1,1,0,0,1,1]]
 > I basically want to group regions that are non zero like I want to get
 > the coordinates of non zero regions..as (x1,y1,x2,y2)
 > [(0,0,2,1),(0,4,2,5)] which show the top left(x1,y1) and bottom
 > right(x2,y2) corners of each group.hope i am clear.
 >


my solution provides the correct output:

   >>> x = [[2,2,0,0,1,1],
   ...  [1,1,0,0,1,1],
   ...  [1,1,0,0,1,1]]
   ...
   ...
   >>> getregioncoords(x)
   [((0, 0), (2, 1)), ((0, 4), (2, 5))]

* except that the points aren't flattened.  If that's important to you, rewrite 
getregioncoords as follows:

def getregioncoords(grid):
 """Get top left and bottom right of *rectangular* regions"""
 regions = getregions(grid)
 return [reg[0]+reg[-1] for reg in regions if reg.sort() or True]

  >>> getregioncoords(x)
  [(0, 0, 2, 1), (0, 4, 2, 5)]
  >>>


> Also in your solution I cannot follow this

I broke the solution into two parts:

1) the getregions generator yields a list of all the contiguous regions.  The 
output below is the lists of coordinates that are contiguous non-zero cells in 
the grid.

 > [[1, 1, 2, 1, 2, 0],
 >[2, 0, 0, 2, 0, 1],
 >[1, 2, 2, 0, 2, 0],
 >[0, 1, 0, 0, 0, 0],
 >[2, 0, 0, 1, 1, 0],
 >[2, 2, 2, 0, 1, 0]]
 >   >>> print "\n".join(str(reg) for reg in getregions(x))
 >   [(0, 1), (0, 0), (0, 2), (1, 0), (0, 3), (2, 0), (1, 3), (0, 4), (2,
 > 1), (3,
 > 1), (2, 2)]
 >   [(5, 4), (4, 4), (4, 3)]
 >   [(5, 0), (5, 1), (4, 0), (5, 2)]
 >   [(1, 5)]
 >   [(2, 4)]


2) If the regions are rectangular, the getregioncoords functions returns the 
coordinates of the top-left and bottom-right points.  You did not answer the 
previous post which asked what to do if the regions were not rectangular.



HTH

Michael



-- 
http://mail.python.org/mailman/listinfo/python-list


Re: Feature Proposal: Sequence .join method

2005-09-30 Thread Michael Spencer
Terry Reedy wrote:
> "David Murmann" <[EMAIL PROTECTED]> wrote in message 
> news:[EMAIL PROTECTED]
> 
>>>def join(sep, seq):
>>>return reduce(lambda x, y: x + sep + y, seq, type(sep)())
>>
>>damn, i wanted too much. Proper implementation:
>>
>>def join(sep, seq):
>>if len(seq):
>>return reduce(lambda x, y: x + sep + y, seq)
>>return type(sep)()
>>
>>but still short enough
> 
> 
> For general use, this is both too general and not general enough.
> 
> If len(seq) exists then seq is probably reiterable, in which case it may be 
> possible to determine the output length and preallocate to make the process 
> O(n) instead of O(n**2).  I believe str.join does this.  A user written 
> join for lists could also.  A tuple function could make a list first and 
> then tuple(it) at the end.
> 
> If seq is a general (non-empty) iterable, len(seq) may raise an exception 
> even though the reduce would work fine.
> 
> Terry J. Reedy
> 
> 
> 
For the general iterable case, you could have something like this:

  >>> def interleave(sep, iterable):
  ... it = iter(iterable)
  ... next = it.next()
  ... try:
  ... while 1:
  ... item = next
  ... next = it.next()
  ... yield item
  ... yield sep
  ... except StopIteration:
  ... yield item
  ...
  >>> list(interleave(100,range(10)))
  [0, 100, 1, 100, 2, 100, 3, 100, 4, 100, 5, 100, 6, 100, 7, 100, 8, 100, 9]
  >>>

but I can't think of a use for it ;-)

Michael

-- 
http://mail.python.org/mailman/listinfo/python-list


Re: Need advice on subclassing code

2005-11-15 Thread Michael Spencer
Kent Johnson wrote:
> Rusty Shackleford wrote:
>> ...
>> C_1_1 and C_1_2 share a common C ancestor, and in practice may be
>> identical, but theoretically, could have the same function name with two
>> different implementations underneath.
>>
>> ...
> 
> How are you instantiating the correct class? You should be able to provide a 
> default behaviour. For example if the classes are all defined in module C you 
> could have a factory like this:
> 
> import C
> def makeC(x, y):
>   subtype = 'C_%d_%d' % (x, y)
>   cls = getattr(C, subtype, C.C)
>   return cls(x, y)
> 
> Then in module C just define the subtypes you need to specialize; all other 
> values of x and y will get the base class C.C.
> 
> Kent

Or, if you actually want different classes for each set of parameters (say for 
debugging or introspection), you could compose the default ones on the fly:


import C
def makeC(x, y):
   subtype = 'C_%d_%d' % (x, y)
   cls = getattr(C, subtype, None)
   if not cls:
 # No specialized class found, so compose a default
 # This requires C.C to be a new-style class
 cls = type(subtype, (C.C,), {"__autogenerated__": True})
   return cls(x, y)

Michael



-- 
http://mail.python.org/mailman/listinfo/python-list


Re: best cumulative sum

2005-11-22 Thread Michael Spencer
David Isaac wrote:
  for a solution when these are available.
> Something like:
> def cumreduce(func, seq, init = None):
> """Return list of cumulative reductions.
> 
> 
This can be written more concisely as a generator:

  >>> import operator
  >>> def ireduce(func, iterable, init):
  ... for i in iterable:
  ... init = func(init, i)
  ... yield init
  ...
  >>> list(ireduce(operator.mul, range(1,5),init=1))
  [1, 2, 6, 24]
  >>>

Michael


-- 
http://mail.python.org/mailman/listinfo/python-list


Re: aligning a set of word substrings to sentence

2005-12-01 Thread Michael Spencer
Steven Bethard wrote:
> I've got a list of word substrings (the "tokens") which I need to align 
> to a string of text (the "sentence").  The sentence is basically the 
> concatenation of the token list, with spaces sometimes inserted beetween 
> tokens.  I need to determine the start and end offsets of each token in 
> the sentence.  For example::
> 
> py> tokens = ['She', "'s", 'gon', 'na', 'write', 'a', 'book', '?']
> py> text = '''\
> ... She's gonna write
> ... a book?'''
> py> list(offsets(tokens, text))
> [(0, 3), (3, 5), (6, 9), (9, 11), (12, 17), (18, 19), (20, 24), (24, 25)]
> 
> Here's my current definition of the offsets function::
> 
> py> def offsets(tokens, text):
> ... start = 0
> ... for token in tokens:
> ... while text[start].isspace():
> ... start += 1
> ... text_token = text[start:start+len(token)]
> ... assert text_token == token, (text_token, token)
> ... yield start, start + len(token)
> ... start += len(token)
> ...
> 
> I feel like there should be a simpler solution (maybe with the re 
> module?) but I can't figure one out.  Any suggestions?
> 
> STeVe

Hi Steve:

Any reason you can't simply use str.find in your offsets function?

  >>> def offsets(tokens, text):
  ... ptr = 0
  ... for token in tokens:
  ... fpos = text.find(token, ptr)
  ... if fpos != -1:
  ... end = fpos + len(token)
  ... yield (fpos, end)
  ... ptr = end
  ...
  >>> list(offsets(tokens, text))
  [(0, 3), (3, 5), (6, 9), (9, 11), (12, 17), (18, 19), (20, 24), (24, 25)]
  >>>

and then, for an entry in the wacky category, a difflib solution:

  >>> def offsets(tokens, text):
  ... from difflib import SequenceMatcher
  ... s = SequenceMatcher(None, text, "\t".join(tokens))
  ... for start, _, length in s.get_matching_blocks():
  ... if length:
  ... yield start, start + length
  ...
  >>> list(offsets(tokens, text))
  [(0, 3), (3, 5), (6, 9), (9, 11), (12, 17), (18, 19), (20, 24), (24, 25)]
  >>>

cheers
Michael

-- 
http://mail.python.org/mailman/listinfo/python-list


Re: Checking length of each argument - seems like I'm fighting Python

2005-12-03 Thread Michael Spencer
Brendan wrote:
...
> 
> class Things(Object):
> def __init__(self, x, y, z):
> #assert that x, y, and z have the same length
> 
> But I can't figure out a _simple_ way to check the arguments have the
> same length, since len(scalar) throws an exception.  The only ways
> around this I've found so far are
> 
...
> 
> b) use a separate 'Thing' object, and make the 'Things' initializer
> work only with Thing objects.  This seems like way too much structure
> to me.
> 

Yes, but depending on what you want to do with Things, it might indeed make 
sense to convert its arguments to a common sequence type, say a list.  safelist 
is barely more complex than sLen, and may simplify downstream steps.

def safelist(obj):
 """Construct a list from any object."""
 if obj is None:
 return []
 if isinstance(obj, (basestring, int)):
 return [obj]
 if isinstance(obj, list):
 return obj
 try:
 return list(obj)
 except TypeError:
 return [obj]

class Things(object):
 def __init__(self, *args):
 self.args = map(safelist, args)
 assert len(set(len(obj) for obj in self.args)) == 1
 def __repr__(self):
 return "Things%s" % self.args

  >>> Things(0,1,2)
  Things[[0], [1], [2]]
  >>> Things(range(2),xrange(2),(0,1))
  Things[[0, 1], [0, 1], [0, 1]]
  >>> Things(None, 0,1)
  Traceback (most recent call last):
File "", line 1, in ?
File "C:\Documents and Settings\Michael\My 
Documents\PyDev\Junk\safelist.py", line 32, in __init__
  assert len(set(len(obj) for obj in self.args)) == 1
  AssertionError


Michael



-- 
http://mail.python.org/mailman/listinfo/python-list


Re: Documentation suggestions

2005-12-06 Thread Michael Spencer
A.M. Kuchling wrote:
> Here are some thoughts on reorganizing Python's documentation, with
> one big suggestion.
> 

Thanks for raising this topic, and for your on-going efforts in this field.

I use the compiled html help file provided by PythonWin, which includes all the 
core documentation.  I usually use the index interface, not the table of 
contents (the main exception is the LibRef, see below).  In this form, the 
structure of the documentation is less important than how good the index is. 
Unfortunately, the "additional documentation', including, in particular, your 
re 
HowTo is linked, but not indexed and is therefore less accessible.

> The tutorial seems to be in pretty good shape because Raymond
...
Agreed, but as you say below, there may be friendlier forms available for the 
first-timer.

...
> There's another struggle within the LibRef: is it a reference or a
> tutorial?

I want it to help answer questions of the form "What's in the the library that 
might help me do x?"  For this case, some of the current section structure is 
not that helpful.  "Miscellaneous Services", in particular, gives no clue to 
treasures it contains.  I would prefer, for example, to see the data structure 
modules: collections, heapq, array etc... given their own section. 
Documentation/testing, cmd/options might be other candidates to draw together 
currently related material more meaningfully.

   Does it list methods in alphabetical order so you can look
> them up, or does it list them in a pedagogically useful order?  I
> think it has to be a reference;

A reference, yes, but not necessarily alphabetical if another organization is 
more communicative.  itertools is a good example where alphabetic presentation 
makes perfect sense, since the functions are more-or-less peers; the math 
functions are usefully classified by topic; textwrap presents most 
commonly-used 
functions first; several modules document classes before convenience functions. 
   Each of these has its merits, and I don't see a lot of mileage in trying to 
standardize them, given how varied modules are.  However, whatever the 
reference 
structure, examples add significantly to the value to me.

...

> I suspect the Achilles' heel of the docs is the Language Reference.
> Put aside the fact that it's not up to date with new-style classes and
> other stuff; that would be fixable with some effort.
> 

> To some degree, the guide is trying to be very formal; it's written
> like a specification for an implementor, not a document that people
> would read through.  But there's no other way for people to learn
> about all the special object methods like __add__; the tutorial can't
> cover them all, and the LibRef doesn't describe them.  So the newbie
> is stuck.

I find very little of value to me in the Language Ref.  Special methods are the 
crucial exception.  Perhaps they, together with a description of class 
semantics 
(including metaclasses and descriptors) could be moved to the Built-in types 
section of the LibRef, where some related material is already.

I don't know whether the rest of the Language reference is of use to 
implementers, but given the proliferation of implementations beyond Cpython 
(Jython, IronPython, pypy) I would speculate that a formal specification is now 
more important rather than less.  However, perhaps it would be possible to 
express the specification more succinctly via tests instead of a manual.

...
> 
> Perhaps we need a friendlier counterpart to the RefGuide, something
> like the 20-page introduction to Python at the beginning of Beazley's 
> Essential Reference:

I did't know this source, but I just skimmed it at 
http://www.amazon.com/gp/reader/0735709017/ref=sib_dp_pt/103-1276064-0751851#reader-page
(not sure if this is a session link), and I agree it's a very clear 
introduction.   Probably better first reading than the existing tutorial.

...


Michael

-- 
http://mail.python.org/mailman/listinfo/python-list


Re: i=2; lst=[i**=2 while i<1000]

2005-12-06 Thread Michael Spencer
Daniel Schüle wrote:
> Hello NG,
> 
> I am wondering if there were proposals or previous disscussions in this 
> NG considering using 'while' in comprehension lists
> 
> # pseudo code
> i=2
> lst=[i**=2 while i<1000]
> 

You are actually describing two features that list comps don't natively support 
- while-based termination, and calculating based on prior values of output.  Of 
course there are work-arounds for both, which others have shown.  Here's 
another 
  approach:

The while-based termination can be easily achieved using itertools.takewhile, 
e.g.,:

  >>> list(itertools.takewhile(lambda x: x < 10, range(100)))
  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
  >>>

the harder piece is to access the prior value.  One way is like this:

def chasetail(start, func):
 from itertools import tee
 def mygen():
 yield start
 for i in (func(i) for i in iterators[0]):
 yield i
 iterators = tee(mygen())
 return iterators[1]

the trick is to create two independent iterators, using itertools.tee, one of 
which is consumed internally in the func(i) for i in iterators[0] generator 
expression, the other is returned to use code.

  >>> it = chasetail(2, lambda x: x*x) #careful - this won't terminate
  >>> it.next()
  2
  >>> it.next()
  4
  >>> it.next()
  16
  >>> it.next()
  256
  >>> it.next()
  65536
  >>>

Then you can combine these two approaches to get something semantically like 
what you wanted in the first place (although not as pretty ;-)

  >>> list(itertools.takewhile(lambda x: x < 1000, chasetail(2, lambda x: x*x)))
  [2, 4, 16, 256]
  >>>



If you like this sort of thing, you might want to generalize the concept with a 
Stream class.  Here's minimal implementation:

import itertools as it

class Stream(object):
 """An extendable stream, that provides a separate iterator
 (using itertools.tee) on every iteration request"""

 def __init__(self, *iterables):
 self.queue = list(iterables)
 self.itertee = it.tee(self._chain(self.queue))[0]

 def _chain(self, queue):
 while queue:
 for i in self.queue.pop(0):
 self.head = i
 yield i

 def extend(self,other):
 self.queue.append(other)

 def __iter__(self):
 """Normal iteration over the iterables in self.queue in turn"""
 return self.itertee.__copy__()


then, you can write your squaring algorithm as:

  >>> s= Stream([2])
  >>> s.extend(it.takewhile(lambda x: x < 1000, (i**2 for i in s)))
  >>> list(s)
  [2, 4, 16, 256]


Michael




-- 
http://mail.python.org/mailman/listinfo/python-list


Re: Documentation suggestions

2005-12-07 Thread Michael Spencer
A.M. Kuchling wrote:
> On Tue, 06 Dec 2005 10:29:33 -0800, 
>   Michael Spencer <[EMAIL PROTECTED]> wrote:
>> not that helpful.  "Miscellaneous Services", in particular, gives no clue to 
>> treasures it contains.  I would prefer, for example, to see the data 
>> structure modules: collections, heapq, array etc... given their own section. 
>> Documentation/testing, cmd/options might be other candidates to draw together
>> currently related material more meaningfully.
> 
> You're right; "Miscellaneous Services" is a grab-bag of stuff, and so
> are 'Generic OS Services' and 'Optional OS Services'.  These chapters
> should be rearranged into more, smaller chapters.  
> 
> A patch for a draft reorganization is at http://www.python.org/sf/1375417
> 
> --amk
Thanks!  That looks like a good start.

I experimented with some more re-organization, but I don't see away to attach 
the resulting file in the SF comments, so I'll post it here instead.

Michael



% experimental re-organization of lib.tex,
% from http://www.python.org/sf/1375417

\tableofcontents

 % Chapter title:

\input{libintro}% Introduction


% =
% BUILT-INs
% =

\input{libobjs} % Built-in Types, Exceptions and Functions
\input{libfuncs}
\input{libstdtypes}
\input{libexcs}
\input{libconsts}



% =
% BASIC/GENERAL-PURPOSE OBJECTS
% =

% General object services
\input{libtypes}
\input{libnew}
\input{libweakref}
\input{libcopy}
\input{libpprint}
\input{librepr}

% Strings
\input{libstrings}  % String Services
\input{libstring}
\input{libre}
\input{libreconvert}
\input{libstruct}   % also/better in File Formats?
\input{libdifflib}
\input{libfpformat}
\input{libstringio}
\input{libtextwrap}
\input{libcodecs}
\input{libunicodedata}
\input{libstringprep}

% Data types and structures
%\input{libdata}% Data types and structures
\input{libdatetime}
\input{libcalendar}
\input{libcollections}
\input{libheapq}
\input{libarray}
\input{libsets}
\input{libsched}
\input{libmutex}
\input{libqueue}
\input{libuserdict}   % From runtime.  What happened to UserList and UserString?

% Numeric/Mathematical modules
\input{libdecimal}
\input{libmath}
\input{libcmath}
\input{librandom}
\input{libbisect} % is this needed here - more useful in Data types, like heapq?

% Functions, Functional, Generators and Iterators
\input{libitertools}
\input{libfunctional}
\input{liboperator}   % from runtime - better with itertools and functional


%\input{libmisc} % Miscellaneous Services


% =
% DATA FORMATS
% =

%% File formats
\input{libcfgparser}
\input{libnetrc}
\input{librobotparser}
\input{libcsv}
\input{libstruct}   % and in string?

% Big move - include all the markup and internet formats here

% MIME & email stuff
\input{email}
\input{libmailcap}
\input{libmailbox}
\input{libmhlib}
\input{libmimetools}
\input{libmimetypes}
\input{libmimewriter}
\input{libmimify}
\input{libmultifile}
\input{librfc822}

% encoding stuff
\input{libbase64}
\input{libbinascii}
\input{libbinhex}
\input{libquopri}
\input{libuu}
\input{libxdrlib}

\input{markup}  % Structured Markup Processing Tools
\input{libhtmlparser}
\input{libsgmllib}
\input{libhtmllib}
\input{libpyexpat}
\input{xmldom}
\input{xmldomminidom}
\input{xmldompulldom}
\input{xmlsax}
\input{xmlsaxhandler}
\input{xmlsaxutils}
\input{xmlsaxreader}
% \input{libxmllib}

\input{libcrypto}   % Cryptographic Services
\input{libhmac}
\input{libhashlib}
\input{libmd5}
\input{libsha}

% =
% FILE & DATABASE STORAGE
% =

\input{liballos}% File-system services (XXX change header)
\input{libos}
\input{libposixpath}% os.path
\input{libfileinput}
\input{libstat}
\input{libstatvfs}
\input{libfilecmp}
\input{libtempfile}
\input{libglob}
\input{libfnmatch}
\input{liblinecache}
\input{libshutil}
\input{libdircache}

%% Data compression and archiving
\input{libzlib}
\input{libgzip}
\input{libbz2}
\input{libzipfile}
\input{libtarfile}

%\input{libpersistence}  % Persistent storage
\input{libpickle}
\input{libcopyreg}  % really copy_reg % from runtime...
\input{libshelve}
\input{libmarshal}
\input{libanydbm}
\input{libwhichdb}
\input{libdbm}
\input{libgdbm}
\input{libdbhash}
\input{libbsddb}
\input{libdumbdbm}


% =
% OS
% =


\input{liballos}% Generic Operating System Services
\input{libtime}
\input{libgetpass}
\input{libcurses}
\input{libascii}% curses.ascii
\input{libcursespanel}
\input{libplatform}
\input{liberrno}

%% Interprocess communication/networking
\input{libsubprocess}
\input{l

Re: Bitching about the documentation...

2005-12-07 Thread Michael Spencer
Fredrik Lundh wrote:
> Rocco Moretti wrote:
> 
>> Insert punctuation & capitalization to make the following a correct and
>> coherent (if not a little tourtured).
>>
>> fred where guido had had had had had had had had had had had a better
>> effect on the reader
> 
> punctuation, including quote marks, I presume?
> 
> it's not time to bring out "d'ä ä e å, å i åa ä e ö" yet, I hope?
> 
> 
> 
> 
> 
Allowing quotation, almost anything is possible, e.g.,


Fred! Where Guido had had "had", Had had had "had had".  "Had had" had a better 
effect on the reader

or simply

"fred", where Guido had "had had had had had had had had had", had a better
effect on the reader

M

-- 
http://mail.python.org/mailman/listinfo/python-list


Re: newby question: Splitting a string - separator

2005-12-08 Thread Michael Spencer
Thomas Liesner wrote:
> Hi all,
> 
> i am having a textfile which contains a single string with names.
> I want to split this string into its records an put them into a list.
> In "normal" cases i would do something like:
> 
>> #!/usr/bin/python
>> inp = open("file")
>> data = inp.read()
>> names = data.split()
>> inp.close()
> 
> The problem is, that the names contain spaces an the records are also
> just seprarated by spaces. The only thing i can rely on, ist that the
> recordseparator is always more than a single whitespace.
> 
> I thought of something like defining the separator for split() by using
>  a regex for "more than one whitespace". RegEx for whitespace is \s, but
> what would i use for "more than one"? \s+?
> 
> TIA,
> Tom
\s+ gives one or more, you need \s{2,} for two or more:

  >>> import re
  >>> re.split("\s{2,}","Guido van Rossum  Tim Peters Thomas Liesner")
  ['Guido van Rossum', 'Tim Peters', 'Thomas Liesner']
  >>>

Michael

-- 
http://mail.python.org/mailman/listinfo/python-list


Re: Dynamically add Class to Modules

2005-12-08 Thread Michael Spencer
[EMAIL PROTECTED] wrote:
> I'm trying to add a class to a module at runtime.  I've seen examples
> of adding a method to a class, but I haven't been able to suit it to my
> needs.
> 
> As part of a testsuite, I have a main process X that searches
> recursively for python test files.  Those files typically have a global
> "isSupported" method, in which the file tells the test searcher "do or
> do not run me", as well as the typical TestName_TestCase class, with a
> testMyTest method.
> 
> For numerous and reasonable reasons, the TestName_TestCase class must
> be generated at runtime (i cannot use any pre-processing scripts to
> generate the testcase files).  So the external runner has to look into
> each testcase file, determine if it is supported, expand out the
> test-class code, and add that new class to that testcase in memory.
> 
> I hope this picture helps:
> 
> 
> # atestcase.py 
> def isSupported():
> """ do a real check"""
> return True
> 
> 
> ThisTestName = "foo"
> TestCode = \
> """
> class %s_TestCase:
> def __init__( self ):
> """ do some stuff"""
> 
> def test_%s( self ):
>   """ run the test """
> """
> #
> 
> 
> #--- The external runner 
> 
> (essentially)
> import atestcase.py
> if atestcase.isSupported():
> # Run this test
> 
> (here's what i'm trying to figure out)
> #--> expand atestcase.TestCode out to include "foo"
> #--> make the testcode a class
> #--> add the new foo_TestCase class to
> #the atestcase module
> 
> #-
> 
> 
> So:  Does anyone know how dynamically generate a class, and add it to a
> "module" that is already in memory?
> 
> Thanks so much in advance.  My flu is heating up my brain pretty badly,
> so please ask me if I have to clarify anything above.
> 
Bill,
I think this should do it:

import atestcase as T
exec T.TestCode % T.ThisTestName in T.__dict__

If you want to substitute ThisTestName more than once, you might be better off 
using the %(name)s form, supplied with a dictionary {name: "foo"}, or you could 
look at the new string.Template class for easier string subsitution.

Michael

-- 
http://mail.python.org/mailman/listinfo/python-list


Re: Dynamically add Class to Modules

2005-12-08 Thread Michael Spencer
[EMAIL PROTECTED] wrote:
...
> exec testModule.TheTestCode %(testModule.TheTestName, testModule.TheTestName )

...

Try changing that to exec ~ in testModule.__dict__

otherwise, your class statement gets executed in the current scope

Michael

-- 
http://mail.python.org/mailman/listinfo/python-list


Re: getting host and path from url

2005-12-09 Thread Michael Spencer
Steve Young wrote:
> Hi, this is probably an easy question but is there a way to get the host and 
> path seperatly out of an url? 
>   
>   Example:
>   
>   url = http://news.yahoo.com/fc/world/iraq
>   
>   and i want some way of getting:
>   
>   host = http://news.yahoo.com
>   and
>   path = /fc/world/iraq
>   
>   thanks.
>   
>   -Steve
>
>   
> 
>   
> -
> Yahoo! Shopping
>  Find Great Deals on Holiday Gifts at Yahoo! Shopping 
> 
check out urlparse in the stdlib

Michael

-- 
http://mail.python.org/mailman/listinfo/python-list


Re: newby question: Splitting a string - separator

2005-12-09 Thread Michael Spencer
[EMAIL PROTECTED] wrote:
> Thomas Liesner wrote:
>> Hi all,
>>
>> i am having a textfile which contains a single string with names.
>> I want to split this string into its records an put them into a list.
>> In "normal" cases i would do something like:
>>
>>> #!/usr/bin/python
>>> inp = open("file")
>>> data = inp.read()
>>> names = data.split()
>>> inp.close()
>> The problem is, that the names contain spaces an the records are also
>> just seprarated by spaces. The only thing i can rely on, ist that the
>> recordseparator is always more than a single whitespace.
>>
>> I thought of something like defining the separator for split() by using
>>  a regex for "more than one whitespace". RegEx for whitespace is \s, but
>> what would i use for "more than one"? \s+?
>>
> Can I just use "two space" as the seperator ?
> 
> [ x.strip() for x in data.split("  ") ]
> 
If you like, but it will create dummy entries if there are more than two spaces:

  >>> data = "Guido van Rossum  Tim PetersThomas Liesner"
  >>> [ x.strip() for x in data.split("  ") ]
  ['Guido van Rossum', 'Tim Peters', '', 'Thomas Liesner']

You could add a condition to the listcomp:

  >>> [name.strip() for name in data.split("  ") if name]
  ['Guido van Rossum', 'Tim Peters', 'Thomas Liesner']

but what if there is some other whitespace character?

  >>> data = "Guido van Rossum  Tim Peters  \t  Thomas Liesner"
  >>> [name.strip() for name in data.split("  ") if name]
  ['Guido van Rossum', 'Tim Peters', '', 'Thomas Liesner']
  >>>

perhaps a smarter condition?

  >>> [name.strip() for name in data.split("  ") if name.strip(" \t")]
  ['Guido van Rossum', 'Tim Peters', 'Thomas Liesner']

but this is beginning to feel like hard work.


I think this is a case where it's not worth the effort to try to avoid the 
regexp

  >>> import re
  >>> re.split("\s{2,}",data)
  ['Guido van Rossum', 'Tim Peters', 'Thomas Liesner']
  >>>

Michael


-- 
http://mail.python.org/mailman/listinfo/python-list


args (was Re: Lambda as declarative idiom (was RE: what is lambda used for in real code?))

2005-01-04 Thread Michael Spencer
Roman Suzi wrote:
Maybe this is too outlandish, but I see lambdas as a "quote" mechanism,
which presents a possibility to postpone (precisely control, delegate)
evaluation. That is, an ovehead for lambda must be much lower but at the
same time visible to the programmer:
 d = a + (lambda x, y: x+ y)(3, 4)
[...]
I believe that this "possibility to postpone" divides into two related but 
separate concepts: controlling the moment of evaluation, and assembling the 
arguments required at that moment.  They are both species of 'eval', but 
managing arguments is more specialized, because it includes possibly renaming 
parameters, assigning default values, processing positional and keyword 
arguments, and, perhaps in the future dealing with argument types.

Meanwhile, GvR wrote (about defining Interfaces in the context of Optional 
Static Type Checking)
Method declarations can be inspected to find out their signature. I propose a
__signature__ attribute (also for methods defined in classes!) which might be an
object whose attributes make the signature easily inspectable. This might take 
the form of a list of argument declaration objects giving the name, type and default
(if any) for each argument, and a separate argument for the return type. For 
signatures that include *args and/or **kwds, the type of the additional arguments 
should also be given (so you can write for example a varargs method whose arguments
are all strings).
GvR's method.__signature__ object might be related to the args object I proposed 
 as part of the syntax for anonymous functions without 'lambda'. i.e.,

args(a,*b,**kw) --> an object that specifies but does not evaluate its 
parameters until it is supplied to a callable, possibly with calling parameters

This object would contain the default values, and could contain type 
annotations, explicit, or inferred, as well as more complex assertions used in 
several contexts.

* Current function syntax:
def func(a,*b,**c) : pass
creates func with func.__signature__ = args(a,*b,**c)
and when func is called, the args are evaluated using a mechanism in
args.__call__
so, roughly, eval(func.__signature__) --> func.locals
 * Anonymous functions
	Syntax alternatives at http://www.python.org/moin/AlternateLambdaSyntax
	e.g., (f(a) + o(b) - o(c) for args(a, b, c))
	
	args would evaluated with the calling parameters and made available in 			the 
local scope defined by ()
	
 * A stricter alternative to keyword arguments:
 	argspec = args(arg1, arg2, arg3)
	def func(**argspec): pass
	
	is equivalent to def func(arg1, arg2, arg3): pass


args["arg1"]

(i.e., only args defined in argspec are accepted)
 * Useful infrastructure for user-supplied type-based dispatch/lightweight 
multimethods:
	
	argspec = args([(a:int, b:int),(a:str,b:str)])
	
	then a framework can provide a custom args.__call__ method that does
	conformance-checking, adaptation or whatever


Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: pure python code to do modular-arithmetic unit conversions?

2005-01-21 Thread Michael Spencer
Dan Stromberg wrote:
Is there already a pure python module that can do modular-arithmetic unit
conversions, like converting a huge number of seconds into months,
weeks... or a bandwidth measure into megabits/s or gigabits/s or
megabytes/s or gigabytes/s, whatever's the most useful (ala df -h)?
Thanks!
Take a look at:
http://home.tiscali.be/be052320/Unum_tutorial.html
From the intro:
"Unum stands for 'unit-numbers'. It is a Python module that allows to define and 
manipulate true quantities, i.e. numbers with units such as 60 seconds, 500 
watts, 42 miles-per-hour, 100 kg per square meter, 14400 bits per second, 30 
dollars etc. The module validates unit consistency in arithmetic expressions; it 
provides also automatic conversion and output formatting."

Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: Reload Tricks

2005-01-21 Thread Michael Spencer
Kamilche wrote:
I want my program to be able to reload its code dynamically. I have a
large hierarchy of objects in memory. The inheritance hierarchy of
these objects are scattered over several files.
I find that after reloading the appropriate files, and overwriting the
__class__ of object instances, one more thing is necessary: reloading
the __bases__ of each reloaded class. If I don't do this, the modules
reloaded first point to old versions of the classes from later modules,
and when the later module is reloaded, it doesn't update the
inheritance hierarchy of classes already loaded.
This appears to be working... but now I'm wondering, what else did it
not change? Can I expect more toes to be blown off?
--Kamilche
There are some cases when re-assigning __class__ isn't possible, for 
example:
 >>> class A(object):
 ... pass
 ...
 >>> class B(dict):
 ... pass
 ...
 >>> class C:
 ... pass
 ...
 >>> a = A()
 >>> a.__class__ = B
Traceback (most recent call last):
  File "", line 1, in ?
TypeError: __class__ assignment: 'A' object layout differs from 'B'
 >>> a.__class__ = C
Traceback (most recent call last):
  File "", line 1, in ?
TypeError: __class__ must be set to new-style class, not 'classobj' object
 >>>
An alternative approach (with some pros and cons) is to modify the class in 
place, using something like:

 >>> def reclass(cls, to_cls):
 ... """Updates attributes of cls to match those of to_cls"""
 ...
 ... DONOTCOPY = ("__name__","__bases__","__base__",
 ... "__dict__", "__doc__","__weakref__")
 ...
 ... fromdict = cls.__dict__
 ... todict = to_cls.__dict__
 ...
 ... # Delete any attribute present in the new class
 ... [delattr(cls,attr) for attr in fromdict.keys()
 ... if not((attr in todict) or (attr in DONOTCOPY)) ]
 ...
 ... for to_attr, to_obj in todict.iteritems():
 ...
 ... if to_attr in DONOTCOPY:
 ... continue
 ...
 ... # This overwrites all functions, even if they haven't changed.
 ... if type(to_obj) is types.MethodType:
 ... func = to_obj.im_func
 ... to_obj = types.MethodType(func,None, cls)
 ...
 ... setattr(cls, to_attr,to_obj)
 ...
 >>> class A(object):
 ... attr = "A"
 ...
 >>> class B(object):
 ... attr = "B"
 ...
 >>> a = A()
 >>> reclass(A,B)
 >>> a.attr
'B'
 >>>
This copies attributes of old and new-style classes (in fact anything with a 
__dict__ so probably a module would work too)

You still run into problems trying to re-assigning __bases__ to incompatible 
objects, but this one-attribute-at-a-time approach gives you the potential to 
intercept problem cases.  In the example above, problems are avoided by not 
copying __bases__.

An additional advantage of this aprpoach is that you don't need to keep track of 
class instances, in order to change their __class__.  Instances automatically 
acquire the new behavior

One wart is that class docstrings are not writeable, so cannot be copied.  Why?
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: Reload Tricks

2005-01-22 Thread Michael Spencer
Kamilche wrote:
I want my program to be able to reload its code dynamically. I have a
large hierarchy of objects in memory. The inheritance hierarchy of
these objects are scattered over several files.
Michael Spencer wrote:
An alternative approach (with some pros and cons) is to modify the class in 
place, using something like:
 >>> def reclass(cls, to_cls):
 ... """Updates attributes of cls to match those of to_cls"""
 ...
 ... DONOTCOPY = ("__name__","__bases__","__base__",
 ... "__dict__", "__doc__","__weakref__") 
etc...
Kamilche wrote:
Would it be possible to just not copy any attribute that starts and
ends with '__'? Or are there some important attributes being copied?

Possible?  of course, it's Python ;-)
But there are many 'magic' attributes for behavior that you probably do want to 
copy:

e.g., __getitem__, __setitem__ etc...
See: http://docs.python.org/ref/specialnames.html
Michael Hudson's recipe: 	 
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/160164
does auto-reloading "automatically", at the price of changing the type of the 
classes you want to manage.  It's a very convenient approach for interactive 
development (which is the recipe's stated purpose).  It works by tracking 
instances and automatically updating their class.  If your program relies on 
class identity, you may run into problems.

Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: default value in a list

2005-01-22 Thread Michael Spencer
Alex Martelli wrote:
[explanation and the following code:]
 >>> a, b, c = it.islice(
 ...   it.chain(
 ...   line.split(':'), 
 ...   it.repeat(some_default),
 ...   ), 
 ...   3)
 ... 
 ...   
 >>> def pad_with_default(N, iterable, default=None):
 ... it = iter(iterable)
 ... for x in it:
 ... if N<=0: break
 ... yield x
 ... N -= 1
 ... while N>0:
 ... yield default
 ... N -= 1
Why not put these together and put it in itertools, since the requirement seems 
to crop up every other week?

 >>> line = "A:B:C".split(":")
 ...
 >>> def ipad(N,iterable, default = None):
 ... return it.islice(it.chain(iterable, it.repeat(default)), N)
 ...
 >>> a,b,c,d = ipad(4,line)
 >>> a,b,c,d
('A', 'B', 'C', None)
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: What YAML engine do you use?

2005-01-22 Thread Michael Spencer
Paul Rubin wrote:
YAML looks to me to be completely insane, even compared to Python
lists.  I think it would be great if the Python library exposed an
interface for parsing constant list and dict expressions, e.g.:
   [1, 2, 'Joe Smith', 8237972883334L,   # comment
  {'Favorite fruits': ['apple', 'banana', 'pear']},  # another comment
  'xyzzy', [3, 5, [3.14159, 2.71828, [
I don't see what YAML accomplishes that something like the above wouldn't.
Note that all the values in the above have to be constant literals.
Don't suggest using eval.  That would be a huge security hole.
Not hard at all, thanks to compiler.ast:
>>> import compiler
 ...
 >>> class AbstractVisitor(object):
 ... def __init__(self):
 ... self._cache = {} # dispatch table
 ...
 ... def visit(self, node,**kw):
 ... cls = node.__class__
 ... meth = self._cache.setdefault(cls,
 ... getattr(self,'visit'+cls.__name__,self.default))
 ... return meth(node, **kw)
 ...
 ... def default(self, node, **kw):
 ... for child in node.getChildNodes():
 ... return self.visit(child, **kw)
 ...
 >>> class ConstEval(AbstractVisitor):
 ... def visitConst(self, node, **kw):
 ... return node.value
 ...
 ... def visitName(self,node, **kw):
 ... raise NameError, "Names are not resolved"
 ...
 ... def visitDict(self,node,**kw):
 ... return dict([(self.visit(k),self.visit(v)) for k,v in node.items])
 ...
 ... def visitTuple(self,node, **kw):
 ... return tuple(self.visit(i) for i in node.nodes)
 ...
 ... def visitList(self,node, **kw):
 ... return [self.visit(i) for i in node.nodes]
 ...
 >>> ast = compiler.parse(source,"eval")
 >>> walker = ConstEval()
 >>> walker.visit(ast)
[1, 2, 'Joe Smith', 8237972883334L, {'Favorite fruits': ['apple', 'banana', 
'pear']}, 'xyzzy', [3, 5, [3.14158999, 2.71828, [

Add sugar to taste
Regards
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: What YAML engine do you use?

2005-01-24 Thread Michael Spencer
Fredrik Lundh wrote:
Sion Arrowsmith wrote:
I'm probably not thinking deviously enough here, but how are you
going to exploit an eval() which has very tightly controlled
globals and locals (eg. eval(x, {"__builtins__": None}, {}) ?
try this:
eval("'*'*100*2*2*2*2*2*2*2*2*2")
I updated the safe eval recipe I posted yesterday to add the option of reporting 
unsafe source, rather than silently ignoring it.  Is this completely safe?  I'm 
interested in feedback.

Michael
Some source to try:
 >>> goodsource =  """[1, 2, 'Joe Smith', 8237972883334L,   # comment
 ...   {'Favorite fruits': ['apple', 'banana', 'pear']},  # another comment
 ...   'xyzzy', [3, 5, [3.14159, 2.71828, ["""
 ...
Unquoted string literal
 >>> badsource = """[1, 2, JoeSmith, 8237972883334L,   # comment
 ...   {'Favorite fruits': ['apple', 'banana', 'pear']},  # another comment
 ...   'xyzzy', [3, 5, [3.14159, 2.71828, ["""
 ...
Non-constant expression
 >>> effbot = "'*'*100*2*2*2*2*2*2*2*2*2"
 >>> safe_eval(good_source)
[1, 2, 'Joe Smith', 8237972883334L, {'Favorite fruits': ['apple', 'banana', 
'pear']}, 'xyzzy', [3, 5, [3.14158999, 2.71828, [
 >>> assert _ == eval(good_source)

 >>> safe_eval(bad_source)
Traceback (most recent call last):
  [...]
Unsafe_Source_Error: Line 1.  Strings must be quoted: JoeSmith
 >>> safe_eval(bad_source, fail_on_error = False)
[1, 2, None, 8237972883334L, {'Favorite fruits': ['apple', 'banana', 'pear']}, 
'xyzzy', [3, 5, [3.14158999, 2.71828, [

 >>> safe_eval(effbot)
Traceback (most recent call last):
  [...]
Unsafe_Source_Error: Line 1.  Unsupported source construct: compiler.ast.Mul
 >>> safe_eval(effbot, fail_on_error = False)
 ...
'*'
 >>>
Source:
import compiler
class Unsafe_Source_Error(Exception):
def __init__(self,error,descr = None,node = None):
self.error = error
self.descr = descr
self.node = node
self.lineno = getattr(node,"lineno",None)
def __repr__(self):
return "Line %d.  %s: %s" % (self.lineno, self.error, self.descr)
__str__ = __repr__
class AbstractVisitor(object):
def __init__(self):
self._cache = {} # dispatch table
def visit(self, node,**kw):
cls = node.__class__
meth = self._cache.setdefault(cls,
getattr(self,'visit'+cls.__name__,self.default))
return meth(node, **kw)
def default(self, node, **kw):
for child in node.getChildNodes():
return self.visit(child, **kw)
visitExpression = default
class SafeEval(AbstractVisitor):
def visitConst(self, node, **kw):
return node.value
def visitDict(self,node,**kw):
return dict([(self.visit(k),self.visit(v)) for k,v in node.items])
def visitTuple(self,node, **kw):
return tuple(self.visit(i) for i in node.nodes)
def visitList(self,node, **kw):
return [self.visit(i) for i in node.nodes]
class SafeEvalWithErrors(SafeEval):
def default(self, node, **kw):
raise Unsafe_Source_Error("Unsupported source construct",
node.__class__,node)
def visitName(self,node, **kw):
raise Unsafe_Source_Error("Strings must be quoted",
 node.name, node)
# Add more specific errors if desired
def safe_eval(source, fail_on_error = True):
walker = fail_on_error and SafeEvalWithErrors() or SafeEval()
try:
ast = compiler.parse(source,"eval")
except SyntaxError, err:
raise
try:
return walker.visit(ast)
except Unsafe_Source_Error, err:
raise
--
http://mail.python.org/mailman/listinfo/python-list


Re: Classical FP problem in python : Hamming problem

2005-01-25 Thread Michael Spencer
Francis Girard wrote:
The following implementation is even more speaking as it makes self-evident 
and almost mechanical how to translate algorithms that run after their tail 
from recursion to "tee" usage :

Thanks, Francis and Jeff for raising a fascinating topic.  I've enjoyed trying 
to get my head around both the algorithm and your non-recursive implementation.

Here's a version of your implementation that uses a helper class to make the 
algorithm itself prettier.

from itertools import tee, imap
def hamming():
def _hamming():
yield 1
for n in imerge(2 * hamming, imerge(3 * hamming, 5 * hamming)):
yield n
hamming = Tee(_hamming())
return iter(hamming)
class Tee(object):
"""Provides an indepent iterator (using tee) on every iteration request
Also implements lazy iterator arithmetic"""
def __init__(self, iterator):
self.iter = tee(iterator,1)[0]
def __iter__(self):
return self.iter.__copy__()
def __mul__(self, number):
return imap(lambda x: x * number,self.__iter__())
def imerge(xs, ys):
  x = xs.next()
  y = ys.next()
  while True:
if x == y:
  yield x
  x = xs.next()
  y = ys.next()
elif x < y:
  yield x
  x = xs.next()
else: # if y < x:
  yield y
  y = ys.next()
>>> hg = hamming()
>>> for i in range(1):
... n = hg.next()
... if i % 1000 == 0: print i, n
...
0 1
1000 5184
2000 81
3000 27993600
4000 4707158941350
5000 5096079360
6000 4096000
7000 2638827906662400
8000 143327232
9000 680244480
Regards
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: Classical FP problem in python : Hamming problem

2005-01-25 Thread Michael Spencer
Nick Craig-Wood wrote:
Steven Bethard <[EMAIL PROTECTED]> wrote:
Nick Craig-Wood wrote:
Thinking about this some more leads me to believe a general purpose
imerge taking any number of arguments will look neater, eg
def imerge(*generators):
   values = [ g.next() for g in generators ]
   while True:
   x = min(values)
   yield x
   for i in range(len(values)):
   if values[i] == x:
   values[i] = generators[i].next()
This kinda looks like it dies after the first generator is exhausted, 
but I'm not certain.

Yes it will stop iterating then (rather like zip() on lists of unequal
size). Not sure what the specification should be!  It works for the
hamming problem though.

list(imerge(iter([1, 2]), iter([1, 2, 3]), iter([1, 2, 3, 4])))
[1, 2]

An alternate version that doesn't search for 'i':
py> def imerge(*iterables):
... iters = [iter(i) for i in iterables]
... values = [i.next() for i in iters]
... while iters:
... x, i = min((val, i) for i, val in enumerate(values))
... yield x
... try:
... values[i] = iters[i].next()
... except StopIteration:
... del iters[i]
... del values[i]
... 
py> list(imerge([1, 4, 7], [2, 5, 8], [3, 6, 9]))
[1, 2, 3, 4, 5, 6, 7, 8, 9]
py> list(imerge([3, 6, 9], [1, 4, 7], [2, 5, 8]))
[1, 2, 3, 4, 5, 6, 7, 8, 9]
py> list(imerge([1, 4, 7], [3, 6, 9], [2, 5, 8]))
[1, 2, 3, 4, 5, 6, 7, 8, 9]

This isn't quite right...

list(imerge([1, 2, 3], [1, 2, 3], [1, 2, 3]))
[1, 1, 1, 2, 2, 2, 3, 3, 3]
This should produce
[1, 2, 3]
So I'm afraid the searching *is* necessary - you've got to find all
the generators with the min value and move them on.
Here's a dict-based implementation: cute, but slow, at least for a small number 
of iterators

 >>> def imerge(*iterables):
 ... cache = {}
 ... iterators = map(iter,iterables)
 ... number = len(iterables)
 ... exhausted = 0
 ... while 1:
 ... for it in iterators:
 ... try:
 ... cache.setdefault(it.next(),[]).append(it)
 ... except StopIteration:
 ... exhausted += 1
 ... if exhausted == number:
 ... raise StopIteration
 ... val = min(cache)
 ... iterators = cache.pop(val)
 ... yield val
 >>> list(imerge([1, 2, 3, 6], [1, 2, 3, 7], [1, 2, 3, 4, 5]))
[1, 2, 3, 4, 5, 6, 7]
 >>>
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: limited python virtual machine (WAS: Another scripting language implemented into Python itself?)

2005-01-25 Thread Michael Spencer
Steven Bethard wrote:

> 
> I wish there was a way to, say, exec something with no builtins and
with 
> import disabled, so you would have to specify all the available 
> bindings, e.g.:
> 
> exec user_code in dict(ClassA=ClassA, ClassB=ClassB)
> 
> but I suspect that even this wouldn't really solve the problem,
because 
> you can do things like:
> 
> py> class ClassA(object):
> ... pass
> ...
> py> object, = ClassA.__bases__
> py> object
> 
> py> int = object.__subclasses__()[2]
> py> int
> 
> 
> so you can retrieve a lot of the builtins.  I don't know how to
retrieve 
>  __import__ this way, but as soon as you figure that out, you can then

> do pretty much anything you want to.
> 
> Steve

Steve

Safe eval recipe posted to cookbook:
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/364469

Couldn't safe exec be programmed similarly?

'import' and 'from' are syntax, so trivially avoided

Likewise, function calls are easily intercepted

As you say, attribute access to core functions appears to present the
challenge. It is easy to intercept attribute access, harder to know
what's safe.  If there were a known set of 'dangerous' objects e.g.,
sys, file, os etc... then these could be checked by identity against any
attribute returned

Of course, execution would be painfully slow, due to double -
interpretation.  

Michael  
--
http://mail.python.org/mailman/listinfo/python-list


Re: limited python virtual machine (WAS: Another scripting language implemented into Python itself?)

2005-01-25 Thread Michael Spencer
Steven Bethard wrote:
>
> I wish there was a way to, say, exec something with no builtins and
> with import disabled, so you would have to specify all the available
> bindings, e.g.:
>
> exec user_code in dict(ClassA=ClassA, ClassB=ClassB)
>
> but I suspect that even this wouldn't really solve the problem,
> because you can do things like:
>
> py> class ClassA(object):
> ... pass
> ...
> py> object, = ClassA.__bases__
> py> object
> 
> py> int = object.__subclasses__()[2]
> py> int
> 
>
> so you can retrieve a lot of the builtins.  I don't know how to
> retrieve  __import__ this way, but as soon as you figure that out, you
> can then do pretty much anything you want to.
>
> Steve
Steve
Safe eval recipe posted to cookbook: 
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/364469

Couldn't safe exec be programmed similarly?
'import' and 'from' are syntax, so trivially avoided
Likewise, function calls are easily intercepted
As you say, attribute access to core functions appears to present the challenge. 
It is easy to intercept attribute access, harder to know what's safe.  If there 
were a known set of 'dangerous' objects e.g., sys, file, os etc... then these 
could be checked by identity against any attribute returned

Of course, execution would be painfully slow, due to double - interpretation.
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: limited python virtual machine (WAS: Another scripting language implemented into Python itself?)

2005-01-25 Thread Michael Spencer
Steven Bethard wrote:
Michael Spencer wrote:
Safe eval recipe posted to cookbook:
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/364469

This recipe only evaluates constant expressions:
"Description:
Evaluate constant expressions, including list, dict and tuple using the 
abstract syntax tree created by compiler.parse"

It means you can't eval arbitrary Python code -- it's basically just a 
data parser.  Handy in some situations, but not the equivalent of a 
limited Python virtual machine.
Indeed.  But it's easy to extend this to arbitrary constructs.  You just need to 
decide what code to emit for the other 50 or so ast node types.  Many of those 
are boiler-plate binops.

Likewise, function calls are easily intercepted
I'm not sure I follow this...  How do you intend to intercept all 
function calls?
Sorry, should have been more precise.  In the AST, Function calls have their own 
 node type, so it is easy to 'intercept' them and execute them conditionally

[snip]
It sounds like you're suggesting overriding the global attribute access 
mechanism.  Is that right?  So that every time Python encountered an 
attribute access, you would verify that the attribute being accessed is 
not on the 'dangerous' list?
Just in the context of the AST-walker, yes
  I don't know how to do that without
basically rewriting some of Python's C code, though certainly I'm no 
expert in the area...
Not messing with the CPython interpreter
Also, I'm not sure identity is sufficient:
py> import sys
py> import new
py> new.module('newsys')
py> newsys = new.module('newsys')
py> newsys.__dict__.update(sys.__dict__)
py> newsys is sys
False
py> newsys == sys
False
Right - the crux of the problem is how to identify dangerous objects.  My point 
is that if such as test is possible, then safe exec is very easily implemented 
within current Python. If it is not, then it is essentially impossible.

Let's assume that it is indeed not possible to know in general whether an object 
is safe, either by inspecting its attributes, or by matching its identity 
against a black list.

It might still be possible to have a reliable test within a problem-specific 
domain i.e., white-listing.  This, I think, is what you meant when you said:

I wish there was a way to, say, exec something with no builtins and with import 
disabled, so you would have to specify all the available bindings, e.g.:
exec user_code in dict(ClassA=ClassA, ClassB=ClassB) 
I believe that if you can come up with a white-list, then the rest of the 
problem is easy.

Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: limited python virtual machine (WAS: Another scripting language implemented into Python itself?)

2005-01-25 Thread Michael Spencer
Cameron Laird wrote:
In article <[EMAIL PROTECTED]>,
Michael Spencer  <[EMAIL PROTECTED]> wrote:
.
.
.
Right - the crux of the problem is how to identify dangerous objects.  My point 
is that if such as test is possible, then safe exec is very easily implemented 
within current Python. If it is not, then it is essentially impossible.


I'll suggest yet another perspective:  add another indirection.
As the virtual machine becomes more available to introspection,
it might become natural to define a *very* restricted interpreter
which we can all agree is safe, PLUS a means to extend that 
specific instance of the VM with, say, new definitions of bindings
for particular AST nodes.  Then the developer has the means to
"build out" his own VM in a way he can judge useful and safe for
his own situation.  Rather than the Java there-is-one-"safe"-for-
all approach, Pythoneers would have the tools to create safety.
That does sound good.  And evolutionary, because the very restricted VM could be 
implemented today (in Python), and subsequently PyPy (or whatever) could 
optimize it.

The safe eval recipe I referred to earlier in the thread is IMO a trivial 
example of of this approach. Of course, its restrictions are extreme - only 
constant expressions, but it is straightforwardly extensible to any subset of 
the language.

The limitation that I see with this approach is that it is not, in general, 
syntax that is safe or unsafe (with the notable exception of 'import' and its 
relatives).  Rather, it the library objects, especially the built-ins, that 
present the main source of risk.

So, if I understand your suggestion, it would require assessing the safety of 
the built-in objects, as well as providing an interpreter that could control 
access to them, possibly with fine-grain control at the attribute level.

M

--
http://mail.python.org/mailman/listinfo/python-list


Re: python without OO

2005-01-25 Thread Michael Spencer
Davor wrote:
Thanks,
I do not hate OO - I just do not need it for the project size I'm
dealing with - and the project will eventually become open-source and
have additional developers - so I would prefer that we all stick to
"simple procedural" stuff rather than having to deal with a developer
that will be convincing me that his 50 layers inheritance hierarchy is
good since it exists in some weird pattern that he saw somewhere on
some Java design patterns discussion board :-) and other "proper" OO
design issues...  Once I opted for C++ in a very small project and
believed everyone will stick with C subset + better type checking
offered through C++ - but I simply could not manage to keep them off
using OO stuff which was just making program more complicated than it
should have been. (note, I am not an experienced developer, nor the
others I'll be working with (even though some think they are:-)), so I
prefer preemptively dealing with issue of everyone showing off their OO
design skills)
Davor
Perhaps pylint (http://www.logilab.org/projects/pylint) or its ilk can help you 
enforce a coding style

Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: Classical FP problem in python : Hamming problem

2005-01-27 Thread Michael Spencer
Paul Rubin wrote:
Francis Girard <[EMAIL PROTECTED]> writes:
Thank you Nick and Steven for the idea of a more generic imerge.

If you want to get fancy, the merge should use a priority queue (like
in the heapsort algorithm) instead of a linear scan through the
incoming iters, to find the next item to output.  That lowers the
running time to O(n log k) instead of O(n*k), where k is the number of
iterators and n is the length.
I looked at a heapq solution but didn't see any clean way of dealing with 
multiple iterators having equal values.  The dict solution below deals cleanly 
with that, since one key can be shared by any number of iterators.  Extracting 
the minimum, and the associated iterables is fast, but the overall solution is 
still slower than the brute force approach for the 3 hamming iterators.

 >>> def imerge(*iterables):
 ... cache = {}
 ... iterators = map(iter,iterables)
 ... number = len(iterables)
 ... exhausted = 0
 ... while 1:
 # First time through, looked at all of them
 # Subsequently, update only the minimum iterators
 ... for it in iterators:
 ... try:
 # Key each iterator by its next() value
 # Multiple iterators may share the same key
 ... cache.setdefault(it.next(),[]).append(it)
 ... except StopIteration:
 ... exhausted += 1
 ... if exhausted == number:
 ... raise StopIteration
 # Get the lowest value
 ... val = min(cache)
 # and all the iterators that have that value
 ... iterators = cache.pop(val) 
 ... yield val
 >>> list(imerge([1, 2, 3, 6], [1, 2, 3, 7], [1, 2, 3, 4, 5]))
[1, 2, 3, 4, 5, 6, 7]
 >>>
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: remove duplicates from list *preserving order*

2005-02-03 Thread Michael Spencer
Steven Bethard wrote:
I'm sorry, I assume this has been discussed somewhere already, but I 
found only a few hits in Google Groups...  If you know where there's a 
good summary, please feel free to direct me there.

I have a list[1] of objects from which I need to remove duplicates.  I 
have to maintain the list order though, so solutions like set(lst), etc. 
will not work for me.  What are my options?  So far, I can see:

def filterdups(iterable):
result = []
for item in iterable:
if item not in result:
result.append(item)
return result
def filterdups(iterable):
result = []
seen = set()
for item in iterable:
if item not in seen:
result.append(item)
seen.add(item)
return result
def filterdups(iterable):
seen = set()
for item in iterable:
if item not in seen:
seen.add(item)
yield item
Does anyone have a better[2] solution?
STeve
[1] Well, actually it's an iterable of objects, but I can convert it to 
a list if that's helpful.

[2] Yes I know, "better" is ambiguous.  If it helps any, for my 
particular situation, speed is probably more important than memory, so 
I'm leaning towards the second or third implementation.
How about:
>>> def filterdups3(iterable):
... seen = set()
... def _seen(item):
... return item in seen or seen.add(item)
... return itertools.ifilterfalse(_seen,iterable)
...
>>> list(filterdups3([1,2,2,3,3,3,4,4,4,2,2,5]))
[1, 2, 3, 4, 5]
>>>
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: List mapping question

2005-02-03 Thread Michael Spencer
Marc Huffnagle wrote:
I have a number of variables that I want to modify (a bunch of strings 
that I need to convert into ints).  Is there an easy way to do that 
other than saying:

 > a = int(a)
 > b = int(b)
 > c = int(c)
It may not matter to you, at the moment, but a = int(a) is not strictly 
'modifying a variable'.  Instead int(a) creates a new int object, if possible, 
from the object that a is currently bound to.  Then a is rebound to the new object.

I tried
 > [i = int(i) for i in [a, b, c]]
You can't make an assignment in a list comprehension.  If your 'variables' are 
object attributes, you could do: [setattr(obj,name,int(getattr(obj,name)) for 
name in [list of attribute names]]


but that didn't work because it was creating a list with the values of 
a, b and c instead of the actual variables themselves, then trying to 
set a string equal to an integer, which it really didn't like.

 Marc
For your problem as stated:
>>> a=b=c="1"
>>> for var in ["a","b","c"]:
... exec "%s = int(%s)" % (var,var)
...
>>> a,b,c
(1, 1, 1)
>>>
But don't do this, except as a "one-off" data crunching exercise
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: how to generate SQL SELECT pivot table string

2005-02-03 Thread Michael Spencer
McBooCzech wrote:
Hallo all,
I am trying to generate SQL SELECT command which will return pivot
table. The number of column in the pivot table depends on the data
stored in the database. It means I do not know in advance how many
columns the pivot table will have.
For example I will test the database as following:
SELECT DISTINCT T1.YEAR FROM T1
The SELECT command will return:
2002
2003
2004
2005
So I would like to construct following select:
select T1.WEEK,
SUM (case T1.YEAR when '2002' then T1.PRICE else 0 END) Y_02,
SUM (case T1.YEAR when '2003' then T1.PRICE else 0 END) Y_03,
SUM (case T1.YEAR when '2004' then T1.PRICE else 0 END) Y_04,
SUM (case T1.YEAR when '2005' then T1.PRICE else 0 END) Y_05
from T1
group by T1.week
which will return pivot table with 5 columns:
WEEK, Y_02, Y_03, Y_04, Y_05,
but if the command "SELECT DISTINCT T1.YEAR FROM T1" returns:
2003
2004
I have to construct only following string:
select T1.WEEK,
SUM (case T1.YEAR when '2003' then T1.PRICE else 0 END) Y_03,
SUM (case T1.YEAR when '2004' then T1.PRICE else 0 END) Y_04,
from T1
group by T1.week
which will return pivot table with 3 columns:
WEEK, Y_03, Y_04
Can anyone help and give me a hand or just direct me, how to write a
code which will generate SELECT string depending on the data stored in
the database as I described?
Thanks
Petr McBooCzech
>>> step1result = """2000
... 2001
... 2002
... 2003""".splitlines()
>>> step1result
['2000', '2001', '2002', '2003']
>>> step2query = "Prefix " + ",".join(["Case %s" % year for year in 
step1result]) + " Postfix"
>>> step2query
'Prefix Case 2000,Case 2001,Case 2002,Case 2003 Postfix'

HTH
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: Popularizing SimpleHTTPServer and CGIHTTPServer

2005-02-04 Thread Michael Spencer
Jorey Bump wrote:
> ... Is there a NotSoSimpleHTTPServer? ...
Steve Holden wrote:
> ... You want ExtremelyBloodyComplicatedHTTPServer  :-)
Lee Harr wrote:
... I think I would point to twisted for that.

Michael :-)

--
http://mail.python.org/mailman/listinfo/python-list


Re: returning True, False or None

2005-02-04 Thread Michael Spencer
Steven Bethard wrote:
I have lists containing values that are all either True, False or None, 
e.g.:

[True,  None,  None,  False]
[None,  False, False, None ]
[False, True,  True,  True ]
etc.
For a given list:
* If all values are None, the function should return None.
* If at least one value is True, the function should return True.
* Otherwise, the function should return False.
Right now, my code looks like:
if True in lst:
return True
elif False in lst:
return False
else:
return None
This has a light code smell for me though -- can anyone see a simpler 
way of writing this?

STeVe
max(lst)  ;-)
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: changing local namespace of a function

2005-02-04 Thread Michael Spencer
Bo Peng wrote:
Dear list,
I have many dictionaries with the same set of keys and I would like to 
write a function to calculate something based on these values. For 
example, I have

a = {'x':1, 'y':2}
b = {'x':3, 'y':3}
def fun(dict):
  dict['z'] = dict['x'] + dict['y']
fun(a) and fun(b) will set z in each dictionary as the sum of x and y.
My function and dictionaries are a lot more complicated than these so I 
would like to set dict as the default namespace of fun. Is this 
possible? The ideal code would be:

def fun(dict):
  # set dict as local namespace
  # locals() = dict?
  z = x + y
As you no doubt have discovered from the docs and this group, that isn't doable 
with CPython.

If you must write your functions as real functions, then you might do something 
like this:

 >>> a = {'x':1, 'y':2}
 >>> b = {'x':3, 'y':3}
 ...
 >>> def funa(x,y, **kw):
 ... del kw #Careful of unwanted names in locals with this approach
 ... z = x + y
 ... return locals()
 ...
 >>> a.update(funa(**a))
 >>> b.update(funa(**b))
 >>> a
{'y': 2, 'x': 1, 'z': 3}
 >>> b
{'y': 3, 'x': 3, 'z': 6}
 >>>
Alternatively, you could use exec:
 >>> a = {'x':1, 'y':2}
 >>> b = {'x':3, 'y':3}
 >>> exec "z = x + y" in globals(), a
 >>> a
{'y': 2, 'x': 1, 'z': 3}
 >>> exec "z = x + y" in globals(), b
 >>> b
{'y': 3, 'x': 3, 'z': 6}
 >>>
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: changing local namespace of a function

2005-02-04 Thread Michael Spencer
Bo Peng wrote:
Michael Spencer wrote:
 >
There are hundreds of items in the dictionary (that will be needed in 
the calculation) so passing the whole dictionary is a lot better than 
passing individual items.
...
def fun(d):
  exec 'z = x + y' in globals(), d
seems to be more readable than
def fun(d):
  d['z'] = d['x'] + d['y']
But how severe will the performance penalty be?
Try it and see.
Bo
Compare it with Jeff Shannon's suggestion, and with a lazy dict-wrapper 
like this:
 >>> class wrapbigdict(object):
 ... """Lazy attribute access to dictionary keys.  Will not access
 ... keys that are not valid attribute names!"""
 ... def __init__(self, mydict):
 ... object.__setattr__(self, "mydict",mydict)
 ... def __getattr__(self, attrname):
 ... return self.mydict[attrname]
 ... def __setattr__(self, attrname, value):
 ... self.mydict[attrname] = value
 ...
 ...
 >>> a = {'x':1, 'y':2}
 >>> b = {'x':3, 'y':3}
 ...
 >>> w_a = wrapbigdict(a)
 >>> w_b = wrapbigdict(b)
 ...
 >>> def fun(d):
 ... d.z = d.x + d.y
 ...
 >>> fun(w_a)
 >>> fun(w_b)
 ...
 >>> w_a.mydict
{'y': 2, 'x': 1, 'z': 3}
 >>> w_b.mydict
{'y': 3, 'x': 3, 'z': 6}
 >>>
--
http://mail.python.org/mailman/listinfo/python-list


Re: returning True, False or None

2005-02-04 Thread Michael Spencer
Fahri Basegmez wrote:
reduce(lambda x, y: x or y, lst)
works but when I tried
import operator
reduce(operator.or_, lst)
this did not work.  It pukes
Traceback (most recent call last):
  File "", line 1, in ?
TypeError: unsupported operand type(s) for |: 'NoneType' and 'bool'
Any comments?
Fahri

TypeError: unsupported operand type(s) for |: 'NoneType' and 'bool'
operator.or_ is "|" i.e., bitwise, not logical or
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: changing local namespace of a function

2005-02-04 Thread Michael Spencer
Nick Coghlan wrote:
Michael Spencer wrote:
def fun(dict):
  # set dict as local namespace
  # locals() = dict?
  z = x + y

As you no doubt have discovered from the docs and this group, that 
isn't doable with CPython.

Not entirely impossible:
Py> def f(d):
...   exec "locals().update(d)"
...   return x + y
...
Py> f(dict(x=1, y=2))
3
Due to the way 'exec' is implemented, modifications to locals() inside 
an exec statement actually take effect (basically, they're freeloading 
on the code which allows 'exec "x = 1"' to work properly).

This is an evil, evil hack and almost certainly not what anyone should 
be doing. Also, variables created this way will be slower than normal 
variables due to the way the associated code works.

Cheers,
Nick.
Oooh - evil indeed, but thanks for the pointer.
I debated including a link to one of the 'writable locals' threads, when I 
settled on not 'doable', but gambled on being probably useful rather than 
certainly accurate.  Just goes to show you can't get away with anything in this 
NG ;-)

Cheers
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: returning True, False or None

2005-02-04 Thread Michael Spencer
Fahri Basegmez wrote:
"Michael Spencer" <[EMAIL PROTECTED]> wrote in message 
news:[EMAIL PROTECTED]

Fahri Basegmez wrote:
reduce(lambda x, y: x or y, lst)
works but when I tried
import operator
reduce(operator.or_, lst)
this did not work.  It pukes
Traceback (most recent call last):
 File "", line 1, in ?
TypeError: unsupported operand type(s) for |: 'NoneType' and 'bool'
Any comments?
Fahri

TypeError: unsupported operand type(s) for |: 'NoneType' and 'bool'
operator.or_ is "|" i.e., bitwise, not logical or
Michael

That explains it.  Is there a logical or we can use with reduce?
Fahri 


Yes, but it's not quite the same as the 'or' operator
 >>> bool.__or__(True, False)
True
 >>> bool.__or__(False, False)
False
 >>> bool.__or__(False, None)
NotImplemented
 >>>
this may not be intentional...
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: changing local namespace of a function

2005-02-05 Thread Michael Spencer
Alex Martelli wrote:
Hmmm, you do realize that wrapdict uses a lot of indirection while my
equivalent approach, just posted, is very direct, right?  To reiterate
the latter, and dress it up nicely too, it's
class wrapwell(object):
def __init__(self, somedict):
self.__dict__ = somedict
Bad mistake on my part, sorry!
Nick Coghlan wrote:
... a class that combined property access with the above...
 
In a similar vein to Nick's solution:
class AutoProperty(object):
def __init__(self, meth):
   self.meth = meth
   self.name = meth.__name__
   self.__doc__ = meth.__doc__
def __get__(self, obj, cls):
if isinstance(obj, cls):
return obj.__dict__.setdefault(self.name, self.meth(obj))
else:
return self.__doc__
# You could define __set__ and __del__ but they don't seem
# necessary based on what you've said so far
class DataManipulator(object):
def __init__(self, data):
self.__dict__ = data
class Model(DataManipulator):
def z(self):
"""x+y"""
return self.x+self.y
z = AutoProperty(z)
def z1(self):
"""Or any other useful information"""
return self.z + self.x
z1 = AutoProperty(z1)
# You could automate these calls to AutoProperty in a metaclass
 >>> a = {'x':1, 'y':2}
 >>> b = {'x':3, 'y':3}
 >>> d = Model(a)
 >>> d.z
3
 >>> d.z1
4
 >>> a
{'y': 2, 'x': 1, 'z': 3, 'z1': 4}
 >>> d=  Model(b)
 >>> d.z1
9
 >>> b
{'y': 3, 'x': 3, 'z': 6, 'z1': 9}
 >>>
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: empty classes as c structs?

2005-02-05 Thread Michael Spencer
Alex Martelli wrote:
Nick Coghlan <[EMAIL PROTECTED]> wrote:
   ...
Michael Spencer also posted ...
Wasted indirection, IMHO.  A better implementation:
class attr_view(object):
def __init__(self, data):
self.__dict__ = data
Alex
Indeed!  A complete brain-blip
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: empty classes as c structs?

2005-02-05 Thread Michael Spencer
Steven Bethard wrote:
Nick Coghlan wrote:

class attr_view(object):
def __init__(self, data):
self.__dict__ = data

I think the idea definitely deserves mention as a possible 
implementation strategy in the generic objects PEP, with the data 
argument made optional:

That's basically what the current implementation does (although I use 
'update' instead of '=').  The code is complicated because the 
implementation also takes all the argument types that dicts take.

STeVe
Have you noted the similarity of bunch and types.ModuleType?
perhaps module.__init__  could take an additional keyword argument to set its 
__dict__

Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: Read-only class properties

2005-07-10 Thread Michael Spencer
Bengt Richter wrote:
...
> 
> class Foo(object):
> class __metaclass__(type):
> def __setattr__(cls, name, value):
> if type(cls.__dict__.get(name)).__name__ == 'Descriptor':
> raise AttributeError, 'setting Foo.%s to %r is not allowed' 
> %(name, value) 
> type.__setattr__(cls, name, value)
> @classproperty
> def TheAnswer(cls):
> return "The Answer according to %s is 42" % cls.__name__
> @classproperty
> def AnotherAnswer(cls):
> return "Another Answer according to %s is 43" % cls.__name__
> 

or, simply put the read-only descriptor in the metaclass:

  Python 2.4 (#60, Nov 30 2004, 11:49:19) [MSC v.1310 32 bit (Intel)] on win32
  Type "help", "copyright", "credits" or "license" for more information.
  >>> def classproperty(function):
  ... class Descriptor(object):
  ... def __get__(self, obj, objtype):
  ...return function(objtype)
  ... def __set__(self, obj, value):
  ... raise AttributeError, "can't set class attribute"
  ... return Descriptor()
  ...
  >>> class A(object):
  ... class __metaclass__(type):
  ... @classproperty
  ... def TheAnswer(cls):
  ... return "The Answer according to %s is 42" % cls.__name__
  ...
  >>> A.TheAnswer
  'The Answer according to __metaclass__ is 42'
  >>> A.TheAnswer = 3
  Traceback (most recent call last):
File "", line 1, in ?
File "", line 6, in __set__
  AttributeError: can't set class attribute
  >>> class B(A): pass
  ...
  >>> B.TheAnswer
  'The Answer according to __metaclass__ is 42'
  >>>


this means that the getter doesn't automatically get a reference to the class 
(since it is a method of metaclass), which may or may not matter, depending on 
the application

Michael

-- 
http://mail.python.org/mailman/listinfo/python-list


Re: redefining a function through assignment

2005-09-08 Thread Michael Spencer
Daniel Britt wrote:
> Hello All,
> I am new to Python so if there is an obvious answer to my question please 
> forgive me. Lets say I have the following code in mod1.py
> 
> class test:
> def func1(self):
> print 'hello'
> 
> 
> Now lets say I have another file called main.py:
> 
> import mod1
> 
> inst = mod1.test()
> inst.func1()
> 
> 
> This will print out hello. Now if I added the following to main:
> def newFunc(var):
> print 'new method'
> 
> mod1.test.func1 = newFunc
> 
> inst.func1()
> 
> 
> This will print out 'new method'. If any other instance of mod1.test is 
> created calling func1, func1 will always reference the newFunc function. 
> This is less than desirable to say the least. Is there any way of preventing 
> this from ever happening? I searched around for quite a while and I haven't 
> been able to find anyone who has a solution. The reason I am asking this is 
> b/c I want to build an application in python that has plugins. I want to 
> make sure that a programmer could not accidently or intentionally clobber 
> over another plugins code, which they could easily do. Any help would be 
> appreciated. Thanks
> 
> ~DJ
> 
> 
The obvious answer is not to give a programmer access to an object that you 
don't want to be messed with.  However, you've probably thought of that...

You could deter (not completely prevent) modification of the class by 
intercepting the __setattr__ of its metaclass:

  >>> class meta_writeonce(type):
  ... def __setattr__(cls, attrname, val):
  ... raise TypeError
  ...
  >>> class A(object):
  ... __metaclass__ = meta_writeonce
  ... def func(self):
  ... print "hello from the unmodified class A"
  ...
  >>> A.func = None
  Traceback (most recent call last):
File "", line 1, in ?
File "", line 3, in __setattr__
  TypeError
  >>> a = A()
  >>> a.func()
  hello from the unmodified class A
  >>>

If you want only to deter overwriting existing class attributes, you could do:

  >>> class meta_writeonlyattributes(type):
  ... def __setattr__(cls, attrname, val):
  ... if hasattr(cls, attrname):
  ... raise TypeError
  ... else:
  ... type.__setattr__(cls, attrname, val)
  ...
  >>> class B(object):
  ... __metaclass__ = meta_writeonlyattributes
  ... def func(self):
  ... print "hello from the unmodified class B"
  ...
  >>> B.func = None
  Traceback (most recent call last):
File "", line 1, in ?
File "", line 4, in __setattr__
  TypeError
  >>> B.func2 = lambda self: "hello from func2"
  >>> b = B()
  >>> b.func()
  hello from the unmodified class B
  >>> b.func2()
  'hello from func2'
  >>>

This is good enough to prevent accidental 'clobbering', but would not prevent a 
programmer rebinding an attribute deliberately:

  >>> type.__setattr__(B,"func",lambda self: "I've got you now")
  >>> b = B()
  >>> b.func()
  "I've got you now"
  >>>

Michael

-- 
http://mail.python.org/mailman/listinfo/python-list


Re: unusual exponential formatting puzzle

2005-09-21 Thread Michael Spencer
Neal Becker wrote:
> Like a puzzle?  I need to interface python output to some strange old
> program.  It wants to see numbers formatted as:
> 
> e.g.: 0.23456789E01
> 
> That is, the leading digit is always 0, instead of the first significant
> digit.  It is fixed width.  I can almost get it with '% 16.9E', but not
> quite.
> 
> My solution is to print to a string with the '% 16.9E' format, then parse it
> with re to pick off the pieces and fix it up.  Pretty ugly.  Any better
> ideas?
> 
> 
Does this do what you want?

  >>> from math import log10, modf, fabs
  >>> def format(n, mantplaces = 9, expplaces = 2):
  ... sign, n = n/fabs(n), fabs(n)# preserve the sign
  ... c, m = modf(log10(n))
  ... c, m = c - (c>0), m + (c>0) # redistribute mantissa to exponent
  ... return "%.*fE%0*d" % (mantplaces, sign * 10**c, expplaces, m)
  ...
  >>>
  >>> def test_format(n):
  ... for  exp in range(-5, 5):
  ... N = n*(10**exp)
  ... print format(n*(10**exp))
  ...
  >>> test_format(234.56789)
  0.234567890E-2
  0.234567890E-1
  0.234567890E00
  0.234567890E01
  0.234567890E02
  0.234567890E03
  0.234567890E04
  0.234567890E05
  0.234567890E06
  0.234567890E07
  >>>

Michael

-- 
http://mail.python.org/mailman/listinfo/python-list


Re: unusual exponential formatting puzzle

2005-09-21 Thread Michael Spencer
Michael Spencer wrote:
> Neal Becker wrote:
> 
>>Like a puzzle?  I need to interface python output to some strange old
>>program.  It wants to see numbers formatted as:
>>
>>e.g.: 0.23456789E01
>>
>>That is, the leading digit is always 0, instead of the first significant
>>digit.  It is fixed width.  I can almost get it with '% 16.9E', but not
>>quite.
>>
>>My solution is to print to a string with the '% 16.9E' format, then parse it
>>with re to pick off the pieces and fix it up.  Pretty ugly.  Any better
>>ideas?
>>
>>
> 
> Does this do what you want?

Not, if the input is 0 or 1.  Here's a correction, with a more comprehensive 
test

from math import log10, modf, fabs

def format(n, mantplaces = 9, expplaces = 2):
 """Formats n as '0.mEee'"""
 if n:
 sign, absn = n/fabs(n), fabs(n)
 f, i = modf(log10(absn))
 mant, exp = sign * 10** (f - (f>=0)), i + (f>=0)
 else:
 mant, exp = 0, 0
 return "%.*fE%0*d" % (mantplaces, mant, expplaces, exp)

def test_format(N = 1, step = 1):
 """Verifies format(n) and format(1/n) for -N < n < N"""
 assert format(0,9) == '0.0E00'
 assert format(0, 7, 3) == '0.000E000'

 def verify(n):
 DIGITS = '123456789'
 try:
 f = format(n)
 assert round(float(format(n)),6) == round(n, 6)
 assert f[0] == "-" and f[3] in DIGITS or f[2] in DIGITS
 except AssertionError:
 raise AssertionError("Failed on: %f, formatted as %s" % (n, f))

 for  n in xrange(-N, N, step):
 if n:
 verify(n)
 verify(1.0/n)


Michael

-- 
http://mail.python.org/mailman/listinfo/python-list


Re: How do I convert arithemtic string (like "2+2") to a number?

2005-02-06 Thread Michael Spencer
John J. Lee wrote:
"Adomas" <[EMAIL PROTECTED]> writes:

Well, a bit more secure would be
eval(expression, {'__builtins__': {}}, {})
or alike.

Don't believe this without (or even with ;-) very careful thought,
anyone.  Google for rexec.
John
This module provides a more systematic way to set up restricted evaluation:
"""Restricted evaluation
Main entry point: r_eval()
For usage see class tests or run them using testall()"""
import types
import compiler
import operator
import sys, os # used only for testing
ast = compiler.ast
class Eval_Error(Exception):
def __init__(self,error,descr = None,node = None):
self.error = error
self.descr = descr
def __repr__(self):
return "%s: %s" % (self.error, self.descr)
__str__ = __repr__
class AbstractVisitor(object):
"""Standard depth-first AST walker - dispatches to methods
based on Node class name"""
def __init__(self):
self._cache = {} # dispatch table
def visit(self, node,**kw):
if node is None: return None
cls = node.__class__
meth = self._cache.setdefault(cls,
getattr(self,'visit'+cls.__name__,self.default))
return meth(node, **kw)
def default(self, node, **kw):
for child in node.getChildNodes():
return self.visit(child, **kw)
visitExpression = default

class Eval(AbstractVisitor):
"""An AST walker that implements a replacement to built-in eval.
See r_eval for entry point/usage.
Provides hooks for managing name resolution, proxying objects,
and controlling attribute access
Does not implement:
List Comprehensions, Generator Expressions, Lambda
Ellipsis (can this be used without numpy?)
"""
def __init__(self, context = globals()):
super(Eval,self).__init__()
self.context = context
# Namespace interface.  Safe implementations should override these methods
# to implement restricted evaluation.  This implementation simply
# evals the name in self.context and provides no proxying or
# attribute lookup restrictions
def lookup(self, objname):
"""Called only by visitName.  Raise an exception here
to prevent any direct name resolution, but note that
objects may be returned by callables or attribute lookups"""
return eval(objname, self.context)
def getObject(self, obj):
"""Called by all name resolvers and by CallFunc.  Provides
a hook for proxying unsafe objects"""
return obj
def getAttribute(self,obj,attrname):
"""Called by visitGetattr"""
return getattr(obj,attrname)
# End Namespace interface
# Syntax nodes follow by topic group.  Delete methods to disallow
# certain syntax.
# Object constructor nodes
def visitConst(self, node, **kw):
return node.value
def visitDict(self,node,**kw):
return dict([(self.visit(k),self.visit(v)) for k,v in node.items])
def visitTuple(self,node, **kw):
return tuple(self.visit(i) for i in node.nodes)
def visitList(self,node, **kw):
return [self.visit(i) for i in node.nodes]
def visitSliceobj(self,node,**kw):
return slice(*[self.visit(i) for i in node.nodes])
def visitEllipsis(self,node,**kw):
raise NotImplementedError, "Ellipsis"
# Binary Ops
def visitAdd(self,node,**kw):
return self.visit(node.left) + self.visit(node.right)
def visitDiv(self,node,**kw):
return self.visit(node.left) / self.visit(node.right)
def visitFloorDiv(self,node,**kw):
return self.visit(node.left) // self.visit(node.right)
def visitLeftShift(self,node,**kw):
return self.visit(node.left) << self.visit(node.right)
def visitMod(self,node,**kw):
return self.visit(node.left) % self.visit(node.right)
def visitMul(self,node,**kw):
return self.visit(node.left) * self.visit(node.right)
def visitPower(self,node,**kw):
return self.visit(node.left) ** self.visit(node.right)
def visitRightShift(self,node,**kw):
return self.visit(node.left) >> self.visit(node.right)
def visitSub(self,node,**kw):
return self.visit(node.left) - self.visit(node.right)
# Unary ops
def visitNot(self,node,*kw):
return not self.visit(node.expr)
def visitUnarySub(self,node,*kw):
return -self.visit(node.expr)
def visitInvert(self,node,*kw):
return ~self.visit(node.expr)
def visitUnaryAdd(self,node,*kw):
return +self.visit(node.expr)
# Logical Ops
def visitAnd(self,node,**kw):
return reduce(lambda a,b: a and b,[self.visit(arg) for arg in 
node.nodes])
def visitBitand(self,node,**kw):
return reduce(lambda a,b: a & b,[self.visit(arg) for arg in node.nodes])
def visitBitor(self,node,**kw):
return reduce(lambda a,b: a | b,[self.visit(arg) for arg in node.nodes])
def visitBitxor(self,node,**kw):
return reduce(lambda a,b: a 

Re: empty classes as c structs?

2005-02-06 Thread Michael Spencer
Alex Martelli wrote:
Steven Bethard <[EMAIL PROTECTED]> wrote:

Hmm... interesting.  This isn't the main intended use of 
Bunch/Struct/whatever, but it does seem like a useful thing to have...
I wonder if it would be worth having, say, a staticmethod of Bunch that
produced such a view, e.g.:

class Bunch(object):
...
@staticmethod
def view(data):
result = Bunch()
result.__dict__ = data
return result
Then you could write your code as something like:
gbls = Bunch.view(globals())
I'm probably gonna need more feedback though from people though to know
if this is a commonly desired use case...

Reasonably so, is my guess.  Witness the dict.fromkeys classmethod -- it
gives you, on dict creation, the same kind of nice syntax sugar that
wrapping a dict in a bunch gives you for further getting and setting
(and with similar constraints: all keys must be identifiers and not
happen to clash with reserved words).
I think this ``view'' or however you call it should be a classmethod
too, for the same reason -- let someone handily subclass Bunch and still
get this creational pattern w/o extra work.  Maybe a good factoring
could be something like:
class Bunch(object):
def __init__(self, *a, **k):
self.__dict__.update(*a, **k)
def getDict(self):
return self.__dict__
def setDict(self, adict):
self.__dict__ = adict
theDict = property(getDict, setDict, None, 
   "direct access to the instance dictionary"
  )

@classmethod
def wrapDict(cls, adict, *a, **k):
result = cls.__new__(cls, *a, **k)
result.setDict(adict)
cls.__init__(result, *a, **k)
return result
I'm thinking of use cases where a subclass of Bunch might override
setDict (to do something else in addition to Bunch.setDict, e.g.
maintain some auxiliary data structure for example) -- structuring
wrapDict as a classmethod in a ``Template Method'' DP might minimize the
amount of work, and the intrusiveness, needed for the purpose.  (I don't
have a real-life use case for such a subclass, but it seems to cost but
little to provide for it as a possibility anyway).
[[given the way property works, one would need extra indirectness in
getDict and setDict -- structuring THEM as Template Methods, too -- to
fully support such a subclass; but that's a well-known general issue
with property, and the cost of the extra indirection -- mostly in terms
of complication -- should probably not be borne here, it seems to me]]
Alex
Steven et al
I like the idea of making the 'bunch' concept a little more standard.
I also like the suggestion Nick Coghlan cited (not sure who suggested the term 
in this context) of calling this 'namespace' in part because it can lead to 
easily-explained semantics.

ISTM that 'bunch' or 'namespace' is in effect the complement of vars i.e., while 
vars(object) => object.__dict__, namespace(somedict) gives an object whose 
__dict__ is somedict.

Looked at this way, namespace (or bunch) is a minimal implementation of an 
object that implements the hasattr(object,__dict__) protocol.  The effect of the 
class is to make operations on __dict__ simpler.  namespace instances can be 
compared with any other object that has a __dict__.  This differs from the PEP 
reference implementation which compares only with other bunch instances.  In 
practice, comparisons with module and class may be useful.

The class interface implements the protocol and little else.
For 'bunch' applications, namespace can be initialized or updated with keyword 
args (just like a dict)
i.e.,
 >>> bunch = namespace({"a":1,"b":2})
can also be written as
 >>> bunch = namespace(a=1,b=2)

For dict-wrapping applications:
 >>> wrappeddict = namespace(bigdict)
but, unlike the PEP implmentation, this sets wrappeddict.__dict__ = bigdict
I think that this interface allows for either use case, without introducing 
'fromdict' classmethod.

Some dict-operations e.g., __copy__ might carry over to the namespace class
Michael
An implementation follows:
# An alternative implementation of Steven Bethard's PEP XXX 'bunch' with
# slightly different interface and semantics:
class namespace(object):
"""
namespace(somedict) => object (with object.__dict__ = somedict)
NB, complement of vars:  vars(object) => object.__dict__
namespace objects provide attribute access to their __dict__
In general, operations on namespace equate to the operations
on namespace.__dict__
"""
def __init__(self, E = None, **F):
"""__init__([namespace|dict], **kwds) -> None"""
if isinstance(E, dict):
self.__dict__ = E
elif hasattr(E, "__dict__"):
self.__dict__ = E.__dict__
self.__dict__.update(**F)
# define only magic methods to limit pollution
def __update__(self, E = None, **F):
"""update([namespace|dict], **kwds) -> None
equivalent to self.__dict__.update
with the addition of 

Re: empty classes as c structs?

2005-02-07 Thread Michael Spencer
Nick Coghlan wrote:
Steven Bethard wrote:
It was because these seem like two separate cases that I wanted two 
different functions for them (__init__ and, say, dictview)...
I see this, but I think it weakens the case for a single implementation, given 
that each implementation is essentially one method.
The other issue is that a namespace *is* a mutable object, so the 
default behaviour should be to make a copy

I don't follow this argument.  Why does mutability demand copy?  Given that 
somedict here is either a throwaway (in the classic bunch application ) or a 
dict that must be updated (the wrap-dict case), copying doesn't make much sense 
to me.

OTOH, dict.__init__(dict) copies.  h

I think Michael's implementation also fell into a trap whereby 'E' 
couldn't be used as an attribute name. The version below tries to avoid 
this (using magic-style naming for the other args in the methods which 
accept keyword dictionaries).
You're right - I hadn't considered that.  In case it wasn't obvious, I was 
matching the argspec of dict.  Your solution avoids the problem.
To limit the special casing in update, I've switched to only using 
__dict__ for the specific case of instances of namespace
That seems a pity to me.
 (otherwise the
semantics are too hard to explain). This is to allow easy copying of an 
existing namespace - 
Can't this be spelled namespace(somenamespace).__copy__()?
> for anything else, invoking vars() is easy enough.
If there is potential for confusion, I'd be tempted to disallow namespaces as an 
argument to update/__update__

We could use __add__, instead for combining namespaces
And I was reading Carlos's page on MetaTemplate, so I threw in an extra 
class "record" which inherits from namespace and allows complex data 
structures to be defined via class syntax (see the record.__init__ 
docstring for details). That bit's entirely optional, but I thought it 
was neat.
Good idea.  The implementation ought to be tested against several plausible 
specializations.
Finally, I've just used normal names for the functions. I think the 
issue of function shadowing is best handled by recommending that all of 
the functions be called using the class explicitly - this works just as 
well for instance methods as it does for class or static methods.
I don't like the sound of that.  The whole point here - whether as Steven's nice 
straightforward bunch, as originally conceived, or the other cases you and I and 
others have been 'cluttering' the discussion with ;-)  is convenience, and 
readability.  If there are hoops to jump through to use it, then the benefit is 
quickly reduced to zero.

Regards
Michael
Cheers,
Nick.
+
from types import ClassType
class namespace(object):
"""
namespace([namespace|dict]) => object
namespace objects provide attribute access to their __dict__
Complement of vars:  vars(object) => object.__dict__
Non-magic methods should generally be invoked via the class to
avoid inadvertent shadowing by instance attributes
Using attribute names that look like magic attributes is not
prohibited but can lead to surprising behaviour.
In general, operations on namespace equate to the operations
on namespace.__dict__
"""
def __init__(__self__, __orig__ = None, **__kwds__):
"""__init__([namespace|dict], **kwds) -> None"""
type(__self__).update(__self__, __orig__, **__kwds__)
@classmethod
def view(cls, orig):
"""namespace.view(dict) -> namespace
Creates a namespace that is a view of the original
dictionary. Allows modification of an existing
dictionary via namespace syntax"""
new = cls()
new.__dict__ = orig
return new
def __repr__(self):
return "%s(%s)" % (self.__class__.__name__, repr(self.__dict__))
# Recommend calling non-magic methods via class form to
# avoid problems with inadvertent attribute shadowing
def _checked_update(self, other):
try:
self.__dict__.update(other)
except (TypeError):
raise TypeError("Namespace update requires mapping "
"keyed with valid Python identifiers")
def update(__self__, __other__ = None, **__kwds__):
"""type(self).update(self, [namespace|dict], **kwds) -> None
equivalent to self.__dict__.update"""
# Handle direct updates
if __other__ is not None:
 if isinstance(__other__, namespace):
 type(__self__)._checked_update(__self__, 
__other__.__dict__)
 else:
 type(__self__)._checked_update(__self__, __other__)
# Handle keyword updates
if __kwds__ is not None:
type(__self__)._checked_update(__self__, __kwds__)

class record(namespace):
def __init__(self, definition=None):
"""record([definition]) -> record
Constructs a namespace based on the given class definition
 

Re: empty classes as c structs?

2005-02-07 Thread Michael Spencer
Steven Bethard wrote:
Do you mean there should be a separate Namespace and Bunch class?  Or do 
you mean that an implementation with only a single method is less useful?
The former.
If the former, then you either have to repeat the methods __repr__, 
__eq__ and update for both Namespace and Bunch, or one of Namespace and 
Bunch can't be __repr__'d, __eq__'d or updated.
I see no problem in repeating the methods, or inheriting the implementation. 
However, if namespace and bunch are actually different concepts (one with 
reference semantics, the other with copy), then __repr__ at least would need to 
be specialized, to highlight the difference.

So, on balance, if copy semantics are important to bunch uses, and references 
for namespace (though Nick changed his mind on this, and I don't yet know why) I 
think they would be better as two small implementations.  I remain unsure about 
why you need or want copying, aside from matching the behavior of the builtins.

If the latter (setting aside the fact that the implementation provides 4 
methods, not 1), I would argue that even if an implementation is only 
one method, if enough users are currently writing their own version, 
adding such an implementation to the stdlib is still a net benefit.
Yes, I agree with this: I was not picking on the class size ;-)
...
Another way to avoid the problem is to use *args, like the current Bunch 
implementation does:

def update(*args, **kwargs):
"""bunch.update([bunch|dict|seq,] **kwargs) -> None
Sure - nice trick to avoid shadowing self too
...
Is it that much worse to require the following code:
Namespace.update(namespace, obj.__dict__) 
or:
namespace.udpate(obj.__dict__)

if you really want to update a Namespace object with the attributes of a 
non-Namespace object?
No problem at all - just a question of what the class is optimized for, and 
making the interface as convenient as possible, given the use case.  I agree 
that for straight attribute access to a dictionary, your update interface is 
clearly superior.

For that matter, do you have a use-case for where this would be useful? 
 I definitely see the view-of-a-dict example, but I don't see the 
view-of-an-object example since an object already has dotted-attribute 
style access...

Yes, I have various cases in mind relating to argument-passing, dispatching, 
interface-checking and class composition.  Here the class becomes useful if it 
grows some namespace-specific semantics.

For example, I could write something like:
namespace(obj1) >= namespace(obj2) to mean obj1 has at least the attributes of 
obj2
implemented like:
def __ge__(self, other):
for attrname in other.__dict__.keys():
if not attrname in self.__dict__:
return False
return True
I realize that interfaces may be addressed formally by a current PEP, but, even 
if they are, this "cheap and cheerful" approach appeals to me for duck-typing.

However, as I think more about this, I realize that I am stretching your concept 
past its breaking point, and that whatever the merits of this approach, it's not 
helping you with bunch.  Thanks for knocking the ideas around with me.

Cheers
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: turing machine in an LC

2005-02-08 Thread Michael Spencer
Jeremy Bowers wrote:

I can't figure out how to write a TM in a Python List Comprehension
without one of either "variable binding" (so we can store the last symbol
list and manipulate it in the next iteration) or "recursive function" (to
express the whole tape as a recursive function), both of which require
statements. I can figure out how to write a single state transition, but
in a single LC I can't figure out how to feed the new state into the next
iteration; the previous values generated in the LC are, to my knowledge,
not accessible to the LC as it is running. (If they are, I *think* that
would indeed be enough.)
How about:
>>> def fact_ge(n):
... f = [1]
... f.extend(i*j for i,j in it.izip(xrange(1,1+n), f))
... return f
...
>>> fact_ge(10)
[1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800]
>>>
as a "stateful" genexp?
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: turing machine in an LC

2005-02-08 Thread Michael Spencer
Jeremy Bowers wrote:
On Tue, 08 Feb 2005 17:36:19 +0100, Bernhard Herzog wrote:

Now you *can* get at the previous state and write a state-transition
expression in perfectly legal Python.
What do you know, generator comprehensions are Turing Complete and list
comprehensions aren't. I wouldn't have expected that.
I see no difference between LCs and GEs in this respect:
 >>> import itertools as it
 >>>
 >>> def fact_ge(n):
 ... f = [1]
 ... f.extend(i*j for i,j in it.izip(xrange(1,1+n), f))
 ... return f
 ...
 >>> def fact_lc(n):
 ... f = [1]
 ... [f.append(i*j) for i,j in it.izip(xrange(1,1+n), f)]
 ... return f
 ...
 ...
 >>> fact_ge(10)
[1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800]
 >>> fact_lc(10)
[1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800]
Michael

--
http://mail.python.org/mailman/listinfo/python-list


Re: turing machine in an LC

2005-02-08 Thread Michael Spencer
Jeremy Bowers wrote:
That's not a generator expression, that's a generator function. Nobody
contests they can reference earlier states, that's most of their point :-)

Are you sure?
I just wrote my examples in functions to label them
Here's your example with this method:
 >>> import itertools as it
 >>> results = [0]
 >>> magicGenerator = (i+1 for i,lastresult in it.izip(xrange(5),results))
 >>> results.extend(magicGenerator)
 >>> results
[0, 1, 2, 3, 4, 5]
 >>>
> For context, we're trying to build Turing Completeness into Python without
> indentation. I bailed out of a Xah Lee thread because people have
> probably killed it :-)
Didn't see it, but this looked interesting - presumably your point
and this is entirely unrelated by now, except in
> the vague sense he started with an (I'm sure entirely accidentally)
> thought-provoking question.
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: turing machine in an LC

2005-02-08 Thread Michael Spencer
Jeremy Bowers wrote:
OK then, I still don't quite see how you can build a Turing Machine in one
LC, but an LC and one preceding list assignment should be possible,
although the resulting list from the LC is garbage; 
Not necessarily garbage - could be anything, say a copy of the results:
 >>> results = [0]
 >>> [(results.append(lastresult+1) or lastresult) for i, lastresult in 
it.izip(xrange(5),results)]
[0, 1, 2, 3, 4]
 >>> # ok, missing the 5, but close!

I don't think the assignment is avoidable though.
I should clarify a point I made earlier
I see no difference between LCs and GEs in this respect:
What I meant was that both LCs and GEs can reference their prior state in the 
same way.  Of course, there is an important difference in that the LC returns 
its list as soon as it is executed whereas the executing the genexp returns an 
iterator that can delay the evaluation of all but the outer loop until its 
next() is called.  This makes a genexp equivalent to (at least some) functions, 
and perhaps that was part of your point that I missed.

Michael
--
http://mail.python.org/mailman/listinfo/python-list


A ListComp that maintains its own state (Was: Re: turing machine in an LC)

2005-02-08 Thread Michael Spencer

Jeremy Bowers <[EMAIL PROTECTED]> writes:

On Tue, 08 Feb 2005 17:36:19 +0100, Bernhard Herzog wrote:
Nick Vargish <[EMAIL PROTECTED]> writes:
"Xah Lee" <[EMAIL PROTECTED]> writes:
is it possible to write python code without any indentation?
Not if Turing-completeness is something you desire.

Bernhard Herzog wrote:

a Turing Machine in one line plus assignments - nice!  Turns out that pypy is 
more
verbose than strictly necessary ;-)
...
BTW, I realized that it is indeed possible for a LC to maintain its own state 
without being passed an external mutable.  The trick is to use itertools.repeat 
to return the same mutable object on each iteration.

So, here's factorial in one line:
# state refers to list of state history - it is initialized to [1]
# on any iteration, the previous state is in state[-1]
# the expression also uses the trick of list.append() => None
# to both update the state, and return the last state
>>> [state.append(state[-1] * symbol) or state[-1]
... for symbol, state in it.izip(range(1,10),it.repeat([1]))
... ]
[1, 2, 6, 24, 120, 720, 5040, 40320, 362880]
>>>
Now, who was claiming that 'reduce' was opaque?
Michael ;-)
--
http://mail.python.org/mailman/listinfo/python-list


Re: A ListComp that maintains its own state (Was: Re: turing machine in an LC)

2005-02-08 Thread Michael Spencer
Carl Banks wrote:
Pay attention, chief.  I suggested this days ago to remove duplicates
from a list.
from itertools import *
[ x for (x,s) in izip(iterable,repeat(set()))
  if (x not in s,s.add(x))[0] ]
;)
Sorry,  I gave up on that thread after the first 10 Million* posts.  Who knows 
what other pearls I may have missed?

Anyway, the good news is that you appear to have identified a new design 
pattern, and will soon become very famous:

According to:
http://www.cmcrossroads.com/bradapp/docs/patterns-nutshell.html#Patterns_What
A "pattern" is ...
* An abstraction from a concrete form which keeps recurring in specific, 
non-arbitrary contexts. [twice in one week]

* A recurring solution to a common problem [perl-python spam] in a given 
context and system of forces.

* A named "nugget" of instructive insight, conveying the essence of a 
proven solution to a recurring problem in a given context amidst competing 
concerns. [who could doubt it?]

* A successfully recurring "best practice" that has proven itself in the 
"trenches". [of this list anyway]

* A literary format for capturing the wisdom and experience of expert 
designers, and communicating it to novices [I think we're 5 for 5]

So, I would get the book out without further delay, before some other 
Johnny-come-lately lays claim.

BTW, Do you have a 1-line-LC-wiki yet?
Michael
* with due respect to Marvin
--
http://mail.python.org/mailman/listinfo/python-list


Re: A ListComp that maintains its own state

2005-02-09 Thread Michael Spencer
Bernhard Herzog wrote:
Michael Spencer <[EMAIL PROTECTED]> writes:

So, here's factorial in one line:
# state refers to list of state history - it is initialized to [1]
# on any iteration, the previous state is in state[-1]
# the expression also uses the trick of list.append() => None
# to both update the state, and return the last state
>>> [state.append(state[-1] * symbol) or state[-1]
... for symbol, state in it.izip(range(1,10),it.repeat([1]))
... ]
[1, 2, 6, 24, 120, 720, 5040, 40320, 362880]
>>>

There's no need for repeat:

[state.append(state[-1] * symbol) or state[-1] 
for state in [[1]]
for symbol in range(1, 10)]
[1, 2, 6, 24, 120, 720, 5040, 40320, 362880]
While we're at it, a while back I posted a list comprehension that
implements a 'recursive' flatten:
http://groups.google.de/groups?selm=s9zy8eyzcnl.fsf%40salmakis.intevation.de
   Bernhard
Much better - that also cleanly extends to any number of initializers.  I also 
like the approach you take in flatten (and as suggested by Carl Banks) of 
putting the update mechanism in the if clause

So that gives:
def factorial(n):
return [state[-1]
for state in [[1]]
for count in xrange(1,n+1)
if state.append(state[-1] * count) or True
]
Probably of limited practical value, but fun to explore the language.
Thanks
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: listerator clonage

2005-02-12 Thread Michael Spencer
Cyril BAZIN wrote:
Hello, 

I want to build a function which return values which appear two or
more times in a list:
This is very similar to removing duplicate items from a list which was the 
subject of a long recent thread, full of suggested approaches.
Here's one way to do what you want:

>>> l = [1, 7, 3, 4, 3, 2, 1]
>>> seen = set()
>>> set(x for x in l if x in seen or seen.add(x))
set([1, 3])
>>>
This is a 'generator expression' applied as an argument to the set constructor. 
 It relies on the fact that seen.add returns None, and is therefore always false.

this is equivalent to:
>>> def _generate_duplicates(iterable):
... seen = set()
... for x in iterable:
... if x in seen: # it's a duplicate
... yield x
... else:
... seen.add(x)
...
>>> generator = _generate_duplicates(l)
>>> generator

>>> set(generator)
set([1, 3])
>>> # In case you want to preserve the order and number of the duplicates, you
>>> # would use a list
>>> generator = _generate_duplicates(l)
>>> list(generator)
[3, 1]
>>>
So, I decided to write a little example which doesn't work:
#l = [1, 7, 3, 4, 3, 2, 1]
#i = iter(l)
#for x in i:
#j = iter(i)
#for y in j:
#if x == y:
#print x
In thinked that the instruction 'j= iter(i)' create a new iterator 'j'
based on 'i' (some kind of clone). I wrote this little test which show
that 'j = iter(i)' is the same as 'j = i' (that makes me sad):
I don't think your algorithm would work even if iter(iterator) did return a copy 
or separate iterator.  If, however, you do have an algorithm that needs that 
capability, you can use itertools.tee

Cheers
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: Hack with os.walk()

2005-02-12 Thread Michael Spencer
Frans Englich wrote:
Hello,
Have a look at this recursive function:
def walkDirectory( directory, element ):
element = element.newChild( None, "directory", None ) # automatically 
appends to parent
element.setProp( "name", os.path.basename(directory))

for root, dirs, files in os.walk( directory ):
for fileName in files:
element.addChild( parseFile( os.path.join( root, fileName ))
for dirName in filter( acceptDirectory, dirs):
walkDirectory( os.path.join( root, dirName ), element )
return ### Note, this is inside for loop
What it does, is it recurses through all directories, and, with libxml2's 
bindings, builds an XML document which maps directly to the file hierarchy. 
For every file is parseFile() called, which returns a "file element" which is 
appended; the resulting structure looks the way one expect -- like a GUI tree 
view.

The current code works, but I find it hackish, and it probably is inefficient, 
considering that os.walk() is not called once(as it usually is), but for 
every directory level.

My problem, AFAICT, with using os.walk() the usual way, is that in order to 
construct the /hierarchial/ XML document, I need to be aware of the directory 
depth, and a recursive function handles that nicely; os.walk() simply 
concentrates on figuring out paths to all files in a directory, AFAICT.

I guess I could solve it with using os.walk() in a traditional way, by somehow 
pushing libxml2 nodes on a stack, after keeping track of the directory levels 
etc(string parsing..). Or, one could write ones own recursive directory 
parser..

My question is: what is the least ugly? What is the /proper/ solution for my 
problem? How would you write it in the cleanest way?

Cheers,
Frans
The path module by Jorendorff: http://www.jorendorff.com/articles/python/path/
wraps various os functions into an interface that can make this sort of thing 
cleaner

Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: check if object is number

2005-02-12 Thread Michael Spencer
Steven Bethard wrote:
Peter Hansen wrote:
Of course, most of the other definitions of "is a number" that
have been posted may likewise fail (defined as not doing what the
OP would have wanted, in this case) with a numarray arange.
Or maybe not.  (Pretty much all of them will call an arange a
number... would the OP's function work properly with that?)

No, but it will fail properly since my code basically looks like:
def f(max=None):
...
while max is None or n <= max:
...
# complicated incrementing of n
So if max is an array, though all of the proposed isnumber checks will 
call it a number, my code will (rightly) fail when the array (n <= max) 
gets __nonzero__ called in the while condition.  I guess I'd prefer it 
to fail in the isnumber check, but overall, I'm more concerned that 
_some_ error is produced, not necessarily which one.  (I'm also not 
thrilled that bool(array) raises a RuntimeError instead of a TypeError...)

Steve
Steve,
How about explicitly calling an adapter in your function, e.g.?
> def f(max=None):
> max = number(max)
> while max is None or n <= max:
> ...
> # complicated incrementing of n
then you can define number to document the required behavior and return more 
useful exceptions if the object fails.  At the same time, anyone who wants to 
use a custom number class with your function has a ready-made unittest.

 >>> def number(obj):
 ... """Adapts obj to be numeric, or fails helpfully"""
 ... if isinstance(obj, (int, float, long, )): # these types conform
 ...return obj
 ... elif isinstance(obj, basestring): # these types have a known adaptation
 ...return int(obj)
 ... else: # check the object exhibits the required behavior
 ...try:
 ...assert obj+1 >= 1
 ...except Exception, err:
 ...raise TypeError, "obj does not support addition and comparisons 
with numbers (%s)" % err
 ...return obj
 ...
 >>> class MyNumber(object):
 ... def __init__(self, data):
 ...self.data = data
 ... def __add__(self, other):
 ...return MyNumber(self.data + other)
 ... def __cmp__(self, other):
 ...return self.data.__cmp__(other)
 ...
 >>> a = MyNumber(42)
 >>> a is number(a)
True
 >>>
 >>> number(1+2j)
Traceback (most recent call last):
  File "", line 1, in ?
  File "", line 11, in number
TypeError: obj does not support addition and comparisons with numbers (cannot 
compare complex numbers using <, <=, >, >=)
 >>> number(array.array("i",[1]))
Traceback (most recent call last):
  File "", line 1, in ?
  File "", line 11, in number
TypeError: obj does not support addition and comparisons with numbers (can only 
append array (not "int") to array)
 >>>

Cheers
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: check if object is number

2005-02-12 Thread Michael Spencer
Steven Bethard wrote:
Michael Spencer wrote:
Steven Bethard wrote:
Peter Hansen wrote:
Of course, most of the other definitions of "is a number" that
have been posted may likewise fail (defined as not doing what the
OP would have wanted, in this case) with a numarray arange.

How about explicitly calling an adapter in your function, e.g.?

Yup, that's basically what I'm doing right now.  The question was really 
how to define that adapter function. =)

Steve
OK - then my entry is:
assert obj+1 >= 1
:-)
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: Hack with os.walk()

2005-02-12 Thread Michael Spencer
Tim Peters wrote:
[Frans Englich]
...
[snip]
class HasPath:
def __init__(self, path):
self.path = path
def __lt__(self, other):
return self.path < other.path
class Directory(HasPath):
def __init__(self, path):
HasPath.__init__(self, path)
self.files = [] # list of File objects
self.subdirs =  [] # list of sub-Directory objects
class File(HasPath):
pass
[snip]
def build_tree(path, Directory=Directory, File=File):
top = Directory(path)
path2dir = {path: top}
for root, dirs, files in os.walk(path):
dirobj = path2dir[root]
for name in dirs:
subdirobj = Directory(os.path.join(root, name))
path2dir[subdirobj.path] = subdirobj
dirobj.subdirs.append(subdirobj)
for name in files:
dirobj.files.append(File(os.path.join(root, name)))
return top
That looks short and sweet to me.  It could be made shorter, but not
without losing clarity to my eyes.
The aforementioned path class makes this even easier.  No need to build the tree 
- that is done automatically by the path constuctor:

ListingDirectory can then inherit from path.path with few changes:
from path import path
libpath = r"C:\Python24\Lib"
class ListingDirectory(path):
# Display directory tree as a tree, with 4-space indents.
# Files listed before subdirectories, both in alphabetical order.
# Full path displayed for topmost directory, base names for all
# other entries.  Directories listed with trailing os.sep.
def display(self, level=0):
name = self.abspath()   # path method
if level:
name = self.basename() # path method
print "%s%s%s" % (' ' * level, name, os.sep)
for f in self.files():
print "%s%s" % (' ' * (level + 4), f.basename())  path method
for d in self.dirs():  # path.dirs returns an iterator over path objects
ListingDirectory(d).display(level + 4)
mytree = ListingDirectory(libpath)
mytree.display()
[snip about 15000 lines...]
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: Iterator / Iteratable confusion

2005-02-13 Thread Michael Spencer
Francis Girard wrote:
"""

Example 8

Running after your tail with itertools.tee
The beauty of it is that recursive running after their tail FP algorithms
are quite straightforwardly expressed with this Python idiom.
"""
def Ex8_Fibonacci():
  print "Entering Ex8_Fibonacci"
  def _Ex8_Fibonacci():
print "Entering _Ex8_Fibonacci"
yield 1
yield 1
fibTail.next() # Skip the first one
for n in (head + tail for (head, tail) in izip(fibHead, fibTail)):
  yield n
  fibHead, fibTail, fibRes = tee(_Ex8_Fibonacci(), 3)
  return fibRes
  
print
print sEx8Doc
print
print list(islice(Ex8_Fibonacci(), 5))

Absolutely: ever since you brought up the Hamming sequence I've been interested 
in this approach.  However, if iterators could be extended in place, these 
solutions would be even more attractive.

Here are some examples for infinite series constructed with an extendable 
iterator.  This iterator is returned by an iterable class 'Stream', shown below 
the examples:

def factorial():
"""
>>> f = factorial()
>>> f.tolist(10)
[1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880]
"""
factorial = Stream([1])
factorial.extend(factorial * it.count(1))
return factorial
def fib():
"""Example:
>>> f = fib()
>>> f.tolist(10)
[1, 1, 2, 3, 5, 8, 13, 21, 34, 55]"""
fib = Stream([1,1])
fib.extend(x+y for x, y in it.izip(fib, fib[1:]))
return fib

def multimerge(*iterables):
"""Yields the items in iterables in order, without duplicates"""
cache = {}
iterators = map(iter,iterables)
number = len(iterables)
exhausted = 0
while 1:
for it in iterators:
try:
cache.setdefault(it.next(),[]).append(it)
except StopIteration:
exhausted += 1
if exhausted == number:
raise StopIteration
val = min(cache)
iterators = cache.pop(val)
yield val
def hamming():
"""
Example:
>>> h = hamming()
>>> list(h[20:40])
[40, 45, 48, 50, 54, 60, 64, 72, 75, 80, 81, 90, 96, 100, 108, 120, 125, 
128, 135, 144]
>>> h[1]
288555831593533440L
"""

hamming = Stream([1])
hamming.extend(i for i in multimerge(2 * hamming, 3 * hamming, 5 * hamming))
return hamming
def compounds():
"""Extension of Hamming series to compounds of primes(2..13)
Example:
>>> c = compounds()
>>> list(c[20:30])
[24, 25, 26, 27, 28, 30, 32, 33, 35, 36]"""
compounds = Stream([1])
compounds.extend(i for i in multimerge(2 * compounds, 3 * compounds, 5 * 
compounds, 7 * compounds, 9 * compounds, 11 * compounds, 13 * compounds))
return compounds

# Stream class for the above examples:
import itertools as it
import operator as op
class Stream(object):
"""Provides an indepent iterator (using tee) on every iteration request
Also implements lazy iterator arithmetic"""
def __init__(self, *iterables, **kw):
"""iterables: tuple of iterables (including iterators).  A sequence of
iterables will be chained
kw: not used in this base class"""
self.queue = list(iterables)
self.itertee = it.tee(self._chain(self.queue))[0] # We may not need 
this in every case

def extend(self,other):
"""extend(other: iterable) => None
appends iterable to the end of the Stream instance
"""
self.queue.append(other)
def _chain(self, queue):
while queue:
for i in self.queue.pop(0):
self.head = i
yield i
# Iterator methods:
def __iter__(self):
"""Normal iteration over the iterables in self.queue in turn"""
return self.itertee.__copy__()
def _binop(self,other,op):
"""See injected methods - __add__, __mul__ etc.."""
if hasattr(other,"__iter__"):
return (op(i,j) for i, j in it.izip(self,other))
else:
return (op(i,other) for i in self)
def __getitem__(self,index):
"""__getitem__(index: integer | slice)
index: integer => element at position index
index: slice
if slice.stop is given => Stream(it.islice(iter(self),
index.start, index.stop, index.step or 1)))
else: consumes self up to start then => Stream(iter(self))
Note slice.step is ignored in this case
"""
if isinstance(index, slice):
if index.stop:
return (it.islice(iter(self),
index.start or 0, index.stop, index.step or 1))
else:
iterator = iter(self)
for i in range(index.start):
iterator.next()
return iterator
else:
return it.islice(iter(self), index,index

Re: builtin functions for and and or?

2005-02-13 Thread Michael Spencer
Roose wrote:
Yeah, as we can see there are a million ways to do it.  But none of them are
as desirable as just having a library function to do the same thing.  I'd
argue that since there are so many different ways, we should just collapse
them into one: any() and all().  That is more in keeping with the python
philosophy I suppose -- having one canonical way to do things.  Otherwise
you could see any of these several ways of doing it in any program, and each
time you have to make sure it's doing what you think.  Each of them requies
more examination than is justified for such a trivial operation.  And this
definitely hurts the readability of the program.

Previous discussion on this topic:
http://groups-beta.google.com/group/comp.lang.python/msg/a76b4c2caf6c435c
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: Iterator / Iteratable confusion

2005-02-13 Thread Michael Spencer

"Francis Girard" <[EMAIL PROTECTED]> wrote in message 
an "iterator" doesn't have to support the "__iter__" method

Terry Reedy wrote:
Yes it does.  iter(iterator) is iterator is part of the iterater protocol 
for the very reason you noticed...

But, notwithstanding the docs, it is not essential that
iter(iterator) is iterator
 >>> class A(object):
 ... def __iter__(self):
 ... return AnIterator()
 ...
 ...
 >>> class AnIterator(object): # an iterator that copies itself
 ... def next(self):
 ... return "Something"
 ... def __iter__(self):
 ... return AnIterator()
 ...
 >>> a=A()
 >>> i = iter(a)
 ...
 >>> i.next()
'Something'
 >>> j = iter(i)
 >>> j.next()
'Something'
 >>> i is j
False
 >>>
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: builtin functions for and and or?

2005-02-13 Thread Michael Spencer
Roose wrote:
Previous discussion on this topic:
http://groups-beta.google.com/group/comp.lang.python/msg/a76b4c2caf6c435c
Michael

OK, well then.  That's really the exact same thing, down to the names of the
functions.  So what ever happened to that?  

I don't recall: probably 
http://www.google.com/search?sourceid=mozclient&ie=utf-8&oe=utf-8&q=alltrue+site%3Amail.python.org+python-dev

would lead you to the answer
That was over a year ago!  I
> don't see any mention of it in PEP 289?
>
No, PEP289 was for generator expressions - the any/all discussion arose as one 
application of those/itertools

Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: nested lists as arrays

2005-02-14 Thread Michael Spencer
naturalborncyborg wrote:
Hi, I'm using nested lists as arrays and having some problems with
that approach. In my puzzle class there is a swapelement method which
doesn't work out.
What "doesn't work out"?  On casual inspection that method seems to "work":
 >>> p = Puzzle(2)
 >>> p.elements[0][0] = 1
 >>> p.elements[1][1] = 2
 >>> p.elements
[[1, 0], [0, 2]]
 >>> p.swapElements(0,0,1,1)
 >>> p.elements
[[2, 0], [0, 1]]
 >>>
What should it do instead?
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: nested lists as arrays

2005-02-14 Thread Michael Spencer
Terry Reedy wrote:
<[EMAIL PROTECTED]> wrote in message 
news:[EMAIL PROTECTED]

  def setRandomState(self):
 # container for the elements to pick from
 container = [1,2,3,4,5,6,7,8,-1]
 # create elements of puzzle randomly
 i = 0
 j = 0
 while i <= self.dim-1:
 while j <= self.dim-1:
 if len(container) > 0:
 randomindex = random.randint(0,len(container)-1)
 self.elements[j][i] = container[randomindex]
 del container[randomindex]
 j=j+1
 else:
 break
 j=0
 i=i+1

Without reading closely, I believe that the above can generate any possible 
position.  Are you aware that half are unsolvable?  If that matters, you 
need to either find a book or site that explains the parity test for 
solvability or generate the start position from the goal position by a 
series of random moves.

Terry J. Reedy
This covers the test for solvability - enjoy ;-): 
http://www.cs.tcd.ie/publications/tech-reports/reports.01/TCD-CS-2001-24.pdf

BTW, just because your puzzle looks like a grid doesn't neceesarily mean that 
representing the data as nested arrays is easiest.  A flat list might be just as 
good here.  It simplifies some of the operations (creating a random ordering 
becomes a one-liner), at the expense of a little more complexity in some others:

import random
class n2grid(object):
"""A grid for the n squared puzzle"""
def __init__(self,dim = 4):
   self.cells = range(dim*dim)
   self.dim = dim
   self.pos = (0,0)
def shuffle(self):
random.shuffle(self.cells)
self.pos = divmod(self.cells.index(0),self.dim)
def show(self):
for row in self._asarray():
print "".join("[%2s]" % (cell or "") for cell in row)
def _move(self,dy,dx):
dim = self.dim
cells = self.cells
oldy, oldx = self.pos
newy, newx = oldy + dy, oldx + dx
if 0 <= newx < dim and 0 <= newy < dim:
ix = newy * dim + newx
ox = oldy * dim + oldx
cells[ix], cells[ox] = cells[ox], cells[ix]
self.pos = newy,newx
else:
raise Exception, "Illegal move to: (%s,%s)" % (newy, newx)
def move(self, dx, dy):
try:
self._move(dx,dy)
self.show()
except:
pass
def _asarray(self): #create the array representation when needed
cells = iter(self.cells)
dim = self.dim
return [[cells.next() for j in range(dim)] for i in range(dim)]
def __repr__(self):
return repr(self._asarray())

 >>> p = n2grid()
 >>> p.show()
 ...
[  ][ 1][ 2][ 3]
[ 4][ 5][ 6][ 7]
[ 8][ 9][10][11]
[12][13][14][15]
 >>> p.shuffle()
 >>> p.show()
[ 3][15][ 6][ 7]
[10][  ][12][ 5]
[ 4][ 1][14][ 8]
[ 2][11][13][ 9]
 >>> p.move(1,1)
[ 3][15][ 6][ 7]
[10][14][12][ 5]
[ 4][ 1][  ][ 8]
[ 2][11][13][ 9]
 >>> p.move(1,0)
[ 3][15][ 6][ 7]
[10][14][12][ 5]
[ 4][ 1][13][ 8]
[ 2][11][  ][ 9]
 >>> p.move(1,0) # illegal (does nothing)
 >>>
Cheers
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: Iterator / Iteratable confusion

2005-02-15 Thread Michael Spencer
Michael Spencer wrote:
But, notwithstanding the docs, it is not essential that
iter(iterator) is iterator
Terry Reedy wrote:
> iter(iterator) is iterator is part of the iterater protocol
>
[...]I interpret [your post] as saying three things:
1. "There is more than one possible definition of 'iterator'."
Terry, thanks for responding in depth.

2. "It is not essential to not do something wasteful as long as it is 
otherwise inconsequential."
Not that "iter(iterator) is iterator" is somehow wasteful (actually it seems
conservative), but rather that alternative behavior is readily implmented.  You 
point out, reasonably, that if I do that, then what I get is not then an 
iterator, because it fails to conform with the protocol.

However, I suggest that there may be cases where "iter(iterator) is not 
iterator" is useful behavior.  What to call such an object is another matter.

For example, consider:
import itertools as it
def tee2(iterable):
class itertee(object):
def __init__(self, iterator):
self.iterator = iterator
def __iter__(self):
return itertee(self.iterator.__copy__())
def next(self):
return self.iterator.next()
return itertee(it.tee(iterable, 1)[0])
This returns an itertee instance which simply wraps the tee iterator returned by 
 itertools.  However iter(itertee instance) returns a copy of its iterator.  So 
this object creates as many independent iterators over iterable as are required.

In an earlier post in this thread, I included several examples of generating 
infinite series using iterator-copying like this.  I implemented the copying as 
a method of a containing iterable 'Stream', rather than of the iterators 
themselves, partly to respect the 'iterator protocol'.


3. "You can substitute a copy of an object that is never mutated for the 
object itself."

This was not my intended point, although I accept that my example was too 
abstract.
Cheers
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: Can __new__ prevent __init__ from being called?

2005-02-15 Thread Michael Spencer
Peter Hansen wrote:
Felix Wiemann wrote:
Sometimes (but not always) the __new__ method of one of my classes
returns an *existing* instance of the class.  However, when it does
that, the __init__ method of the existing instance is called
nonetheless, so that the instance is initialized a second time.  For
example, please consider the following class (a singleton in this case):
[snip]
How can I prevent __init__ from being called on the already-initialized
object?

Is this an acceptable kludge?
 >>> class C(object):
...  instance=None
...  def __new__(cls):
...   if C.instance is None:
...print 'creating'
...C.instance = object.__new__(cls)
...   else:
...cls.__init__ = lambda self: None
...   return cls.instance
...  def __init__(self):
...   print 'in init'
...
 >>> a = C()
creating
in init
 >>> b = C()
 >>>
(Translation: dynamically override now-useless __init__ method.
But if that works, why do you need __init__ in the first place?)
-Peter
Or this one: use an alternative constructor:
class C(object):
   instance = None
   @classmethod
   def new(cls, *args, **kw):
 if cls.instance is None:
   print 'Creating instance.'
   cls.instance = object.__new__(cls)
   print 'Created.'
   cls.instance.__init__(*args,**kw)
 return cls.instance
   def __init__(self):
 print 'In init.'
 >>> c = C.new()
Creating instance.
Created.
In init.
 >>> c = C.new()
 >>>
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: renaming 'references' to functions can give recursive problems

2005-02-16 Thread Michael Spencer
peter wrote:
Hello, nice solution:
but it puzzles me :)
can anyone tell me why
---correct solution
def fA(input):
  return input
def newFA(input, f= fA):
   return f(input)
fA = newFA
is correct and:
-infinite loop-
def fA(input):
  return input
def newFA(input):
   return fA(input)
In newFA, fA is not bound until you call newFA.  By which time you've re-bound 
fA to newFA, causing the recursion.  In the 'correct' solution above, f is bound 
to the original fA function at the time the def fA statement is executed, which 
is what you want.

fA = newFA
gives an infinite recursive loop?
kind regards
Peter
Regards
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: sampling items from a nested list

2005-02-16 Thread Michael Spencer
Steven Bethard wrote:
So, I have a list of lists, where the items in each sublist are of 
basically the same form.  It looks something like:

...
Can anyone see a simpler way of doing this?
Steve
You just make these up to keep us amused, don't you? ;-)
If you don't need to preserve the ordering, would the following work?:
 >>> data = [[('a', 0),
 ...  ('b', 1),
 ...  ('c', 2)],
 ...
 ... [('d', 2),
 ...  ('e', 0)],
 ...
 ... [('f', 0),
 ...  ('g', 2),
 ...  ('h', 1),
 ...  ('i', 0),
 ...  ('j', 0)]]
 ...
 >>> def resample2(data):
 ... bag = {}
 ... random.shuffle(data)
 ... return [[(item, label)
 ... for item, label in group
 ... if bag.setdefault(label,[]).append(item)
 ... or len(bag[label]) < 3]
 ...for group in data if not random.shuffle(group)]
 ...
 >>> resample2(data)
[[('a', 0), ('c', 2), ('b', 1)], [('h', 1), ('g', 2), ('i', 0)], []]
 >>> resample2(data)
[[('h', 1), ('f', 0), ('j', 0), ('g', 2)], [('b', 1), ('c', 2)], []]
 >>> resample2(data)
[[('e', 0), ('d', 2)], [('i', 0), ('h', 1), ('g', 2)], [('b', 1)]]
 >>>
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: Iterator / Iteratable confusion

2005-02-16 Thread Michael Spencer
Terry Reedy wrote:
"Michael Spencer" <[EMAIL PROTECTED]> wrote in message 

We are both interested in the murky edges at and beyond conventional usage.
...
I am quite aware that multiple iterators for the same iterable (actual or 
conceptual) can be useful (cross products, for example).  But I am dubious 
that initialized clones of 'iterators' are *more* useful, especially for 
Python, than multiple iterators derived from repeated calling of the 
callable that produced the first iterator.
I'm not sure they are.  In the one 'real' example I posted on infinite series, I 
implemented the approach you advocate here.  But I'm keeping copyable iterators 
in mind.


Here are some related reasons why I think it useful if not essential to 
restrict the notion of iterator by restricting  iterator.__iter__ to 
returning self unmodified.

Leaving Python aside, one can think of iterable as something that 
represents a collection and that can produce an iterator that produces the 
items of the collection one at a time.  In this general conceptioning, 
iterables and iterators seem distinct (if one ignores self-iterables).
The separation is appealing, but blurrier in practice, I believe.  Neither 
itertools.cycle nor itertools.tee fits cleanly into this model.  Neither do the 
self-iterables, as you point out.

... giving iterators an __iter__ method, while quite useful, erases 
(confuses) the (seeming) distinction, but giving them a minimal __iter__ 
does so minimally, keeping iterators a distinct subcategory of iterable. 
Iterators that could not be presented to other functions for filtering or 
whatnot would be pretty limited. Unless every iterator is to be derived from 
some special-cased object, how could they not have an __iter__ method?
I accept your point that keeping the functionality of iterator.__iter__ minimal 
and predicatable limits the confusion between iterators and iterables.  But 
since that distinction is already blurred in several places, I don't find that 
argument alone decisive.

> ...
Taking Python as it is, a useful subcategory of iterable is 'reiterable'. 
This is distinct from iterator strictly defined.
What about itertools.cycle?  Not strictly an iterator?
  This we have iterables
divided into iterators, reiterables, and other.  I think this is 
didactically useful.  Spencerators are reiterables.
They may be: they are no more and no less than a thought experiment in which 
iterator.__iter__ does not return self unmodified.

Iter(iterator) returning iterator unchanged makes iterator a fixed point of 
iter.  It ends any chain of objects returned by repeated iter calls. 
Spencerators prolong any iter chain, making it infinite instead of finite. 
Essential?  Repeat the paragraph above with 'a fixed point' substituted for 
'minimal'.

I don't understand this point except in the loosest sense that deviating from
the iterator protocol makes it harder to reason about the code.  Do you mean 
something more specific?

I have been thinking about iterator.__iter__ rather like object.__new__.  Not 
returning a new instance may be surprising and inadvisable in most cases.  But 
still there are accepted uses for the technique.  Do you think these cases are 
comparable?

Do you see the iterator protocol as the vanguard of a new set of python 
protocols that are more semantically restictive than the "mapping, container, 
file-like object etc..." interfaces?  Defining iterator method semantics 
strictly seems like a departure from the existing situation.

Cheers
Michael


--
http://mail.python.org/mailman/listinfo/python-list


Re: Iterator / Iteratable confusion

2005-02-16 Thread Michael Spencer
Adam DePrince wrote:

How is a spencerator [an iterator that doesn't return itself unmodified on iter]
> different than itertools.tee?

Taking your question literally, it changes the behavior of an itertools.tee 
object 'tee', so that iter(tee) returns tee.__copy__(), rather than tee itself.

It was created for rhetorical purposes and has no known practical application.
Depending on your point of view it is evidence either for (a) why the iterator 
protocol must be strictly adhered to, or (b) that iterators and iterables cannot 
be disjoint sets.

Michael

--
http://mail.python.org/mailman/listinfo/python-list


Re: renaming 'references' to functions can give recursive problems

2005-02-16 Thread Michael Spencer
peter wrote:
brain reset and understood 

thx a lot for all your answers
Peter
Now that you've got reset, you might want to consider an alternative solution:
def fA(input):
  return input
oldfA = fA   # Hold a reference to the the old function
def newFA(input):
   "Do something new"
   return oldfA(input)
fA = newFA
The advantage of this is that you don't need to change the function newfA at all 
when you're ready to rename it.

Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: sampling items from a nested list

2005-02-16 Thread Michael Spencer
Michael Spencer wrote:
 >>> def resample2(data):
 ... bag = {}
 ... random.shuffle(data)
 ... return [[(item, label)
 ... for item, label in group
 ... if bag.setdefault(label,[]).append(item)
 ... or len(bag[label]) < 3]
 ...for group in data if not 
...which failed to calculate the minimum count of labels, try this instead 
(while I was at it, I removed the insance LC)

 >>> def resample3(data):
 ... bag = {}
 ... sample = []
 ... labels  = [label for group in data for item, label in group]
 ... min_count = min(labels.count(label) for label in set(labels))
 ... random.shuffle(data)
 ... for subgroup in data:
 ... random.shuffle(subgroup)
 ... subgroupsample = []
 ... for item, label in subgroup:
 ... bag.setdefault(label,[]).append(item)
 ... if len(bag[label]) <= min_count:
 ... subgroupsample.append((item,label))
 ... sample.append(subgroupsample)
 ... return sample
 ...
 >>>
Cheers
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: sampling items from a nested list

2005-02-16 Thread Michael Spencer
Steven Bethard wrote:
Michael Spencer wrote:
Steven Bethard wrote:
So, I have a list of lists, where the items in each sublist are of 
basically the same form.  It looks something like:

...
Can anyone see a simpler way of doing this?
Steve

You just make these up to keep us amused, don't you? ;-)

Heh heh.  I wish.  It's actually about resampling data read in the 
Yamcha data format:

http://chasen.org/~taku/software/yamcha/
So each sublist is a "sentence" and each tuple is the feature vector for 
a "word".  The point is to even out the number of positive and negative 
examples because support vector machines typically work better with 
balanced data sets.

If you don't need to preserve the ordering, would the following work?:
[snip]
 >>> def resample2(data):
 ... bag = {}
 ... random.shuffle(data)
 ... return [[(item, label)
 ... for item, label in group
 ... if bag.setdefault(label,[]).append(item)
 ... or len(bag[label]) < 3]
 ...for group in data if not 
random.shuffle(group)]

It would be preferable to preserve ordering, but it's not absolutely 
crucial.  Thanks for the suggestion!

STeVe
Maybe combine this with a DSU pattern?  Not sure whether the result would be 
better than what you started with

Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: check if object is number

2005-02-17 Thread Michael Spencer
Christos TZOTZIOY Georgiou wrote:
On Sat, 12 Feb 2005 16:01:26 -0800, rumours say that Michael Spencer
<[EMAIL PROTECTED]> might have written:

Yup, that's basically what I'm doing right now.  The question was really 
how to define that adapter function. =)

Steve
OK - then my entry is:
assert obj+1 >= 1
:-)

So -1 is not a number.
At least not a legal one for Steven's function as I understood it
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: Alternative to standard C "for"

2005-02-17 Thread Michael Spencer
James Stroud wrote:

It seems I need constructs like this all of the time
i = 0
while i < len(somelist):
  if oughta_pop_it(somelist[i]):
somelist.pop(i)
  else:
i += 1
There has to be a better way...
Do you have to modify your list in place?
If not, just create a copy with the filtered items:
somelist = [item for item in somelist if not oughta_pop_it(item)]
or you could use filter or itertools.ifilter to do much the same thing
Michael

--
http://mail.python.org/mailman/listinfo/python-list


Re: How to wrap a class's methods?

2005-02-17 Thread Michael Spencer
Grant Edwards wrote:
On 2005-02-17, Steven Bethard <[EMAIL PROTECTED]> wrote:

py> class C(object):
... def f(self, *args):
... print "f:", args
... def g(self, *args):
... print "g:", args
...
py> class D(C):
... pass
...
py> class Wrapper(object):
... def __init__(self, func):
... self.func = func
... def __call__(self, *args):
... print "wrapped"
... return self.func(*args)
...
py> for name in ['f', 'g']:
... wrapper = Wrapper(getattr(C, name))
... setattr(D, name, new.instancemethod(wrapper, None, D))

Thanks.  The stuff provided by the "new" module is what I was
missing.
No magic in the 'new' module - new.instancemethod is just a synonym for the 
method type:

 >>> import new, types
 >>> new.instancemethod is types.MethodType
True
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: How to wrap a class's methods?

2005-02-17 Thread Michael Spencer
John Lenton wrote:
On Thu, Feb 17, 2005 at 07:32:55PM +, Grant Edwards wrote:

I'd usually put big fat warnings around this code, and explain exaclty
why I need to do things this way...

As a low-tech alternative, what about sourcecode generation, since you are 
targetting a python module?  This gives two advantages vs the wrapping function: 
1) the magic all occurs at coding time 2) the method signatures are documented.

Michael
import imaplib
import inspect
import types
instancemethod = types.MethodType
# The function template
funcwrapper = \
"""
def %(name)s%(argspec)s:
s,r = imaplib.IMAP4_SSL.%(name)s%(callspec)s
if s!='OK':
raise NotOK((s,r))
return r"""
# A helper function to get the template parameters
def getargs(method):
argspec = inspect.getargspec(method)
callspec = tuple(argspec[:3] + (None,))# No default
return {"name": method.__name__,
"argspec": inspect.formatargspec(*argspec),
"callspec": inspect.formatargspec(*callspec)}
# Do the stuff manually:
 >>> obj = imaplib.IMAP4_SSL
 >>> attrnames = [meth for meth in dir(imaplib.IMAP4_SSL) if not 
meth.startswith("_")]
 >>> attributes = [getattr(obj, attrname) for attrname in attrnames]
 >>> methods = [attribute for attribute in attributes if 
inspect.ismethod(attribute)]
 >>> print  "\n".join(funcwrapper % getargs(method) for method in methods)

def append(self, mailbox, flags, date_time, message):
s,r = imaplib.IMAP4_SSL.append(self, mailbox, flags, date_time, message)
if s!='OK':
raise NotOK((s,r))
return r
def authenticate(self, mechanism, authobject):
s,r = imaplib.IMAP4_SSL.authenticate(self, mechanism, authobject)
if s!='OK':
raise NotOK((s,r))
return r
def check(self):
s,r = imaplib.IMAP4_SSL.check(self)
if s!='OK':
raise NotOK((s,r))
return r
def close(self):
s,r = imaplib.IMAP4_SSL.close(self)
if s!='OK':
raise NotOK((s,r))
return r
def copy(self, message_set, new_mailbox):
s,r = imaplib.IMAP4_SSL.copy(self, message_set, new_mailbox)
if s!='OK':
raise NotOK((s,r))
return r
def create(self, mailbox):
s,r = imaplib.IMAP4_SSL.create(self, mailbox)
if s!='OK':
raise NotOK((s,r))
return r
def delete(self, mailbox):
s,r = imaplib.IMAP4_SSL.delete(self, mailbox)
if s!='OK':
raise NotOK((s,r))
return r
def deleteacl(self, mailbox, who):
s,r = imaplib.IMAP4_SSL.deleteacl(self, mailbox, who)
if s!='OK':
raise NotOK((s,r))
return r
def expunge(self):
s,r = imaplib.IMAP4_SSL.expunge(self)
if s!='OK':
raise NotOK((s,r))
return r
def fetch(self, message_set, message_parts):
s,r = imaplib.IMAP4_SSL.fetch(self, message_set, message_parts)
if s!='OK':
raise NotOK((s,r))
return r
def getacl(self, mailbox):
s,r = imaplib.IMAP4_SSL.getacl(self, mailbox)
if s!='OK':
raise NotOK((s,r))
return r
def getquota(self, root):
s,r = imaplib.IMAP4_SSL.getquota(self, root)
if s!='OK':
raise NotOK((s,r))
return r
def getquotaroot(self, mailbox):
s,r = imaplib.IMAP4_SSL.getquotaroot(self, mailbox)
if s!='OK':
raise NotOK((s,r))
return r
def list(self, directory='""', pattern='*'):
s,r = imaplib.IMAP4_SSL.list(self, directory, pattern)
if s!='OK':
raise NotOK((s,r))
return r
def login(self, user, password):
s,r = imaplib.IMAP4_SSL.login(self, user, password)
if s!='OK':
raise NotOK((s,r))
return r
def login_cram_md5(self, user, password):
s,r = imaplib.IMAP4_SSL.login_cram_md5(self, user, password)
if s!='OK':
raise NotOK((s,r))
return r
def logout(self):
s,r = imaplib.IMAP4_SSL.logout(self)
if s!='OK':
raise NotOK((s,r))
return r
def lsub(self, directory='""', pattern='*'):
s,r = imaplib.IMAP4_SSL.lsub(self, directory, pattern)
if s!='OK':
raise NotOK((s,r))
return r
def myrights(self, mailbox):
s,r = imaplib.IMAP4_SSL.myrights(self, mailbox)
if s!='OK':
raise NotOK((s,r))
return r
def namespace(self):
s,r = imaplib.IMAP4_SSL.namespace(self)
if s!='OK':
raise NotOK((s,r))
return r
def noop(self):
s,r = imaplib.IMAP4_SSL.noop(self)
if s!='OK':
raise NotOK((s,r))
return r
def open(self, host='', port=993):
s,r = imaplib.IMAP4_SSL.open(self, host, port)
if s!='OK':
raise NotOK((s,r))
return r
def partial(self, message_num, messa

Re: Solution for architecure dependence in Numeric ?

2005-02-18 Thread Michael Spencer
"Johannes Nix|Johannes.Nix"@uni-oldenburg.de wrote:
Hi,
I have a tricky problem with Numeric. Some time ago, I have generated
a huge and complex data structure, and stored it using the cPickle
module. Now I want to evaluate it quickly again on a workstation
cluster with 64-Bit Opteron CPUs - I have no more than three days to
do this. Compiling Python and running Numeric has been no problem at
all. However, I get an error message when accessing the data pickled
before. (I can load it regularly on 32 bit computers, but it is a
quite complex data object, so I really don't want to store every
element as ASCII data).  The problem seems to be with 64 Bit integers
(with 32-bit-floats, no problem was observed).
This looks like that (from the Unix command shell):
[EMAIL PROTECTED]:~> python ~/python/test_npickle.py  -dump test.pck
[EMAIL PROTECTED]:~> python ~/python/test_npickle.py  test.pck
[0 1 2 3 4 5 6 7 8 9]
[EMAIL PROTECTED]:~> ssh 64bithost python ~/python/test_npickle.py  test.pck
Traceback (most recent call last):
  File "/home/jnix/python/test_npickle.py", line 16, in ?
a = cPickle.load(file(filename))
  File "/home/jnix/lib/python2.4/SuSE-9.0/x86_64/Numeric/Numeric.py", line 520, 
in array_constructor
x.shape = shape
ValueError: ('total size of new array must be unchanged', , ((10,), 'l', 
'\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\x06\x00\x00\x00\x07\x00\x00\x00\x08\x00\x00\x00\t\x00\x00\x00',
 1))
also I get:
[EMAIL PROTECTED]:~> python -c "import Numeric; print 
Numeric.arange(0).itemsize()"
4
[EMAIL PROTECTED]:~>  python -c "import Numeric; print 
Numeric.arange(0).itemsize()"
8
The script used to produce the example above is:
-
#/usr/bin/python
# -*- coding: latin1 -*-
import Numeric
import cPickle
import sys
if len(sys.argv) > 1 and sys.argv[1] == '-dump':
filename = sys.argv[2]
binary=1
a = Numeric.arange(10) 
cPickle.dump(a, file(filename,'w',binary))

else:
filename = sys.argv[1]
a = cPickle.load(file(filename))
print a


-
So what would you suggest ? Can I hack Numeric to assume non-native
32 bit integer numbers ?
Many thanks for any help,
Johannes
It might be worth posting to the Numeric mailing list, mirrored at 
http://news.gmane.org/gmane.comp.python.numeric.general

--
http://mail.python.org/mailman/listinfo/python-list


Re: Style guide for subclassing built-in types?

2005-02-23 Thread Michael Spencer
[EMAIL PROTECTED] wrote:
Kent Johnson wrote:
[EMAIL PROTECTED] wrote:
p.s. the reason I'm not sticking to reversed or even reverse :
suppose
the size of the list is huge.
reversed() returns an iterator so list size shouldn't be an issue.
What problem are you actually trying to solve?
Kent

Oh, you are right.
Actually, it's more complicated than simple reversion. The list order
should be somewhat "twisted" and the list is big.
For example,
[1,2,3,4,5,6,7,8,9,10]
--> [10,9,8,7,6,1,2,3,4,5]
so __getitem__(self,i) => __getitem__(self,-i-1) if i
I'd like to have TwistedList class that takes in an original list and
pretends as if it is twisted actually. However, I have to have
duplicate codes here and there to make it act like a "list", say assert
twisted_list == [10,9,...] and for each in twisted_list and etc.
If you want a twisted 'view' of an existing list, then a wrapper makes most 
sense.
If, however, you only need the twisted version, why not simply override 
list.__init__ (and extend, append etc... as required):

 >>> class rev_list(list):
 ...def __init__(self, iterable):
 ...list.__init__(self, iterable[::-1])
 ...
 >>> l = rev_list([1,2,3])
 >>> l
 [3, 2, 1]
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: [perl-python] generic equivalence partition

2005-02-24 Thread Michael Spencer
David Eppstein wrote:
In article <[EMAIL PROTECTED]>,
 "Xah Lee" <[EMAIL PROTECTED]> wrote:
given a list aList of n elements, we want to return a list that is a
range of numbers from 1 to n, partition by the predicate function of
equivalence equalFunc. 

In the worst case, this is going to have to take quadratic time 
(consider an equalFunc that always returns false) so we might as well do 
something really simple rather than trying to be clever.

def parti(aList,equalFunc):
eqv = []
for i in range(len(aList)):
print i,eqv
for L in eqv:
if equalFunc(aList[i],aList[L[0]]):
L.append(i)
break;
else:
eqv.append([i])

Unless we can inspect the predicate function and derive a hash function such 
that hash(a) == hash(b) => predicate(a,b) is True.  Then the partition can take 
linear time
i.e.,
 >>> def equal(a,b):
 ... return a[-1] == b[-1]
 ...
 >>> def hashFunc(obj):
 ... return hash(obj[-1])
 ...
 >>> def parti(aList, hashFunc):
 ... eqv = {}
 ... for i,obj in enumerate(aList):
 ... eqv.setdefault(hashFunc(obj),[]).append(i)
 ... return eqv.values()
 ...

In the case where the predicate is a "black box", would a logistic regression 
over a sample of inputs enable a hash function to be derived experimentally?

Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: accessor/mutator functions

2005-02-25 Thread Michael Spencer
[EMAIL PROTECTED] wrote:
When I look at how classes are set up in other languages (e.g. C++), I
often observe the following patterns:
1) for each data member, the class will have an accessor member
function (a Get function)
2) for each data member, the class will have a mutator member function
(a Set function)
3) data members are never referenced directly; they are always
referenced with the accessor and mutator functions
My questions are:
a) Are the three things above considered pythonic?
No
b) What are the tradeoffs of using getattr() and setattr() rather than
creating accessor and mutator functions for each data member?
Use property descriptors instead:
http://www.python.org/2.2.1/descrintro.html#property
http://users.rcn.com/python/download/Descriptor.htm#properties
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: accessor/mutator functions

2005-02-25 Thread Michael Spencer
[EMAIL PROTECTED] wrote:
If the class had two attributes--x and y--would the code look like
something lik this:
 class C(object):
def __init__(self):
self.__x = 0
self.__y = 0
def getx(self):
return self.__x
def setx(self, x):
if x < 0: x = 0
self.__x = x
def gety(self):
return self.__y
def sety(self, y):
if y < 0: y = 0
self.__y = y
x = property(getx, setx)
y = property(gety, sety)
It could do - that works.  One feature of this solution is that it leaves the 
accessor/mutator functions in the namespace.  That may be a good or a bad thing. 
 If bad, you could simply delete them after the property call (which is 
probably better written as close as possible to the functions)

i.e., class C(object):
   def __init__(self):
   self.__x = 0
   self.__y = 0
   def getx(self):
   return self.__x
   def setx(self, x):
   if x < 0: x = 0
   self.__x = x
   x = property(getx, setx)
   del getx, setx
   def gety(self):
   return self.__y
   def sety(self, y):
   if y < 0: y = 0
   self.__y = y
   y = property(gety, sety)
   del gety, sety
There are also recipes in the cookbook for defining property "suites" more 
elegantly
Note, that it is also easy to "roll your own" descriptor, which may be 
worthwhile if you have a lot of similar properties, for example (not tested 
beyond what you see):

from weakref import WeakKeyDictionary
class Property(object):
def __init__(self, adapter):
"""adapter is a single argument function that will be
applied to the value before setting it"""
self.objdict = WeakKeyDictionary()
self.adapter = adapter
def __get__(self, obj, cls):
if isinstance(obj, cls):
return self.objdict[obj]
else:
return self
def __set__(self, obj, value):
self.objdict[obj] = self.adapter(value)
class C(object):
x = Property(lambda val: max(val, 0))
y = Property(lambda val: val%2)
z = Property(abs)
 >>> c= C()
 >>> c.x = -3
 >>> c.x
0
 >>> c.y = -3
 >>> c.y
1
 >>> c.z = -3
 >>> c.z
3
 >>>
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: Converting HTML to ASCII

2005-02-25 Thread Michael Spencer
gf gf wrote:
[wants to extract ASCII from badly-formed HTML and thinks BeautifulSoup is too complex]
You haven't specified what you mean by "extracting" ASCII, but I'll assume that 
you want to start by eliminating html tags and comments, which is easy enough 
with a couple of regular expressions:

 >>> import re
 >>> comments = re.compile('', re.DOTALL)
 >>> tags = re.compile('<.*?>', re.DOTALL)
 ...
 >>> def striptags(text):
 ... text = re.sub(comments,'', text)
 ... text = re.sub(tags,'', text)
 ... return text
 ...
 >>> def collapsenewlines(text):
 ... return "\n".join(line for line in text.splitlines() if line)
 ...
 >>> import urllib2
 >>> f = urllib2.urlopen('http://www.python.org/')
 >>> source = f.read()
 >>> text = collapsenewlines(striptags(source))
 >>>
This will of course fail if there is a "<" without a ">", probably in other 
cases too.  But it is indifferent to whether the html is well-formed.

This leaves you with the additional task of substituting the html escaped 
characters e.g., " ", not all of which will have ASCII representations.

HTH
Michael

--
http://mail.python.org/mailman/listinfo/python-list


Re: Converting HTML to ASCII

2005-02-25 Thread Michael Spencer
Mike Meyer wrote:
It also fails on tags with a ">" in a string in the tag. That's
well-formed but ill-used HTML.
True enough...however, it doesn't fail too horribly:
 >>> striptags("""the text""")
 "'>the text"
 >>>
and I think that case could be rectified rather easily, by stripping any content 
up to '>' in the result without breaking anything else.

BTW, I tool a first look at BeautifulSoup.  As far as I could tell, there is no
built-in way to extract text from its parse tree, however adding one is trivial:
 >>> from bsoup import BeautifulSoup, Tag
 ...
 >>> def extracttext(obj):
 ... if isinstance(obj,Tag):
 ... return "".join(extracttext(c) for c in obj.contents)
 ... else:
 ... return str(obj)
 ...
 >>> def bsouptext(text):
 ... souptree = BeautifulSoup(text)
 ... bodytext = extracttext(souptree.first())
 ... text = re.sub(comments,'', bodytext)
 ... text = collapsenewlines(text)
 ... return text
 ...
 ...
 >>>
 >>> bsouptext("""the text""")
 "'>the text"
On one 'real world test' (nytimes.com), I find the regexp approach to be more 
accurate, but I won't load up this message with the output to prove it ;-)

Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: ListMixin (WAS: How do you control _all_ items added to a list?)

2005-03-01 Thread Michael Spencer
Steven Bethard wrote:
Nick Coghlan wrote:
 > Hmm, it might be nice if there was a UserList.ListMixin that was the
 > counterpart to UserDict.DictMixin
I've thought this occasionally too.  One of the tricky issues though is 
that often you'd like to define __getitem__ for single items and have 
ListMixin add the code for slices.  I haven't figured out how to do this 
cleanly yet...

STeVe
I agree that would be useful.  One solution would be to ask users to implement 
__getsingleitem__ (and not __getitem__) if they want the mixin to handle slice 
logic.  The following illustrates that, and also falls back to slicing the 
iterator if it is provided:

class ProtoListMixin(object):
"""Prototype ListMixin, exploring slice interface and semantics"""
def __getitem__(self, index):
if isinstance(index, slice):
start, stop, step = index.start or 0, index.stop, index.step or 1
if start < 0 or stop < 0 or not stop:
try:
start, stop, step = index.indices(len(self))
except TypeError:
raise TypeError, "unsized object"
try:
getter = self.__getsingleitem__
return [getter(i) for i in range(start, stop, step)]
except AttributeError:
pass
else:
if index < 0:
try:
index = len(self) + index
except TypeError:
raise TypeError, "unsized object"
try:
return self.__getsingleitem__(index)
except AttributeError:
pass
start, stop, step = index, index + 1, None
# Alternatively, try to use the iterator, if available
import itertools
try:
args = [iter(self)]
except AttributeError:
raise TypeError, "Must implement __getsingleitem__ or __iter__"
if start:
args.append(start)
args.append(stop)
if step:
if step < 1:
raise ValueError, "slicing an iterable requires step >=1"
args.append(step)
iterator = itertools.islice(*args)
if isinstance(index, slice):
return list(iterator)
else:
try:
return iterator.next()
except StopIteration:
raise IndexError, "index out of range"
# Users should implement __getsingleitem__ for positive indices
class Index(ProtoListMixin):
def __init__(self, data):
"""For testing, provide a list"""
self._data = data
def __getsingleitem__(self, index):
return self._data[index]
# If __len__ is implemented, negative indices are supported
class IndexLen(Index):
def __len__(self):
return len(self._data)
# If __getsingleitem__ is not implemented, positive slices are returned
# from an iterator
class Iter(ProtoListMixin):
def __init__(self, data):
"""For testing, provide an iterable"""
self._data = data
def __iter__(self):
return iter(self._data)
 >>> a = Index(range(10))
 >>> a[4]
 4
 >>> a[4:8]
 [4, 5, 6, 7]
 >>> a[-4]
 Traceback (most recent call last):
   File "", line 1, in ?
   File "ListMixin", line 22, in __getitem__
 TypeError: unsized object
 >>> b = IndexLen(range(10))
 >>> b[-4]
 6
 >>> c = Iter(xrange(10))
 >>> c[3]
 3
 >>> c[3:6]
 [3, 4, 5]
 >>> c[-3]
 Traceback (most recent call last):
   File "", line 1, in ?
   File "ListMixin", line 22, in __getitem__
 TypeError: unsized object
 >>>
--
http://mail.python.org/mailman/listinfo/python-list


Re: reuse validation logic with descriptors

2005-03-01 Thread Michael Spencer
David S. wrote:
This still fails to work for instances variables of the class.  That is 
if I use your property in the following:
py> ...class Flags(object):
...def __init__(self): 
... a = singlechar
...
you should write that as:
class Flags(object):
a = singlechar
def __init__(self):
a = "a"

py> f = Flags()
py> f.a = "a"
Now f.a.__class__.__name__ returns 'str'.  So the property was not 
used at all.

Also, it seems that using a property, I can not do the other useful 
things I can do with a proper class, like provide an __init__, __str__, 
or __repr__.  

If you want "other useful things" then you can write a custom descriptor, like:
from weakref import WeakKeyDictionary
class SingleChar(object):
def __init__(self):
"""raises ValueError if attribute is set to something
other than a single char"""
self.objdict = WeakKeyDictionary()
def __get__(self, obj, cls):
if isinstance(obj, cls):
try:
return self.objdict[obj]
except KeyError:
raise AttributeError, "property not set"
else:
return self
def __set__(self, obj, value):
if isinstance(value, str) and len(value) == 1:
self.objdict[obj] = value
else:
raise ValueError, value
class Flags(object):
a = SingleChar()
b = SingleChar()
See also: 
http://groups-beta.google.com/group/comp.lang.python/msg/30c61a30a90133d2
for another example of this approach
Michael
Again, thanks,
David S.

--
http://mail.python.org/mailman/listinfo/python-list


Re: Is it possible to specify the size of list at construction?

2005-03-01 Thread Michael Spencer
Anthony Liu wrote:
I cannot figure out how to specify a list of a
particular size.
For example, I want to construct a list of size 10,
how do I do this?
A list does not have a fixed size (as you probably know)
But you can initialize it with 10 somethings
>
 >>> [None]*10
 [None, None, None, None, None, None, None, None, None, None]
 >>> range(10)
 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
 >>>
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: Is it possible to specify the size of list at construction?

2005-03-01 Thread Michael Spencer
Anthony Liu wrote:
Yes, that's helpful.  Thanks a lot.
But what if I wanna construct an array of arrays like
we do in C++ or Java:
myArray [][]
Basically, I want to do the following in Python:
myArray[0][1] = list1
myArray[1][2] = list2
myArray[2][3] = list3

here you have to be careful to create N different lists.  A list comprehension 
provides a convenient way to do it:

myArray = [[] for i in range(N)]
--
http://mail.python.org/mailman/listinfo/python-list


Re: Best way to make a list unique?

2005-03-08 Thread Michael Spencer
Delaney, Timothy C (Timothy) wrote:
Michael Hoffman wrote:

For those who don't know, these implement a hash set/map which
iterates in the order that the keys were first added to the set/map.
I would love to see such a thing.

I've proposed this on python-dev, but the general feeling so far is
against it. So far the only use case is to remove duplicates without
changing order, and there are iterator-based solutions which would
normally be preferable.
It's pretty simple to roll your own, and I'll probably put together a
Cookbook recipe for it.
Tim Delaney
Here's something to work with:
class OrdSet(object):
def __init__(self, iterable):
"""Build an ordered, unique collection of hashable items"""
self._data = {None:[None, None]} # None is the pointer to the first 		 
# element.  This is unsatisfactory
 # because it cannot then be a member
 # of the collection
self._last = None
self.update(iterable)

def add(self, obj):
"""Add an element to the collection"""
data = self._data
if not obj in data:
last = self._last
data[last][1] = obj
data[obj] = [last, None]
self._last = obj
def update(self, iterable):
"""Update the collection with the union of itself and another"""
obj = self._last
data = self._data
last = data[obj][0]
for item in iterable:
if item not in data:
data[obj] = [last, item]
last, obj = obj, item
data[obj] = [last, None]
self._last = obj
def remove(self, item):
"""Remove an element from a set; it must be a member.
If the element is not a member, raise a KeyError."""
data = self._data
prev, next = data[item]
data[prev][1] = next
data[next][0] = prev
def discard(self, item):
"""Remove an element from a set if it is a member.
If the element is not a member, do nothing."""
try:
self.remove(item)
except KeyError:
pass
def __contains__(self, item):
return item in self._data
def pop(self):
"""Remove and the return the oldest element"""
data = self._data
prev, first =  data[None]
data[None] = [None,data[first][1]]
return first
def clear(self):
self.__init__([])
def __iter__(self):
"""Iterate over the collection in order"""
data = self._data
prev, next = data[None]
while next is not None:
yield next
prev, next = data[next]
def __len__(self):
return len(self._data)-1
def __repr__(self):
return "%s(%s)" % (self.__class__.__name__,list(self))
 >>> a= OrdSet(range(10))
 >>> a
 OrdSet([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
 >>> a.update(range(5,15))
 >>> a
 OrdSet([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
 >>> a.discard(8)
 >>> a
 OrdSet([0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14])
 >>>
Michael
--
http://mail.python.org/mailman/listinfo/python-list


Re: Best way to make a list unique?

2005-03-09 Thread Michael Spencer
Marc Christiansen wrote:
Michael Spencer <[EMAIL PROTECTED]> wrote:
Nice. When you replace None by an object(), you have no restriction on
the elements any more:

Thanks for the suggestion, Marc.
Note that if there is no need to access the middle of the collection, then the 
implementation is simpler, and less resource-intensive, since the items can be 
singly-linked

class UniqueQueue(object):
def __init__(self, iterable):
self._data = _data = {}
self._last = self._root = object() # An object the user is unlikely to
   # reference - thanks Marc
self.update(iterable)
def push(self, obj):
if not obj in self._data:
self._data[self._last] = obj
self._last = obj
def pop(self):
data = self._data
first = data.pop(self._root)
self._root = first
return first
def update(self, iterable):
last = self._last
data = self._data
for item in iterable:
if item not in data:
data[last] = item
last = item
self._last = last
def __iter__(self):
data = self._data
next = self._root
try:
while 1:
next = data[next]
yield next
except KeyError:
raise StopIteration
def __repr__(self):
return "%s(%s)" % (self.__class__.__name__,list(self))
 >>> q = UniqueQueue(range(5))
 >>> q.update(range(3,8))
 >>> q
 UniqueQueue([0, 1, 2, 3, 4, 5, 6, 7])
 >>> q.pop()
 0
 >>> q
 UniqueQueue([1, 2, 3, 4, 5, 6, 7])
 >>>
 >>> q.push(None)
 >>> q
 UniqueQueue([1, 2, 3, 4, 5, 6, 7, None])
 >>>
Michael

--
http://mail.python.org/mailman/listinfo/python-list


  1   2   3   4   >