Re: What is "self"?
Ron Adam wrote: > Erik Max Francis wrote: > >>Ron Adam wrote: >> >> >>>When you call a method of an instance, Python translates it to... >>> >>> leader.set_name(leader, "John") >> >> >>It actually translates it to >> >>Person.set_name(leader, "John") >> > > > I thought that I might have missed something there. > > Is there a paper on how python accesses and stores instance data and > methods? I googled but couldn't find anything that addressed this > particular question. > > >>> class a(object): > ...def x(self): > ... print 'x' > ... > >>> b = a() > >>> b > <__main__.a object at 0x009D1890> > >>> b.x > > > > So what exactly is a bound method object? Does it possibly translates > to something like the following? > > def x(*args, **kwds): > self = ? > return __class__.self(self, *args, **kwds) > > Cheers, > Ron > > > > > All is explained at: http://users.rcn.com/python/download/Descriptor.htm#functions-and-methods and further at: http://www.python.org/pycon/2005/papers/36/pyc05_bla_dp.pdf "For objects, the machinery is in object.__getattribute__ which transforms b.x into type(b).__dict__['x'].__get__(b, type(b))." What follows is my interpretation - hope it's correct: # what exactly is a bound method object? # Illustrate b.f => type(b).__dict__['x'].__get__(b, type(b)) >>> class B(object): ... def f(self, x): ... return x or 42 ... >>> b = B() >>> type(b).__dict__['f'] # a plain old function >>> _.__get__(b, type(b)) # invoke the descriptor protocol # to make a bound method > >>> You don't have to use object.__getattribute__ to get a bound method. Nor does the function have to be in the class dictionary. You can just call any function descriptor yourself: >>> def g(self, y): ... return self.f(y) ... >>> boundg = g.__get__(b) # bind to B instance >>> boundg > >>> boundg(0) 42 >>> Looked at this way, function.__get__ just does partial function application (aka currying). >>> def f(x, y): ... return x+y ... >>> add42 = f.__get__(42) >>> add42 >>> add42(1) 43 Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Wrapping classes
Jeremy Sanders wrote: > Colin J. Williams wrote: > > >>Could you not have functions a and b each of which returns a NumArray >>instance? >> >>Your expression would then be something like a(..)+2*b(..). > > > The user enters the expression (yes - I'm aware of the possible security > issues), as it is a scientific application. I don't think they'd like to > put () after each variable name. > > I could always munge the expression after the user enters it, of course. > > Jeremy > Alternatively, you could build your own expression calculator, and initialize the objects if necessary as they are evaluated. If you are happy with Python syntax for your expressiones then the stdlib compiler package is helpful. The example below is not tested beyond what you see. It's a bit verbose, but most of the code is boilerplate. >>> a = 3 >>> b = 4 >>> calc('a * b') using a using b 12 >>> calc('a * b ** (b - a) * "a"') using a using b using b using a '' >>> calc("0 and a or b") using b 4 >>> calc("1 and a or b") using a 3 >>> calc("1 and a or c") using a 3 >>> calc("0 and a or c") Undefined symbol: c >>> HTH, Michael - import compiler class CalcError(Exception): def __init__(self,error,descr = None,node = None): self.error = error self.descr = descr self.node = node def __repr__(self): return "%s: %s" % (self.error, self.descr) __str__ = __repr__ class LazyCalc(object): def __init__(self, namespace): self._cache = {} # dispatch table self.context = namespace def visit(self, node,**kw): cls = node.__class__ meth = self._cache.setdefault(cls, getattr(self,'visit'+cls.__name__,self.default)) return meth(node, **kw) def visitExpression(self, node, **kw): return self.visit(node.node) # Binary Ops def visitAdd(self,node,**kw): return self.visit(node.left) + self.visit(node.right) def visitDiv(self,node,**kw): return self.visit(node.left) / self.visit(node.right) def visitFloorDiv(self,node,**kw): return self.visit(node.left) // self.visit(node.right) def visitLeftShift(self,node,**kw): return self.visit(node.left) << self.visit(node.right) def visitMod(self,node,**kw): return self.visit(node.left) % self.visit(node.right) def visitMul(self,node,**kw): return self.visit(node.left) * self.visit(node.right) def visitPower(self,node,**kw): return self.visit(node.left) ** self.visit(node.right) def visitRightShift(self,node,**kw): return self.visit(node.left) >> self.visit(node.right) def visitSub(self,node,**kw): return self.visit(node.left) - self.visit(node.right) # Unary ops def visitNot(self,node,*kw): return not self.visit(node.expr) def visitUnarySub(self,node,*kw): return -self.visit(node.expr) def visitInvert(self,node,*kw): return ~self.visit(node.expr) def visitUnaryAdd(self,node,*kw): return +self.visit(node.expr) # Flow Control def visitAnd(self,node,**kw): for arg in node.nodes: val = self.visit(arg) if not val: return val return val def visitOr(self,node,**kw): for arg in node.nodes: val = self.visit(arg) if val: return val return val # Logical Ops def visitBitand(self,node,**kw): return reduce(lambda a,b: a & b,[self.visit(arg) for arg in node.nodes]) def visitBitor(self,node,**kw): return reduce(lambda a,b: a | b,[self.visit(arg) for arg in node.nodes]) def visitBitxor(self,node,**kw): return reduce(lambda a,b: a ^ b,[self.visit(arg) for arg in node.nodes]) def visitCompare(self,node,**kw): comparisons = { "<": operator.lt, # strictly less than "<=": operator.le,# less than or equal ">": operator.gt, # strictly greater than ">=": operator.ge, # greater than or equal "==": operator.eq, # equal "!=": operator.ne, # not equal "<>": operator.ne, # not equal "is": operator.is_, # object identity "is not": operator.is_not # negated object identity } obj = self.visit(node.expr) for op, compnode in node.ops: compobj = self.visit(compnode) if not comparisons[op](obj, compobj): return False obj = compobj return True # Values def visitCallFunc(self,node,**kw): raise CalcError("Functions not supported", node.node) def visitName(self, node, **kw): """LazyEvaluation""" name = node.name try: val = eval(name, self.context) except
Re: What is "self"?
Ron Adam wrote: > What I've noticed is you can block the visibility of a class attribute, > which include methods, by inserting an object in the instance with the > same name. > [snip example of this behavior] Yes, that's true for "non-data descriptors" (see last two bullets below) Raymond Hettinger [http://users.rcn.com/python/download/Descriptor.htm] > > The important points to remember are: > > * descriptors are invoked by the __getattribute__ method > * overriding __getattribute__ prevents automatic descriptor calls > * __getattribute__ is only available with new style classes and objects > * object.__getattribute__ and type.__getattribute__ make different calls to __get__. > * data descriptors always override instance dictionaries. > * non-data descriptors may be overridden by instance dictionaries. Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Silly function call lookup stuff?
Lucas Lemmens wrote: > Dear pythonians, > > I've been reading/thinking about the famous function call speedup > trick where you use a function in the local context to represent > a "remoter" function to speed up the 'function lookup'. > > "This is especially usefull in a loop where you call the function a > zillion time" they say. > > I think this is very odd behavior. > > Why isn't the result of the first function-lookup cached so that following > function calls don't need to do the function-lookup at all? > I guess because the function name may be re-bound between loop iterations. Are there good applications of this? I don't know. > And if the context changes (an import-statement say) reset the > cached 'function-lookups'. In general an object doesn't know what names are bound to it and there are many ways besides an import statement of binding/re-binding, so "if the context changes" is easier said than done. > > This way any function would only need to be looked up once. > > L. > Would you apply this optimization to all lookups in outer scopes, or just callables? Why? ;-) Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: grouping array
[EMAIL PROTECTED] wrote: > hi if I have an array > > say x = [[2,2,0,0,1,1], > [1,1,0,0,1,1], > [1,1,0,0,1,1]] > I basically want to group regions that are non zero like I want to get > the coordinates of non zero regions..as (x1,y1,x2,y2) > [(0,0,2,1),(0,4,2,5)] which show the top left(x1,y1) and bottom > right(x2,y2) corners of each group.hope i am clear. > > Thanks > How about this: def getregions(grid): """Yield lists of adjancent points, not necessarily rectangular""" adj = [(-1,0),(+1,0),(0,-1),(0,+1)] # horizontal and vertical adjacencies # could add diagonals points = set((y,x) for y, row in enumerate(grid) for x, cell in enumerate(row) if cell) while points:# set of (y,x) non-zero points region = [points.pop()] # start a new region with any remaining point ptr = 0 while ptr < len(region): y, x = region[ptr] adjpoints = set((y + j, x + i) for j, i in adj) adjpoints &= points # keep only the non-zero, unseen points points -= adjpoints # remove these adjancencies from points region.extend(adjpoints) # add them to the region ptr += 1 yield region def getregioncoords(grid): """Get top left and bottom right of *rectangular* regions""" regions = getregions(grid) return [(reg[0], reg[-1]) for reg in regions if reg.sort() or True] >>> x = [[2,2,0,0,1,1], ... [1,1,0,0,1,1], ... [1,1,0,0,1,1]] ... ... >>> getregioncoords(x) [((0, 0), (2, 1)), ((0, 4), (2, 5))] >>> x = [[1,0,1,0,1]] >>> getregioncoords(x) [((0, 0), (0, 0)), ((0, 2), (0, 2)), ((0, 4), (0, 4))] >>> x = [[random.choice([0,1,2]) for x in range(6)] for y in range(6)] >>> pprint.pprint(x) [[1, 1, 2, 1, 2, 0], [2, 0, 0, 2, 0, 1], [1, 2, 2, 0, 2, 0], [0, 1, 0, 0, 0, 0], [2, 0, 0, 1, 1, 0], [2, 2, 2, 0, 1, 0]] >>> print "\n".join(str(reg) for reg in getregions(x)) [(0, 1), (0, 0), (0, 2), (1, 0), (0, 3), (2, 0), (1, 3), (0, 4), (2, 1), (3, 1), (2, 2)] [(5, 4), (4, 4), (4, 3)] [(5, 0), (5, 1), (4, 0), (5, 2)] [(1, 5)] [(2, 4)] >>> Unfortunately, it's rather slow. This one is much faster, using just one data structure def getregions2(grid): """Yield lists of adjancent points, not necessarily rectangular""" adj = [(-1,0),(+1,0),(0,-1),(0,+1)] # horizontal and vertical adjacencies # could add diagonals rows = len(grid) cols = len(grid[0]) griddata = [] for row in grid: griddata.extend(row) for y in xrange(rows): ybase = y * cols for x in xrange(cols): if griddata[ybase + x]: griddata[ybase + x] = 0 region = [(y, x)] append = region.append ptr = 0 while ptr < len(region): y1, x1 = region[ptr] for j, i in adj: y2, x2 = y1 + j, x1 + i if y2 < 0 or y2 == rows: continue if x2 < 0 or x2 == cols: continue if griddata[y2 * cols + x2]: append((y2, x2)) griddata[y2 * cols + x2] = 0 ptr += 1 yield region Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: grouping array
[EMAIL PROTECTED] wrote: > fredrick's solutions seems to be more closer to what I was looking > for.But I am still not sure if that could be done without the use of > Image module. What do you mean by "closer to what I was looking for"? For the single test case you provided: > say x = [[2,2,0,0,1,1], > [1,1,0,0,1,1], > [1,1,0,0,1,1]] > I basically want to group regions that are non zero like I want to get > the coordinates of non zero regions..as (x1,y1,x2,y2) > [(0,0,2,1),(0,4,2,5)] which show the top left(x1,y1) and bottom > right(x2,y2) corners of each group.hope i am clear. > my solution provides the correct output: >>> x = [[2,2,0,0,1,1], ... [1,1,0,0,1,1], ... [1,1,0,0,1,1]] ... ... >>> getregioncoords(x) [((0, 0), (2, 1)), ((0, 4), (2, 5))] * except that the points aren't flattened. If that's important to you, rewrite getregioncoords as follows: def getregioncoords(grid): """Get top left and bottom right of *rectangular* regions""" regions = getregions(grid) return [reg[0]+reg[-1] for reg in regions if reg.sort() or True] >>> getregioncoords(x) [(0, 0, 2, 1), (0, 4, 2, 5)] >>> > Also in your solution I cannot follow this I broke the solution into two parts: 1) the getregions generator yields a list of all the contiguous regions. The output below is the lists of coordinates that are contiguous non-zero cells in the grid. > [[1, 1, 2, 1, 2, 0], >[2, 0, 0, 2, 0, 1], >[1, 2, 2, 0, 2, 0], >[0, 1, 0, 0, 0, 0], >[2, 0, 0, 1, 1, 0], >[2, 2, 2, 0, 1, 0]] > >>> print "\n".join(str(reg) for reg in getregions(x)) > [(0, 1), (0, 0), (0, 2), (1, 0), (0, 3), (2, 0), (1, 3), (0, 4), (2, > 1), (3, > 1), (2, 2)] > [(5, 4), (4, 4), (4, 3)] > [(5, 0), (5, 1), (4, 0), (5, 2)] > [(1, 5)] > [(2, 4)] 2) If the regions are rectangular, the getregioncoords functions returns the coordinates of the top-left and bottom-right points. You did not answer the previous post which asked what to do if the regions were not rectangular. HTH Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Feature Proposal: Sequence .join method
Terry Reedy wrote: > "David Murmann" <[EMAIL PROTECTED]> wrote in message > news:[EMAIL PROTECTED] > >>>def join(sep, seq): >>>return reduce(lambda x, y: x + sep + y, seq, type(sep)()) >> >>damn, i wanted too much. Proper implementation: >> >>def join(sep, seq): >>if len(seq): >>return reduce(lambda x, y: x + sep + y, seq) >>return type(sep)() >> >>but still short enough > > > For general use, this is both too general and not general enough. > > If len(seq) exists then seq is probably reiterable, in which case it may be > possible to determine the output length and preallocate to make the process > O(n) instead of O(n**2). I believe str.join does this. A user written > join for lists could also. A tuple function could make a list first and > then tuple(it) at the end. > > If seq is a general (non-empty) iterable, len(seq) may raise an exception > even though the reduce would work fine. > > Terry J. Reedy > > > For the general iterable case, you could have something like this: >>> def interleave(sep, iterable): ... it = iter(iterable) ... next = it.next() ... try: ... while 1: ... item = next ... next = it.next() ... yield item ... yield sep ... except StopIteration: ... yield item ... >>> list(interleave(100,range(10))) [0, 100, 1, 100, 2, 100, 3, 100, 4, 100, 5, 100, 6, 100, 7, 100, 8, 100, 9] >>> but I can't think of a use for it ;-) Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Need advice on subclassing code
Kent Johnson wrote: > Rusty Shackleford wrote: >> ... >> C_1_1 and C_1_2 share a common C ancestor, and in practice may be >> identical, but theoretically, could have the same function name with two >> different implementations underneath. >> >> ... > > How are you instantiating the correct class? You should be able to provide a > default behaviour. For example if the classes are all defined in module C you > could have a factory like this: > > import C > def makeC(x, y): > subtype = 'C_%d_%d' % (x, y) > cls = getattr(C, subtype, C.C) > return cls(x, y) > > Then in module C just define the subtypes you need to specialize; all other > values of x and y will get the base class C.C. > > Kent Or, if you actually want different classes for each set of parameters (say for debugging or introspection), you could compose the default ones on the fly: import C def makeC(x, y): subtype = 'C_%d_%d' % (x, y) cls = getattr(C, subtype, None) if not cls: # No specialized class found, so compose a default # This requires C.C to be a new-style class cls = type(subtype, (C.C,), {"__autogenerated__": True}) return cls(x, y) Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: best cumulative sum
David Isaac wrote: for a solution when these are available. > Something like: > def cumreduce(func, seq, init = None): > """Return list of cumulative reductions. > > This can be written more concisely as a generator: >>> import operator >>> def ireduce(func, iterable, init): ... for i in iterable: ... init = func(init, i) ... yield init ... >>> list(ireduce(operator.mul, range(1,5),init=1)) [1, 2, 6, 24] >>> Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: aligning a set of word substrings to sentence
Steven Bethard wrote: > I've got a list of word substrings (the "tokens") which I need to align > to a string of text (the "sentence"). The sentence is basically the > concatenation of the token list, with spaces sometimes inserted beetween > tokens. I need to determine the start and end offsets of each token in > the sentence. For example:: > > py> tokens = ['She', "'s", 'gon', 'na', 'write', 'a', 'book', '?'] > py> text = '''\ > ... She's gonna write > ... a book?''' > py> list(offsets(tokens, text)) > [(0, 3), (3, 5), (6, 9), (9, 11), (12, 17), (18, 19), (20, 24), (24, 25)] > > Here's my current definition of the offsets function:: > > py> def offsets(tokens, text): > ... start = 0 > ... for token in tokens: > ... while text[start].isspace(): > ... start += 1 > ... text_token = text[start:start+len(token)] > ... assert text_token == token, (text_token, token) > ... yield start, start + len(token) > ... start += len(token) > ... > > I feel like there should be a simpler solution (maybe with the re > module?) but I can't figure one out. Any suggestions? > > STeVe Hi Steve: Any reason you can't simply use str.find in your offsets function? >>> def offsets(tokens, text): ... ptr = 0 ... for token in tokens: ... fpos = text.find(token, ptr) ... if fpos != -1: ... end = fpos + len(token) ... yield (fpos, end) ... ptr = end ... >>> list(offsets(tokens, text)) [(0, 3), (3, 5), (6, 9), (9, 11), (12, 17), (18, 19), (20, 24), (24, 25)] >>> and then, for an entry in the wacky category, a difflib solution: >>> def offsets(tokens, text): ... from difflib import SequenceMatcher ... s = SequenceMatcher(None, text, "\t".join(tokens)) ... for start, _, length in s.get_matching_blocks(): ... if length: ... yield start, start + length ... >>> list(offsets(tokens, text)) [(0, 3), (3, 5), (6, 9), (9, 11), (12, 17), (18, 19), (20, 24), (24, 25)] >>> cheers Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Checking length of each argument - seems like I'm fighting Python
Brendan wrote: ... > > class Things(Object): > def __init__(self, x, y, z): > #assert that x, y, and z have the same length > > But I can't figure out a _simple_ way to check the arguments have the > same length, since len(scalar) throws an exception. The only ways > around this I've found so far are > ... > > b) use a separate 'Thing' object, and make the 'Things' initializer > work only with Thing objects. This seems like way too much structure > to me. > Yes, but depending on what you want to do with Things, it might indeed make sense to convert its arguments to a common sequence type, say a list. safelist is barely more complex than sLen, and may simplify downstream steps. def safelist(obj): """Construct a list from any object.""" if obj is None: return [] if isinstance(obj, (basestring, int)): return [obj] if isinstance(obj, list): return obj try: return list(obj) except TypeError: return [obj] class Things(object): def __init__(self, *args): self.args = map(safelist, args) assert len(set(len(obj) for obj in self.args)) == 1 def __repr__(self): return "Things%s" % self.args >>> Things(0,1,2) Things[[0], [1], [2]] >>> Things(range(2),xrange(2),(0,1)) Things[[0, 1], [0, 1], [0, 1]] >>> Things(None, 0,1) Traceback (most recent call last): File "", line 1, in ? File "C:\Documents and Settings\Michael\My Documents\PyDev\Junk\safelist.py", line 32, in __init__ assert len(set(len(obj) for obj in self.args)) == 1 AssertionError Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Documentation suggestions
A.M. Kuchling wrote: > Here are some thoughts on reorganizing Python's documentation, with > one big suggestion. > Thanks for raising this topic, and for your on-going efforts in this field. I use the compiled html help file provided by PythonWin, which includes all the core documentation. I usually use the index interface, not the table of contents (the main exception is the LibRef, see below). In this form, the structure of the documentation is less important than how good the index is. Unfortunately, the "additional documentation', including, in particular, your re HowTo is linked, but not indexed and is therefore less accessible. > The tutorial seems to be in pretty good shape because Raymond ... Agreed, but as you say below, there may be friendlier forms available for the first-timer. ... > There's another struggle within the LibRef: is it a reference or a > tutorial? I want it to help answer questions of the form "What's in the the library that might help me do x?" For this case, some of the current section structure is not that helpful. "Miscellaneous Services", in particular, gives no clue to treasures it contains. I would prefer, for example, to see the data structure modules: collections, heapq, array etc... given their own section. Documentation/testing, cmd/options might be other candidates to draw together currently related material more meaningfully. Does it list methods in alphabetical order so you can look > them up, or does it list them in a pedagogically useful order? I > think it has to be a reference; A reference, yes, but not necessarily alphabetical if another organization is more communicative. itertools is a good example where alphabetic presentation makes perfect sense, since the functions are more-or-less peers; the math functions are usefully classified by topic; textwrap presents most commonly-used functions first; several modules document classes before convenience functions. Each of these has its merits, and I don't see a lot of mileage in trying to standardize them, given how varied modules are. However, whatever the reference structure, examples add significantly to the value to me. ... > I suspect the Achilles' heel of the docs is the Language Reference. > Put aside the fact that it's not up to date with new-style classes and > other stuff; that would be fixable with some effort. > > To some degree, the guide is trying to be very formal; it's written > like a specification for an implementor, not a document that people > would read through. But there's no other way for people to learn > about all the special object methods like __add__; the tutorial can't > cover them all, and the LibRef doesn't describe them. So the newbie > is stuck. I find very little of value to me in the Language Ref. Special methods are the crucial exception. Perhaps they, together with a description of class semantics (including metaclasses and descriptors) could be moved to the Built-in types section of the LibRef, where some related material is already. I don't know whether the rest of the Language reference is of use to implementers, but given the proliferation of implementations beyond Cpython (Jython, IronPython, pypy) I would speculate that a formal specification is now more important rather than less. However, perhaps it would be possible to express the specification more succinctly via tests instead of a manual. ... > > Perhaps we need a friendlier counterpart to the RefGuide, something > like the 20-page introduction to Python at the beginning of Beazley's > Essential Reference: I did't know this source, but I just skimmed it at http://www.amazon.com/gp/reader/0735709017/ref=sib_dp_pt/103-1276064-0751851#reader-page (not sure if this is a session link), and I agree it's a very clear introduction. Probably better first reading than the existing tutorial. ... Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: i=2; lst=[i**=2 while i<1000]
Daniel Schüle wrote: > Hello NG, > > I am wondering if there were proposals or previous disscussions in this > NG considering using 'while' in comprehension lists > > # pseudo code > i=2 > lst=[i**=2 while i<1000] > You are actually describing two features that list comps don't natively support - while-based termination, and calculating based on prior values of output. Of course there are work-arounds for both, which others have shown. Here's another approach: The while-based termination can be easily achieved using itertools.takewhile, e.g.,: >>> list(itertools.takewhile(lambda x: x < 10, range(100))) [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] >>> the harder piece is to access the prior value. One way is like this: def chasetail(start, func): from itertools import tee def mygen(): yield start for i in (func(i) for i in iterators[0]): yield i iterators = tee(mygen()) return iterators[1] the trick is to create two independent iterators, using itertools.tee, one of which is consumed internally in the func(i) for i in iterators[0] generator expression, the other is returned to use code. >>> it = chasetail(2, lambda x: x*x) #careful - this won't terminate >>> it.next() 2 >>> it.next() 4 >>> it.next() 16 >>> it.next() 256 >>> it.next() 65536 >>> Then you can combine these two approaches to get something semantically like what you wanted in the first place (although not as pretty ;-) >>> list(itertools.takewhile(lambda x: x < 1000, chasetail(2, lambda x: x*x))) [2, 4, 16, 256] >>> If you like this sort of thing, you might want to generalize the concept with a Stream class. Here's minimal implementation: import itertools as it class Stream(object): """An extendable stream, that provides a separate iterator (using itertools.tee) on every iteration request""" def __init__(self, *iterables): self.queue = list(iterables) self.itertee = it.tee(self._chain(self.queue))[0] def _chain(self, queue): while queue: for i in self.queue.pop(0): self.head = i yield i def extend(self,other): self.queue.append(other) def __iter__(self): """Normal iteration over the iterables in self.queue in turn""" return self.itertee.__copy__() then, you can write your squaring algorithm as: >>> s= Stream([2]) >>> s.extend(it.takewhile(lambda x: x < 1000, (i**2 for i in s))) >>> list(s) [2, 4, 16, 256] Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Documentation suggestions
A.M. Kuchling wrote: > On Tue, 06 Dec 2005 10:29:33 -0800, > Michael Spencer <[EMAIL PROTECTED]> wrote: >> not that helpful. "Miscellaneous Services", in particular, gives no clue to >> treasures it contains. I would prefer, for example, to see the data >> structure modules: collections, heapq, array etc... given their own section. >> Documentation/testing, cmd/options might be other candidates to draw together >> currently related material more meaningfully. > > You're right; "Miscellaneous Services" is a grab-bag of stuff, and so > are 'Generic OS Services' and 'Optional OS Services'. These chapters > should be rearranged into more, smaller chapters. > > A patch for a draft reorganization is at http://www.python.org/sf/1375417 > > --amk Thanks! That looks like a good start. I experimented with some more re-organization, but I don't see away to attach the resulting file in the SF comments, so I'll post it here instead. Michael % experimental re-organization of lib.tex, % from http://www.python.org/sf/1375417 \tableofcontents % Chapter title: \input{libintro}% Introduction % = % BUILT-INs % = \input{libobjs} % Built-in Types, Exceptions and Functions \input{libfuncs} \input{libstdtypes} \input{libexcs} \input{libconsts} % = % BASIC/GENERAL-PURPOSE OBJECTS % = % General object services \input{libtypes} \input{libnew} \input{libweakref} \input{libcopy} \input{libpprint} \input{librepr} % Strings \input{libstrings} % String Services \input{libstring} \input{libre} \input{libreconvert} \input{libstruct} % also/better in File Formats? \input{libdifflib} \input{libfpformat} \input{libstringio} \input{libtextwrap} \input{libcodecs} \input{libunicodedata} \input{libstringprep} % Data types and structures %\input{libdata}% Data types and structures \input{libdatetime} \input{libcalendar} \input{libcollections} \input{libheapq} \input{libarray} \input{libsets} \input{libsched} \input{libmutex} \input{libqueue} \input{libuserdict} % From runtime. What happened to UserList and UserString? % Numeric/Mathematical modules \input{libdecimal} \input{libmath} \input{libcmath} \input{librandom} \input{libbisect} % is this needed here - more useful in Data types, like heapq? % Functions, Functional, Generators and Iterators \input{libitertools} \input{libfunctional} \input{liboperator} % from runtime - better with itertools and functional %\input{libmisc} % Miscellaneous Services % = % DATA FORMATS % = %% File formats \input{libcfgparser} \input{libnetrc} \input{librobotparser} \input{libcsv} \input{libstruct} % and in string? % Big move - include all the markup and internet formats here % MIME & email stuff \input{email} \input{libmailcap} \input{libmailbox} \input{libmhlib} \input{libmimetools} \input{libmimetypes} \input{libmimewriter} \input{libmimify} \input{libmultifile} \input{librfc822} % encoding stuff \input{libbase64} \input{libbinascii} \input{libbinhex} \input{libquopri} \input{libuu} \input{libxdrlib} \input{markup} % Structured Markup Processing Tools \input{libhtmlparser} \input{libsgmllib} \input{libhtmllib} \input{libpyexpat} \input{xmldom} \input{xmldomminidom} \input{xmldompulldom} \input{xmlsax} \input{xmlsaxhandler} \input{xmlsaxutils} \input{xmlsaxreader} % \input{libxmllib} \input{libcrypto} % Cryptographic Services \input{libhmac} \input{libhashlib} \input{libmd5} \input{libsha} % = % FILE & DATABASE STORAGE % = \input{liballos}% File-system services (XXX change header) \input{libos} \input{libposixpath}% os.path \input{libfileinput} \input{libstat} \input{libstatvfs} \input{libfilecmp} \input{libtempfile} \input{libglob} \input{libfnmatch} \input{liblinecache} \input{libshutil} \input{libdircache} %% Data compression and archiving \input{libzlib} \input{libgzip} \input{libbz2} \input{libzipfile} \input{libtarfile} %\input{libpersistence} % Persistent storage \input{libpickle} \input{libcopyreg} % really copy_reg % from runtime... \input{libshelve} \input{libmarshal} \input{libanydbm} \input{libwhichdb} \input{libdbm} \input{libgdbm} \input{libdbhash} \input{libbsddb} \input{libdumbdbm} % = % OS % = \input{liballos}% Generic Operating System Services \input{libtime} \input{libgetpass} \input{libcurses} \input{libascii}% curses.ascii \input{libcursespanel} \input{libplatform} \input{liberrno} %% Interprocess communication/networking \input{libsubprocess} \input{l
Re: Bitching about the documentation...
Fredrik Lundh wrote: > Rocco Moretti wrote: > >> Insert punctuation & capitalization to make the following a correct and >> coherent (if not a little tourtured). >> >> fred where guido had had had had had had had had had had had a better >> effect on the reader > > punctuation, including quote marks, I presume? > > it's not time to bring out "d'ä ä e å, å i åa ä e ö" yet, I hope? > > > > > Allowing quotation, almost anything is possible, e.g., Fred! Where Guido had had "had", Had had had "had had". "Had had" had a better effect on the reader or simply "fred", where Guido had "had had had had had had had had had", had a better effect on the reader M -- http://mail.python.org/mailman/listinfo/python-list
Re: newby question: Splitting a string - separator
Thomas Liesner wrote: > Hi all, > > i am having a textfile which contains a single string with names. > I want to split this string into its records an put them into a list. > In "normal" cases i would do something like: > >> #!/usr/bin/python >> inp = open("file") >> data = inp.read() >> names = data.split() >> inp.close() > > The problem is, that the names contain spaces an the records are also > just seprarated by spaces. The only thing i can rely on, ist that the > recordseparator is always more than a single whitespace. > > I thought of something like defining the separator for split() by using > a regex for "more than one whitespace". RegEx for whitespace is \s, but > what would i use for "more than one"? \s+? > > TIA, > Tom \s+ gives one or more, you need \s{2,} for two or more: >>> import re >>> re.split("\s{2,}","Guido van Rossum Tim Peters Thomas Liesner") ['Guido van Rossum', 'Tim Peters', 'Thomas Liesner'] >>> Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Dynamically add Class to Modules
[EMAIL PROTECTED] wrote: > I'm trying to add a class to a module at runtime. I've seen examples > of adding a method to a class, but I haven't been able to suit it to my > needs. > > As part of a testsuite, I have a main process X that searches > recursively for python test files. Those files typically have a global > "isSupported" method, in which the file tells the test searcher "do or > do not run me", as well as the typical TestName_TestCase class, with a > testMyTest method. > > For numerous and reasonable reasons, the TestName_TestCase class must > be generated at runtime (i cannot use any pre-processing scripts to > generate the testcase files). So the external runner has to look into > each testcase file, determine if it is supported, expand out the > test-class code, and add that new class to that testcase in memory. > > I hope this picture helps: > > > # atestcase.py > def isSupported(): > """ do a real check""" > return True > > > ThisTestName = "foo" > TestCode = \ > """ > class %s_TestCase: > def __init__( self ): > """ do some stuff""" > > def test_%s( self ): > """ run the test """ > """ > # > > > #--- The external runner > > (essentially) > import atestcase.py > if atestcase.isSupported(): > # Run this test > > (here's what i'm trying to figure out) > #--> expand atestcase.TestCode out to include "foo" > #--> make the testcode a class > #--> add the new foo_TestCase class to > #the atestcase module > > #- > > > So: Does anyone know how dynamically generate a class, and add it to a > "module" that is already in memory? > > Thanks so much in advance. My flu is heating up my brain pretty badly, > so please ask me if I have to clarify anything above. > Bill, I think this should do it: import atestcase as T exec T.TestCode % T.ThisTestName in T.__dict__ If you want to substitute ThisTestName more than once, you might be better off using the %(name)s form, supplied with a dictionary {name: "foo"}, or you could look at the new string.Template class for easier string subsitution. Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Dynamically add Class to Modules
[EMAIL PROTECTED] wrote: ... > exec testModule.TheTestCode %(testModule.TheTestName, testModule.TheTestName ) ... Try changing that to exec ~ in testModule.__dict__ otherwise, your class statement gets executed in the current scope Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: getting host and path from url
Steve Young wrote: > Hi, this is probably an easy question but is there a way to get the host and > path seperatly out of an url? > > Example: > > url = http://news.yahoo.com/fc/world/iraq > > and i want some way of getting: > > host = http://news.yahoo.com > and > path = /fc/world/iraq > > thanks. > > -Steve > > > > > - > Yahoo! Shopping > Find Great Deals on Holiday Gifts at Yahoo! Shopping > check out urlparse in the stdlib Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: newby question: Splitting a string - separator
[EMAIL PROTECTED] wrote: > Thomas Liesner wrote: >> Hi all, >> >> i am having a textfile which contains a single string with names. >> I want to split this string into its records an put them into a list. >> In "normal" cases i would do something like: >> >>> #!/usr/bin/python >>> inp = open("file") >>> data = inp.read() >>> names = data.split() >>> inp.close() >> The problem is, that the names contain spaces an the records are also >> just seprarated by spaces. The only thing i can rely on, ist that the >> recordseparator is always more than a single whitespace. >> >> I thought of something like defining the separator for split() by using >> a regex for "more than one whitespace". RegEx for whitespace is \s, but >> what would i use for "more than one"? \s+? >> > Can I just use "two space" as the seperator ? > > [ x.strip() for x in data.split(" ") ] > If you like, but it will create dummy entries if there are more than two spaces: >>> data = "Guido van Rossum Tim PetersThomas Liesner" >>> [ x.strip() for x in data.split(" ") ] ['Guido van Rossum', 'Tim Peters', '', 'Thomas Liesner'] You could add a condition to the listcomp: >>> [name.strip() for name in data.split(" ") if name] ['Guido van Rossum', 'Tim Peters', 'Thomas Liesner'] but what if there is some other whitespace character? >>> data = "Guido van Rossum Tim Peters \t Thomas Liesner" >>> [name.strip() for name in data.split(" ") if name] ['Guido van Rossum', 'Tim Peters', '', 'Thomas Liesner'] >>> perhaps a smarter condition? >>> [name.strip() for name in data.split(" ") if name.strip(" \t")] ['Guido van Rossum', 'Tim Peters', 'Thomas Liesner'] but this is beginning to feel like hard work. I think this is a case where it's not worth the effort to try to avoid the regexp >>> import re >>> re.split("\s{2,}",data) ['Guido van Rossum', 'Tim Peters', 'Thomas Liesner'] >>> Michael -- http://mail.python.org/mailman/listinfo/python-list
args (was Re: Lambda as declarative idiom (was RE: what is lambda used for in real code?))
Roman Suzi wrote: Maybe this is too outlandish, but I see lambdas as a "quote" mechanism, which presents a possibility to postpone (precisely control, delegate) evaluation. That is, an ovehead for lambda must be much lower but at the same time visible to the programmer: d = a + (lambda x, y: x+ y)(3, 4) [...] I believe that this "possibility to postpone" divides into two related but separate concepts: controlling the moment of evaluation, and assembling the arguments required at that moment. They are both species of 'eval', but managing arguments is more specialized, because it includes possibly renaming parameters, assigning default values, processing positional and keyword arguments, and, perhaps in the future dealing with argument types. Meanwhile, GvR wrote (about defining Interfaces in the context of Optional Static Type Checking) Method declarations can be inspected to find out their signature. I propose a __signature__ attribute (also for methods defined in classes!) which might be an object whose attributes make the signature easily inspectable. This might take the form of a list of argument declaration objects giving the name, type and default (if any) for each argument, and a separate argument for the return type. For signatures that include *args and/or **kwds, the type of the additional arguments should also be given (so you can write for example a varargs method whose arguments are all strings). GvR's method.__signature__ object might be related to the args object I proposed as part of the syntax for anonymous functions without 'lambda'. i.e., args(a,*b,**kw) --> an object that specifies but does not evaluate its parameters until it is supplied to a callable, possibly with calling parameters This object would contain the default values, and could contain type annotations, explicit, or inferred, as well as more complex assertions used in several contexts. * Current function syntax: def func(a,*b,**c) : pass creates func with func.__signature__ = args(a,*b,**c) and when func is called, the args are evaluated using a mechanism in args.__call__ so, roughly, eval(func.__signature__) --> func.locals * Anonymous functions Syntax alternatives at http://www.python.org/moin/AlternateLambdaSyntax e.g., (f(a) + o(b) - o(c) for args(a, b, c)) args would evaluated with the calling parameters and made available in the local scope defined by () * A stricter alternative to keyword arguments: argspec = args(arg1, arg2, arg3) def func(**argspec): pass is equivalent to def func(arg1, arg2, arg3): pass args["arg1"] (i.e., only args defined in argspec are accepted) * Useful infrastructure for user-supplied type-based dispatch/lightweight multimethods: argspec = args([(a:int, b:int),(a:str,b:str)]) then a framework can provide a custom args.__call__ method that does conformance-checking, adaptation or whatever Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: pure python code to do modular-arithmetic unit conversions?
Dan Stromberg wrote: Is there already a pure python module that can do modular-arithmetic unit conversions, like converting a huge number of seconds into months, weeks... or a bandwidth measure into megabits/s or gigabits/s or megabytes/s or gigabytes/s, whatever's the most useful (ala df -h)? Thanks! Take a look at: http://home.tiscali.be/be052320/Unum_tutorial.html From the intro: "Unum stands for 'unit-numbers'. It is a Python module that allows to define and manipulate true quantities, i.e. numbers with units such as 60 seconds, 500 watts, 42 miles-per-hour, 100 kg per square meter, 14400 bits per second, 30 dollars etc. The module validates unit consistency in arithmetic expressions; it provides also automatic conversion and output formatting." Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Reload Tricks
Kamilche wrote: I want my program to be able to reload its code dynamically. I have a large hierarchy of objects in memory. The inheritance hierarchy of these objects are scattered over several files. I find that after reloading the appropriate files, and overwriting the __class__ of object instances, one more thing is necessary: reloading the __bases__ of each reloaded class. If I don't do this, the modules reloaded first point to old versions of the classes from later modules, and when the later module is reloaded, it doesn't update the inheritance hierarchy of classes already loaded. This appears to be working... but now I'm wondering, what else did it not change? Can I expect more toes to be blown off? --Kamilche There are some cases when re-assigning __class__ isn't possible, for example: >>> class A(object): ... pass ... >>> class B(dict): ... pass ... >>> class C: ... pass ... >>> a = A() >>> a.__class__ = B Traceback (most recent call last): File "", line 1, in ? TypeError: __class__ assignment: 'A' object layout differs from 'B' >>> a.__class__ = C Traceback (most recent call last): File "", line 1, in ? TypeError: __class__ must be set to new-style class, not 'classobj' object >>> An alternative approach (with some pros and cons) is to modify the class in place, using something like: >>> def reclass(cls, to_cls): ... """Updates attributes of cls to match those of to_cls""" ... ... DONOTCOPY = ("__name__","__bases__","__base__", ... "__dict__", "__doc__","__weakref__") ... ... fromdict = cls.__dict__ ... todict = to_cls.__dict__ ... ... # Delete any attribute present in the new class ... [delattr(cls,attr) for attr in fromdict.keys() ... if not((attr in todict) or (attr in DONOTCOPY)) ] ... ... for to_attr, to_obj in todict.iteritems(): ... ... if to_attr in DONOTCOPY: ... continue ... ... # This overwrites all functions, even if they haven't changed. ... if type(to_obj) is types.MethodType: ... func = to_obj.im_func ... to_obj = types.MethodType(func,None, cls) ... ... setattr(cls, to_attr,to_obj) ... >>> class A(object): ... attr = "A" ... >>> class B(object): ... attr = "B" ... >>> a = A() >>> reclass(A,B) >>> a.attr 'B' >>> This copies attributes of old and new-style classes (in fact anything with a __dict__ so probably a module would work too) You still run into problems trying to re-assigning __bases__ to incompatible objects, but this one-attribute-at-a-time approach gives you the potential to intercept problem cases. In the example above, problems are avoided by not copying __bases__. An additional advantage of this aprpoach is that you don't need to keep track of class instances, in order to change their __class__. Instances automatically acquire the new behavior One wart is that class docstrings are not writeable, so cannot be copied. Why? Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Reload Tricks
Kamilche wrote: I want my program to be able to reload its code dynamically. I have a large hierarchy of objects in memory. The inheritance hierarchy of these objects are scattered over several files. Michael Spencer wrote: An alternative approach (with some pros and cons) is to modify the class in place, using something like: >>> def reclass(cls, to_cls): ... """Updates attributes of cls to match those of to_cls""" ... ... DONOTCOPY = ("__name__","__bases__","__base__", ... "__dict__", "__doc__","__weakref__") etc... Kamilche wrote: Would it be possible to just not copy any attribute that starts and ends with '__'? Or are there some important attributes being copied? Possible? of course, it's Python ;-) But there are many 'magic' attributes for behavior that you probably do want to copy: e.g., __getitem__, __setitem__ etc... See: http://docs.python.org/ref/specialnames.html Michael Hudson's recipe: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/160164 does auto-reloading "automatically", at the price of changing the type of the classes you want to manage. It's a very convenient approach for interactive development (which is the recipe's stated purpose). It works by tracking instances and automatically updating their class. If your program relies on class identity, you may run into problems. Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: default value in a list
Alex Martelli wrote: [explanation and the following code:] >>> a, b, c = it.islice( ... it.chain( ... line.split(':'), ... it.repeat(some_default), ... ), ... 3) ... ... >>> def pad_with_default(N, iterable, default=None): ... it = iter(iterable) ... for x in it: ... if N<=0: break ... yield x ... N -= 1 ... while N>0: ... yield default ... N -= 1 Why not put these together and put it in itertools, since the requirement seems to crop up every other week? >>> line = "A:B:C".split(":") ... >>> def ipad(N,iterable, default = None): ... return it.islice(it.chain(iterable, it.repeat(default)), N) ... >>> a,b,c,d = ipad(4,line) >>> a,b,c,d ('A', 'B', 'C', None) Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: What YAML engine do you use?
Paul Rubin wrote: YAML looks to me to be completely insane, even compared to Python lists. I think it would be great if the Python library exposed an interface for parsing constant list and dict expressions, e.g.: [1, 2, 'Joe Smith', 8237972883334L, # comment {'Favorite fruits': ['apple', 'banana', 'pear']}, # another comment 'xyzzy', [3, 5, [3.14159, 2.71828, [ I don't see what YAML accomplishes that something like the above wouldn't. Note that all the values in the above have to be constant literals. Don't suggest using eval. That would be a huge security hole. Not hard at all, thanks to compiler.ast: >>> import compiler ... >>> class AbstractVisitor(object): ... def __init__(self): ... self._cache = {} # dispatch table ... ... def visit(self, node,**kw): ... cls = node.__class__ ... meth = self._cache.setdefault(cls, ... getattr(self,'visit'+cls.__name__,self.default)) ... return meth(node, **kw) ... ... def default(self, node, **kw): ... for child in node.getChildNodes(): ... return self.visit(child, **kw) ... >>> class ConstEval(AbstractVisitor): ... def visitConst(self, node, **kw): ... return node.value ... ... def visitName(self,node, **kw): ... raise NameError, "Names are not resolved" ... ... def visitDict(self,node,**kw): ... return dict([(self.visit(k),self.visit(v)) for k,v in node.items]) ... ... def visitTuple(self,node, **kw): ... return tuple(self.visit(i) for i in node.nodes) ... ... def visitList(self,node, **kw): ... return [self.visit(i) for i in node.nodes] ... >>> ast = compiler.parse(source,"eval") >>> walker = ConstEval() >>> walker.visit(ast) [1, 2, 'Joe Smith', 8237972883334L, {'Favorite fruits': ['apple', 'banana', 'pear']}, 'xyzzy', [3, 5, [3.14158999, 2.71828, [ Add sugar to taste Regards Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: What YAML engine do you use?
Fredrik Lundh wrote: Sion Arrowsmith wrote: I'm probably not thinking deviously enough here, but how are you going to exploit an eval() which has very tightly controlled globals and locals (eg. eval(x, {"__builtins__": None}, {}) ? try this: eval("'*'*100*2*2*2*2*2*2*2*2*2") I updated the safe eval recipe I posted yesterday to add the option of reporting unsafe source, rather than silently ignoring it. Is this completely safe? I'm interested in feedback. Michael Some source to try: >>> goodsource = """[1, 2, 'Joe Smith', 8237972883334L, # comment ... {'Favorite fruits': ['apple', 'banana', 'pear']}, # another comment ... 'xyzzy', [3, 5, [3.14159, 2.71828, [""" ... Unquoted string literal >>> badsource = """[1, 2, JoeSmith, 8237972883334L, # comment ... {'Favorite fruits': ['apple', 'banana', 'pear']}, # another comment ... 'xyzzy', [3, 5, [3.14159, 2.71828, [""" ... Non-constant expression >>> effbot = "'*'*100*2*2*2*2*2*2*2*2*2" >>> safe_eval(good_source) [1, 2, 'Joe Smith', 8237972883334L, {'Favorite fruits': ['apple', 'banana', 'pear']}, 'xyzzy', [3, 5, [3.14158999, 2.71828, [ >>> assert _ == eval(good_source) >>> safe_eval(bad_source) Traceback (most recent call last): [...] Unsafe_Source_Error: Line 1. Strings must be quoted: JoeSmith >>> safe_eval(bad_source, fail_on_error = False) [1, 2, None, 8237972883334L, {'Favorite fruits': ['apple', 'banana', 'pear']}, 'xyzzy', [3, 5, [3.14158999, 2.71828, [ >>> safe_eval(effbot) Traceback (most recent call last): [...] Unsafe_Source_Error: Line 1. Unsupported source construct: compiler.ast.Mul >>> safe_eval(effbot, fail_on_error = False) ... '*' >>> Source: import compiler class Unsafe_Source_Error(Exception): def __init__(self,error,descr = None,node = None): self.error = error self.descr = descr self.node = node self.lineno = getattr(node,"lineno",None) def __repr__(self): return "Line %d. %s: %s" % (self.lineno, self.error, self.descr) __str__ = __repr__ class AbstractVisitor(object): def __init__(self): self._cache = {} # dispatch table def visit(self, node,**kw): cls = node.__class__ meth = self._cache.setdefault(cls, getattr(self,'visit'+cls.__name__,self.default)) return meth(node, **kw) def default(self, node, **kw): for child in node.getChildNodes(): return self.visit(child, **kw) visitExpression = default class SafeEval(AbstractVisitor): def visitConst(self, node, **kw): return node.value def visitDict(self,node,**kw): return dict([(self.visit(k),self.visit(v)) for k,v in node.items]) def visitTuple(self,node, **kw): return tuple(self.visit(i) for i in node.nodes) def visitList(self,node, **kw): return [self.visit(i) for i in node.nodes] class SafeEvalWithErrors(SafeEval): def default(self, node, **kw): raise Unsafe_Source_Error("Unsupported source construct", node.__class__,node) def visitName(self,node, **kw): raise Unsafe_Source_Error("Strings must be quoted", node.name, node) # Add more specific errors if desired def safe_eval(source, fail_on_error = True): walker = fail_on_error and SafeEvalWithErrors() or SafeEval() try: ast = compiler.parse(source,"eval") except SyntaxError, err: raise try: return walker.visit(ast) except Unsafe_Source_Error, err: raise -- http://mail.python.org/mailman/listinfo/python-list
Re: Classical FP problem in python : Hamming problem
Francis Girard wrote: The following implementation is even more speaking as it makes self-evident and almost mechanical how to translate algorithms that run after their tail from recursion to "tee" usage : Thanks, Francis and Jeff for raising a fascinating topic. I've enjoyed trying to get my head around both the algorithm and your non-recursive implementation. Here's a version of your implementation that uses a helper class to make the algorithm itself prettier. from itertools import tee, imap def hamming(): def _hamming(): yield 1 for n in imerge(2 * hamming, imerge(3 * hamming, 5 * hamming)): yield n hamming = Tee(_hamming()) return iter(hamming) class Tee(object): """Provides an indepent iterator (using tee) on every iteration request Also implements lazy iterator arithmetic""" def __init__(self, iterator): self.iter = tee(iterator,1)[0] def __iter__(self): return self.iter.__copy__() def __mul__(self, number): return imap(lambda x: x * number,self.__iter__()) def imerge(xs, ys): x = xs.next() y = ys.next() while True: if x == y: yield x x = xs.next() y = ys.next() elif x < y: yield x x = xs.next() else: # if y < x: yield y y = ys.next() >>> hg = hamming() >>> for i in range(1): ... n = hg.next() ... if i % 1000 == 0: print i, n ... 0 1 1000 5184 2000 81 3000 27993600 4000 4707158941350 5000 5096079360 6000 4096000 7000 2638827906662400 8000 143327232 9000 680244480 Regards Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Classical FP problem in python : Hamming problem
Nick Craig-Wood wrote: Steven Bethard <[EMAIL PROTECTED]> wrote: Nick Craig-Wood wrote: Thinking about this some more leads me to believe a general purpose imerge taking any number of arguments will look neater, eg def imerge(*generators): values = [ g.next() for g in generators ] while True: x = min(values) yield x for i in range(len(values)): if values[i] == x: values[i] = generators[i].next() This kinda looks like it dies after the first generator is exhausted, but I'm not certain. Yes it will stop iterating then (rather like zip() on lists of unequal size). Not sure what the specification should be! It works for the hamming problem though. list(imerge(iter([1, 2]), iter([1, 2, 3]), iter([1, 2, 3, 4]))) [1, 2] An alternate version that doesn't search for 'i': py> def imerge(*iterables): ... iters = [iter(i) for i in iterables] ... values = [i.next() for i in iters] ... while iters: ... x, i = min((val, i) for i, val in enumerate(values)) ... yield x ... try: ... values[i] = iters[i].next() ... except StopIteration: ... del iters[i] ... del values[i] ... py> list(imerge([1, 4, 7], [2, 5, 8], [3, 6, 9])) [1, 2, 3, 4, 5, 6, 7, 8, 9] py> list(imerge([3, 6, 9], [1, 4, 7], [2, 5, 8])) [1, 2, 3, 4, 5, 6, 7, 8, 9] py> list(imerge([1, 4, 7], [3, 6, 9], [2, 5, 8])) [1, 2, 3, 4, 5, 6, 7, 8, 9] This isn't quite right... list(imerge([1, 2, 3], [1, 2, 3], [1, 2, 3])) [1, 1, 1, 2, 2, 2, 3, 3, 3] This should produce [1, 2, 3] So I'm afraid the searching *is* necessary - you've got to find all the generators with the min value and move them on. Here's a dict-based implementation: cute, but slow, at least for a small number of iterators >>> def imerge(*iterables): ... cache = {} ... iterators = map(iter,iterables) ... number = len(iterables) ... exhausted = 0 ... while 1: ... for it in iterators: ... try: ... cache.setdefault(it.next(),[]).append(it) ... except StopIteration: ... exhausted += 1 ... if exhausted == number: ... raise StopIteration ... val = min(cache) ... iterators = cache.pop(val) ... yield val >>> list(imerge([1, 2, 3, 6], [1, 2, 3, 7], [1, 2, 3, 4, 5])) [1, 2, 3, 4, 5, 6, 7] >>> Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: limited python virtual machine (WAS: Another scripting language implemented into Python itself?)
Steven Bethard wrote: > > I wish there was a way to, say, exec something with no builtins and with > import disabled, so you would have to specify all the available > bindings, e.g.: > > exec user_code in dict(ClassA=ClassA, ClassB=ClassB) > > but I suspect that even this wouldn't really solve the problem, because > you can do things like: > > py> class ClassA(object): > ... pass > ... > py> object, = ClassA.__bases__ > py> object > > py> int = object.__subclasses__()[2] > py> int > > > so you can retrieve a lot of the builtins. I don't know how to retrieve > __import__ this way, but as soon as you figure that out, you can then > do pretty much anything you want to. > > Steve Steve Safe eval recipe posted to cookbook: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/364469 Couldn't safe exec be programmed similarly? 'import' and 'from' are syntax, so trivially avoided Likewise, function calls are easily intercepted As you say, attribute access to core functions appears to present the challenge. It is easy to intercept attribute access, harder to know what's safe. If there were a known set of 'dangerous' objects e.g., sys, file, os etc... then these could be checked by identity against any attribute returned Of course, execution would be painfully slow, due to double - interpretation. Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: limited python virtual machine (WAS: Another scripting language implemented into Python itself?)
Steven Bethard wrote: > > I wish there was a way to, say, exec something with no builtins and > with import disabled, so you would have to specify all the available > bindings, e.g.: > > exec user_code in dict(ClassA=ClassA, ClassB=ClassB) > > but I suspect that even this wouldn't really solve the problem, > because you can do things like: > > py> class ClassA(object): > ... pass > ... > py> object, = ClassA.__bases__ > py> object > > py> int = object.__subclasses__()[2] > py> int > > > so you can retrieve a lot of the builtins. I don't know how to > retrieve __import__ this way, but as soon as you figure that out, you > can then do pretty much anything you want to. > > Steve Steve Safe eval recipe posted to cookbook: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/364469 Couldn't safe exec be programmed similarly? 'import' and 'from' are syntax, so trivially avoided Likewise, function calls are easily intercepted As you say, attribute access to core functions appears to present the challenge. It is easy to intercept attribute access, harder to know what's safe. If there were a known set of 'dangerous' objects e.g., sys, file, os etc... then these could be checked by identity against any attribute returned Of course, execution would be painfully slow, due to double - interpretation. Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: limited python virtual machine (WAS: Another scripting language implemented into Python itself?)
Steven Bethard wrote: Michael Spencer wrote: Safe eval recipe posted to cookbook: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/364469 This recipe only evaluates constant expressions: "Description: Evaluate constant expressions, including list, dict and tuple using the abstract syntax tree created by compiler.parse" It means you can't eval arbitrary Python code -- it's basically just a data parser. Handy in some situations, but not the equivalent of a limited Python virtual machine. Indeed. But it's easy to extend this to arbitrary constructs. You just need to decide what code to emit for the other 50 or so ast node types. Many of those are boiler-plate binops. Likewise, function calls are easily intercepted I'm not sure I follow this... How do you intend to intercept all function calls? Sorry, should have been more precise. In the AST, Function calls have their own node type, so it is easy to 'intercept' them and execute them conditionally [snip] It sounds like you're suggesting overriding the global attribute access mechanism. Is that right? So that every time Python encountered an attribute access, you would verify that the attribute being accessed is not on the 'dangerous' list? Just in the context of the AST-walker, yes I don't know how to do that without basically rewriting some of Python's C code, though certainly I'm no expert in the area... Not messing with the CPython interpreter Also, I'm not sure identity is sufficient: py> import sys py> import new py> new.module('newsys') py> newsys = new.module('newsys') py> newsys.__dict__.update(sys.__dict__) py> newsys is sys False py> newsys == sys False Right - the crux of the problem is how to identify dangerous objects. My point is that if such as test is possible, then safe exec is very easily implemented within current Python. If it is not, then it is essentially impossible. Let's assume that it is indeed not possible to know in general whether an object is safe, either by inspecting its attributes, or by matching its identity against a black list. It might still be possible to have a reliable test within a problem-specific domain i.e., white-listing. This, I think, is what you meant when you said: I wish there was a way to, say, exec something with no builtins and with import disabled, so you would have to specify all the available bindings, e.g.: exec user_code in dict(ClassA=ClassA, ClassB=ClassB) I believe that if you can come up with a white-list, then the rest of the problem is easy. Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: limited python virtual machine (WAS: Another scripting language implemented into Python itself?)
Cameron Laird wrote: In article <[EMAIL PROTECTED]>, Michael Spencer <[EMAIL PROTECTED]> wrote: . . . Right - the crux of the problem is how to identify dangerous objects. My point is that if such as test is possible, then safe exec is very easily implemented within current Python. If it is not, then it is essentially impossible. I'll suggest yet another perspective: add another indirection. As the virtual machine becomes more available to introspection, it might become natural to define a *very* restricted interpreter which we can all agree is safe, PLUS a means to extend that specific instance of the VM with, say, new definitions of bindings for particular AST nodes. Then the developer has the means to "build out" his own VM in a way he can judge useful and safe for his own situation. Rather than the Java there-is-one-"safe"-for- all approach, Pythoneers would have the tools to create safety. That does sound good. And evolutionary, because the very restricted VM could be implemented today (in Python), and subsequently PyPy (or whatever) could optimize it. The safe eval recipe I referred to earlier in the thread is IMO a trivial example of of this approach. Of course, its restrictions are extreme - only constant expressions, but it is straightforwardly extensible to any subset of the language. The limitation that I see with this approach is that it is not, in general, syntax that is safe or unsafe (with the notable exception of 'import' and its relatives). Rather, it the library objects, especially the built-ins, that present the main source of risk. So, if I understand your suggestion, it would require assessing the safety of the built-in objects, as well as providing an interpreter that could control access to them, possibly with fine-grain control at the attribute level. M -- http://mail.python.org/mailman/listinfo/python-list
Re: python without OO
Davor wrote: Thanks, I do not hate OO - I just do not need it for the project size I'm dealing with - and the project will eventually become open-source and have additional developers - so I would prefer that we all stick to "simple procedural" stuff rather than having to deal with a developer that will be convincing me that his 50 layers inheritance hierarchy is good since it exists in some weird pattern that he saw somewhere on some Java design patterns discussion board :-) and other "proper" OO design issues... Once I opted for C++ in a very small project and believed everyone will stick with C subset + better type checking offered through C++ - but I simply could not manage to keep them off using OO stuff which was just making program more complicated than it should have been. (note, I am not an experienced developer, nor the others I'll be working with (even though some think they are:-)), so I prefer preemptively dealing with issue of everyone showing off their OO design skills) Davor Perhaps pylint (http://www.logilab.org/projects/pylint) or its ilk can help you enforce a coding style Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Classical FP problem in python : Hamming problem
Paul Rubin wrote: Francis Girard <[EMAIL PROTECTED]> writes: Thank you Nick and Steven for the idea of a more generic imerge. If you want to get fancy, the merge should use a priority queue (like in the heapsort algorithm) instead of a linear scan through the incoming iters, to find the next item to output. That lowers the running time to O(n log k) instead of O(n*k), where k is the number of iterators and n is the length. I looked at a heapq solution but didn't see any clean way of dealing with multiple iterators having equal values. The dict solution below deals cleanly with that, since one key can be shared by any number of iterators. Extracting the minimum, and the associated iterables is fast, but the overall solution is still slower than the brute force approach for the 3 hamming iterators. >>> def imerge(*iterables): ... cache = {} ... iterators = map(iter,iterables) ... number = len(iterables) ... exhausted = 0 ... while 1: # First time through, looked at all of them # Subsequently, update only the minimum iterators ... for it in iterators: ... try: # Key each iterator by its next() value # Multiple iterators may share the same key ... cache.setdefault(it.next(),[]).append(it) ... except StopIteration: ... exhausted += 1 ... if exhausted == number: ... raise StopIteration # Get the lowest value ... val = min(cache) # and all the iterators that have that value ... iterators = cache.pop(val) ... yield val >>> list(imerge([1, 2, 3, 6], [1, 2, 3, 7], [1, 2, 3, 4, 5])) [1, 2, 3, 4, 5, 6, 7] >>> Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: remove duplicates from list *preserving order*
Steven Bethard wrote: I'm sorry, I assume this has been discussed somewhere already, but I found only a few hits in Google Groups... If you know where there's a good summary, please feel free to direct me there. I have a list[1] of objects from which I need to remove duplicates. I have to maintain the list order though, so solutions like set(lst), etc. will not work for me. What are my options? So far, I can see: def filterdups(iterable): result = [] for item in iterable: if item not in result: result.append(item) return result def filterdups(iterable): result = [] seen = set() for item in iterable: if item not in seen: result.append(item) seen.add(item) return result def filterdups(iterable): seen = set() for item in iterable: if item not in seen: seen.add(item) yield item Does anyone have a better[2] solution? STeve [1] Well, actually it's an iterable of objects, but I can convert it to a list if that's helpful. [2] Yes I know, "better" is ambiguous. If it helps any, for my particular situation, speed is probably more important than memory, so I'm leaning towards the second or third implementation. How about: >>> def filterdups3(iterable): ... seen = set() ... def _seen(item): ... return item in seen or seen.add(item) ... return itertools.ifilterfalse(_seen,iterable) ... >>> list(filterdups3([1,2,2,3,3,3,4,4,4,2,2,5])) [1, 2, 3, 4, 5] >>> Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: List mapping question
Marc Huffnagle wrote: I have a number of variables that I want to modify (a bunch of strings that I need to convert into ints). Is there an easy way to do that other than saying: > a = int(a) > b = int(b) > c = int(c) It may not matter to you, at the moment, but a = int(a) is not strictly 'modifying a variable'. Instead int(a) creates a new int object, if possible, from the object that a is currently bound to. Then a is rebound to the new object. I tried > [i = int(i) for i in [a, b, c]] You can't make an assignment in a list comprehension. If your 'variables' are object attributes, you could do: [setattr(obj,name,int(getattr(obj,name)) for name in [list of attribute names]] but that didn't work because it was creating a list with the values of a, b and c instead of the actual variables themselves, then trying to set a string equal to an integer, which it really didn't like. Marc For your problem as stated: >>> a=b=c="1" >>> for var in ["a","b","c"]: ... exec "%s = int(%s)" % (var,var) ... >>> a,b,c (1, 1, 1) >>> But don't do this, except as a "one-off" data crunching exercise Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: how to generate SQL SELECT pivot table string
McBooCzech wrote: Hallo all, I am trying to generate SQL SELECT command which will return pivot table. The number of column in the pivot table depends on the data stored in the database. It means I do not know in advance how many columns the pivot table will have. For example I will test the database as following: SELECT DISTINCT T1.YEAR FROM T1 The SELECT command will return: 2002 2003 2004 2005 So I would like to construct following select: select T1.WEEK, SUM (case T1.YEAR when '2002' then T1.PRICE else 0 END) Y_02, SUM (case T1.YEAR when '2003' then T1.PRICE else 0 END) Y_03, SUM (case T1.YEAR when '2004' then T1.PRICE else 0 END) Y_04, SUM (case T1.YEAR when '2005' then T1.PRICE else 0 END) Y_05 from T1 group by T1.week which will return pivot table with 5 columns: WEEK, Y_02, Y_03, Y_04, Y_05, but if the command "SELECT DISTINCT T1.YEAR FROM T1" returns: 2003 2004 I have to construct only following string: select T1.WEEK, SUM (case T1.YEAR when '2003' then T1.PRICE else 0 END) Y_03, SUM (case T1.YEAR when '2004' then T1.PRICE else 0 END) Y_04, from T1 group by T1.week which will return pivot table with 3 columns: WEEK, Y_03, Y_04 Can anyone help and give me a hand or just direct me, how to write a code which will generate SELECT string depending on the data stored in the database as I described? Thanks Petr McBooCzech >>> step1result = """2000 ... 2001 ... 2002 ... 2003""".splitlines() >>> step1result ['2000', '2001', '2002', '2003'] >>> step2query = "Prefix " + ",".join(["Case %s" % year for year in step1result]) + " Postfix" >>> step2query 'Prefix Case 2000,Case 2001,Case 2002,Case 2003 Postfix' HTH Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Popularizing SimpleHTTPServer and CGIHTTPServer
Jorey Bump wrote: > ... Is there a NotSoSimpleHTTPServer? ... Steve Holden wrote: > ... You want ExtremelyBloodyComplicatedHTTPServer :-) Lee Harr wrote: ... I think I would point to twisted for that. Michael :-) -- http://mail.python.org/mailman/listinfo/python-list
Re: returning True, False or None
Steven Bethard wrote: I have lists containing values that are all either True, False or None, e.g.: [True, None, None, False] [None, False, False, None ] [False, True, True, True ] etc. For a given list: * If all values are None, the function should return None. * If at least one value is True, the function should return True. * Otherwise, the function should return False. Right now, my code looks like: if True in lst: return True elif False in lst: return False else: return None This has a light code smell for me though -- can anyone see a simpler way of writing this? STeVe max(lst) ;-) Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: changing local namespace of a function
Bo Peng wrote: Dear list, I have many dictionaries with the same set of keys and I would like to write a function to calculate something based on these values. For example, I have a = {'x':1, 'y':2} b = {'x':3, 'y':3} def fun(dict): dict['z'] = dict['x'] + dict['y'] fun(a) and fun(b) will set z in each dictionary as the sum of x and y. My function and dictionaries are a lot more complicated than these so I would like to set dict as the default namespace of fun. Is this possible? The ideal code would be: def fun(dict): # set dict as local namespace # locals() = dict? z = x + y As you no doubt have discovered from the docs and this group, that isn't doable with CPython. If you must write your functions as real functions, then you might do something like this: >>> a = {'x':1, 'y':2} >>> b = {'x':3, 'y':3} ... >>> def funa(x,y, **kw): ... del kw #Careful of unwanted names in locals with this approach ... z = x + y ... return locals() ... >>> a.update(funa(**a)) >>> b.update(funa(**b)) >>> a {'y': 2, 'x': 1, 'z': 3} >>> b {'y': 3, 'x': 3, 'z': 6} >>> Alternatively, you could use exec: >>> a = {'x':1, 'y':2} >>> b = {'x':3, 'y':3} >>> exec "z = x + y" in globals(), a >>> a {'y': 2, 'x': 1, 'z': 3} >>> exec "z = x + y" in globals(), b >>> b {'y': 3, 'x': 3, 'z': 6} >>> Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: changing local namespace of a function
Bo Peng wrote: Michael Spencer wrote: > There are hundreds of items in the dictionary (that will be needed in the calculation) so passing the whole dictionary is a lot better than passing individual items. ... def fun(d): exec 'z = x + y' in globals(), d seems to be more readable than def fun(d): d['z'] = d['x'] + d['y'] But how severe will the performance penalty be? Try it and see. Bo Compare it with Jeff Shannon's suggestion, and with a lazy dict-wrapper like this: >>> class wrapbigdict(object): ... """Lazy attribute access to dictionary keys. Will not access ... keys that are not valid attribute names!""" ... def __init__(self, mydict): ... object.__setattr__(self, "mydict",mydict) ... def __getattr__(self, attrname): ... return self.mydict[attrname] ... def __setattr__(self, attrname, value): ... self.mydict[attrname] = value ... ... >>> a = {'x':1, 'y':2} >>> b = {'x':3, 'y':3} ... >>> w_a = wrapbigdict(a) >>> w_b = wrapbigdict(b) ... >>> def fun(d): ... d.z = d.x + d.y ... >>> fun(w_a) >>> fun(w_b) ... >>> w_a.mydict {'y': 2, 'x': 1, 'z': 3} >>> w_b.mydict {'y': 3, 'x': 3, 'z': 6} >>> -- http://mail.python.org/mailman/listinfo/python-list
Re: returning True, False or None
Fahri Basegmez wrote: reduce(lambda x, y: x or y, lst) works but when I tried import operator reduce(operator.or_, lst) this did not work. It pukes Traceback (most recent call last): File "", line 1, in ? TypeError: unsupported operand type(s) for |: 'NoneType' and 'bool' Any comments? Fahri TypeError: unsupported operand type(s) for |: 'NoneType' and 'bool' operator.or_ is "|" i.e., bitwise, not logical or Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: changing local namespace of a function
Nick Coghlan wrote: Michael Spencer wrote: def fun(dict): # set dict as local namespace # locals() = dict? z = x + y As you no doubt have discovered from the docs and this group, that isn't doable with CPython. Not entirely impossible: Py> def f(d): ... exec "locals().update(d)" ... return x + y ... Py> f(dict(x=1, y=2)) 3 Due to the way 'exec' is implemented, modifications to locals() inside an exec statement actually take effect (basically, they're freeloading on the code which allows 'exec "x = 1"' to work properly). This is an evil, evil hack and almost certainly not what anyone should be doing. Also, variables created this way will be slower than normal variables due to the way the associated code works. Cheers, Nick. Oooh - evil indeed, but thanks for the pointer. I debated including a link to one of the 'writable locals' threads, when I settled on not 'doable', but gambled on being probably useful rather than certainly accurate. Just goes to show you can't get away with anything in this NG ;-) Cheers Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: returning True, False or None
Fahri Basegmez wrote: "Michael Spencer" <[EMAIL PROTECTED]> wrote in message news:[EMAIL PROTECTED] Fahri Basegmez wrote: reduce(lambda x, y: x or y, lst) works but when I tried import operator reduce(operator.or_, lst) this did not work. It pukes Traceback (most recent call last): File "", line 1, in ? TypeError: unsupported operand type(s) for |: 'NoneType' and 'bool' Any comments? Fahri TypeError: unsupported operand type(s) for |: 'NoneType' and 'bool' operator.or_ is "|" i.e., bitwise, not logical or Michael That explains it. Is there a logical or we can use with reduce? Fahri Yes, but it's not quite the same as the 'or' operator >>> bool.__or__(True, False) True >>> bool.__or__(False, False) False >>> bool.__or__(False, None) NotImplemented >>> this may not be intentional... Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: changing local namespace of a function
Alex Martelli wrote: Hmmm, you do realize that wrapdict uses a lot of indirection while my equivalent approach, just posted, is very direct, right? To reiterate the latter, and dress it up nicely too, it's class wrapwell(object): def __init__(self, somedict): self.__dict__ = somedict Bad mistake on my part, sorry! Nick Coghlan wrote: ... a class that combined property access with the above... In a similar vein to Nick's solution: class AutoProperty(object): def __init__(self, meth): self.meth = meth self.name = meth.__name__ self.__doc__ = meth.__doc__ def __get__(self, obj, cls): if isinstance(obj, cls): return obj.__dict__.setdefault(self.name, self.meth(obj)) else: return self.__doc__ # You could define __set__ and __del__ but they don't seem # necessary based on what you've said so far class DataManipulator(object): def __init__(self, data): self.__dict__ = data class Model(DataManipulator): def z(self): """x+y""" return self.x+self.y z = AutoProperty(z) def z1(self): """Or any other useful information""" return self.z + self.x z1 = AutoProperty(z1) # You could automate these calls to AutoProperty in a metaclass >>> a = {'x':1, 'y':2} >>> b = {'x':3, 'y':3} >>> d = Model(a) >>> d.z 3 >>> d.z1 4 >>> a {'y': 2, 'x': 1, 'z': 3, 'z1': 4} >>> d= Model(b) >>> d.z1 9 >>> b {'y': 3, 'x': 3, 'z': 6, 'z1': 9} >>> Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: empty classes as c structs?
Alex Martelli wrote: Nick Coghlan <[EMAIL PROTECTED]> wrote: ... Michael Spencer also posted ... Wasted indirection, IMHO. A better implementation: class attr_view(object): def __init__(self, data): self.__dict__ = data Alex Indeed! A complete brain-blip Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: empty classes as c structs?
Steven Bethard wrote: Nick Coghlan wrote: class attr_view(object): def __init__(self, data): self.__dict__ = data I think the idea definitely deserves mention as a possible implementation strategy in the generic objects PEP, with the data argument made optional: That's basically what the current implementation does (although I use 'update' instead of '='). The code is complicated because the implementation also takes all the argument types that dicts take. STeVe Have you noted the similarity of bunch and types.ModuleType? perhaps module.__init__ could take an additional keyword argument to set its __dict__ Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Read-only class properties
Bengt Richter wrote: ... > > class Foo(object): > class __metaclass__(type): > def __setattr__(cls, name, value): > if type(cls.__dict__.get(name)).__name__ == 'Descriptor': > raise AttributeError, 'setting Foo.%s to %r is not allowed' > %(name, value) > type.__setattr__(cls, name, value) > @classproperty > def TheAnswer(cls): > return "The Answer according to %s is 42" % cls.__name__ > @classproperty > def AnotherAnswer(cls): > return "Another Answer according to %s is 43" % cls.__name__ > or, simply put the read-only descriptor in the metaclass: Python 2.4 (#60, Nov 30 2004, 11:49:19) [MSC v.1310 32 bit (Intel)] on win32 Type "help", "copyright", "credits" or "license" for more information. >>> def classproperty(function): ... class Descriptor(object): ... def __get__(self, obj, objtype): ...return function(objtype) ... def __set__(self, obj, value): ... raise AttributeError, "can't set class attribute" ... return Descriptor() ... >>> class A(object): ... class __metaclass__(type): ... @classproperty ... def TheAnswer(cls): ... return "The Answer according to %s is 42" % cls.__name__ ... >>> A.TheAnswer 'The Answer according to __metaclass__ is 42' >>> A.TheAnswer = 3 Traceback (most recent call last): File "", line 1, in ? File "", line 6, in __set__ AttributeError: can't set class attribute >>> class B(A): pass ... >>> B.TheAnswer 'The Answer according to __metaclass__ is 42' >>> this means that the getter doesn't automatically get a reference to the class (since it is a method of metaclass), which may or may not matter, depending on the application Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: redefining a function through assignment
Daniel Britt wrote: > Hello All, > I am new to Python so if there is an obvious answer to my question please > forgive me. Lets say I have the following code in mod1.py > > class test: > def func1(self): > print 'hello' > > > Now lets say I have another file called main.py: > > import mod1 > > inst = mod1.test() > inst.func1() > > > This will print out hello. Now if I added the following to main: > def newFunc(var): > print 'new method' > > mod1.test.func1 = newFunc > > inst.func1() > > > This will print out 'new method'. If any other instance of mod1.test is > created calling func1, func1 will always reference the newFunc function. > This is less than desirable to say the least. Is there any way of preventing > this from ever happening? I searched around for quite a while and I haven't > been able to find anyone who has a solution. The reason I am asking this is > b/c I want to build an application in python that has plugins. I want to > make sure that a programmer could not accidently or intentionally clobber > over another plugins code, which they could easily do. Any help would be > appreciated. Thanks > > ~DJ > > The obvious answer is not to give a programmer access to an object that you don't want to be messed with. However, you've probably thought of that... You could deter (not completely prevent) modification of the class by intercepting the __setattr__ of its metaclass: >>> class meta_writeonce(type): ... def __setattr__(cls, attrname, val): ... raise TypeError ... >>> class A(object): ... __metaclass__ = meta_writeonce ... def func(self): ... print "hello from the unmodified class A" ... >>> A.func = None Traceback (most recent call last): File "", line 1, in ? File "", line 3, in __setattr__ TypeError >>> a = A() >>> a.func() hello from the unmodified class A >>> If you want only to deter overwriting existing class attributes, you could do: >>> class meta_writeonlyattributes(type): ... def __setattr__(cls, attrname, val): ... if hasattr(cls, attrname): ... raise TypeError ... else: ... type.__setattr__(cls, attrname, val) ... >>> class B(object): ... __metaclass__ = meta_writeonlyattributes ... def func(self): ... print "hello from the unmodified class B" ... >>> B.func = None Traceback (most recent call last): File "", line 1, in ? File "", line 4, in __setattr__ TypeError >>> B.func2 = lambda self: "hello from func2" >>> b = B() >>> b.func() hello from the unmodified class B >>> b.func2() 'hello from func2' >>> This is good enough to prevent accidental 'clobbering', but would not prevent a programmer rebinding an attribute deliberately: >>> type.__setattr__(B,"func",lambda self: "I've got you now") >>> b = B() >>> b.func() "I've got you now" >>> Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: unusual exponential formatting puzzle
Neal Becker wrote: > Like a puzzle? I need to interface python output to some strange old > program. It wants to see numbers formatted as: > > e.g.: 0.23456789E01 > > That is, the leading digit is always 0, instead of the first significant > digit. It is fixed width. I can almost get it with '% 16.9E', but not > quite. > > My solution is to print to a string with the '% 16.9E' format, then parse it > with re to pick off the pieces and fix it up. Pretty ugly. Any better > ideas? > > Does this do what you want? >>> from math import log10, modf, fabs >>> def format(n, mantplaces = 9, expplaces = 2): ... sign, n = n/fabs(n), fabs(n)# preserve the sign ... c, m = modf(log10(n)) ... c, m = c - (c>0), m + (c>0) # redistribute mantissa to exponent ... return "%.*fE%0*d" % (mantplaces, sign * 10**c, expplaces, m) ... >>> >>> def test_format(n): ... for exp in range(-5, 5): ... N = n*(10**exp) ... print format(n*(10**exp)) ... >>> test_format(234.56789) 0.234567890E-2 0.234567890E-1 0.234567890E00 0.234567890E01 0.234567890E02 0.234567890E03 0.234567890E04 0.234567890E05 0.234567890E06 0.234567890E07 >>> Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: unusual exponential formatting puzzle
Michael Spencer wrote: > Neal Becker wrote: > >>Like a puzzle? I need to interface python output to some strange old >>program. It wants to see numbers formatted as: >> >>e.g.: 0.23456789E01 >> >>That is, the leading digit is always 0, instead of the first significant >>digit. It is fixed width. I can almost get it with '% 16.9E', but not >>quite. >> >>My solution is to print to a string with the '% 16.9E' format, then parse it >>with re to pick off the pieces and fix it up. Pretty ugly. Any better >>ideas? >> >> > > Does this do what you want? Not, if the input is 0 or 1. Here's a correction, with a more comprehensive test from math import log10, modf, fabs def format(n, mantplaces = 9, expplaces = 2): """Formats n as '0.mEee'""" if n: sign, absn = n/fabs(n), fabs(n) f, i = modf(log10(absn)) mant, exp = sign * 10** (f - (f>=0)), i + (f>=0) else: mant, exp = 0, 0 return "%.*fE%0*d" % (mantplaces, mant, expplaces, exp) def test_format(N = 1, step = 1): """Verifies format(n) and format(1/n) for -N < n < N""" assert format(0,9) == '0.0E00' assert format(0, 7, 3) == '0.000E000' def verify(n): DIGITS = '123456789' try: f = format(n) assert round(float(format(n)),6) == round(n, 6) assert f[0] == "-" and f[3] in DIGITS or f[2] in DIGITS except AssertionError: raise AssertionError("Failed on: %f, formatted as %s" % (n, f)) for n in xrange(-N, N, step): if n: verify(n) verify(1.0/n) Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: How do I convert arithemtic string (like "2+2") to a number?
John J. Lee wrote: "Adomas" <[EMAIL PROTECTED]> writes: Well, a bit more secure would be eval(expression, {'__builtins__': {}}, {}) or alike. Don't believe this without (or even with ;-) very careful thought, anyone. Google for rexec. John This module provides a more systematic way to set up restricted evaluation: """Restricted evaluation Main entry point: r_eval() For usage see class tests or run them using testall()""" import types import compiler import operator import sys, os # used only for testing ast = compiler.ast class Eval_Error(Exception): def __init__(self,error,descr = None,node = None): self.error = error self.descr = descr def __repr__(self): return "%s: %s" % (self.error, self.descr) __str__ = __repr__ class AbstractVisitor(object): """Standard depth-first AST walker - dispatches to methods based on Node class name""" def __init__(self): self._cache = {} # dispatch table def visit(self, node,**kw): if node is None: return None cls = node.__class__ meth = self._cache.setdefault(cls, getattr(self,'visit'+cls.__name__,self.default)) return meth(node, **kw) def default(self, node, **kw): for child in node.getChildNodes(): return self.visit(child, **kw) visitExpression = default class Eval(AbstractVisitor): """An AST walker that implements a replacement to built-in eval. See r_eval for entry point/usage. Provides hooks for managing name resolution, proxying objects, and controlling attribute access Does not implement: List Comprehensions, Generator Expressions, Lambda Ellipsis (can this be used without numpy?) """ def __init__(self, context = globals()): super(Eval,self).__init__() self.context = context # Namespace interface. Safe implementations should override these methods # to implement restricted evaluation. This implementation simply # evals the name in self.context and provides no proxying or # attribute lookup restrictions def lookup(self, objname): """Called only by visitName. Raise an exception here to prevent any direct name resolution, but note that objects may be returned by callables or attribute lookups""" return eval(objname, self.context) def getObject(self, obj): """Called by all name resolvers and by CallFunc. Provides a hook for proxying unsafe objects""" return obj def getAttribute(self,obj,attrname): """Called by visitGetattr""" return getattr(obj,attrname) # End Namespace interface # Syntax nodes follow by topic group. Delete methods to disallow # certain syntax. # Object constructor nodes def visitConst(self, node, **kw): return node.value def visitDict(self,node,**kw): return dict([(self.visit(k),self.visit(v)) for k,v in node.items]) def visitTuple(self,node, **kw): return tuple(self.visit(i) for i in node.nodes) def visitList(self,node, **kw): return [self.visit(i) for i in node.nodes] def visitSliceobj(self,node,**kw): return slice(*[self.visit(i) for i in node.nodes]) def visitEllipsis(self,node,**kw): raise NotImplementedError, "Ellipsis" # Binary Ops def visitAdd(self,node,**kw): return self.visit(node.left) + self.visit(node.right) def visitDiv(self,node,**kw): return self.visit(node.left) / self.visit(node.right) def visitFloorDiv(self,node,**kw): return self.visit(node.left) // self.visit(node.right) def visitLeftShift(self,node,**kw): return self.visit(node.left) << self.visit(node.right) def visitMod(self,node,**kw): return self.visit(node.left) % self.visit(node.right) def visitMul(self,node,**kw): return self.visit(node.left) * self.visit(node.right) def visitPower(self,node,**kw): return self.visit(node.left) ** self.visit(node.right) def visitRightShift(self,node,**kw): return self.visit(node.left) >> self.visit(node.right) def visitSub(self,node,**kw): return self.visit(node.left) - self.visit(node.right) # Unary ops def visitNot(self,node,*kw): return not self.visit(node.expr) def visitUnarySub(self,node,*kw): return -self.visit(node.expr) def visitInvert(self,node,*kw): return ~self.visit(node.expr) def visitUnaryAdd(self,node,*kw): return +self.visit(node.expr) # Logical Ops def visitAnd(self,node,**kw): return reduce(lambda a,b: a and b,[self.visit(arg) for arg in node.nodes]) def visitBitand(self,node,**kw): return reduce(lambda a,b: a & b,[self.visit(arg) for arg in node.nodes]) def visitBitor(self,node,**kw): return reduce(lambda a,b: a | b,[self.visit(arg) for arg in node.nodes]) def visitBitxor(self,node,**kw): return reduce(lambda a,b: a
Re: empty classes as c structs?
Alex Martelli wrote: Steven Bethard <[EMAIL PROTECTED]> wrote: Hmm... interesting. This isn't the main intended use of Bunch/Struct/whatever, but it does seem like a useful thing to have... I wonder if it would be worth having, say, a staticmethod of Bunch that produced such a view, e.g.: class Bunch(object): ... @staticmethod def view(data): result = Bunch() result.__dict__ = data return result Then you could write your code as something like: gbls = Bunch.view(globals()) I'm probably gonna need more feedback though from people though to know if this is a commonly desired use case... Reasonably so, is my guess. Witness the dict.fromkeys classmethod -- it gives you, on dict creation, the same kind of nice syntax sugar that wrapping a dict in a bunch gives you for further getting and setting (and with similar constraints: all keys must be identifiers and not happen to clash with reserved words). I think this ``view'' or however you call it should be a classmethod too, for the same reason -- let someone handily subclass Bunch and still get this creational pattern w/o extra work. Maybe a good factoring could be something like: class Bunch(object): def __init__(self, *a, **k): self.__dict__.update(*a, **k) def getDict(self): return self.__dict__ def setDict(self, adict): self.__dict__ = adict theDict = property(getDict, setDict, None, "direct access to the instance dictionary" ) @classmethod def wrapDict(cls, adict, *a, **k): result = cls.__new__(cls, *a, **k) result.setDict(adict) cls.__init__(result, *a, **k) return result I'm thinking of use cases where a subclass of Bunch might override setDict (to do something else in addition to Bunch.setDict, e.g. maintain some auxiliary data structure for example) -- structuring wrapDict as a classmethod in a ``Template Method'' DP might minimize the amount of work, and the intrusiveness, needed for the purpose. (I don't have a real-life use case for such a subclass, but it seems to cost but little to provide for it as a possibility anyway). [[given the way property works, one would need extra indirectness in getDict and setDict -- structuring THEM as Template Methods, too -- to fully support such a subclass; but that's a well-known general issue with property, and the cost of the extra indirection -- mostly in terms of complication -- should probably not be borne here, it seems to me]] Alex Steven et al I like the idea of making the 'bunch' concept a little more standard. I also like the suggestion Nick Coghlan cited (not sure who suggested the term in this context) of calling this 'namespace' in part because it can lead to easily-explained semantics. ISTM that 'bunch' or 'namespace' is in effect the complement of vars i.e., while vars(object) => object.__dict__, namespace(somedict) gives an object whose __dict__ is somedict. Looked at this way, namespace (or bunch) is a minimal implementation of an object that implements the hasattr(object,__dict__) protocol. The effect of the class is to make operations on __dict__ simpler. namespace instances can be compared with any other object that has a __dict__. This differs from the PEP reference implementation which compares only with other bunch instances. In practice, comparisons with module and class may be useful. The class interface implements the protocol and little else. For 'bunch' applications, namespace can be initialized or updated with keyword args (just like a dict) i.e., >>> bunch = namespace({"a":1,"b":2}) can also be written as >>> bunch = namespace(a=1,b=2) For dict-wrapping applications: >>> wrappeddict = namespace(bigdict) but, unlike the PEP implmentation, this sets wrappeddict.__dict__ = bigdict I think that this interface allows for either use case, without introducing 'fromdict' classmethod. Some dict-operations e.g., __copy__ might carry over to the namespace class Michael An implementation follows: # An alternative implementation of Steven Bethard's PEP XXX 'bunch' with # slightly different interface and semantics: class namespace(object): """ namespace(somedict) => object (with object.__dict__ = somedict) NB, complement of vars: vars(object) => object.__dict__ namespace objects provide attribute access to their __dict__ In general, operations on namespace equate to the operations on namespace.__dict__ """ def __init__(self, E = None, **F): """__init__([namespace|dict], **kwds) -> None""" if isinstance(E, dict): self.__dict__ = E elif hasattr(E, "__dict__"): self.__dict__ = E.__dict__ self.__dict__.update(**F) # define only magic methods to limit pollution def __update__(self, E = None, **F): """update([namespace|dict], **kwds) -> None equivalent to self.__dict__.update with the addition of
Re: empty classes as c structs?
Nick Coghlan wrote: Steven Bethard wrote: It was because these seem like two separate cases that I wanted two different functions for them (__init__ and, say, dictview)... I see this, but I think it weakens the case for a single implementation, given that each implementation is essentially one method. The other issue is that a namespace *is* a mutable object, so the default behaviour should be to make a copy I don't follow this argument. Why does mutability demand copy? Given that somedict here is either a throwaway (in the classic bunch application ) or a dict that must be updated (the wrap-dict case), copying doesn't make much sense to me. OTOH, dict.__init__(dict) copies. h I think Michael's implementation also fell into a trap whereby 'E' couldn't be used as an attribute name. The version below tries to avoid this (using magic-style naming for the other args in the methods which accept keyword dictionaries). You're right - I hadn't considered that. In case it wasn't obvious, I was matching the argspec of dict. Your solution avoids the problem. To limit the special casing in update, I've switched to only using __dict__ for the specific case of instances of namespace That seems a pity to me. (otherwise the semantics are too hard to explain). This is to allow easy copying of an existing namespace - Can't this be spelled namespace(somenamespace).__copy__()? > for anything else, invoking vars() is easy enough. If there is potential for confusion, I'd be tempted to disallow namespaces as an argument to update/__update__ We could use __add__, instead for combining namespaces And I was reading Carlos's page on MetaTemplate, so I threw in an extra class "record" which inherits from namespace and allows complex data structures to be defined via class syntax (see the record.__init__ docstring for details). That bit's entirely optional, but I thought it was neat. Good idea. The implementation ought to be tested against several plausible specializations. Finally, I've just used normal names for the functions. I think the issue of function shadowing is best handled by recommending that all of the functions be called using the class explicitly - this works just as well for instance methods as it does for class or static methods. I don't like the sound of that. The whole point here - whether as Steven's nice straightforward bunch, as originally conceived, or the other cases you and I and others have been 'cluttering' the discussion with ;-) is convenience, and readability. If there are hoops to jump through to use it, then the benefit is quickly reduced to zero. Regards Michael Cheers, Nick. + from types import ClassType class namespace(object): """ namespace([namespace|dict]) => object namespace objects provide attribute access to their __dict__ Complement of vars: vars(object) => object.__dict__ Non-magic methods should generally be invoked via the class to avoid inadvertent shadowing by instance attributes Using attribute names that look like magic attributes is not prohibited but can lead to surprising behaviour. In general, operations on namespace equate to the operations on namespace.__dict__ """ def __init__(__self__, __orig__ = None, **__kwds__): """__init__([namespace|dict], **kwds) -> None""" type(__self__).update(__self__, __orig__, **__kwds__) @classmethod def view(cls, orig): """namespace.view(dict) -> namespace Creates a namespace that is a view of the original dictionary. Allows modification of an existing dictionary via namespace syntax""" new = cls() new.__dict__ = orig return new def __repr__(self): return "%s(%s)" % (self.__class__.__name__, repr(self.__dict__)) # Recommend calling non-magic methods via class form to # avoid problems with inadvertent attribute shadowing def _checked_update(self, other): try: self.__dict__.update(other) except (TypeError): raise TypeError("Namespace update requires mapping " "keyed with valid Python identifiers") def update(__self__, __other__ = None, **__kwds__): """type(self).update(self, [namespace|dict], **kwds) -> None equivalent to self.__dict__.update""" # Handle direct updates if __other__ is not None: if isinstance(__other__, namespace): type(__self__)._checked_update(__self__, __other__.__dict__) else: type(__self__)._checked_update(__self__, __other__) # Handle keyword updates if __kwds__ is not None: type(__self__)._checked_update(__self__, __kwds__) class record(namespace): def __init__(self, definition=None): """record([definition]) -> record Constructs a namespace based on the given class definition
Re: empty classes as c structs?
Steven Bethard wrote: Do you mean there should be a separate Namespace and Bunch class? Or do you mean that an implementation with only a single method is less useful? The former. If the former, then you either have to repeat the methods __repr__, __eq__ and update for both Namespace and Bunch, or one of Namespace and Bunch can't be __repr__'d, __eq__'d or updated. I see no problem in repeating the methods, or inheriting the implementation. However, if namespace and bunch are actually different concepts (one with reference semantics, the other with copy), then __repr__ at least would need to be specialized, to highlight the difference. So, on balance, if copy semantics are important to bunch uses, and references for namespace (though Nick changed his mind on this, and I don't yet know why) I think they would be better as two small implementations. I remain unsure about why you need or want copying, aside from matching the behavior of the builtins. If the latter (setting aside the fact that the implementation provides 4 methods, not 1), I would argue that even if an implementation is only one method, if enough users are currently writing their own version, adding such an implementation to the stdlib is still a net benefit. Yes, I agree with this: I was not picking on the class size ;-) ... Another way to avoid the problem is to use *args, like the current Bunch implementation does: def update(*args, **kwargs): """bunch.update([bunch|dict|seq,] **kwargs) -> None Sure - nice trick to avoid shadowing self too ... Is it that much worse to require the following code: Namespace.update(namespace, obj.__dict__) or: namespace.udpate(obj.__dict__) if you really want to update a Namespace object with the attributes of a non-Namespace object? No problem at all - just a question of what the class is optimized for, and making the interface as convenient as possible, given the use case. I agree that for straight attribute access to a dictionary, your update interface is clearly superior. For that matter, do you have a use-case for where this would be useful? I definitely see the view-of-a-dict example, but I don't see the view-of-an-object example since an object already has dotted-attribute style access... Yes, I have various cases in mind relating to argument-passing, dispatching, interface-checking and class composition. Here the class becomes useful if it grows some namespace-specific semantics. For example, I could write something like: namespace(obj1) >= namespace(obj2) to mean obj1 has at least the attributes of obj2 implemented like: def __ge__(self, other): for attrname in other.__dict__.keys(): if not attrname in self.__dict__: return False return True I realize that interfaces may be addressed formally by a current PEP, but, even if they are, this "cheap and cheerful" approach appeals to me for duck-typing. However, as I think more about this, I realize that I am stretching your concept past its breaking point, and that whatever the merits of this approach, it's not helping you with bunch. Thanks for knocking the ideas around with me. Cheers Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: turing machine in an LC
Jeremy Bowers wrote: I can't figure out how to write a TM in a Python List Comprehension without one of either "variable binding" (so we can store the last symbol list and manipulate it in the next iteration) or "recursive function" (to express the whole tape as a recursive function), both of which require statements. I can figure out how to write a single state transition, but in a single LC I can't figure out how to feed the new state into the next iteration; the previous values generated in the LC are, to my knowledge, not accessible to the LC as it is running. (If they are, I *think* that would indeed be enough.) How about: >>> def fact_ge(n): ... f = [1] ... f.extend(i*j for i,j in it.izip(xrange(1,1+n), f)) ... return f ... >>> fact_ge(10) [1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800] >>> as a "stateful" genexp? Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: turing machine in an LC
Jeremy Bowers wrote: On Tue, 08 Feb 2005 17:36:19 +0100, Bernhard Herzog wrote: Now you *can* get at the previous state and write a state-transition expression in perfectly legal Python. What do you know, generator comprehensions are Turing Complete and list comprehensions aren't. I wouldn't have expected that. I see no difference between LCs and GEs in this respect: >>> import itertools as it >>> >>> def fact_ge(n): ... f = [1] ... f.extend(i*j for i,j in it.izip(xrange(1,1+n), f)) ... return f ... >>> def fact_lc(n): ... f = [1] ... [f.append(i*j) for i,j in it.izip(xrange(1,1+n), f)] ... return f ... ... >>> fact_ge(10) [1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800] >>> fact_lc(10) [1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800] Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: turing machine in an LC
Jeremy Bowers wrote: That's not a generator expression, that's a generator function. Nobody contests they can reference earlier states, that's most of their point :-) Are you sure? I just wrote my examples in functions to label them Here's your example with this method: >>> import itertools as it >>> results = [0] >>> magicGenerator = (i+1 for i,lastresult in it.izip(xrange(5),results)) >>> results.extend(magicGenerator) >>> results [0, 1, 2, 3, 4, 5] >>> > For context, we're trying to build Turing Completeness into Python without > indentation. I bailed out of a Xah Lee thread because people have > probably killed it :-) Didn't see it, but this looked interesting - presumably your point and this is entirely unrelated by now, except in > the vague sense he started with an (I'm sure entirely accidentally) > thought-provoking question. Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: turing machine in an LC
Jeremy Bowers wrote: OK then, I still don't quite see how you can build a Turing Machine in one LC, but an LC and one preceding list assignment should be possible, although the resulting list from the LC is garbage; Not necessarily garbage - could be anything, say a copy of the results: >>> results = [0] >>> [(results.append(lastresult+1) or lastresult) for i, lastresult in it.izip(xrange(5),results)] [0, 1, 2, 3, 4] >>> # ok, missing the 5, but close! I don't think the assignment is avoidable though. I should clarify a point I made earlier I see no difference between LCs and GEs in this respect: What I meant was that both LCs and GEs can reference their prior state in the same way. Of course, there is an important difference in that the LC returns its list as soon as it is executed whereas the executing the genexp returns an iterator that can delay the evaluation of all but the outer loop until its next() is called. This makes a genexp equivalent to (at least some) functions, and perhaps that was part of your point that I missed. Michael -- http://mail.python.org/mailman/listinfo/python-list
A ListComp that maintains its own state (Was: Re: turing machine in an LC)
Jeremy Bowers <[EMAIL PROTECTED]> writes: On Tue, 08 Feb 2005 17:36:19 +0100, Bernhard Herzog wrote: Nick Vargish <[EMAIL PROTECTED]> writes: "Xah Lee" <[EMAIL PROTECTED]> writes: is it possible to write python code without any indentation? Not if Turing-completeness is something you desire. Bernhard Herzog wrote: a Turing Machine in one line plus assignments - nice! Turns out that pypy is more verbose than strictly necessary ;-) ... BTW, I realized that it is indeed possible for a LC to maintain its own state without being passed an external mutable. The trick is to use itertools.repeat to return the same mutable object on each iteration. So, here's factorial in one line: # state refers to list of state history - it is initialized to [1] # on any iteration, the previous state is in state[-1] # the expression also uses the trick of list.append() => None # to both update the state, and return the last state >>> [state.append(state[-1] * symbol) or state[-1] ... for symbol, state in it.izip(range(1,10),it.repeat([1])) ... ] [1, 2, 6, 24, 120, 720, 5040, 40320, 362880] >>> Now, who was claiming that 'reduce' was opaque? Michael ;-) -- http://mail.python.org/mailman/listinfo/python-list
Re: A ListComp that maintains its own state (Was: Re: turing machine in an LC)
Carl Banks wrote: Pay attention, chief. I suggested this days ago to remove duplicates from a list. from itertools import * [ x for (x,s) in izip(iterable,repeat(set())) if (x not in s,s.add(x))[0] ] ;) Sorry, I gave up on that thread after the first 10 Million* posts. Who knows what other pearls I may have missed? Anyway, the good news is that you appear to have identified a new design pattern, and will soon become very famous: According to: http://www.cmcrossroads.com/bradapp/docs/patterns-nutshell.html#Patterns_What A "pattern" is ... * An abstraction from a concrete form which keeps recurring in specific, non-arbitrary contexts. [twice in one week] * A recurring solution to a common problem [perl-python spam] in a given context and system of forces. * A named "nugget" of instructive insight, conveying the essence of a proven solution to a recurring problem in a given context amidst competing concerns. [who could doubt it?] * A successfully recurring "best practice" that has proven itself in the "trenches". [of this list anyway] * A literary format for capturing the wisdom and experience of expert designers, and communicating it to novices [I think we're 5 for 5] So, I would get the book out without further delay, before some other Johnny-come-lately lays claim. BTW, Do you have a 1-line-LC-wiki yet? Michael * with due respect to Marvin -- http://mail.python.org/mailman/listinfo/python-list
Re: A ListComp that maintains its own state
Bernhard Herzog wrote: Michael Spencer <[EMAIL PROTECTED]> writes: So, here's factorial in one line: # state refers to list of state history - it is initialized to [1] # on any iteration, the previous state is in state[-1] # the expression also uses the trick of list.append() => None # to both update the state, and return the last state >>> [state.append(state[-1] * symbol) or state[-1] ... for symbol, state in it.izip(range(1,10),it.repeat([1])) ... ] [1, 2, 6, 24, 120, 720, 5040, 40320, 362880] >>> There's no need for repeat: [state.append(state[-1] * symbol) or state[-1] for state in [[1]] for symbol in range(1, 10)] [1, 2, 6, 24, 120, 720, 5040, 40320, 362880] While we're at it, a while back I posted a list comprehension that implements a 'recursive' flatten: http://groups.google.de/groups?selm=s9zy8eyzcnl.fsf%40salmakis.intevation.de Bernhard Much better - that also cleanly extends to any number of initializers. I also like the approach you take in flatten (and as suggested by Carl Banks) of putting the update mechanism in the if clause So that gives: def factorial(n): return [state[-1] for state in [[1]] for count in xrange(1,n+1) if state.append(state[-1] * count) or True ] Probably of limited practical value, but fun to explore the language. Thanks Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: listerator clonage
Cyril BAZIN wrote: Hello, I want to build a function which return values which appear two or more times in a list: This is very similar to removing duplicate items from a list which was the subject of a long recent thread, full of suggested approaches. Here's one way to do what you want: >>> l = [1, 7, 3, 4, 3, 2, 1] >>> seen = set() >>> set(x for x in l if x in seen or seen.add(x)) set([1, 3]) >>> This is a 'generator expression' applied as an argument to the set constructor. It relies on the fact that seen.add returns None, and is therefore always false. this is equivalent to: >>> def _generate_duplicates(iterable): ... seen = set() ... for x in iterable: ... if x in seen: # it's a duplicate ... yield x ... else: ... seen.add(x) ... >>> generator = _generate_duplicates(l) >>> generator >>> set(generator) set([1, 3]) >>> # In case you want to preserve the order and number of the duplicates, you >>> # would use a list >>> generator = _generate_duplicates(l) >>> list(generator) [3, 1] >>> So, I decided to write a little example which doesn't work: #l = [1, 7, 3, 4, 3, 2, 1] #i = iter(l) #for x in i: #j = iter(i) #for y in j: #if x == y: #print x In thinked that the instruction 'j= iter(i)' create a new iterator 'j' based on 'i' (some kind of clone). I wrote this little test which show that 'j = iter(i)' is the same as 'j = i' (that makes me sad): I don't think your algorithm would work even if iter(iterator) did return a copy or separate iterator. If, however, you do have an algorithm that needs that capability, you can use itertools.tee Cheers Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Hack with os.walk()
Frans Englich wrote: Hello, Have a look at this recursive function: def walkDirectory( directory, element ): element = element.newChild( None, "directory", None ) # automatically appends to parent element.setProp( "name", os.path.basename(directory)) for root, dirs, files in os.walk( directory ): for fileName in files: element.addChild( parseFile( os.path.join( root, fileName )) for dirName in filter( acceptDirectory, dirs): walkDirectory( os.path.join( root, dirName ), element ) return ### Note, this is inside for loop What it does, is it recurses through all directories, and, with libxml2's bindings, builds an XML document which maps directly to the file hierarchy. For every file is parseFile() called, which returns a "file element" which is appended; the resulting structure looks the way one expect -- like a GUI tree view. The current code works, but I find it hackish, and it probably is inefficient, considering that os.walk() is not called once(as it usually is), but for every directory level. My problem, AFAICT, with using os.walk() the usual way, is that in order to construct the /hierarchial/ XML document, I need to be aware of the directory depth, and a recursive function handles that nicely; os.walk() simply concentrates on figuring out paths to all files in a directory, AFAICT. I guess I could solve it with using os.walk() in a traditional way, by somehow pushing libxml2 nodes on a stack, after keeping track of the directory levels etc(string parsing..). Or, one could write ones own recursive directory parser.. My question is: what is the least ugly? What is the /proper/ solution for my problem? How would you write it in the cleanest way? Cheers, Frans The path module by Jorendorff: http://www.jorendorff.com/articles/python/path/ wraps various os functions into an interface that can make this sort of thing cleaner Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: check if object is number
Steven Bethard wrote: Peter Hansen wrote: Of course, most of the other definitions of "is a number" that have been posted may likewise fail (defined as not doing what the OP would have wanted, in this case) with a numarray arange. Or maybe not. (Pretty much all of them will call an arange a number... would the OP's function work properly with that?) No, but it will fail properly since my code basically looks like: def f(max=None): ... while max is None or n <= max: ... # complicated incrementing of n So if max is an array, though all of the proposed isnumber checks will call it a number, my code will (rightly) fail when the array (n <= max) gets __nonzero__ called in the while condition. I guess I'd prefer it to fail in the isnumber check, but overall, I'm more concerned that _some_ error is produced, not necessarily which one. (I'm also not thrilled that bool(array) raises a RuntimeError instead of a TypeError...) Steve Steve, How about explicitly calling an adapter in your function, e.g.? > def f(max=None): > max = number(max) > while max is None or n <= max: > ... > # complicated incrementing of n then you can define number to document the required behavior and return more useful exceptions if the object fails. At the same time, anyone who wants to use a custom number class with your function has a ready-made unittest. >>> def number(obj): ... """Adapts obj to be numeric, or fails helpfully""" ... if isinstance(obj, (int, float, long, )): # these types conform ...return obj ... elif isinstance(obj, basestring): # these types have a known adaptation ...return int(obj) ... else: # check the object exhibits the required behavior ...try: ...assert obj+1 >= 1 ...except Exception, err: ...raise TypeError, "obj does not support addition and comparisons with numbers (%s)" % err ...return obj ... >>> class MyNumber(object): ... def __init__(self, data): ...self.data = data ... def __add__(self, other): ...return MyNumber(self.data + other) ... def __cmp__(self, other): ...return self.data.__cmp__(other) ... >>> a = MyNumber(42) >>> a is number(a) True >>> >>> number(1+2j) Traceback (most recent call last): File "", line 1, in ? File "", line 11, in number TypeError: obj does not support addition and comparisons with numbers (cannot compare complex numbers using <, <=, >, >=) >>> number(array.array("i",[1])) Traceback (most recent call last): File "", line 1, in ? File "", line 11, in number TypeError: obj does not support addition and comparisons with numbers (can only append array (not "int") to array) >>> Cheers Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: check if object is number
Steven Bethard wrote: Michael Spencer wrote: Steven Bethard wrote: Peter Hansen wrote: Of course, most of the other definitions of "is a number" that have been posted may likewise fail (defined as not doing what the OP would have wanted, in this case) with a numarray arange. How about explicitly calling an adapter in your function, e.g.? Yup, that's basically what I'm doing right now. The question was really how to define that adapter function. =) Steve OK - then my entry is: assert obj+1 >= 1 :-) Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Hack with os.walk()
Tim Peters wrote: [Frans Englich] ... [snip] class HasPath: def __init__(self, path): self.path = path def __lt__(self, other): return self.path < other.path class Directory(HasPath): def __init__(self, path): HasPath.__init__(self, path) self.files = [] # list of File objects self.subdirs = [] # list of sub-Directory objects class File(HasPath): pass [snip] def build_tree(path, Directory=Directory, File=File): top = Directory(path) path2dir = {path: top} for root, dirs, files in os.walk(path): dirobj = path2dir[root] for name in dirs: subdirobj = Directory(os.path.join(root, name)) path2dir[subdirobj.path] = subdirobj dirobj.subdirs.append(subdirobj) for name in files: dirobj.files.append(File(os.path.join(root, name))) return top That looks short and sweet to me. It could be made shorter, but not without losing clarity to my eyes. The aforementioned path class makes this even easier. No need to build the tree - that is done automatically by the path constuctor: ListingDirectory can then inherit from path.path with few changes: from path import path libpath = r"C:\Python24\Lib" class ListingDirectory(path): # Display directory tree as a tree, with 4-space indents. # Files listed before subdirectories, both in alphabetical order. # Full path displayed for topmost directory, base names for all # other entries. Directories listed with trailing os.sep. def display(self, level=0): name = self.abspath() # path method if level: name = self.basename() # path method print "%s%s%s" % (' ' * level, name, os.sep) for f in self.files(): print "%s%s" % (' ' * (level + 4), f.basename()) path method for d in self.dirs(): # path.dirs returns an iterator over path objects ListingDirectory(d).display(level + 4) mytree = ListingDirectory(libpath) mytree.display() [snip about 15000 lines...] Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Iterator / Iteratable confusion
Francis Girard wrote: """ Example 8 Running after your tail with itertools.tee The beauty of it is that recursive running after their tail FP algorithms are quite straightforwardly expressed with this Python idiom. """ def Ex8_Fibonacci(): print "Entering Ex8_Fibonacci" def _Ex8_Fibonacci(): print "Entering _Ex8_Fibonacci" yield 1 yield 1 fibTail.next() # Skip the first one for n in (head + tail for (head, tail) in izip(fibHead, fibTail)): yield n fibHead, fibTail, fibRes = tee(_Ex8_Fibonacci(), 3) return fibRes print print sEx8Doc print print list(islice(Ex8_Fibonacci(), 5)) Absolutely: ever since you brought up the Hamming sequence I've been interested in this approach. However, if iterators could be extended in place, these solutions would be even more attractive. Here are some examples for infinite series constructed with an extendable iterator. This iterator is returned by an iterable class 'Stream', shown below the examples: def factorial(): """ >>> f = factorial() >>> f.tolist(10) [1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880] """ factorial = Stream([1]) factorial.extend(factorial * it.count(1)) return factorial def fib(): """Example: >>> f = fib() >>> f.tolist(10) [1, 1, 2, 3, 5, 8, 13, 21, 34, 55]""" fib = Stream([1,1]) fib.extend(x+y for x, y in it.izip(fib, fib[1:])) return fib def multimerge(*iterables): """Yields the items in iterables in order, without duplicates""" cache = {} iterators = map(iter,iterables) number = len(iterables) exhausted = 0 while 1: for it in iterators: try: cache.setdefault(it.next(),[]).append(it) except StopIteration: exhausted += 1 if exhausted == number: raise StopIteration val = min(cache) iterators = cache.pop(val) yield val def hamming(): """ Example: >>> h = hamming() >>> list(h[20:40]) [40, 45, 48, 50, 54, 60, 64, 72, 75, 80, 81, 90, 96, 100, 108, 120, 125, 128, 135, 144] >>> h[1] 288555831593533440L """ hamming = Stream([1]) hamming.extend(i for i in multimerge(2 * hamming, 3 * hamming, 5 * hamming)) return hamming def compounds(): """Extension of Hamming series to compounds of primes(2..13) Example: >>> c = compounds() >>> list(c[20:30]) [24, 25, 26, 27, 28, 30, 32, 33, 35, 36]""" compounds = Stream([1]) compounds.extend(i for i in multimerge(2 * compounds, 3 * compounds, 5 * compounds, 7 * compounds, 9 * compounds, 11 * compounds, 13 * compounds)) return compounds # Stream class for the above examples: import itertools as it import operator as op class Stream(object): """Provides an indepent iterator (using tee) on every iteration request Also implements lazy iterator arithmetic""" def __init__(self, *iterables, **kw): """iterables: tuple of iterables (including iterators). A sequence of iterables will be chained kw: not used in this base class""" self.queue = list(iterables) self.itertee = it.tee(self._chain(self.queue))[0] # We may not need this in every case def extend(self,other): """extend(other: iterable) => None appends iterable to the end of the Stream instance """ self.queue.append(other) def _chain(self, queue): while queue: for i in self.queue.pop(0): self.head = i yield i # Iterator methods: def __iter__(self): """Normal iteration over the iterables in self.queue in turn""" return self.itertee.__copy__() def _binop(self,other,op): """See injected methods - __add__, __mul__ etc..""" if hasattr(other,"__iter__"): return (op(i,j) for i, j in it.izip(self,other)) else: return (op(i,other) for i in self) def __getitem__(self,index): """__getitem__(index: integer | slice) index: integer => element at position index index: slice if slice.stop is given => Stream(it.islice(iter(self), index.start, index.stop, index.step or 1))) else: consumes self up to start then => Stream(iter(self)) Note slice.step is ignored in this case """ if isinstance(index, slice): if index.stop: return (it.islice(iter(self), index.start or 0, index.stop, index.step or 1)) else: iterator = iter(self) for i in range(index.start): iterator.next() return iterator else: return it.islice(iter(self), index,index
Re: builtin functions for and and or?
Roose wrote: Yeah, as we can see there are a million ways to do it. But none of them are as desirable as just having a library function to do the same thing. I'd argue that since there are so many different ways, we should just collapse them into one: any() and all(). That is more in keeping with the python philosophy I suppose -- having one canonical way to do things. Otherwise you could see any of these several ways of doing it in any program, and each time you have to make sure it's doing what you think. Each of them requies more examination than is justified for such a trivial operation. And this definitely hurts the readability of the program. Previous discussion on this topic: http://groups-beta.google.com/group/comp.lang.python/msg/a76b4c2caf6c435c Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Iterator / Iteratable confusion
"Francis Girard" <[EMAIL PROTECTED]> wrote in message an "iterator" doesn't have to support the "__iter__" method Terry Reedy wrote: Yes it does. iter(iterator) is iterator is part of the iterater protocol for the very reason you noticed... But, notwithstanding the docs, it is not essential that iter(iterator) is iterator >>> class A(object): ... def __iter__(self): ... return AnIterator() ... ... >>> class AnIterator(object): # an iterator that copies itself ... def next(self): ... return "Something" ... def __iter__(self): ... return AnIterator() ... >>> a=A() >>> i = iter(a) ... >>> i.next() 'Something' >>> j = iter(i) >>> j.next() 'Something' >>> i is j False >>> Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: builtin functions for and and or?
Roose wrote: Previous discussion on this topic: http://groups-beta.google.com/group/comp.lang.python/msg/a76b4c2caf6c435c Michael OK, well then. That's really the exact same thing, down to the names of the functions. So what ever happened to that? I don't recall: probably http://www.google.com/search?sourceid=mozclient&ie=utf-8&oe=utf-8&q=alltrue+site%3Amail.python.org+python-dev would lead you to the answer That was over a year ago! I > don't see any mention of it in PEP 289? > No, PEP289 was for generator expressions - the any/all discussion arose as one application of those/itertools Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: nested lists as arrays
naturalborncyborg wrote: Hi, I'm using nested lists as arrays and having some problems with that approach. In my puzzle class there is a swapelement method which doesn't work out. What "doesn't work out"? On casual inspection that method seems to "work": >>> p = Puzzle(2) >>> p.elements[0][0] = 1 >>> p.elements[1][1] = 2 >>> p.elements [[1, 0], [0, 2]] >>> p.swapElements(0,0,1,1) >>> p.elements [[2, 0], [0, 1]] >>> What should it do instead? Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: nested lists as arrays
Terry Reedy wrote: <[EMAIL PROTECTED]> wrote in message news:[EMAIL PROTECTED] def setRandomState(self): # container for the elements to pick from container = [1,2,3,4,5,6,7,8,-1] # create elements of puzzle randomly i = 0 j = 0 while i <= self.dim-1: while j <= self.dim-1: if len(container) > 0: randomindex = random.randint(0,len(container)-1) self.elements[j][i] = container[randomindex] del container[randomindex] j=j+1 else: break j=0 i=i+1 Without reading closely, I believe that the above can generate any possible position. Are you aware that half are unsolvable? If that matters, you need to either find a book or site that explains the parity test for solvability or generate the start position from the goal position by a series of random moves. Terry J. Reedy This covers the test for solvability - enjoy ;-): http://www.cs.tcd.ie/publications/tech-reports/reports.01/TCD-CS-2001-24.pdf BTW, just because your puzzle looks like a grid doesn't neceesarily mean that representing the data as nested arrays is easiest. A flat list might be just as good here. It simplifies some of the operations (creating a random ordering becomes a one-liner), at the expense of a little more complexity in some others: import random class n2grid(object): """A grid for the n squared puzzle""" def __init__(self,dim = 4): self.cells = range(dim*dim) self.dim = dim self.pos = (0,0) def shuffle(self): random.shuffle(self.cells) self.pos = divmod(self.cells.index(0),self.dim) def show(self): for row in self._asarray(): print "".join("[%2s]" % (cell or "") for cell in row) def _move(self,dy,dx): dim = self.dim cells = self.cells oldy, oldx = self.pos newy, newx = oldy + dy, oldx + dx if 0 <= newx < dim and 0 <= newy < dim: ix = newy * dim + newx ox = oldy * dim + oldx cells[ix], cells[ox] = cells[ox], cells[ix] self.pos = newy,newx else: raise Exception, "Illegal move to: (%s,%s)" % (newy, newx) def move(self, dx, dy): try: self._move(dx,dy) self.show() except: pass def _asarray(self): #create the array representation when needed cells = iter(self.cells) dim = self.dim return [[cells.next() for j in range(dim)] for i in range(dim)] def __repr__(self): return repr(self._asarray()) >>> p = n2grid() >>> p.show() ... [ ][ 1][ 2][ 3] [ 4][ 5][ 6][ 7] [ 8][ 9][10][11] [12][13][14][15] >>> p.shuffle() >>> p.show() [ 3][15][ 6][ 7] [10][ ][12][ 5] [ 4][ 1][14][ 8] [ 2][11][13][ 9] >>> p.move(1,1) [ 3][15][ 6][ 7] [10][14][12][ 5] [ 4][ 1][ ][ 8] [ 2][11][13][ 9] >>> p.move(1,0) [ 3][15][ 6][ 7] [10][14][12][ 5] [ 4][ 1][13][ 8] [ 2][11][ ][ 9] >>> p.move(1,0) # illegal (does nothing) >>> Cheers Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Iterator / Iteratable confusion
Michael Spencer wrote: But, notwithstanding the docs, it is not essential that iter(iterator) is iterator Terry Reedy wrote: > iter(iterator) is iterator is part of the iterater protocol > [...]I interpret [your post] as saying three things: 1. "There is more than one possible definition of 'iterator'." Terry, thanks for responding in depth. 2. "It is not essential to not do something wasteful as long as it is otherwise inconsequential." Not that "iter(iterator) is iterator" is somehow wasteful (actually it seems conservative), but rather that alternative behavior is readily implmented. You point out, reasonably, that if I do that, then what I get is not then an iterator, because it fails to conform with the protocol. However, I suggest that there may be cases where "iter(iterator) is not iterator" is useful behavior. What to call such an object is another matter. For example, consider: import itertools as it def tee2(iterable): class itertee(object): def __init__(self, iterator): self.iterator = iterator def __iter__(self): return itertee(self.iterator.__copy__()) def next(self): return self.iterator.next() return itertee(it.tee(iterable, 1)[0]) This returns an itertee instance which simply wraps the tee iterator returned by itertools. However iter(itertee instance) returns a copy of its iterator. So this object creates as many independent iterators over iterable as are required. In an earlier post in this thread, I included several examples of generating infinite series using iterator-copying like this. I implemented the copying as a method of a containing iterable 'Stream', rather than of the iterators themselves, partly to respect the 'iterator protocol'. 3. "You can substitute a copy of an object that is never mutated for the object itself." This was not my intended point, although I accept that my example was too abstract. Cheers Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Can __new__ prevent __init__ from being called?
Peter Hansen wrote: Felix Wiemann wrote: Sometimes (but not always) the __new__ method of one of my classes returns an *existing* instance of the class. However, when it does that, the __init__ method of the existing instance is called nonetheless, so that the instance is initialized a second time. For example, please consider the following class (a singleton in this case): [snip] How can I prevent __init__ from being called on the already-initialized object? Is this an acceptable kludge? >>> class C(object): ... instance=None ... def __new__(cls): ... if C.instance is None: ...print 'creating' ...C.instance = object.__new__(cls) ... else: ...cls.__init__ = lambda self: None ... return cls.instance ... def __init__(self): ... print 'in init' ... >>> a = C() creating in init >>> b = C() >>> (Translation: dynamically override now-useless __init__ method. But if that works, why do you need __init__ in the first place?) -Peter Or this one: use an alternative constructor: class C(object): instance = None @classmethod def new(cls, *args, **kw): if cls.instance is None: print 'Creating instance.' cls.instance = object.__new__(cls) print 'Created.' cls.instance.__init__(*args,**kw) return cls.instance def __init__(self): print 'In init.' >>> c = C.new() Creating instance. Created. In init. >>> c = C.new() >>> Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: renaming 'references' to functions can give recursive problems
peter wrote: Hello, nice solution: but it puzzles me :) can anyone tell me why ---correct solution def fA(input): return input def newFA(input, f= fA): return f(input) fA = newFA is correct and: -infinite loop- def fA(input): return input def newFA(input): return fA(input) In newFA, fA is not bound until you call newFA. By which time you've re-bound fA to newFA, causing the recursion. In the 'correct' solution above, f is bound to the original fA function at the time the def fA statement is executed, which is what you want. fA = newFA gives an infinite recursive loop? kind regards Peter Regards Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: sampling items from a nested list
Steven Bethard wrote: So, I have a list of lists, where the items in each sublist are of basically the same form. It looks something like: ... Can anyone see a simpler way of doing this? Steve You just make these up to keep us amused, don't you? ;-) If you don't need to preserve the ordering, would the following work?: >>> data = [[('a', 0), ... ('b', 1), ... ('c', 2)], ... ... [('d', 2), ... ('e', 0)], ... ... [('f', 0), ... ('g', 2), ... ('h', 1), ... ('i', 0), ... ('j', 0)]] ... >>> def resample2(data): ... bag = {} ... random.shuffle(data) ... return [[(item, label) ... for item, label in group ... if bag.setdefault(label,[]).append(item) ... or len(bag[label]) < 3] ...for group in data if not random.shuffle(group)] ... >>> resample2(data) [[('a', 0), ('c', 2), ('b', 1)], [('h', 1), ('g', 2), ('i', 0)], []] >>> resample2(data) [[('h', 1), ('f', 0), ('j', 0), ('g', 2)], [('b', 1), ('c', 2)], []] >>> resample2(data) [[('e', 0), ('d', 2)], [('i', 0), ('h', 1), ('g', 2)], [('b', 1)]] >>> Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Iterator / Iteratable confusion
Terry Reedy wrote: "Michael Spencer" <[EMAIL PROTECTED]> wrote in message We are both interested in the murky edges at and beyond conventional usage. ... I am quite aware that multiple iterators for the same iterable (actual or conceptual) can be useful (cross products, for example). But I am dubious that initialized clones of 'iterators' are *more* useful, especially for Python, than multiple iterators derived from repeated calling of the callable that produced the first iterator. I'm not sure they are. In the one 'real' example I posted on infinite series, I implemented the approach you advocate here. But I'm keeping copyable iterators in mind. Here are some related reasons why I think it useful if not essential to restrict the notion of iterator by restricting iterator.__iter__ to returning self unmodified. Leaving Python aside, one can think of iterable as something that represents a collection and that can produce an iterator that produces the items of the collection one at a time. In this general conceptioning, iterables and iterators seem distinct (if one ignores self-iterables). The separation is appealing, but blurrier in practice, I believe. Neither itertools.cycle nor itertools.tee fits cleanly into this model. Neither do the self-iterables, as you point out. ... giving iterators an __iter__ method, while quite useful, erases (confuses) the (seeming) distinction, but giving them a minimal __iter__ does so minimally, keeping iterators a distinct subcategory of iterable. Iterators that could not be presented to other functions for filtering or whatnot would be pretty limited. Unless every iterator is to be derived from some special-cased object, how could they not have an __iter__ method? I accept your point that keeping the functionality of iterator.__iter__ minimal and predicatable limits the confusion between iterators and iterables. But since that distinction is already blurred in several places, I don't find that argument alone decisive. > ... Taking Python as it is, a useful subcategory of iterable is 'reiterable'. This is distinct from iterator strictly defined. What about itertools.cycle? Not strictly an iterator? This we have iterables divided into iterators, reiterables, and other. I think this is didactically useful. Spencerators are reiterables. They may be: they are no more and no less than a thought experiment in which iterator.__iter__ does not return self unmodified. Iter(iterator) returning iterator unchanged makes iterator a fixed point of iter. It ends any chain of objects returned by repeated iter calls. Spencerators prolong any iter chain, making it infinite instead of finite. Essential? Repeat the paragraph above with 'a fixed point' substituted for 'minimal'. I don't understand this point except in the loosest sense that deviating from the iterator protocol makes it harder to reason about the code. Do you mean something more specific? I have been thinking about iterator.__iter__ rather like object.__new__. Not returning a new instance may be surprising and inadvisable in most cases. But still there are accepted uses for the technique. Do you think these cases are comparable? Do you see the iterator protocol as the vanguard of a new set of python protocols that are more semantically restictive than the "mapping, container, file-like object etc..." interfaces? Defining iterator method semantics strictly seems like a departure from the existing situation. Cheers Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Iterator / Iteratable confusion
Adam DePrince wrote: How is a spencerator [an iterator that doesn't return itself unmodified on iter] > different than itertools.tee? Taking your question literally, it changes the behavior of an itertools.tee object 'tee', so that iter(tee) returns tee.__copy__(), rather than tee itself. It was created for rhetorical purposes and has no known practical application. Depending on your point of view it is evidence either for (a) why the iterator protocol must be strictly adhered to, or (b) that iterators and iterables cannot be disjoint sets. Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: renaming 'references' to functions can give recursive problems
peter wrote: brain reset and understood thx a lot for all your answers Peter Now that you've got reset, you might want to consider an alternative solution: def fA(input): return input oldfA = fA # Hold a reference to the the old function def newFA(input): "Do something new" return oldfA(input) fA = newFA The advantage of this is that you don't need to change the function newfA at all when you're ready to rename it. Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: sampling items from a nested list
Michael Spencer wrote: >>> def resample2(data): ... bag = {} ... random.shuffle(data) ... return [[(item, label) ... for item, label in group ... if bag.setdefault(label,[]).append(item) ... or len(bag[label]) < 3] ...for group in data if not ...which failed to calculate the minimum count of labels, try this instead (while I was at it, I removed the insance LC) >>> def resample3(data): ... bag = {} ... sample = [] ... labels = [label for group in data for item, label in group] ... min_count = min(labels.count(label) for label in set(labels)) ... random.shuffle(data) ... for subgroup in data: ... random.shuffle(subgroup) ... subgroupsample = [] ... for item, label in subgroup: ... bag.setdefault(label,[]).append(item) ... if len(bag[label]) <= min_count: ... subgroupsample.append((item,label)) ... sample.append(subgroupsample) ... return sample ... >>> Cheers Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: sampling items from a nested list
Steven Bethard wrote: Michael Spencer wrote: Steven Bethard wrote: So, I have a list of lists, where the items in each sublist are of basically the same form. It looks something like: ... Can anyone see a simpler way of doing this? Steve You just make these up to keep us amused, don't you? ;-) Heh heh. I wish. It's actually about resampling data read in the Yamcha data format: http://chasen.org/~taku/software/yamcha/ So each sublist is a "sentence" and each tuple is the feature vector for a "word". The point is to even out the number of positive and negative examples because support vector machines typically work better with balanced data sets. If you don't need to preserve the ordering, would the following work?: [snip] >>> def resample2(data): ... bag = {} ... random.shuffle(data) ... return [[(item, label) ... for item, label in group ... if bag.setdefault(label,[]).append(item) ... or len(bag[label]) < 3] ...for group in data if not random.shuffle(group)] It would be preferable to preserve ordering, but it's not absolutely crucial. Thanks for the suggestion! STeVe Maybe combine this with a DSU pattern? Not sure whether the result would be better than what you started with Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: check if object is number
Christos TZOTZIOY Georgiou wrote: On Sat, 12 Feb 2005 16:01:26 -0800, rumours say that Michael Spencer <[EMAIL PROTECTED]> might have written: Yup, that's basically what I'm doing right now. The question was really how to define that adapter function. =) Steve OK - then my entry is: assert obj+1 >= 1 :-) So -1 is not a number. At least not a legal one for Steven's function as I understood it Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Alternative to standard C "for"
James Stroud wrote: It seems I need constructs like this all of the time i = 0 while i < len(somelist): if oughta_pop_it(somelist[i]): somelist.pop(i) else: i += 1 There has to be a better way... Do you have to modify your list in place? If not, just create a copy with the filtered items: somelist = [item for item in somelist if not oughta_pop_it(item)] or you could use filter or itertools.ifilter to do much the same thing Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: How to wrap a class's methods?
Grant Edwards wrote: On 2005-02-17, Steven Bethard <[EMAIL PROTECTED]> wrote: py> class C(object): ... def f(self, *args): ... print "f:", args ... def g(self, *args): ... print "g:", args ... py> class D(C): ... pass ... py> class Wrapper(object): ... def __init__(self, func): ... self.func = func ... def __call__(self, *args): ... print "wrapped" ... return self.func(*args) ... py> for name in ['f', 'g']: ... wrapper = Wrapper(getattr(C, name)) ... setattr(D, name, new.instancemethod(wrapper, None, D)) Thanks. The stuff provided by the "new" module is what I was missing. No magic in the 'new' module - new.instancemethod is just a synonym for the method type: >>> import new, types >>> new.instancemethod is types.MethodType True Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: How to wrap a class's methods?
John Lenton wrote: On Thu, Feb 17, 2005 at 07:32:55PM +, Grant Edwards wrote: I'd usually put big fat warnings around this code, and explain exaclty why I need to do things this way... As a low-tech alternative, what about sourcecode generation, since you are targetting a python module? This gives two advantages vs the wrapping function: 1) the magic all occurs at coding time 2) the method signatures are documented. Michael import imaplib import inspect import types instancemethod = types.MethodType # The function template funcwrapper = \ """ def %(name)s%(argspec)s: s,r = imaplib.IMAP4_SSL.%(name)s%(callspec)s if s!='OK': raise NotOK((s,r)) return r""" # A helper function to get the template parameters def getargs(method): argspec = inspect.getargspec(method) callspec = tuple(argspec[:3] + (None,))# No default return {"name": method.__name__, "argspec": inspect.formatargspec(*argspec), "callspec": inspect.formatargspec(*callspec)} # Do the stuff manually: >>> obj = imaplib.IMAP4_SSL >>> attrnames = [meth for meth in dir(imaplib.IMAP4_SSL) if not meth.startswith("_")] >>> attributes = [getattr(obj, attrname) for attrname in attrnames] >>> methods = [attribute for attribute in attributes if inspect.ismethod(attribute)] >>> print "\n".join(funcwrapper % getargs(method) for method in methods) def append(self, mailbox, flags, date_time, message): s,r = imaplib.IMAP4_SSL.append(self, mailbox, flags, date_time, message) if s!='OK': raise NotOK((s,r)) return r def authenticate(self, mechanism, authobject): s,r = imaplib.IMAP4_SSL.authenticate(self, mechanism, authobject) if s!='OK': raise NotOK((s,r)) return r def check(self): s,r = imaplib.IMAP4_SSL.check(self) if s!='OK': raise NotOK((s,r)) return r def close(self): s,r = imaplib.IMAP4_SSL.close(self) if s!='OK': raise NotOK((s,r)) return r def copy(self, message_set, new_mailbox): s,r = imaplib.IMAP4_SSL.copy(self, message_set, new_mailbox) if s!='OK': raise NotOK((s,r)) return r def create(self, mailbox): s,r = imaplib.IMAP4_SSL.create(self, mailbox) if s!='OK': raise NotOK((s,r)) return r def delete(self, mailbox): s,r = imaplib.IMAP4_SSL.delete(self, mailbox) if s!='OK': raise NotOK((s,r)) return r def deleteacl(self, mailbox, who): s,r = imaplib.IMAP4_SSL.deleteacl(self, mailbox, who) if s!='OK': raise NotOK((s,r)) return r def expunge(self): s,r = imaplib.IMAP4_SSL.expunge(self) if s!='OK': raise NotOK((s,r)) return r def fetch(self, message_set, message_parts): s,r = imaplib.IMAP4_SSL.fetch(self, message_set, message_parts) if s!='OK': raise NotOK((s,r)) return r def getacl(self, mailbox): s,r = imaplib.IMAP4_SSL.getacl(self, mailbox) if s!='OK': raise NotOK((s,r)) return r def getquota(self, root): s,r = imaplib.IMAP4_SSL.getquota(self, root) if s!='OK': raise NotOK((s,r)) return r def getquotaroot(self, mailbox): s,r = imaplib.IMAP4_SSL.getquotaroot(self, mailbox) if s!='OK': raise NotOK((s,r)) return r def list(self, directory='""', pattern='*'): s,r = imaplib.IMAP4_SSL.list(self, directory, pattern) if s!='OK': raise NotOK((s,r)) return r def login(self, user, password): s,r = imaplib.IMAP4_SSL.login(self, user, password) if s!='OK': raise NotOK((s,r)) return r def login_cram_md5(self, user, password): s,r = imaplib.IMAP4_SSL.login_cram_md5(self, user, password) if s!='OK': raise NotOK((s,r)) return r def logout(self): s,r = imaplib.IMAP4_SSL.logout(self) if s!='OK': raise NotOK((s,r)) return r def lsub(self, directory='""', pattern='*'): s,r = imaplib.IMAP4_SSL.lsub(self, directory, pattern) if s!='OK': raise NotOK((s,r)) return r def myrights(self, mailbox): s,r = imaplib.IMAP4_SSL.myrights(self, mailbox) if s!='OK': raise NotOK((s,r)) return r def namespace(self): s,r = imaplib.IMAP4_SSL.namespace(self) if s!='OK': raise NotOK((s,r)) return r def noop(self): s,r = imaplib.IMAP4_SSL.noop(self) if s!='OK': raise NotOK((s,r)) return r def open(self, host='', port=993): s,r = imaplib.IMAP4_SSL.open(self, host, port) if s!='OK': raise NotOK((s,r)) return r def partial(self, message_num, messa
Re: Solution for architecure dependence in Numeric ?
"Johannes Nix|Johannes.Nix"@uni-oldenburg.de wrote: Hi, I have a tricky problem with Numeric. Some time ago, I have generated a huge and complex data structure, and stored it using the cPickle module. Now I want to evaluate it quickly again on a workstation cluster with 64-Bit Opteron CPUs - I have no more than three days to do this. Compiling Python and running Numeric has been no problem at all. However, I get an error message when accessing the data pickled before. (I can load it regularly on 32 bit computers, but it is a quite complex data object, so I really don't want to store every element as ASCII data). The problem seems to be with 64 Bit integers (with 32-bit-floats, no problem was observed). This looks like that (from the Unix command shell): [EMAIL PROTECTED]:~> python ~/python/test_npickle.py -dump test.pck [EMAIL PROTECTED]:~> python ~/python/test_npickle.py test.pck [0 1 2 3 4 5 6 7 8 9] [EMAIL PROTECTED]:~> ssh 64bithost python ~/python/test_npickle.py test.pck Traceback (most recent call last): File "/home/jnix/python/test_npickle.py", line 16, in ? a = cPickle.load(file(filename)) File "/home/jnix/lib/python2.4/SuSE-9.0/x86_64/Numeric/Numeric.py", line 520, in array_constructor x.shape = shape ValueError: ('total size of new array must be unchanged', , ((10,), 'l', '\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\x06\x00\x00\x00\x07\x00\x00\x00\x08\x00\x00\x00\t\x00\x00\x00', 1)) also I get: [EMAIL PROTECTED]:~> python -c "import Numeric; print Numeric.arange(0).itemsize()" 4 [EMAIL PROTECTED]:~> python -c "import Numeric; print Numeric.arange(0).itemsize()" 8 The script used to produce the example above is: - #/usr/bin/python # -*- coding: latin1 -*- import Numeric import cPickle import sys if len(sys.argv) > 1 and sys.argv[1] == '-dump': filename = sys.argv[2] binary=1 a = Numeric.arange(10) cPickle.dump(a, file(filename,'w',binary)) else: filename = sys.argv[1] a = cPickle.load(file(filename)) print a - So what would you suggest ? Can I hack Numeric to assume non-native 32 bit integer numbers ? Many thanks for any help, Johannes It might be worth posting to the Numeric mailing list, mirrored at http://news.gmane.org/gmane.comp.python.numeric.general -- http://mail.python.org/mailman/listinfo/python-list
Re: Style guide for subclassing built-in types?
[EMAIL PROTECTED] wrote: Kent Johnson wrote: [EMAIL PROTECTED] wrote: p.s. the reason I'm not sticking to reversed or even reverse : suppose the size of the list is huge. reversed() returns an iterator so list size shouldn't be an issue. What problem are you actually trying to solve? Kent Oh, you are right. Actually, it's more complicated than simple reversion. The list order should be somewhat "twisted" and the list is big. For example, [1,2,3,4,5,6,7,8,9,10] --> [10,9,8,7,6,1,2,3,4,5] so __getitem__(self,i) => __getitem__(self,-i-1) if i I'd like to have TwistedList class that takes in an original list and pretends as if it is twisted actually. However, I have to have duplicate codes here and there to make it act like a "list", say assert twisted_list == [10,9,...] and for each in twisted_list and etc. If you want a twisted 'view' of an existing list, then a wrapper makes most sense. If, however, you only need the twisted version, why not simply override list.__init__ (and extend, append etc... as required): >>> class rev_list(list): ...def __init__(self, iterable): ...list.__init__(self, iterable[::-1]) ... >>> l = rev_list([1,2,3]) >>> l [3, 2, 1] Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: [perl-python] generic equivalence partition
David Eppstein wrote: In article <[EMAIL PROTECTED]>, "Xah Lee" <[EMAIL PROTECTED]> wrote: given a list aList of n elements, we want to return a list that is a range of numbers from 1 to n, partition by the predicate function of equivalence equalFunc. In the worst case, this is going to have to take quadratic time (consider an equalFunc that always returns false) so we might as well do something really simple rather than trying to be clever. def parti(aList,equalFunc): eqv = [] for i in range(len(aList)): print i,eqv for L in eqv: if equalFunc(aList[i],aList[L[0]]): L.append(i) break; else: eqv.append([i]) Unless we can inspect the predicate function and derive a hash function such that hash(a) == hash(b) => predicate(a,b) is True. Then the partition can take linear time i.e., >>> def equal(a,b): ... return a[-1] == b[-1] ... >>> def hashFunc(obj): ... return hash(obj[-1]) ... >>> def parti(aList, hashFunc): ... eqv = {} ... for i,obj in enumerate(aList): ... eqv.setdefault(hashFunc(obj),[]).append(i) ... return eqv.values() ... In the case where the predicate is a "black box", would a logistic regression over a sample of inputs enable a hash function to be derived experimentally? Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: accessor/mutator functions
[EMAIL PROTECTED] wrote: When I look at how classes are set up in other languages (e.g. C++), I often observe the following patterns: 1) for each data member, the class will have an accessor member function (a Get function) 2) for each data member, the class will have a mutator member function (a Set function) 3) data members are never referenced directly; they are always referenced with the accessor and mutator functions My questions are: a) Are the three things above considered pythonic? No b) What are the tradeoffs of using getattr() and setattr() rather than creating accessor and mutator functions for each data member? Use property descriptors instead: http://www.python.org/2.2.1/descrintro.html#property http://users.rcn.com/python/download/Descriptor.htm#properties Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: accessor/mutator functions
[EMAIL PROTECTED] wrote: If the class had two attributes--x and y--would the code look like something lik this: class C(object): def __init__(self): self.__x = 0 self.__y = 0 def getx(self): return self.__x def setx(self, x): if x < 0: x = 0 self.__x = x def gety(self): return self.__y def sety(self, y): if y < 0: y = 0 self.__y = y x = property(getx, setx) y = property(gety, sety) It could do - that works. One feature of this solution is that it leaves the accessor/mutator functions in the namespace. That may be a good or a bad thing. If bad, you could simply delete them after the property call (which is probably better written as close as possible to the functions) i.e., class C(object): def __init__(self): self.__x = 0 self.__y = 0 def getx(self): return self.__x def setx(self, x): if x < 0: x = 0 self.__x = x x = property(getx, setx) del getx, setx def gety(self): return self.__y def sety(self, y): if y < 0: y = 0 self.__y = y y = property(gety, sety) del gety, sety There are also recipes in the cookbook for defining property "suites" more elegantly Note, that it is also easy to "roll your own" descriptor, which may be worthwhile if you have a lot of similar properties, for example (not tested beyond what you see): from weakref import WeakKeyDictionary class Property(object): def __init__(self, adapter): """adapter is a single argument function that will be applied to the value before setting it""" self.objdict = WeakKeyDictionary() self.adapter = adapter def __get__(self, obj, cls): if isinstance(obj, cls): return self.objdict[obj] else: return self def __set__(self, obj, value): self.objdict[obj] = self.adapter(value) class C(object): x = Property(lambda val: max(val, 0)) y = Property(lambda val: val%2) z = Property(abs) >>> c= C() >>> c.x = -3 >>> c.x 0 >>> c.y = -3 >>> c.y 1 >>> c.z = -3 >>> c.z 3 >>> Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Converting HTML to ASCII
gf gf wrote: [wants to extract ASCII from badly-formed HTML and thinks BeautifulSoup is too complex] You haven't specified what you mean by "extracting" ASCII, but I'll assume that you want to start by eliminating html tags and comments, which is easy enough with a couple of regular expressions: >>> import re >>> comments = re.compile('', re.DOTALL) >>> tags = re.compile('<.*?>', re.DOTALL) ... >>> def striptags(text): ... text = re.sub(comments,'', text) ... text = re.sub(tags,'', text) ... return text ... >>> def collapsenewlines(text): ... return "\n".join(line for line in text.splitlines() if line) ... >>> import urllib2 >>> f = urllib2.urlopen('http://www.python.org/') >>> source = f.read() >>> text = collapsenewlines(striptags(source)) >>> This will of course fail if there is a "<" without a ">", probably in other cases too. But it is indifferent to whether the html is well-formed. This leaves you with the additional task of substituting the html escaped characters e.g., " ", not all of which will have ASCII representations. HTH Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Converting HTML to ASCII
Mike Meyer wrote: It also fails on tags with a ">" in a string in the tag. That's well-formed but ill-used HTML. True enough...however, it doesn't fail too horribly: >>> striptags("""the text""") "'>the text" >>> and I think that case could be rectified rather easily, by stripping any content up to '>' in the result without breaking anything else. BTW, I tool a first look at BeautifulSoup. As far as I could tell, there is no built-in way to extract text from its parse tree, however adding one is trivial: >>> from bsoup import BeautifulSoup, Tag ... >>> def extracttext(obj): ... if isinstance(obj,Tag): ... return "".join(extracttext(c) for c in obj.contents) ... else: ... return str(obj) ... >>> def bsouptext(text): ... souptree = BeautifulSoup(text) ... bodytext = extracttext(souptree.first()) ... text = re.sub(comments,'', bodytext) ... text = collapsenewlines(text) ... return text ... ... >>> >>> bsouptext("""the text""") "'>the text" On one 'real world test' (nytimes.com), I find the regexp approach to be more accurate, but I won't load up this message with the output to prove it ;-) Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: ListMixin (WAS: How do you control _all_ items added to a list?)
Steven Bethard wrote: Nick Coghlan wrote: > Hmm, it might be nice if there was a UserList.ListMixin that was the > counterpart to UserDict.DictMixin I've thought this occasionally too. One of the tricky issues though is that often you'd like to define __getitem__ for single items and have ListMixin add the code for slices. I haven't figured out how to do this cleanly yet... STeVe I agree that would be useful. One solution would be to ask users to implement __getsingleitem__ (and not __getitem__) if they want the mixin to handle slice logic. The following illustrates that, and also falls back to slicing the iterator if it is provided: class ProtoListMixin(object): """Prototype ListMixin, exploring slice interface and semantics""" def __getitem__(self, index): if isinstance(index, slice): start, stop, step = index.start or 0, index.stop, index.step or 1 if start < 0 or stop < 0 or not stop: try: start, stop, step = index.indices(len(self)) except TypeError: raise TypeError, "unsized object" try: getter = self.__getsingleitem__ return [getter(i) for i in range(start, stop, step)] except AttributeError: pass else: if index < 0: try: index = len(self) + index except TypeError: raise TypeError, "unsized object" try: return self.__getsingleitem__(index) except AttributeError: pass start, stop, step = index, index + 1, None # Alternatively, try to use the iterator, if available import itertools try: args = [iter(self)] except AttributeError: raise TypeError, "Must implement __getsingleitem__ or __iter__" if start: args.append(start) args.append(stop) if step: if step < 1: raise ValueError, "slicing an iterable requires step >=1" args.append(step) iterator = itertools.islice(*args) if isinstance(index, slice): return list(iterator) else: try: return iterator.next() except StopIteration: raise IndexError, "index out of range" # Users should implement __getsingleitem__ for positive indices class Index(ProtoListMixin): def __init__(self, data): """For testing, provide a list""" self._data = data def __getsingleitem__(self, index): return self._data[index] # If __len__ is implemented, negative indices are supported class IndexLen(Index): def __len__(self): return len(self._data) # If __getsingleitem__ is not implemented, positive slices are returned # from an iterator class Iter(ProtoListMixin): def __init__(self, data): """For testing, provide an iterable""" self._data = data def __iter__(self): return iter(self._data) >>> a = Index(range(10)) >>> a[4] 4 >>> a[4:8] [4, 5, 6, 7] >>> a[-4] Traceback (most recent call last): File "", line 1, in ? File "ListMixin", line 22, in __getitem__ TypeError: unsized object >>> b = IndexLen(range(10)) >>> b[-4] 6 >>> c = Iter(xrange(10)) >>> c[3] 3 >>> c[3:6] [3, 4, 5] >>> c[-3] Traceback (most recent call last): File "", line 1, in ? File "ListMixin", line 22, in __getitem__ TypeError: unsized object >>> -- http://mail.python.org/mailman/listinfo/python-list
Re: reuse validation logic with descriptors
David S. wrote: This still fails to work for instances variables of the class. That is if I use your property in the following: py> ...class Flags(object): ...def __init__(self): ... a = singlechar ... you should write that as: class Flags(object): a = singlechar def __init__(self): a = "a" py> f = Flags() py> f.a = "a" Now f.a.__class__.__name__ returns 'str'. So the property was not used at all. Also, it seems that using a property, I can not do the other useful things I can do with a proper class, like provide an __init__, __str__, or __repr__. If you want "other useful things" then you can write a custom descriptor, like: from weakref import WeakKeyDictionary class SingleChar(object): def __init__(self): """raises ValueError if attribute is set to something other than a single char""" self.objdict = WeakKeyDictionary() def __get__(self, obj, cls): if isinstance(obj, cls): try: return self.objdict[obj] except KeyError: raise AttributeError, "property not set" else: return self def __set__(self, obj, value): if isinstance(value, str) and len(value) == 1: self.objdict[obj] = value else: raise ValueError, value class Flags(object): a = SingleChar() b = SingleChar() See also: http://groups-beta.google.com/group/comp.lang.python/msg/30c61a30a90133d2 for another example of this approach Michael Again, thanks, David S. -- http://mail.python.org/mailman/listinfo/python-list
Re: Is it possible to specify the size of list at construction?
Anthony Liu wrote: I cannot figure out how to specify a list of a particular size. For example, I want to construct a list of size 10, how do I do this? A list does not have a fixed size (as you probably know) But you can initialize it with 10 somethings > >>> [None]*10 [None, None, None, None, None, None, None, None, None, None] >>> range(10) [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] >>> Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Is it possible to specify the size of list at construction?
Anthony Liu wrote: Yes, that's helpful. Thanks a lot. But what if I wanna construct an array of arrays like we do in C++ or Java: myArray [][] Basically, I want to do the following in Python: myArray[0][1] = list1 myArray[1][2] = list2 myArray[2][3] = list3 here you have to be careful to create N different lists. A list comprehension provides a convenient way to do it: myArray = [[] for i in range(N)] -- http://mail.python.org/mailman/listinfo/python-list
Re: Best way to make a list unique?
Delaney, Timothy C (Timothy) wrote: Michael Hoffman wrote: For those who don't know, these implement a hash set/map which iterates in the order that the keys were first added to the set/map. I would love to see such a thing. I've proposed this on python-dev, but the general feeling so far is against it. So far the only use case is to remove duplicates without changing order, and there are iterator-based solutions which would normally be preferable. It's pretty simple to roll your own, and I'll probably put together a Cookbook recipe for it. Tim Delaney Here's something to work with: class OrdSet(object): def __init__(self, iterable): """Build an ordered, unique collection of hashable items""" self._data = {None:[None, None]} # None is the pointer to the first # element. This is unsatisfactory # because it cannot then be a member # of the collection self._last = None self.update(iterable) def add(self, obj): """Add an element to the collection""" data = self._data if not obj in data: last = self._last data[last][1] = obj data[obj] = [last, None] self._last = obj def update(self, iterable): """Update the collection with the union of itself and another""" obj = self._last data = self._data last = data[obj][0] for item in iterable: if item not in data: data[obj] = [last, item] last, obj = obj, item data[obj] = [last, None] self._last = obj def remove(self, item): """Remove an element from a set; it must be a member. If the element is not a member, raise a KeyError.""" data = self._data prev, next = data[item] data[prev][1] = next data[next][0] = prev def discard(self, item): """Remove an element from a set if it is a member. If the element is not a member, do nothing.""" try: self.remove(item) except KeyError: pass def __contains__(self, item): return item in self._data def pop(self): """Remove and the return the oldest element""" data = self._data prev, first = data[None] data[None] = [None,data[first][1]] return first def clear(self): self.__init__([]) def __iter__(self): """Iterate over the collection in order""" data = self._data prev, next = data[None] while next is not None: yield next prev, next = data[next] def __len__(self): return len(self._data)-1 def __repr__(self): return "%s(%s)" % (self.__class__.__name__,list(self)) >>> a= OrdSet(range(10)) >>> a OrdSet([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) >>> a.update(range(5,15)) >>> a OrdSet([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]) >>> a.discard(8) >>> a OrdSet([0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14]) >>> Michael -- http://mail.python.org/mailman/listinfo/python-list
Re: Best way to make a list unique?
Marc Christiansen wrote: Michael Spencer <[EMAIL PROTECTED]> wrote: Nice. When you replace None by an object(), you have no restriction on the elements any more: Thanks for the suggestion, Marc. Note that if there is no need to access the middle of the collection, then the implementation is simpler, and less resource-intensive, since the items can be singly-linked class UniqueQueue(object): def __init__(self, iterable): self._data = _data = {} self._last = self._root = object() # An object the user is unlikely to # reference - thanks Marc self.update(iterable) def push(self, obj): if not obj in self._data: self._data[self._last] = obj self._last = obj def pop(self): data = self._data first = data.pop(self._root) self._root = first return first def update(self, iterable): last = self._last data = self._data for item in iterable: if item not in data: data[last] = item last = item self._last = last def __iter__(self): data = self._data next = self._root try: while 1: next = data[next] yield next except KeyError: raise StopIteration def __repr__(self): return "%s(%s)" % (self.__class__.__name__,list(self)) >>> q = UniqueQueue(range(5)) >>> q.update(range(3,8)) >>> q UniqueQueue([0, 1, 2, 3, 4, 5, 6, 7]) >>> q.pop() 0 >>> q UniqueQueue([1, 2, 3, 4, 5, 6, 7]) >>> >>> q.push(None) >>> q UniqueQueue([1, 2, 3, 4, 5, 6, 7, None]) >>> Michael -- http://mail.python.org/mailman/listinfo/python-list