Hi, why don't you use the code snippet I posted? It already solves most of your problems. The rest should be easy for you to add.
Stefan amitesh kumar wrote: > I've updated my code to this extent: > > import sys > import xml.parsers.expat > import dircache > > rec = {} > rec2 = {} > oli = {} > ordtagname = '*' > recList = {} > cnt = 0 > cnt2 = 0 > > ordtags = > set(['orrfnbr','orrfnbr','afidlog','orprtot','ortxtot','orshtot','orcpcur','orpstmp','orustmp','orappstat','orappdt']) > > shptags = > set(['strfnbr','stprnbr','stvdnbr','stprice','stquant','stpstmp','stustmp','starwbll','stdspchstat','stlogistics','stentrydt','stcpprice','stlstprice']) > > omptags = set(['ompaymthd','ommaxaamt']) > > def start_element(name, attrs): > global ordtagname, rec,recList,cnt,ordtags,rec2,cnt2 > if name in ordtags or name in shptags or name in omptags: > ordtagname = name > if name == 'shipto': > rec[cnt2] = rec2 > if name == 'order': > recList[cnt] = rec > sys.stdout.flush() > > def end_element(name): > global ordtagname, rec,recList,cnt,ordtags,rec2,cnt2 > if name in ordtags or name in shptags or name in omptags: > ordtagname = '' > if name == 'shipto': > cnt2 = cnt2+1 > rec2 = {} > #rec[cnt2] = rec2 > if name == 'order': > cnt2 = 0 > #recList[cnt] = rec > rec = {} > cnt = cnt+1 > sys.stdout.flush() > > def char_data(data): > global ordtagname, rec,recList,cnt,ordtags,rec2,cnt2 > if None != data: > if ordtagname in ordtags or ordtagname in shptags or ordtagname > in omptags: > if ordtagname in shptags : > rec2[repr(ordtagname).strip('u\'')] = > repr(data).strip('u\'') > else: > rec[repr(ordtagname).strip('u\'')] = repr(data).strip('u\'') > sys.stdout.flush() > > for f in iter(dircache.listdir('./xmls/')): > #print f > g=open('./xmls/'+f, 'r') > p = xml.parsers.expat.ParserCreate() > p.StartElementHandler = start_element > p.CharacterDataHandler = char_data > p.EndElementHandler = end_element > p.ParseFile(g) > g.close() > print recList > > ----------- > > > Now, I've to access recList elements in a iterative manner and do > further processing. Will you please help me in this effort. > > Current output is: > > {0: {0: {'stentrydt': 'null', 'stustmp': '2007-07-18 14:49:43.0', > 'stlogistics': '7', 'stprnbr': '10197436', 'stlstprice': '284', > 'stdspchstat': '0', 'starwbll': 'null', 'strfnbr': '4491691', 'stquant': > '1', 'stprice': ' 284.0', 'stpstmp': '2007-07-18 14:49:43.0', 'stvdnbr': > '4143', 'stcpprice': '221.52'}, 'orustmp': '2007-07-19 18:29:23.0', 2: > {'stentrydt': 'null', 'stustmp': '2007-07-18 14:49: 44.0', > 'stlogistics': '7', 'stprnbr': '10158532', 'stlstprice': '325', > 'stdspchstat': '0', 'starwbll': 'null', 'strfnbr': '4491693', 'stquant': > '1', 'stprice': ' 325.0', 'stpstmp': '2007-07-18 14:49:44.0', 'stvdnbr': > '4285', 'stcpprice': '276.25'}, 'orappstat': '1', 4: {'stentrydt': > 'null', 'stustmp': '2007-07-18 14:49: 44.0', 'stlogistics': '0', > 'stprnbr': '10193438', 'stlstprice': '199', 'stdspchstat': '0', > 'starwbll': 'null', 'strfnbr': '4491695', 'stquant': '1', 'stprice': ' > 129.0', 'stpstmp': '2007-07-18 14:49:44.0', 'stvdnbr': '956', > 'stcpprice': '90.3'}, 3: {'stentrydt': 'null', 'stustmp': '2007-07-18 > 14:49:44.0 ', 'stlogistics': '7', 'stprnbr': '10092402', 'stlstprice': > '199', 'stdspchstat': '0', 'starwbll': 'null', 'strfnbr': '4491694', > 'stquant': '1', 'stprice': ' 189.0', 'stpstmp': '2007-07-18 14:49:44.0', > 'stvdnbr': '4094', 'stcpprice': '151.2'}, 1: {'stentrydt': 'null', > 'stustmp': '2007-07-18 14:49:43.0 ', 'stlogistics': '7', 'stprnbr': > '10188562', 'stlstprice': '1299', 'stdspchstat': '0', 'starwbll': 'll', > 'strfnbr': '4491692', 'stquant': '1', 'stprice': ' 909.0', 'stpstmp': > '2007-07-18 14:49:43.0', 'stvdnbr': '3557', 'stcpprice': '727.2'}, > 'orpstmp': '2007-07-18 14:49:44.0', 'ompaymthd': 'ICI ', 'orappdt': > '2007-07-19 18:29: 23.0', 'orshtot': '241.0', 'orcpcur': 'INR', > 'ommaxaamt': '2077.0', 'orrfnbr': '3992187', 'orprtot': '1836.0', > 'ortxtot': '0.0 '}, 1: {0: {'stentrydt': 'null', 'stustmp': '2007-07-19 > 22:52:14.0', 'stlogistics': '0', 'stprnbr': '1030470', 'stlstprice': > '2475', 'stdspchstat': '0', 'starwbll': 'null', 'strfnbr': '4494126', > 'stquant': '1', 'stprice': ' 2475.0', 'stpstmp': '2007-07-19 > 22:52:14.0', 'stvdnbr': '2179', 'stcpprice': '1750.0'}, 'orustmp': > '2007-07-19 22:52:16.0', 'orappstat': '-1', 1: {'stentrydt': 'null', > 'stustmp': '2007-07-19 22:52: 14.0', 'stlogistics': '0', 'stprnbr': > '1048790', 'stlstprice': '2475', 'stdspchstat': '0', 'starwbll': 'null', > 'strfnbr': '4494127', 'stquant': '1', 'stprice': ' 2475.0', 'stpstmp': > '2007-07-19 22:52:14.0', 'stvdnbr': '2179', 'stcpprice': '0.0'}, > 'orpstmp': '2007-07-19 22:52:14.0', 'ompaymthd': 'MAST ', 'orappdt': > 'null', 'orshtot': ' 0.0', 'orcpcur': 'INR', 'ommaxaamt': '4950.0', > 'orrfnbr': '3994456', 'orprtot': '4950.0', 'ortxtot': '0.0'}, 2: {0: > {'stentrydt': 'null', 'stustmp': '2007-07-19 23:05: 05.0', > 'stlogistics': '0', 'stprnbr': '3539177', 'stlstprice': '1', > 'stdspchstat': '0', 'starwbll': 'null', 'strfnbr': '4494139', 'stquant': > '1', 'stprice': ' 500.0', 'stpstmp': '2007-07-19 23:05:05.0', 'stvdnbr': > '4370', 'stcpprice': '465.0'}, 'orustmp': '2007-07-20 00:20:06.0', > 'orappstat': '5', 'orpstmp': '2007-07-19 23:05: 05.0', 'ompaymthd': > 'ICI ', 'orappdt': 'null', 'afidlog': 'Auction', 'orshtot': '0.0', > 'orcpcur': 'INR', 'ommaxaamt': '500.0 ', 'orrfnbr': '3994466', > 'orprtot': '500.0', 'ortxtot': '0.0'}, 3: {0: {'stentrydt': 'null', > 'stustmp': '2007-07-19 23:38:56.0', 'stlogistics': '0', 'stprnbr': > '2771831', 'stlstprice': '843', 'stdspchstat': '0', 'starwbll': 'null', > 'strfnbr': '4494158', 'stquant': '1', 'stprice': ' 900.0', 'stpstmp': > '2007-07-19 23:38:56.0', 'stvdnbr': '3991', 'stcpprice': '543.0'}, > 'orustmp': '2007-07-19 23:38:57.0', 'orappstat': '-1', 'orpstmp': > '2007-07-19 23:38: 56.0', 'ompaymthd': 'AMEX ', 'orappdt': 'null', > 'orshtot': '0.0', 'orcpcur': 'INR', 'ommaxaamt': '900.0', 'orrfnbr': > '3994481', 'orprtot': ' 900.0', 'ortxtot': '0.0'}} > > > Thanks, > > Amitesh > > > > > On 8/10/07, *Stefan Behnel* < [EMAIL PROTECTED] > <mailto:[EMAIL PROTECTED]>> wrote: > > Hi, > > first thing: don't use expat directly. Use (c)ElementTree's > iterparse. It's in > Python 2.5, but is also available as an external package for older > Python > versions. There's also lxml (which is mostly compatible to > ElementTree), in > case you ever need features like XPath, XSLT or whatever. > > > amitesh kumar wrote: > > Please review the following code and help me. > > > > Here I'm trying to : > > 1. Read each XML file in a folder. > > 2. Parse file. > > 3. Store some of the tags values as key-value pair in a map > > 4. Similarly maintain another collection that'll store one list > per file. > > > ------------------------------------------------------------------------ > > > > ordtags = set() > > shptags = set() > > omptags = set() > > > > ordtags.add('orrfnbr') > > ordtags.add('afidlog') > [...] > > Better: > > ordtags = set(['offfnbr', 'afidlog', ...]) > > from xml.etree.cElementTree import iterparse > > for onefile in allfiles: > for event, element in iterparse(onefile): > if element.tag in ordtags: > # do something like > values[ element.tag] = element.text > elif element.tag in shptags: > # do something else > else: > # don't do anything? > element.clear() > > Stefan > > > > > -- > With Regards > Amitesh K. > 9850638640 _______________________________________________ XML-SIG maillist - XML-SIG@python.org http://mail.python.org/mailman/listinfo/xml-sig