import xml.sax.handler

class BookHandler(xml.sax.handler.ContentHandler):
  def __init__(self):
    self.inTitle = 0
    self.inAuthor = 0
    self.mapping = {}

  def startElement(self, name, attributes):
    if name == "book":
      # Zoznam dat ku knihe bude tvorit nazov a autor
      self.bookdata = []
      self.title = ""
      self.author = ""
      self.isbn = attributes["isbn"]
    elif name == "title":
      self.inTitle = 1
    elif name == "author":
      self.inAuthor = 1

  def characters(self, data):
    if self.inTitle:
      self.title += data
    elif self.inAuthor:
      self.author +=data
      
  def endElement(self, name):
    if name == "title":
      self.inTitle = 0
      self.bookdata.append(self.title)
    elif name == "author":
      self.inAuthor = 0	    
      self.bookdata.append(self.author)	    
    self.mapping[self.isbn] = self.bookdata
    # pre kontrolu
    # print self.mapping

import xml.sax
import pprint

parser = xml.sax.make_parser( )
handler = BookHandler( )
parser.setContentHandler(handler)
parser.parse("books5.xml")
# pre kontrolu
# print handler.mapping
# pprint.pprint(handler.mapping)

keys = handler.mapping.keys()
#keys.sort()
# vytlacit kluce a polozky slovnika
sf=open('knihy_SAX.txt','w') 

for key in keys:
  s1=handler.mapping[key][1].ljust(25)
  s2=handler.mapping[key][0].ljust(30)
  # parsovane retazce su zakodovane do unicode, preto ich treba previest
  # do cp1250
  line = "%s:  %s\n(ISBN %s)\n"\
     %(s1.encode('cp1250'), s2.encode('cp1250'), key.encode('cp1250'))
  sf.write(line)

sf.close()
