The escaping is way off in NTSerializer.py. Here's an attempt at a better one (in python). What it really needs is the w3 test suite, which I have not tried yet.
==== rdflib/syntax/serializers/NTSerializer.py ==== from rdflib.syntax.serializers import Serializer from rdflib import Literal, URIRef def ntNode(node): # see http://www.w3.org/TR/rdf-testcases/#ntriples if not isinstance(node, (Literal, URIRef)): return node.n3() # I tried using raptor via redland, but redland couldn't make # literals from unicode! So here's a pure-python, not-well-tested # nt serializer. w3 has a test suite that should be incorporated. # http://www.w3.org/TR/rdf-testcases/#ntrip_strings chars = list(node) for i, char in enumerate(chars): o = ord(char) if o in [0x09, 0x0a, 0x0d]: chars[i] = char.encode('unicode_escape') elif o <= 0x1f: chars[i] = "\\u%04X" % o elif o == 0x22: chars[i] = '\\"' elif char == '\\': chars[i] = '\\\\' elif o > 0xff: # unicode_escape seems to pick \u or \U correctly chars[i] = char.encode('unicode_escape') elif o > 0x7e: # unicode_escape would pick \x## for these chars[i] = "\\u%04X" % o escaped = ''.join(chars) if isinstance(node, Literal): ret = '"%s"' % escaped if node.language: if node.datatype: raise ValueError("NT can't represent a literal with both " "language and datatype") ret = ret + "@" + node.language elif node.datatype: # ret = ret + "^^<%s>" % node.datatype elif isinstance(node, URIRef): ret = '<%s>' % escaped return ret class NTSerializer(Serializer): def __init__(self, store): """ I serialize RDF graphs in NTriples format. """ super(NTSerializer, self).__init__(store) def serialize(self, stream, base=None, encoding=None): if base is not None: print "TODO: NTSerializer does not support base" encoding = self.encoding write = lambda triple: stream.write( (ntNode(triple[0]) + u" " + ntNode(triple[1]) + u" " + ntNode(triple[2]) + u".\n")) map(write, self.store) _______________________________________________ Dev mailing list Dev@rdflib.net http://rdflib.net/mailman/listinfo/dev