here's something I've used and worked reasonably well, though not perfectly.
   Suggestions/improvements welcome.

adam


def force_to_utf8(text):
  """ This has been very complicated and painful for us to get this right in 
Python 2
  and App engine.  See preso: http://farmdev.com/talks/unicode/ """
  global ENCODING_ERRORS
  if isinstance(text, unicode):
    return force_to_utf8(text.encode('utf8'))
  if not isinstance(text, basestring):
    text = str(text)
  try:
    # detect if it's already utf8
    text.decode('utf8')
    return text
  except:
    ENCODING_ERRORS += 1
  try:
    res = text.decode('latin1').encode('utf8')
    #logging.error("decode(latin1) worked: "+repr(res))
    return res
  except:
    ENCODING_ERRORS += 1
  try:
    res = text.decode('8859-1').encode('utf8')
    #logging.error("decode(latin1) worked: "+repr(res))
    return res
  except:
    ENCODING_ERRORS += 1
  try:
    res = text.decode('utf16').encode('utf8')
    #logging.error("decode(utf16) worked: "+repr(res))
    return res
  except:
    ENCODING_ERRORS += 1
  logging.error("tried every encoding method for '"+repr(text)[:50]+"'")
  return text


-- 
You received this message because you are subscribed to the Google Groups 
"Google App Engine" group.
To view this discussion on the web visit 
https://groups.google.com/d/msg/google-appengine/-/ez2AnX78zFcJ.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to 
[email protected].
For more options, visit this group at 
http://groups.google.com/group/google-appengine?hl=en.

Reply via email to