Marcus Christie created AIRAVATA-3697:
-----------------------------------------

             Summary: Handle unicode characters in file names
                 Key: AIRAVATA-3697
                 URL: https://issues.apache.org/jira/browse/AIRAVATA-3697
             Project: Airavata
          Issue Type: Bug
          Components: Airavata API, Django Portal
            Reporter: Marcus Christie
            Assignee: Marcus Christie


Getting the following Django error when browsing experiment output files with 
unicode characters in the file names:

{code}
Traceback (most recent call last):
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/rest_framework/views.py",
 line 506, in dispatch
   response = handler(request, *args, **kwargs)
 File 
"/var/www/portals/django-seagrid/airavata-django-portal/django_airavata/apps/api/views.py",
 line 1467, in get
   return self._create_response(request, experiment_id, path)
 File 
"/var/www/portals/django-seagrid/airavata-django-portal/django_airavata/apps/api/views.py",
 line 1471, in _create_response
   directories, files = user_storage.list_experiment_dir(request, 
experiment_id, path)
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/airavata_django_portal_sdk/user_storage/api.py",
 line 629, in list_experiment_dir
   backend=backend, owner=experiment.userName)
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/airavata_django_portal_sdk/user_storage/api.py",
 line 760, in _get_data_product_uri
   if user_file.exists():
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/db/models/query.py",
 line 808, in exists
   return self.query.has_results(using=self.db)
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/db/models/sql/query.py",
 line 561, in has_results
   return compiler.has_results()
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/db/models/sql/compiler.py",
 line 1145, in has_results
   return bool(self.execute_sql(SINGLE))
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/db/models/sql/compiler.py",
 line 1175, in execute_sql
   cursor.execute(sql, params)
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/db/backends/utils.py",
 line 66, in execute
   return self._execute_with_wrappers(sql, params, many=False, 
executor=self._execute)
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/db/backends/utils.py",
 line 75, in _execute_with_wrappers
   return executor(sql, params, many, context)
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/db/backends/utils.py",
 line 84, in _execute
   return self.cursor.execute(sql, params)
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/db/backends/mysql/base.py",
 line 73, in execute
   return self.cursor.execute(query, args)
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/MySQLdb/cursors.py",
 line 199, in execute
   args = tuple(map(db.literal, args))
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/MySQLdb/connections.py",
 line 280, in literal
   s = self.string_literal(o.encode(self.encoding))

During handling of the above exception ('utf-8' codec can't encode characters 
in position 197-199: surrogates not allowed), another exception occurred:
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/handlers/exception.py",
 line 47, in inner
   response = get_response(request)
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/handlers/base.py",
 line 181, in _get_response
   response = wrapped_callback(request, *callback_args, **callback_kwargs)
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/views/decorators/csrf.py",
 line 54, in wrapped_view
   return view_func(*args, **kwargs)
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/views/generic/base.py",
 line 70, in view
   return self.dispatch(request, *args, **kwargs)
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/rest_framework/views.py",
 line 509, in dispatch
   response = self.handle_exception(exc)
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/rest_framework/views.py",
 line 466, in handle_exception
   response = exception_handler(exc, context)
 File 
"/var/www/portals/django-seagrid/airavata-django-portal/django_airavata/apps/api/exceptions.py",
 line 54, in custom_exception_handler
   log.error("API exception", exc_info=exc, extra={'request': 
context['request']})
 File "/usr/lib64/python3.6/logging/__init__.py", line 1337, in error
   self._log(ERROR, msg, args, **kwargs)
 File "/usr/lib64/python3.6/logging/__init__.py", line 1444, in _log
   self.handle(record)
 File "/usr/lib64/python3.6/logging/__init__.py", line 1454, in handle
   self.callHandlers(record)
 File "/usr/lib64/python3.6/logging/__init__.py", line 1516, in callHandlers
   hdlr.handle(record)
 File "/usr/lib64/python3.6/logging/__init__.py", line 865, in handle
   self.emit(record)
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/utils/log.py",
 line 122, in emit
   self.send_mail(subject, message, fail_silently=True, 
html_message=html_message)
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/utils/log.py",
 line 125, in send_mail
   mail.mail_admins(subject, message, *args, connection=self.connection(), 
**kwargs)
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/mail/__init__.py",
 line 104, in mail_admins
   mail.send(fail_silently=fail_silently)
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/mail/message.py",
 line 284, in send
   return self.get_connection(fail_silently).send_messages([self])
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/mail/backends/smtp.py",
 line 109, in send_messages
   sent = self._send(message)
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/mail/backends/smtp.py",
 line 123, in _send
   message = email_message.message()
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/mail/message.py",
 line 247, in message
   msg = self._create_message(msg)
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/mail/message.py",
 line 436, in _create_message
   return self._create_attachments(self._create_alternatives(msg))
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/mail/message.py",
 line 446, in _create_alternatives
   msg.attach(self._create_mime_attachment(*alternative))
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/mail/message.py",
 line 361, in _create_mime_attachment
   attachment = SafeMIMEText(content, subtype, encoding)
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/mail/message.py",
 line 159, in __init__
   MIMEText.__init__(self, _text, _subtype=_subtype, _charset=_charset)
 File "/usr/lib64/python3.6/email/mime/text.py", in __init__

 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/mail/message.py",
 line 169, in set_payload
   for line in payload.splitlines()
 File 
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/mail/message.py",
 line 169, in <genexpr>
   for line in payload.splitlines()

Exception Type: UnicodeEncodeError at 
/api/experiment-storage/CDE-Extract_on_Dec_5,_2022_2:59_PM_0f641c8b-63ac-4281-981e-176ef47c820d/ARCHIVE/ScrapeData/completed
Exception Value: 'utf-8' codec can't encode characters in position 70-72: 
surrogates not allowed

{code}

There are a couple different issues here. First, the Django server thinks the 
file system encoding is 'ascii'. That can be fixed by setting the language and 
locale of the Python process to en_US.UTF8 on the WSGIDaemonProcess 
(https://modwsgi.readthedocs.io/en/master/configuration-directives/WSGIDaemonProcess.html?highlight=lang#wsgidaemonprocess).

The second issue is on the API server. The replica_catalog tables use a 
character encoding of latin1 so they are not able to store file names or paths 
with unicode characters. This can be fixed by converting these tables to 'utf8' 
character set.




--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to