Vikas Gupta created AIRAVATA-3703:
-------------------------------------
Summary: CLONE - Handle unicode characters in file names
Key: AIRAVATA-3703
URL: https://issues.apache.org/jira/browse/AIRAVATA-3703
Project: Airavata
Issue Type: Bug
Components: Airavata API, Django Portal
Reporter: Vikas Gupta
Assignee: Marcus Christie
Getting the following Django error when browsing experiment output files with
unicode characters in the file names:
{code}
Traceback (most recent call last):
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/rest_framework/views.py",
line 506, in dispatch
response = handler(request, *args, **kwargs)
File
"/var/www/portals/django-seagrid/airavata-django-portal/django_airavata/apps/api/views.py",
line 1467, in get
return self._create_response(request, experiment_id, path)
File
"/var/www/portals/django-seagrid/airavata-django-portal/django_airavata/apps/api/views.py",
line 1471, in _create_response
directories, files = user_storage.list_experiment_dir(request,
experiment_id, path)
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/airavata_django_portal_sdk/user_storage/api.py",
line 629, in list_experiment_dir
backend=backend, owner=experiment.userName)
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/airavata_django_portal_sdk/user_storage/api.py",
line 760, in _get_data_product_uri
if user_file.exists():
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/db/models/query.py",
line 808, in exists
return self.query.has_results(using=self.db)
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/db/models/sql/query.py",
line 561, in has_results
return compiler.has_results()
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/db/models/sql/compiler.py",
line 1145, in has_results
return bool(self.execute_sql(SINGLE))
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/db/models/sql/compiler.py",
line 1175, in execute_sql
cursor.execute(sql, params)
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/db/backends/utils.py",
line 66, in execute
return self._execute_with_wrappers(sql, params, many=False,
executor=self._execute)
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/db/backends/utils.py",
line 75, in _execute_with_wrappers
return executor(sql, params, many, context)
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/db/backends/utils.py",
line 84, in _execute
return self.cursor.execute(sql, params)
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/db/backends/mysql/base.py",
line 73, in execute
return self.cursor.execute(query, args)
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/MySQLdb/cursors.py",
line 199, in execute
args = tuple(map(db.literal, args))
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/MySQLdb/connections.py",
line 280, in literal
s = self.string_literal(o.encode(self.encoding))
During handling of the above exception ('utf-8' codec can't encode characters
in position 197-199: surrogates not allowed), another exception occurred:
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/handlers/exception.py",
line 47, in inner
response = get_response(request)
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/handlers/base.py",
line 181, in _get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/views/decorators/csrf.py",
line 54, in wrapped_view
return view_func(*args, **kwargs)
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/views/generic/base.py",
line 70, in view
return self.dispatch(request, *args, **kwargs)
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/rest_framework/views.py",
line 509, in dispatch
response = self.handle_exception(exc)
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/rest_framework/views.py",
line 466, in handle_exception
response = exception_handler(exc, context)
File
"/var/www/portals/django-seagrid/airavata-django-portal/django_airavata/apps/api/exceptions.py",
line 54, in custom_exception_handler
log.error("API exception", exc_info=exc, extra={'request':
context['request']})
File "/usr/lib64/python3.6/logging/__init__.py", line 1337, in error
self._log(ERROR, msg, args, **kwargs)
File "/usr/lib64/python3.6/logging/__init__.py", line 1444, in _log
self.handle(record)
File "/usr/lib64/python3.6/logging/__init__.py", line 1454, in handle
self.callHandlers(record)
File "/usr/lib64/python3.6/logging/__init__.py", line 1516, in callHandlers
hdlr.handle(record)
File "/usr/lib64/python3.6/logging/__init__.py", line 865, in handle
self.emit(record)
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/utils/log.py",
line 122, in emit
self.send_mail(subject, message, fail_silently=True,
html_message=html_message)
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/utils/log.py",
line 125, in send_mail
mail.mail_admins(subject, message, *args, connection=self.connection(),
**kwargs)
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/mail/__init__.py",
line 104, in mail_admins
mail.send(fail_silently=fail_silently)
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/mail/message.py",
line 284, in send
return self.get_connection(fail_silently).send_messages([self])
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/mail/backends/smtp.py",
line 109, in send_messages
sent = self._send(message)
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/mail/backends/smtp.py",
line 123, in _send
message = email_message.message()
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/mail/message.py",
line 247, in message
msg = self._create_message(msg)
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/mail/message.py",
line 436, in _create_message
return self._create_attachments(self._create_alternatives(msg))
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/mail/message.py",
line 446, in _create_alternatives
msg.attach(self._create_mime_attachment(*alternative))
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/mail/message.py",
line 361, in _create_mime_attachment
attachment = SafeMIMEText(content, subtype, encoding)
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/mail/message.py",
line 159, in __init__
MIMEText.__init__(self, _text, _subtype=_subtype, _charset=_charset)
File "/usr/lib64/python3.6/email/mime/text.py", in __init__
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/mail/message.py",
line 169, in set_payload
for line in payload.splitlines()
File
"/var/www/portals/django-seagrid/venv/lib64/python3.6/site-packages/django/core/mail/message.py",
line 169, in <genexpr>
for line in payload.splitlines()
Exception Type: UnicodeEncodeError at
/api/experiment-storage/CDE-Extract_on_Dec_5,_2022_2:59_PM_0f641c8b-63ac-4281-981e-176ef47c820d/ARCHIVE/ScrapeData/completed
Exception Value: 'utf-8' codec can't encode characters in position 70-72:
surrogates not allowed
{code}
There are a couple different issues here. First, the Django server thinks the
file system encoding is 'ascii'. That can be fixed by setting the language and
locale of the Python process to en_US.UTF8 on the WSGIDaemonProcess
(https://modwsgi.readthedocs.io/en/master/configuration-directives/WSGIDaemonProcess.html?highlight=lang#wsgidaemonprocess).
The second issue is on the API server. The replica_catalog tables use a
character encoding of latin1 so they are not able to store file names or paths
with unicode characters. This can be fixed by converting these tables to 'utf8'
character set.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)