zackmeso edited a comment on issue #4114: [AIRFLOW-3259] Fix internal server 
error when displaying charts
URL: 
https://github.com/apache/incubator-airflow/pull/4114#issuecomment-433942970
 
 
   @Fokko 
   Hello. I created another PR in order to fully comply with the contribution 
guidelines.
   As for your question "Would it be possible to test this?": If you mean test 
the behavior of `sort_values` with a unit test, I am not sure. the `sort` call 
exists within the chart_data template which is a big chunk of code that does a 
lot of things before rendering the template. 
   ```python
   
       @expose('/chart_data')
       @data_profiling_required
       @wwwutils.gzipped
       # @cache.cached(timeout=3600, key_prefix=wwwutils.make_cache_key)
       def chart_data(self):
           from airflow import macros
           import pandas as pd
           if conf.getboolean('core', 'secure_mode'):
               abort(404)
   
           with create_session() as session:
               chart_id = request.args.get('chart_id')
               csv = request.args.get('csv') == "true"
               chart = 
session.query(models.Chart).filter_by(id=chart_id).first()
               db = session.query(
                   models.Connection).filter_by(conn_id=chart.conn_id).first()
   
           payload = {
               "state": "ERROR",
               "error": ""
           }
   
           # Processing templated fields
           try:
               args = ast.literal_eval(chart.default_params)
               if not isinstance(args, dict):
                   raise AirflowException('Not a dict')
           except Exception:
               args = {}
               payload['error'] += (
                   "Default params is not valid, string has to evaluate as "
                   "a Python dictionary. ")
   
           request_dict = {k: request.args.get(k) for k in request.args}
           args.update(request_dict)
           args['macros'] = macros
           sandbox = ImmutableSandboxedEnvironment()
           sql = sandbox.from_string(chart.sql).render(**args)
           label = sandbox.from_string(chart.label).render(**args)
           payload['sql_html'] = Markup(highlight(
               sql,
               lexers.SqlLexer(),  # Lexer call
               HtmlFormatter(noclasses=True))
           )
           payload['label'] = label
   
           pd.set_option('display.max_colwidth', 100)
           hook = db.get_hook()
           try:
               df = hook.get_pandas_df(
                   wwwutils.limit_sql(sql, CHART_LIMIT, conn_type=db.conn_type))
               df = df.fillna(0)
           except Exception as e:
               payload['error'] += "SQL execution failed. Details: " + str(e)
   
           if csv:
               return Response(
                   response=df.to_csv(index=False),
                   status=200,
                   mimetype="application/text")
   
           if not payload['error'] and len(df) == CHART_LIMIT:
               payload['warning'] = (
                   "Data has been truncated to {0}"
                   " rows. Expect incomplete results.").format(CHART_LIMIT)
   
           if not payload['error'] and len(df) == 0:
               payload['error'] += "Empty result set. "
           elif (
                   not payload['error'] and
                   chart.sql_layout == 'series' and
                   chart.chart_type != "datatable" and
                   len(df.columns) < 3):
               payload['error'] += "SQL needs to return at least 3 columns. "
           elif (
                   not payload['error'] and
                   chart.sql_layout == 'columns' and
                   len(df.columns) < 2):
               payload['error'] += "SQL needs to return at least 2 columns. "
           elif not payload['error']:
               import numpy as np
               chart_type = chart.chart_type
   
               data = None
               if chart.show_datatable or chart_type == "datatable":
                   data = df.to_dict(orient="split")
                   data['columns'] = [{'title': c} for c in data['columns']]
                   payload['data'] = data
   
               # Trying to convert time to something Highcharts likes
               x_col = 1 if chart.sql_layout == 'series' else 0
               if chart.x_is_date:
                   try:
                       # From string to datetime
                       df[df.columns[x_col]] = pd.to_datetime(
                           df[df.columns[x_col]])
                       df[df.columns[x_col]] = df[df.columns[x_col]].apply(
                           lambda x: int(x.strftime("%s")) * 1000)
                   except Exception as e:
                       payload['error'] = "Time conversion failed"
   
               if chart_type == 'datatable':
                   payload['state'] = 'SUCCESS'
                   return wwwutils.json_response(payload)
               else:
                   if chart.sql_layout == 'series':
                       # User provides columns (series, x, y)
                       df[df.columns[2]] = df[df.columns[2]].astype(np.float)
                       df = df.pivot_table(
                           index=df.columns[1],
                           columns=df.columns[0],
                           values=df.columns[2], aggfunc=np.sum)
                   else:
                       # User provides columns (x, y, metric1, metric2, ...)
                       df.index = df[df.columns[0]]
   ```
   ```python
                       df = df.sort(df.columns[0]) #here's the code needing 
replacement
   ```
   ```python
                       del df[df.columns[0]]
                       for col in df.columns:
                           df[col] = df[col].astype(np.float)
   
                   df = df.fillna(0)
                   NVd3ChartClass = chart_mapping.get(chart.chart_type)
                   NVd3ChartClass = getattr(nvd3, NVd3ChartClass)
                   nvd3_chart = NVd3ChartClass(x_is_date=chart.x_is_date)
   
                   for col in df.columns:
                       nvd3_chart.add_serie(name=col, y=df[col].tolist(), 
x=df[col].index.tolist())
                   try:
                       nvd3_chart.buildcontent()
                       payload['chart_type'] = nvd3_chart.__class__.__name__
                       payload['htmlcontent'] = nvd3_chart.htmlcontent
                   except Exception as e:
                       payload['error'] = str(e)
   
               payload['state'] = 'SUCCESS'
               payload['request_dict'] = request_dict
           return wwwutils.json_response(payload)
   
   ```
   However if left unchanged, the error 500 will persist because the `sort` 
function is not a part of pandas anymore.
   Also, is there a possibility of retriggering the build? It stalled on
   > 'TOX_ENV=py27-backend_postgres-env_kubernetes KUBERNETES_VERSION=v1.9.0'
   
   then it failed.
   
   Thanks.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to