[
https://issues.apache.org/jira/browse/MADLIB-1395?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Frank McQuillan closed MADLIB-1395.
-----------------------------------
Resolution: Fixed
https://github.com/apache/madlib/pull/460
> Term frequency and LDA - turn off notices
> -----------------------------------------
>
> Key: MADLIB-1395
> URL: https://issues.apache.org/jira/browse/MADLIB-1395
> Project: Apache MADlib
> Issue Type: Bug
> Components: Module: Utilities
> Reporter: Nikhil Kak
> Priority: Major
> Fix For: v1.17
>
>
> turn off these notices by using a MinWarning(“Error”) decorator in python
> {code}
> madlib=# SELECT madlib.term_frequency('documents', -- input table
> madlib(# 'docid', -- document id column
> madlib(# 'words', -- vector of words in document
> madlib(# 'documents_tf', -- output documents table with term frequency
> madlib(# TRUE); -- TRUE to created vocabulary table
> NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy
> entry.
> CONTEXT: SQL statement "
> CREATE TABLE documents_tf_vocabulary AS
> SELECT (row_number() OVER (order by word))::INTEGER - 1 as wordid,
> word::TEXT
> FROM (
> SELECT distinct(words) as word
> FROM (
> SELECT unnest(words::TEXT[]) as words
> FROM documents
> ) q1
> ) q2
> "
> PL/Python function "term_frequency"
> NOTICE: One or more columns in the following table(s) do not have statistics:
> documents
> HINT: For non-partitioned tables, run analyze <table_name>(<column_list>).
> For partitioned tables, run analyze rootpartition
> <table_name>(<column_list>). See log for columns missing statistics.
> CONTEXT: SQL statement "
> CREATE TABLE documents_tf_vocabulary AS
> SELECT (row_number() OVER (order by word))::INTEGER - 1 as wordid,
> word::TEXT
> FROM (
> SELECT distinct(words) as word
> FROM (
> SELECT unnest(words::TEXT[]) as words
> FROM documents
> ) q1
> ) q2
> "
> PL/Python function "term_frequency"
> NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named
> 'docid' as the Greenplum Database data distribution key for this table.
> HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make
> sure column(s) chosen are the optimal data distribution key to minimize skew.
> CONTEXT: SQL statement "
> CREATE TABLE documents_tf(
> docid INTEGER,
> wordid INTEGER,
> count INTEGER
> )
> "
> PL/Python function "term_frequency"
> NOTICE: One or more columns in the following table(s) do not have statistics:
> documents
> HINT: For non-partitioned tables, run analyze <table_name>(<column_list>).
> For partitioned tables, run analyze rootpartition
> <table_name>(<column_list>). See log for columns missing statistics.
> CONTEXT: SQL statement "
> INSERT INTO documents_tf
> SELECT docid, w.wordid as wordid, word_count as count
> FROM (
> SELECT docid, word::TEXT, count(*) as word_count
> FROM
> (
> SELECT docid, unnest(words::TEXT[]) as word
> FROM documents
> WHERE
> docid IS NOT NULL
> ) q1
> GROUP BY docid, word
> ) q2
>
> , documents_tf_vocabulary as w
> WHERE
> q2.word = w.word
>
> "
> PL/Python function "term_frequency"
> term_frequency
> ------------------------------------------------------------------------------------------
> Term frequency output in table documents_tf, vocabulary in table
> documents_tf_vocabulary
> (1 row)
> {code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)