Github user kaknikhil commented on a diff in the pull request:

    https://github.com/apache/madlib/pull/230#discussion_r165511625
  
    --- Diff: src/ports/postgres/modules/sample/balance_sample.py_in ---
    @@ -0,0 +1,748 @@
    +# coding=utf-8
    +#
    +# Licensed to the Apache Software Foundation (ASF) under one
    +# or more contributor license agreements.  See the NOTICE file
    +# distributed with this work for additional information
    +# regarding copyright ownership.  The ASF licenses this file
    +# to you under the Apache License, Version 2.0 (the
    +# "License"); you may not use this file EXCEPT in compliance
    +# with the License.  You may obtain a copy of the License at
    +#
    +#   http://www.apache.org/licenses/LICENSE-2.0
    +#
    +# Unless required by applicable law or agreed to in writing,
    +# software distributed under the License is distributed on an
    +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    +# KIND, either express or implied.  See the License for the
    +# specific language governing permissions and limitations
    +# under the License.
    +
    +m4_changequote(`<!', `!>')
    +
    +import math
    +
    +if __name__ != "__main__":
    +    import plpy
    +    from utilities.control import MinWarning
    +    from utilities.utilities import _assert
    +    from utilities.utilities import extract_keyvalue_params
    +    from utilities.utilities import unique_string
    +    from utilities.validate_args import columns_exist_in_table
    +    from utilities.validate_args import get_cols
    +    from utilities.validate_args import table_exists
    +    from utilities.validate_args import table_is_empty
    +else:
    +    # Used only for Unit Testing
    +    # FIXME: repeating a function from utilities that is needed by the 
unit test.
    +    # This should be removed once a unittest framework in used for testing.
    +    import random
    +    import time
    +
    +    def unique_string(desp='', **kwargs):
    +        """
    +        Generate random remporary names for temp table and other names.
    +        It has a SQL interface so both SQL and Python functions can call 
it.
    +        """
    +        r1 = random.randint(1, 100000000)
    +        r2 = int(time.time())
    +        r3 = int(time.time()) % random.randint(1, 100000000)
    +        u_string = "__madlib_temp_" + desp + str(r1) + "_" + str(r2) + "_" 
+ str(r3) + "__"
    +        return u_string
    +# 
------------------------------------------------------------------------------
    +
    +UNIFORM = 'uniform'
    +UNDERSAMPLE = 'undersample'
    +OVERSAMPLE = 'oversample'
    +NOSAMPLE = 'nosample'
    +
    +NEW_ID_COLUMN = '__madlib_id__'
    +NULL_IDENTIFIER = '__madlib_null_id__'
    +
    +def _get_frequency_distribution(source_table, class_col):
    +    """ Returns a dict containing the number of rows associated with each 
class
    --- End diff --
    
    Minor suggestion: should we add that `None` can also be a key in this 
dictionary


---

Reply via email to