Bug#950932: q2-types: FTBFS with pandas 1.0: test failures

Rebecca N. Palmer Sat, 08 Feb 2020 06:36:46 -0800

Source: q2-types
Version: 2019.10.0-1
Control: block 950430 by -1


With pandas 1.0 from experimental:

=================================== FAILURES===================================____________ TestTaxonomyFormatsToDataFrame.test_duplicate_columns_____________

self =<q2_types.feature_data.tests.test_transformer.TestTaxonomyFormatsToDataFrametestMethod=test_duplicate_columns>


    def test_duplicate_columns(self):
        with self.assertRaisesRegex(ValueError, 'duplicated: Column1'):
            _taxonomy_formats_to_dataframe(
                self.get_data_path(os.path.join(
>                   'taxonomy', 'duplicate-columns.tsv')))

q2_types/feature_data/tests/test_transformer.py:355:

_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ __ _ _ _


    def _taxonomy_formats_to_dataframe(filepath, has_header=None):
        """Read any of the three taxonomy formats into a dataframe.

        Parameters
        ----------
        filepath : str
            The taxonomy-formatted file to be read.
        has_header : bool, optional

If `None`, autodetect the header: only `FeatureID<tab>Taxon` isrecognized, optionally followed by other columns. If`True`, the filemust have the expected header described above otherwise anerror is

            raised. If `False`, the file is read without assuming a header.

        Returns
        -------
        pd.DataFrame
            Dataframe containing parsed contents of the taxonomy file. The

dataframe will have its index name set to `Feature ID` andits firstcolumn will be `Taxon`, followed by any other columns inthe input

            file.

        """

# Using `dtype=object` and `set_index()` to avoid typecasting/inference of

        # any columns or the index.
        df = pd.read_csv(filepath, sep='\t', skip_blank_lines=True,
                         header=None, dtype=object)

        if len(df.columns) < 2:
            raise ValueError(
                "Taxonomy format requires at least two columns, found %d."
                % len(df.columns))

        if has_header and not _has_expected_header(df):
            raise ValueError(

"Taxonomy format requires a header with `Feature ID`and `Taxon` "

                "as the first two columns.")

        if has_header or (has_header is None and _has_expected_header(df)):
            # Make first row the header:
            #     https://stackoverflow.com/a/26147330/3776794
            df.columns = df.iloc[0]
            df.columns.name = None
            df = df.reindex(df.index.drop(0))
        else:
            # No header
            unnamed_columns = ['Unnamed Column %d' % (i + 1)
                               for i in range(len(df.columns[2:]))]
            df.columns = TSVTaxonomyFormat.HEADER + unnamed_columns

        df.set_index(df.columns[0], drop=True, append=False, inplace=True)

        if len(df.index) < 1:

raise ValueError("Taxonomy format requires at least one rowof data.")


        if df.index.has_duplicates:
            raise ValueError(

"Taxonomy format feature IDs must be unique. Thefollowing IDs ""are duplicated: %s" % ','.join(df.index.get_duplicates()))


        if df.columns.has_duplicates:
            raise ValueError(

"Taxonomy format column names must be unique. Thefollowing "

                "column names are duplicated: %s" %
>               ', '.join(df.columns.get_duplicates()))
E           AttributeError: 'Index' object has no attribute 'get_duplicates'

q2_types/feature_data/_transformer.py:89: AttributeError

______________ TestTaxonomyFormatsToDataFrame.test_duplicate_ids_______________

self =<q2_types.feature_data.tests.test_transformer.TestTaxonomyFormatsToDataFrametestMethod=test_duplicate_ids>


    def test_duplicate_ids(self):
        with self.assertRaisesRegex(ValueError, 'duplicated: SEQUENCE1'):
            _taxonomy_formats_to_dataframe(
                self.get_data_path(os.path.join(
>                   'taxonomy', 'duplicate-ids.tsv')))

q2_types/feature_data/tests/test_transformer.py:349:

_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ __ _ _ _


    def _taxonomy_formats_to_dataframe(filepath, has_header=None):
        """Read any of the three taxonomy formats into a dataframe.

        Parameters
        ----------
        filepath : str
            The taxonomy-formatted file to be read.
        has_header : bool, optional

            raised. If `False`, the file is read without assuming a header.

        Returns
        -------
        pd.DataFrame
            Dataframe containing parsed contents of the taxonomy file. The

dataframe will have its index name set to `Feature ID` andits firstcolumn will be `Taxon`, followed by any other columns inthe input

            file.

        """

# Using `dtype=object` and `set_index()` to avoid typecasting/inference of

        # any columns or the index.
        df = pd.read_csv(filepath, sep='\t', skip_blank_lines=True,
                         header=None, dtype=object)

        if len(df.columns) < 2:
            raise ValueError(
                "Taxonomy format requires at least two columns, found %d."
                % len(df.columns))

        if has_header and not _has_expected_header(df):
            raise ValueError(

"Taxonomy format requires a header with `Feature ID`and `Taxon` "

                "as the first two columns.")

        if has_header or (has_header is None and _has_expected_header(df)):
            # Make first row the header:
            #     https://stackoverflow.com/a/26147330/3776794
            df.columns = df.iloc[0]
            df.columns.name = None
            df = df.reindex(df.index.drop(0))
        else:
            # No header
            unnamed_columns = ['Unnamed Column %d' % (i + 1)
                               for i in range(len(df.columns[2:]))]
            df.columns = TSVTaxonomyFormat.HEADER + unnamed_columns

        df.set_index(df.columns[0], drop=True, append=False, inplace=True)

        if len(df.index) < 1:

raise ValueError("Taxonomy format requires at least one rowof data.")


        if df.index.has_duplicates:
            raise ValueError(

"Taxonomy format feature IDs must be unique. Thefollowing IDs "> "are duplicated: %s" % ','.join(df.index.get_duplicates()))

E           AttributeError: 'Index' object has no attribute 'get_duplicates'

q2_types/feature_data/_transformer.py:83: AttributeError

__________________ TestTaxonomyFormatsToDataFrame.test_jagged__________________

self =<q2_types.feature_data.tests.test_transformer.TestTaxonomyFormatsToDataFrametestMethod=test_jagged>


    def test_jagged(self):
>       with self.assertRaises(pandas.io.common.CParserError):

E AttributeError: module 'pandas.io.common' has no attribute'CParserError'


q2_types/feature_data/tests/test_transformer.py:341: AttributeError

___________ TestDataFrameToTSVTaxonomyFormat.test_duplicate_columns____________

self =<q2_types.feature_data.tests.test_transformer.TestDataFrameToTSVTaxonomyFormattestMethod=test_duplicate_columns>


    def test_duplicate_columns(self):
        index = pd.Index(['seq1', 'seq2'], name='Feature ID', dtype=object)
        columns = ['Taxon', 'Taxon']
        df = pd.DataFrame([['abc', 'def'], ['ghi', 'jkl']], index=index,
                          columns=columns, dtype=object)

        with self.assertRaisesRegex(ValueError, "duplicated: Taxon"):
>           _dataframe_to_tsv_taxonomy_format(df)

q2_types/feature_data/tests/test_transformer.py:497:

_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ __ _ _ _


    def _dataframe_to_tsv_taxonomy_format(df):
        if len(df.index) < 1:

raise ValueError("Taxonomy format requires at least one rowof data.")


        if len(df.columns) < 1:
            raise ValueError(
                "Taxonomy format requires at least one column of data.")

        if df.index.name != 'Feature ID':
            raise ValueError(
                "Taxonomy format requires the dataframe index name to be "
                "`Feature ID`, found %r" % df.index.name)

        if df.columns[0] != 'Taxon':
            raise ValueError(

"Taxonomy format requires the first column name to be`Taxon`, "

                "found %r" % df.columns[0])

        if df.index.has_duplicates:
            raise ValueError(

"Taxonomy format feature IDs must be unique. Thefollowing IDs ""are duplicated: %s" % ','.join(df.index.get_duplicates()))


        if df.columns.has_duplicates:
            raise ValueError(

"Taxonomy format column names must be unique. Thefollowing "

                "column names are duplicated: %s" %
>               ', '.join(df.columns.get_duplicates()))
E           AttributeError: 'Index' object has no attribute 'get_duplicates'

q2_types/feature_data/_transformer.py:126: AttributeError

_____________ TestDataFrameToTSVTaxonomyFormat.test_duplicate_ids______________

self =<q2_types.feature_data.tests.test_transformer.TestDataFrameToTSVTaxonomyFormattestMethod=test_duplicate_ids>


    def test_duplicate_ids(self):
        index = pd.Index(['seq1', 'seq2', 'seq1'], name='Feature ID',
                         dtype=object)
        columns = ['Taxon']
        df = pd.DataFrame([['abc'], ['def'], ['ghi']], index=index,
                          columns=columns, dtype=object)

        with self.assertRaisesRegex(ValueError, "duplicated: seq1"):
>           _dataframe_to_tsv_taxonomy_format(df)

q2_types/feature_data/tests/test_transformer.py:488:

_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ __ _ _ _


    def _dataframe_to_tsv_taxonomy_format(df):
        if len(df.index) < 1:

raise ValueError("Taxonomy format requires at least one rowof data.")


        if len(df.columns) < 1:
            raise ValueError(
                "Taxonomy format requires at least one column of data.")

        if df.index.name != 'Feature ID':
            raise ValueError(
                "Taxonomy format requires the dataframe index name to be "
                "`Feature ID`, found %r" % df.index.name)

        if df.columns[0] != 'Taxon':
            raise ValueError(

"Taxonomy format requires the first column name to be`Taxon`, "

                "found %r" % df.columns[0])

        if df.index.has_duplicates:
            raise ValueError(

"Taxonomy format feature IDs must be unique. Thefollowing IDs "> "are duplicated: %s" % ','.join(df.index.get_duplicates()))

E           AttributeError: 'Index' object has no attribute 'get_duplicates'

q2_types/feature_data/_transformer.py:120: AttributeError

Bug#950932: q2-types: FTBFS with pandas 1.0: test failures

Reply via email to