Source: q2-types
Version: 2019.10.0-1
Control: block 950430 by -1

With pandas 1.0 from experimental:

=================================== FAILURES =================================== ____________ TestTaxonomyFormatsToDataFrame.test_duplicate_columns _____________

self = <q2_types.feature_data.tests.test_transformer.TestTaxonomyFormatsToDataFrame testMethod=test_duplicate_columns>

    def test_duplicate_columns(self):
        with self.assertRaisesRegex(ValueError, 'duplicated: Column1'):
            _taxonomy_formats_to_dataframe(
                self.get_data_path(os.path.join(
>                   'taxonomy', 'duplicate-columns.tsv')))

q2_types/feature_data/tests/test_transformer.py:355:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

    def _taxonomy_formats_to_dataframe(filepath, has_header=None):
        """Read any of the three taxonomy formats into a dataframe.

        Parameters
        ----------
        filepath : str
            The taxonomy-formatted file to be read.
        has_header : bool, optional
If `None`, autodetect the header: only `Feature ID<tab>Taxon` is recognized, optionally followed by other columns. If `True`, the file must have the expected header described above otherwise an error is
            raised. If `False`, the file is read without assuming a header.

        Returns
        -------
        pd.DataFrame
            Dataframe containing parsed contents of the taxonomy file. The
dataframe will have its index name set to `Feature ID` and its first column will be `Taxon`, followed by any other columns in the input
            file.

        """
# Using `dtype=object` and `set_index()` to avoid type casting/inference of
        # any columns or the index.
        df = pd.read_csv(filepath, sep='\t', skip_blank_lines=True,
                         header=None, dtype=object)

        if len(df.columns) < 2:
            raise ValueError(
                "Taxonomy format requires at least two columns, found %d."
                % len(df.columns))

        if has_header and not _has_expected_header(df):
            raise ValueError(
"Taxonomy format requires a header with `Feature ID` and `Taxon` "
                "as the first two columns.")

        if has_header or (has_header is None and _has_expected_header(df)):
            # Make first row the header:
            #     https://stackoverflow.com/a/26147330/3776794
            df.columns = df.iloc[0]
            df.columns.name = None
            df = df.reindex(df.index.drop(0))
        else:
            # No header
            unnamed_columns = ['Unnamed Column %d' % (i + 1)
                               for i in range(len(df.columns[2:]))]
            df.columns = TSVTaxonomyFormat.HEADER + unnamed_columns

        df.set_index(df.columns[0], drop=True, append=False, inplace=True)

        if len(df.index) < 1:
raise ValueError("Taxonomy format requires at least one row of data.")

        if df.index.has_duplicates:
            raise ValueError(
"Taxonomy format feature IDs must be unique. The following IDs " "are duplicated: %s" % ', '.join(df.index.get_duplicates()))

        if df.columns.has_duplicates:
            raise ValueError(
"Taxonomy format column names must be unique. The following "
                "column names are duplicated: %s" %
>               ', '.join(df.columns.get_duplicates()))
E           AttributeError: 'Index' object has no attribute 'get_duplicates'

q2_types/feature_data/_transformer.py:89: AttributeError
______________ TestTaxonomyFormatsToDataFrame.test_duplicate_ids _______________

self = <q2_types.feature_data.tests.test_transformer.TestTaxonomyFormatsToDataFrame testMethod=test_duplicate_ids>

    def test_duplicate_ids(self):
        with self.assertRaisesRegex(ValueError, 'duplicated: SEQUENCE1'):
            _taxonomy_formats_to_dataframe(
                self.get_data_path(os.path.join(
>                   'taxonomy', 'duplicate-ids.tsv')))

q2_types/feature_data/tests/test_transformer.py:349:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

    def _taxonomy_formats_to_dataframe(filepath, has_header=None):
        """Read any of the three taxonomy formats into a dataframe.

        Parameters
        ----------
        filepath : str
            The taxonomy-formatted file to be read.
        has_header : bool, optional
If `None`, autodetect the header: only `Feature ID<tab>Taxon` is recognized, optionally followed by other columns. If `True`, the file must have the expected header described above otherwise an error is
            raised. If `False`, the file is read without assuming a header.

        Returns
        -------
        pd.DataFrame
            Dataframe containing parsed contents of the taxonomy file. The
dataframe will have its index name set to `Feature ID` and its first column will be `Taxon`, followed by any other columns in the input
            file.

        """
# Using `dtype=object` and `set_index()` to avoid type casting/inference of
        # any columns or the index.
        df = pd.read_csv(filepath, sep='\t', skip_blank_lines=True,
                         header=None, dtype=object)

        if len(df.columns) < 2:
            raise ValueError(
                "Taxonomy format requires at least two columns, found %d."
                % len(df.columns))

        if has_header and not _has_expected_header(df):
            raise ValueError(
"Taxonomy format requires a header with `Feature ID` and `Taxon` "
                "as the first two columns.")

        if has_header or (has_header is None and _has_expected_header(df)):
            # Make first row the header:
            #     https://stackoverflow.com/a/26147330/3776794
            df.columns = df.iloc[0]
            df.columns.name = None
            df = df.reindex(df.index.drop(0))
        else:
            # No header
            unnamed_columns = ['Unnamed Column %d' % (i + 1)
                               for i in range(len(df.columns[2:]))]
            df.columns = TSVTaxonomyFormat.HEADER + unnamed_columns

        df.set_index(df.columns[0], drop=True, append=False, inplace=True)

        if len(df.index) < 1:
raise ValueError("Taxonomy format requires at least one row of data.")

        if df.index.has_duplicates:
            raise ValueError(
"Taxonomy format feature IDs must be unique. The following IDs " > "are duplicated: %s" % ', '.join(df.index.get_duplicates()))
E           AttributeError: 'Index' object has no attribute 'get_duplicates'

q2_types/feature_data/_transformer.py:83: AttributeError
__________________ TestTaxonomyFormatsToDataFrame.test_jagged __________________

self = <q2_types.feature_data.tests.test_transformer.TestTaxonomyFormatsToDataFrame testMethod=test_jagged>

    def test_jagged(self):
>       with self.assertRaises(pandas.io.common.CParserError):
E AttributeError: module 'pandas.io.common' has no attribute 'CParserError'

q2_types/feature_data/tests/test_transformer.py:341: AttributeError
___________ TestDataFrameToTSVTaxonomyFormat.test_duplicate_columns ____________

self = <q2_types.feature_data.tests.test_transformer.TestDataFrameToTSVTaxonomyFormat testMethod=test_duplicate_columns>

    def test_duplicate_columns(self):
        index = pd.Index(['seq1', 'seq2'], name='Feature ID', dtype=object)
        columns = ['Taxon', 'Taxon']
        df = pd.DataFrame([['abc', 'def'], ['ghi', 'jkl']], index=index,
                          columns=columns, dtype=object)

        with self.assertRaisesRegex(ValueError, "duplicated: Taxon"):
>           _dataframe_to_tsv_taxonomy_format(df)

q2_types/feature_data/tests/test_transformer.py:497:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

    def _dataframe_to_tsv_taxonomy_format(df):
        if len(df.index) < 1:
raise ValueError("Taxonomy format requires at least one row of data.")

        if len(df.columns) < 1:
            raise ValueError(
                "Taxonomy format requires at least one column of data.")

        if df.index.name != 'Feature ID':
            raise ValueError(
                "Taxonomy format requires the dataframe index name to be "
                "`Feature ID`, found %r" % df.index.name)

        if df.columns[0] != 'Taxon':
            raise ValueError(
"Taxonomy format requires the first column name to be `Taxon`, "
                "found %r" % df.columns[0])

        if df.index.has_duplicates:
            raise ValueError(
"Taxonomy format feature IDs must be unique. The following IDs " "are duplicated: %s" % ', '.join(df.index.get_duplicates()))

        if df.columns.has_duplicates:
            raise ValueError(
"Taxonomy format column names must be unique. The following "
                "column names are duplicated: %s" %
>               ', '.join(df.columns.get_duplicates()))
E           AttributeError: 'Index' object has no attribute 'get_duplicates'

q2_types/feature_data/_transformer.py:126: AttributeError
_____________ TestDataFrameToTSVTaxonomyFormat.test_duplicate_ids ______________

self = <q2_types.feature_data.tests.test_transformer.TestDataFrameToTSVTaxonomyFormat testMethod=test_duplicate_ids>

    def test_duplicate_ids(self):
        index = pd.Index(['seq1', 'seq2', 'seq1'], name='Feature ID',
                         dtype=object)
        columns = ['Taxon']
        df = pd.DataFrame([['abc'], ['def'], ['ghi']], index=index,
                          columns=columns, dtype=object)

        with self.assertRaisesRegex(ValueError, "duplicated: seq1"):
>           _dataframe_to_tsv_taxonomy_format(df)

q2_types/feature_data/tests/test_transformer.py:488:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

    def _dataframe_to_tsv_taxonomy_format(df):
        if len(df.index) < 1:
raise ValueError("Taxonomy format requires at least one row of data.")

        if len(df.columns) < 1:
            raise ValueError(
                "Taxonomy format requires at least one column of data.")

        if df.index.name != 'Feature ID':
            raise ValueError(
                "Taxonomy format requires the dataframe index name to be "
                "`Feature ID`, found %r" % df.index.name)

        if df.columns[0] != 'Taxon':
            raise ValueError(
"Taxonomy format requires the first column name to be `Taxon`, "
                "found %r" % df.columns[0])

        if df.index.has_duplicates:
            raise ValueError(
"Taxonomy format feature IDs must be unique. The following IDs " > "are duplicated: %s" % ', '.join(df.index.get_duplicates()))
E           AttributeError: 'Index' object has no attribute 'get_duplicates'

q2_types/feature_data/_transformer.py:120: AttributeError

Reply via email to