Hi,
i found a possible bug in selectClause.cpp of version 1.3.9 when using a
select-statement with
duplicate column names i.e. "v_1,v_1,v_2".
I know this is not a valid use-case for a select clause but it may lead to
a segfault so i dug into it.
Explaining the problem is somewhat tricky so i attached a simple testcase
that shows the issue with verbosity set to 5:
selectClause::fillNames -- select clause internal details:
low-level expressions (names_[2], aggr_[2], atms_[2]):
0: v_1, v_1
1: v_2, v_2
high-level expressions (xnames_[3], xtms_[3]):
0: v_1, v_1
1: v_1, v_1
2: _2, v_2
As you can see the size of internal terms-vectors in selectClause.cpp
differs
between high- and low-level expressions.
In method ibis::selectClause::fillNames() a loop iterates over xtms_ vector
and accesses aggr_ in line 197:
// fill the external names
for (uint32_t j = 0; j < xtms_.size(); ++ j) {
if (xnames_[j].empty() && aggr_[j] == ibis::selectClause::NIL_AGGR &&
The test against "aggr_[j] == ibis::selectClause::NIL_AGGR" will result
in an access out of range
in the last iteration of the for-loop due to different sizes of aggr_
and xtms_.
On initial load out of range access at aggr_[j] will access
null-initialized heap and so the test will
result in true and bug will not appear but a segfault may be possible.
If memory outside aggr_ allocation is dirty and contains -1 test will
result in false and leads to
renaming of the last column in select from "v_2" into "_2".
If the result of that query is stored within in-memory table by
ibis::table::select() any subsequent
access to column "v_2" will fail (that's how i came accross the issue).
To fix this i patched selectClause.cpp a bit to avoid duplicate columns
wihtin high-level expressions.
(patch is attached also)
But i'm not sure whether that's the correct place to fix it nor if it is
conceptually correct
to remove duplicate columns from the select-clause.
Could you verify the bug and if so fix it please?
Thanks alot,
Bernd
#include "ibis.h"
#include <memory>
class tester
{
public:
tester();
~tester();
void load(const char *datadir);
void query(const char *datadir, const char *where);
private:
void builtindata(const char* datadir);
};
tester::tester()
{
}
tester::~tester()
{
}
void tester::builtindata(const char *datadir)
{
ibis::table::row irow;
ibis::tablex* ta(ibis::tablex::create());
ta->addColumn("v_1", ibis::SHORT);
ta->addColumn("v_2", ibis::SHORT);
irow.clear();
irow.shortsnames.push_back("v_1");
irow.shortsvalues.push_back(1);
irow.shortsnames.push_back("v_2");
irow.shortsvalues.push_back(2);
ta->appendRow(irow);
irow.clear();
irow.shortsnames.push_back("v_1");
irow.shortsvalues.push_back(4);
irow.shortsnames.push_back("v_2");
irow.shortsvalues.push_back(3);
ta->appendRow(irow);
irow.clear();
irow.shortsnames.push_back("v_1");
irow.shortsvalues.push_back(5);
irow.shortsnames.push_back("v_2");
irow.shortsvalues.push_back(2);
ta->appendRow(irow);
ta->write(datadir);
LOGGER(ibis::gVerbose > 0) << "generated " << ta->mRows() << " rows in directory " << datadir;
}
void tester::load(const char *datadir)
{
if (datadir == 0 || *datadir == 0) {
return;
}
try {
std::auto_ptr<ibis::table> table(ibis::table::create(datadir));
if (table.get() == 0) {
LOGGER(ibis::gVerbose >= 0) << "failed to load table from " << datadir;
return;
}
if (table->nRows() == 0 || table->nColumns() == 0) {
builtindata(datadir);
table->addPartition(datadir);
}
std::auto_ptr<ibis::table> select(table->select("", "1=1"));
if (select.get() == 0) {
LOGGER(ibis::gVerbose >= 0) << "failed to select all rows from table " << table->name();
return;
}
LOGGER(select->nRows() != table->nRows() && ibis::gVerbose >= 0)
<< "expected to select " << table->nRows() << " row"
<< (table->nRows()>1?"s":"") << ", but got " << select->nRows();
} catch (...) {
builtindata(datadir);
}
}
void tester::query(const char *datadir, const char *selectString)
{
if (datadir == 0 || selectString == 0 || *datadir == 0 || *selectString == 0) {
return;
}
const char *where = "1=1";
std::auto_ptr<ibis::table> table(ibis::table::create(datadir));
if (table.get() == 0) {
LOGGER(ibis::gVerbose >= 0) << "failed to load table from " << datadir;
return;
}
if (table->name() == 0 || *(table->name()) == 0) {
LOGGER(ibis::gVerbose >= 0) << "failed to find any data records in directory " << datadir;
return;
}
if (table->nColumns() == 0) {
LOGGER(ibis::gVerbose >= 0) << "Table " << table->name() << " in " << datadir << " is empty";
return;
}
ibis::table::stringList cnames = table->columnNames();
if (cnames.empty()) {
LOGGER(ibis::gVerbose >= 0) << "failed to retrieve column names from table " << table->name() << " in " << datadir;
return;
}
ibis::util::setVerboseLevel(5);
std::auto_ptr<ibis::table> select(table->select(selectString, where));
if (select.get() == 0) {
LOGGER(ibis::gVerbose >= 0) << "failed to select \"" << selectString << "\" on table " << table->name();
return;
}
ibis::util::setVerboseLevel(0);
ibis::table::stringList tableColumns = select->columnNames();
std::string selStr(selectString);
std::istringstream sel(selStr);
std::string column;
while (std::getline(sel, column, ',')) {
if (std::find(tableColumns.begin(), tableColumns.end(), column) == tableColumns.end()) {
LOGGER(ibis::gVerbose >= 0) << "missing column \"" << column << "\" in table with columns \" " << tableColumns << "\"";
}
}
}
int main(int argc, char** argv)
{
if (argc < 2) {
printf("\nUsage:\n\t%s <datadir> [select_1 [select_2...]]\n\n", *argv);
return 0;
}
char *datadir = argv[1];
tester tests0r;
tests0r.load(datadir);
if (argc == 2) {
/* Test for "aggr_[j] == ibis::selectClause::NIL_AGGR" will access out of range when columnnames
* in select-clause are duplicated.
* On initial load out of range access at aggr_[j] will access null-initialized heap and so the test will
* result in true and bug will not appear.
* So repeat query to make memory outside of aggr_ allocation dirty and test will result in false:
*/
tests0r.query(datadir, "v_1,v_1,v_2");
tests0r.query(datadir, "v_1,v_1,v_2");
tests0r.query(datadir, "v_1,v_1,v_2");
}
for (int iarg = 2; iarg < argc; ++ iarg) {
tests0r.query(datadir, argv[iarg]);
}
return 0;
}
--- src/selectClause.cpp 2014-04-25 09:50:19.666641782 +0200
+++ src/selectClause.cpp 2014-04-25 11:16:48.373750453 +0200
@@ -444,6 +444,17 @@
/// added to xtms_.
void ibis::selectClause::addTerm(ibis::math::term *tm, const std::string* al) {
if (tm == 0) return;
+
+ if (tm->termType() == ibis::math::VARIABLE) {
+ ibis::selectClause::variable *var = dynamic_cast<ibis::selectClause::variable *>(tm);
+ if (var == 0) {
+ const char* vname = static_cast<ibis::math::variable*>(tm)->variableName();
+ if (ordered_.find(vname) != ordered_.end()) {
+ return;
+ }
+ }
+ }
+
ibis::math::term *xtm = addRecursive(tm);
if (xtm != 0) {
if (al != 0 && !al->empty())
_______________________________________________
FastBit-users mailing list
[email protected]
https://hpcrdm.lbl.gov/cgi-bin/mailman/listinfo/fastbit-users