Hi,
guess I added a further bug within in the patch I sent before. This fix
should now work.
Cheers,
Hendrik
-------- Original-Nachricht --------
Betreff: Bug in ibis::bundle when using groupby
Datum: Wed, 21 May 2014 14:54:36 +0200
Von: Hendrik Heinemann <[email protected]>
An: FastBit Users <[email protected]>
Hi John,
I just found out that there is a bug in ibis::bundle when using the
attached code and dataset. I guess fastbit determines the wrong
column-names to access the results of the aggregation.
I fixed the bug and attached the diff (diffed with rev 720) so you can
review it.
Cheers,
Hendrik
/*
export LD_LIBRARY_PATH=/opt/pdp/fastbit/lib
g++ -g -o group groupBy.cpp -lfastbit -std=c++11
*/
#include "ibis.h"
#include "capi.h"
int main(int argc, char** argv)
{
std::string path = std::string("/home/heinemann/src/fastbit4java/tests/fastbit_data/test3");
ibis::part *partition = new ibis::part(path.c_str());
ibis::table *hans = ibis::table::create(*partition);
ibis::table *temp = hans->select("a,b,c,d,group,value","d>0");
if (temp > 0) {
std::cout << "\n+++++++++SELI1+++++++++ " << temp->nRows() << " ++++++++++++++++++++++\n";
temp->dumpNames(std::cout);
temp->dump(std::cout, 20);
std::cout << "\n++++++++++++++++++++++++++++++++++++++++\n";
}
// ibis::gVerbose = 7;
ibis::table *group = temp->groupby("group, MAX(d) as maxval, MIN(value) as minval, COUNT(*) as total");
if (group > 0) {
std::cout << "\n+++++++++SELI1+++++++++ " << group->nRows() << " ++++++++++++++++++++++\n";
group->dumpNames(std::cout);
group->dump(std::cout, 20);
std::cout << "\n++++++++++++++++++++++++++++++++++++++++\n";
}
return 0;
}
"C","abc","1",5,"A",1
"E","cba","2",4,"B",2
"D","dgh","3",7,"A",1
"G","zyk","4",3,"A",2
"F","hpl","5",6,"B",3
"B","dmr","6",2,"C",4
"A","ogl","7",1,"C",3
Index: bundle.cpp
===================================================================
--- bundle.cpp (Revision 720)
+++ bundle.cpp (Arbeitskopie)
@@ -1450,86 +1450,89 @@
/// Constructor. It creates a bundle from all rows of tbl.
ibis::bundles::bundles(const ibis::part& tbl, const ibis::selectClause& cmps,
- int dir) : bundle(cmps) {
- id = tbl.name();
- try {
- ibis::bitvector msk;
- tbl.getNullMask(msk);
- for (unsigned ic = 0; ic < comps.aggSize(); ++ ic) {
- const ibis::math::term& expr = *comps.aggExpr(ic);
- const char* cn = comps.aggName(ic);
- if (comps.getAggregator(ic) == ibis::selectClause::CNT) {
- continue;
- }
+ int dir) : bundle(cmps) {
+ id = tbl.name();
+ try {
+ ibis::bitvector msk;
+ tbl.getNullMask(msk);
- ibis::column* c = tbl.getColumn(cn);
- if (c == 0 && expr.termType() == ibis::math::VARIABLE) {
- c = tbl.getColumn(static_cast<const ibis::math::variable&>
- (expr).variableName());
- }
- if (c == 0) {
- clear();
- LOGGER(ibis::gVerbose >= 0)
- << "Warning -- bundles(" << tbl.name() << ", "
- << comps << ") can not find a column named "
- << (cn ? cn : "");
- throw "bundle1::ctor can not find a column name";
- }
+ for (unsigned ic = 0; ic < cmps.aggSize(); ++ ic) {
+ const ibis::math::term& expr = *cmps.aggExpr(ic);
+ const char* cn = cmps.aggName(ic);
+ if (cmps.getAggregator(ic) == ibis::selectClause::CNT) {
+ continue;
+ }
- LOGGER(ibis::gVerbose > 6)
- << "bundles::ctor is to start a colValues for \""
- << *(comps.aggExpr(ic)) << "\" as cols[" << cols.size() << ']';
- ibis::colValues* cv = 0;
- switch (comps.getAggregator(ic)) {
- case ibis::selectClause::AVG:
- case ibis::selectClause::SUM:
- case ibis::selectClause::VARPOP:
- case ibis::selectClause::VARSAMP:
- case ibis::selectClause::STDPOP:
- case ibis::selectClause::STDSAMP:
- cv = new ibis::colDoubles(c, msk);
- break;
- case ibis::selectClause::CONCAT:
- cv = new ibis::colStrings(c, msk);
- break;
- default:
- cv = ibis::colValues::create(c, msk);
- break;
- }
- if (cv != 0) {
- cols.push_back(cv);
- aggr.push_back(comps.getAggregator(ic));
- LOGGER(ibis::gVerbose > 2)
- << "bundles::ctor created a colValues for \""
- << *(comps.aggExpr(ic)) << "\" as cols[" << cols.size()
- << "] with size " << cv->size();
- }
- else {
- LOGGER(ibis::gVerbose > 0)
- << "Warning -- bundles(" << tbl.name() << ", " << comps
- << ") failed to create an in-memory column for \""
- << *(comps.aggExpr(ic)) << '"';
- }
- }
+ const char* varname = static_cast<const ibis::math::variable&>(expr).variableName();
+ ibis::column* c = tbl.getColumn(varname);
+ if (c == 0) {
+ varname = cn;
+ c = tbl.getColumn(varname);
+ }
- if (cols.size() > 0)
- sort(dir);
+ if (c == 0) {
+ clear();
+ LOGGER(ibis::gVerbose >= 0)
+ << "Warning -- bundles(" << tbl.name() << ", "
+ << cmps << ") can not find a column named "
+ << (varname ? varname : "");
+ throw "bundle1::ctor can not find a column name";
+ }
- if (ibis::gVerbose > 5) {
- ibis::util::logger lg;
- lg() << "bundles -- generated the bundle for \"" << *comps
- << "\"\n";
- if ((1U << ibis::gVerbose) > cols.size() || ibis::gVerbose > 30)
- print(lg());
- }
+ LOGGER(ibis::gVerbose > 6)
+ << "bundles::ctor is to start a colValues for \""
+ << *(cmps.aggExpr(ic)) << "\" as cols[" << cols.size() << ']';
+ ibis::colValues* cv = 0;
+ switch (comps.getAggregator(ic)) {
+ case ibis::selectClause::AVG:
+ case ibis::selectClause::SUM:
+ case ibis::selectClause::VARPOP:
+ case ibis::selectClause::VARSAMP:
+ case ibis::selectClause::STDPOP:
+ case ibis::selectClause::STDSAMP:
+ cv = new ibis::colDoubles(c, msk);
+ break;
+ case ibis::selectClause::CONCAT:
+ cv = new ibis::colStrings(c, msk);
+ break;
+ default:
+ cv = ibis::colValues::create(c, msk);
+ break;
+ }
+ if (cv != 0) {
+ cols.push_back(cv);
+ aggr.push_back(cmps.getAggregator(ic));
+ LOGGER(ibis::gVerbose > 2)
+ << "bundles::ctor created a colValues for \""
+ << *(cmps.aggExpr(ic)) << "\" as cols[" << cols.size()
+ << "] with size " << cv->size();
+ }
+ else {
+ LOGGER(ibis::gVerbose > 0)
+ << "Warning -- bundles(" << tbl.name() << ", " << cmps
+ << ") failed to create an in-memory column for \""
+ << *(cmps.aggExpr(ic)) << '"';
+ }
}
- catch (...) {
- LOGGER(ibis::gVerbose >= 0)
- << "Error -- bundles::ctor received an exception, "
- "start cleaning up";
- clear();
- throw; // rethrow the exception
+
+ if (cols.size() > 0)
+ sort(dir);
+
+ if (ibis::gVerbose > 5) {
+ ibis::util::logger lg;
+ lg() << "bundles -- generated the bundle for \"" << *cmps
+ << "\"\n";
+ if ((1U << ibis::gVerbose) > cols.size() || ibis::gVerbose > 30)
+ print(lg());
}
+ }
+ catch (...) {
+ LOGGER(ibis::gVerbose >= 0)
+ << "Error -- bundles::ctor received an exception, "
+ "start cleaning up";
+ clear();
+ throw; // rethrow the exception
+ }
} // ibis::bundles::bundles
/// Print out the bundles without RIDs.
_______________________________________________
FastBit-users mailing list
[email protected]
https://hpcrdm.lbl.gov/cgi-bin/mailman/listinfo/fastbit-users