Author: cutting
Date: Tue Apr 5 18:14:02 2011
New Revision: 1089150
URL: http://svn.apache.org/viewvc?rev=1089150&view=rev
Log:
Merge r1089128, r1089131 from trunk to 1.5 branch. Fixes AVRO-794, AVRO-795.
Removed:
avro/branches/branch-1.5/lang/c++/Makefile.am
Modified:
avro/branches/branch-1.5/ (props changed)
avro/branches/branch-1.5/CHANGES.txt
avro/branches/branch-1.5/lang/c++/api/DataFile.hh
avro/branches/branch-1.5/lang/c++/impl/DataFile.cc
avro/branches/branch-1.5/lang/c++/test/DataFileTests.cc
Propchange: avro/branches/branch-1.5/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Apr 5 18:14:02 2011
@@ -1 +1 @@
-/avro/trunk:1075938,1075993,1078917,1079055,1079060,1079063,1083246,1085921,1086727,1086730,1086866,1087076,1087129,1087136,1087439-1087440,1087463,1087472,1087792
+/avro/trunk:1075938,1075993,1078917,1079055,1079060,1079063,1083246,1085921,1086727,1086730,1086866,1087076,1087129,1087136,1087439-1087440,1087463,1087472,1087792,1089128,1089131
Modified: avro/branches/branch-1.5/CHANGES.txt
URL:
http://svn.apache.org/viewvc/avro/branches/branch-1.5/CHANGES.txt?rev=1089150&r1=1089149&r2=1089150&view=diff
==============================================================================
--- avro/branches/branch-1.5/CHANGES.txt (original)
+++ avro/branches/branch-1.5/CHANGES.txt Tue Apr 5 18:14:02 2011
@@ -25,6 +25,10 @@ Avro 1.5.1 (unreleased)
AVRO-709. Python: Optimize property lookup. (Justin Azoff via cutting)
+ AVRO-794. Makefile.am is no longer required in C++. (thiru)
+
+ AVRO-795. C++ Datafile reader makes it hard to build adaptive clients.
(thiru)
+
BUG FIXES
AVRO-786. Java: Fix equals() to work on objects containing maps. (cutting)
Modified: avro/branches/branch-1.5/lang/c++/api/DataFile.hh
URL:
http://svn.apache.org/viewvc/avro/branches/branch-1.5/lang/c%2B%2B/api/DataFile.hh?rev=1089150&r1=1089149&r2=1089150&view=diff
==============================================================================
--- avro/branches/branch-1.5/lang/c++/api/DataFile.hh (original)
+++ avro/branches/branch-1.5/lang/c++/api/DataFile.hh Tue Apr 5 18:14:02 2011
@@ -67,7 +67,7 @@ class DataFileWriterBase : boost::noncop
*/
void sync();
-protected:
+public:
Encoder& encoder() const { return *encoderPtr_; }
void syncIfNeeded();
@@ -75,7 +75,6 @@ protected:
void incr() {
++objectCount_;
}
-public:
/**
* Constructs a data file writer with the given sync interval and name.
*/
@@ -104,23 +103,40 @@ public:
* An Avro datafile that can store objects of type T.
*/
template <typename T>
-class DataFileWriter : public DataFileWriterBase {
+class DataFileWriter : boost::noncopyable {
+ std::auto_ptr<DataFileWriterBase> base_;
public:
/**
* Constructs a new data file.
*/
DataFileWriter(const char* filename, const ValidSchema& schema,
size_t syncInterval = 16 * 1024) :
- DataFileWriterBase(filename, schema, syncInterval) { }
+ base_(new DataFileWriterBase(filename, schema, syncInterval)) { }
/**
* Writes the given piece of data into the file.
*/
void write(const T& datum) {
- syncIfNeeded();
- avro::encode(encoder(), datum);
- incr();
+ base_->syncIfNeeded();
+ avro::encode(base_->encoder(), datum);
+ base_->incr();
}
+
+ /**
+ * Closes the current file. Once closed this datafile object cannot be
+ * used for writing any more.
+ */
+ void close() { base_->close(); }
+
+ /**
+ * Returns the schema for this data file.
+ */
+ const ValidSchema& schema() const { return base_->schema(); }
+
+ /**
+ * Flushes any unwritten data into the file.
+ */
+ void flush() { base_->flush(); }
};
class DataFileReaderBase : boost::noncopyable {
@@ -140,7 +156,8 @@ class DataFileReaderBase : boost::noncop
void readHeader();
-protected:
+ bool readDataBlock();
+public:
Decoder& decoder() { return *dataDecoder_; }
/**
@@ -149,20 +166,29 @@ protected:
bool hasMore();
void decr() { --objectCount_; }
- bool readDataBlock();
-public:
/**
* Constructs the reader for the given file and the reader is
- * expected to use the given schema.
+ * expected to use the schema that is used with data.
+ * This function should be called exactly once after constructing
+ * the DataFileReaderBase object.
*/
- DataFileReaderBase(const char* filename, const ValidSchema& readerSchema);
+ DataFileReaderBase(const char* filename);
/**
- * Constructs the reader for the given file and the reader is
- * expected to use the schema that is used with data.
+ * Initializes the reader so that the reader and writer schemas
+ * are the same.
*/
- DataFileReaderBase(const char* filename);
+ void init();
+
+ /**
+ * Initializes the reader to read objects according to the given
+ * schema. This gives an opportinity for the reader to see the schema
+ * in the data file before deciding the right schema to use for reading.
+ * This must be called exactly once after constructing the
+ * DataFileReaderBase object.
+ */
+ void init(const ValidSchema& readerSchema);
/**
* Returns the schema for this object.
@@ -181,29 +207,78 @@ public:
};
template <typename T>
-class DataFileReader : public DataFileReaderBase {
+class DataFileReader : boost::noncopyable {
+ std::auto_ptr<DataFileReaderBase> base_;
public:
/**
* Constructs the reader for the given file and the reader is
* expected to use the given schema.
*/
DataFileReader(const char* filename, const ValidSchema& readerSchema) :
- DataFileReaderBase(filename, readerSchema) { }
+ base_(new DataFileReaderBase(filename)) {
+ base_->init(readerSchema);
+ }
/**
* Constructs the reader for the given file and the reader is
* expected to use the schema that is used with data.
*/
- DataFileReader(const char* filename) : DataFileReaderBase(filename) { }
+ DataFileReader(const char* filename) :
+ base_(new DataFileReaderBase(filename)) {
+ base_->init();
+ }
+
+
+ /**
+ * Constructs a reader using the reader base. This form of constructor
+ * allows the user to examine the schema of a given file and then
+ * decide to use the right type of data to be desrialize. Without this
+ * the user must know the type of data for the template _before_
+ * he knows the schema within the file.
+ * The schema present in the data file will be used for reading
+ * from this reader.
+ */
+ DataFileReader(std::auto_ptr<DataFileReaderBase> base) : base_(base) {
+ base_->init();
+ }
+
+ /**
+ * Constructs a reader using the reader base. This form of constructor
+ * allows the user to examine the schema of a given file and then
+ * decide to use the right type of data to be desrialize. Without this
+ * the user must know the type of data for the template _before_
+ * he knows the schema within the file.
+ * The argument readerSchema will be used for reading
+ * from this reader.
+ */
+ DataFileReader(std::auto_ptr<DataFileReaderBase> base,
+ const ValidSchema& readerSchema) : base_(base) {
+ base_->init(readerSchema);
+ }
bool read(T& datum) {
- if (hasMore()) {
- decr();
- avro::decode(decoder(), datum);
+ if (base_->hasMore()) {
+ base_->decr();
+ avro::decode(base_->decoder(), datum);
return true;
}
return false;
}
+
+ /**
+ * Returns the schema for this object.
+ */
+ const ValidSchema& readerSchema() { return base_->readerSchema(); }
+
+ /**
+ * Returns the schema stored with the data file.
+ */
+ const ValidSchema& dataSchema() { return base_->dataSchema(); }
+
+ /**
+ * Closes the reader. No further operation is possible on this reader.
+ */
+ void close() { return base_->close(); }
};
} // namespace avro
Modified: avro/branches/branch-1.5/lang/c++/impl/DataFile.cc
URL:
http://svn.apache.org/viewvc/avro/branches/branch-1.5/lang/c%2B%2B/impl/DataFile.cc?rev=1089150&r1=1089149&r2=1089150&view=diff
==============================================================================
--- avro/branches/branch-1.5/lang/c++/impl/DataFile.cc (original)
+++ avro/branches/branch-1.5/lang/c++/impl/DataFile.cc Tue Apr 5 18:14:02 2011
@@ -149,19 +149,27 @@ void DataFileWriterBase::setMetadata(con
metadata_[key] = v;
}
-DataFileReaderBase::DataFileReaderBase(const char* filename,
- const ValidSchema& schema) :
+DataFileReaderBase::DataFileReaderBase(const char* filename) :
filename_(filename), stream_(fileInputStream(filename)),
- decoder_(binaryDecoder()), objectCount_(0), readerSchema_(schema)
+ decoder_(binaryDecoder()), objectCount_(0)
{
readHeader();
}
-DataFileReaderBase::DataFileReaderBase(const char* filename) :
- filename_(filename), stream_(fileInputStream(filename)),
- decoder_(binaryDecoder()), objectCount_(0)
+void DataFileReaderBase::init()
{
- readHeader();
+ readerSchema_ = dataSchema_;
+ dataDecoder_ = binaryDecoder();
+ readDataBlock();
+}
+
+void DataFileReaderBase::init(const ValidSchema& readerSchema)
+{
+ readerSchema_ = readerSchema;
+ dataDecoder_ = (toString(readerSchema_) != toString(dataSchema_)) ?
+ resolvingDecoder(dataSchema_, readerSchema_, binaryDecoder()) :
+ binaryDecoder();
+ readDataBlock();
}
static void drain(InputStream& in)
@@ -310,12 +318,7 @@ void DataFileReaderBase::readHeader()
throw Exception("Unknown codec in data file: " + toString(it->second));
}
- dataDecoder_ = (toString(readerSchema_) != toString(dataSchema_)) ?
- resolvingDecoder(dataSchema_, readerSchema_, binaryDecoder()) :
- binaryDecoder();
-
avro::decode(*decoder_, sync_);
- readDataBlock();
}
} // namespace avro
Modified: avro/branches/branch-1.5/lang/c++/test/DataFileTests.cc
URL:
http://svn.apache.org/viewvc/avro/branches/branch-1.5/lang/c%2B%2B/test/DataFileTests.cc?rev=1089150&r1=1089149&r2=1089150&view=diff
==============================================================================
--- avro/branches/branch-1.5/lang/c++/test/DataFileTests.cc (original)
+++ avro/branches/branch-1.5/lang/c++/test/DataFileTests.cc Tue Apr 5 18:14:02
2011
@@ -33,6 +33,7 @@ using std::pair;
using std::vector;
using std::map;
using std::istringstream;
+using std::ostringstream;
using boost::array;
using boost::shared_ptr;
@@ -55,15 +56,17 @@ struct Integer {
int64_t re;
Integer() : re(0) { }
Integer(int64_t r) : re(r) { }
-
- bool operator==(const Integer& oth) const {
- return re == oth.re;
- }
};
typedef Complex<int64_t> ComplexInteger;
typedef Complex<double> ComplexDouble;
+struct Double {
+ double re;
+ Double() : re(0) { }
+ Double(double r) : re(r) { }
+};
+
namespace avro {
template <typename T> struct codec_traits<Complex<T> > {
@@ -84,6 +87,12 @@ template <> struct codec_traits<Integer>
}
};
+template <> struct codec_traits<Double> {
+ static void decode(Decoder& d, Double& c) {
+ avro::decode(d, c.re);
+ }
+};
+
}
static ValidSchema makeValidSchema(const char* schema)
@@ -108,6 +117,18 @@ static const char dsch[] = "{\"type\": \
"{\"name\":\"re\", \"type\":\"double\"},"
"{\"name\":\"im\", \"type\":\"double\"}"
"]}";
+static const char dblsch[] = "{\"type\": \"record\","
+ "\"name\":\"ComplexDouble\", \"fields\": ["
+ "{\"name\":\"re\", \"type\":\"double\"}"
+ "]}";
+
+
+string toString(const ValidSchema& s)
+{
+ ostringstream oss;
+ s.toJson(oss);
+ return oss.str();
+}
class DataFileTest {
const char* filename;
@@ -263,6 +284,52 @@ public:
BOOST_CHECK_EQUAL(i, 1000);
}
+ /**
+ * Constructs the DataFileReader in two steps.
+ */
+ void testReadDoubleTwoStep() {
+ auto_ptr<avro::DataFileReaderBase>
+ base(new avro::DataFileReaderBase(filename));
+ avro::DataFileReader<ComplexDouble> df(base);
+ BOOST_CHECK_EQUAL(toString(writerSchema), toString(df.readerSchema()));
+ BOOST_CHECK_EQUAL(toString(writerSchema), toString(df.dataSchema()));
+ int i = 0;
+ ComplexDouble ci;
+ double re = 3.0;
+ double im = 5.0;
+ while (df.read(ci)) {
+ BOOST_CHECK_CLOSE(ci.re, re, 0.0001);
+ BOOST_CHECK_CLOSE(ci.im, im, 0.0001);
+ re += (im - 0.7);
+ im += 3.1;
+ ++i;
+ }
+ BOOST_CHECK_EQUAL(i, 1000);
+ }
+
+ /**
+ * Constructs the DataFileReader in two steps using a different
+ * reader schema.
+ */
+ void testReadDoubleTwoStepProject() {
+ auto_ptr<avro::DataFileReaderBase>
+ base(new avro::DataFileReaderBase(filename));
+ avro::DataFileReader<Double> df(base, readerSchema);
+
+ BOOST_CHECK_EQUAL(toString(readerSchema), toString(df.readerSchema()));
+ BOOST_CHECK_EQUAL(toString(writerSchema), toString(df.dataSchema()));
+ int i = 0;
+ Double ci;
+ double re = 3.0;
+ double im = 5.0;
+ while (df.read(ci)) {
+ BOOST_CHECK_CLOSE(ci.re, re, 0.0001);
+ re += (im - 0.7);
+ im += 3.1;
+ ++i;
+ }
+ BOOST_CHECK_EQUAL(i, 1000);
+ }
};
void addReaderTests(test_suite* ts, const shared_ptr<DataFileTest>& t)
@@ -288,9 +355,12 @@ init_unit_test_suite( int argc, char* ar
ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testWriteGeneric, t2));
addReaderTests(ts, t2);
- shared_ptr<DataFileTest> t3(new DataFileTest("test3.df", dsch, dsch));
+ shared_ptr<DataFileTest> t3(new DataFileTest("test3.df", dsch, dblsch));
ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testWriteDouble, t3));
ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testReadDouble, t3));
+ ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testReadDoubleTwoStep, t3));
+ ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testReadDoubleTwoStepProject,
+ t3));
ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testCleanup, t3));
return ts;
}