[
https://issues.apache.org/jira/browse/AVRO-2014?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16649704#comment-16649704
]
ASF GitHub Bot commented on AVRO-2014:
--------------------------------------
thiru-apache closed pull request #270: AVRO-2014 Add support for custom streams
for the DataFile interfaces.
URL: https://github.com/apache/avro/pull/270
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/lang/c++/api/DataFile.hh b/lang/c++/api/DataFile.hh
index bff309770..6206bff09 100644
--- a/lang/c++/api/DataFile.hh
+++ b/lang/c++/api/DataFile.hh
@@ -85,6 +85,11 @@ class AVRO_DECL DataFileWriterBase : boost::noncopyable {
*/
void sync();
+ /**
+ * Shared constructor portion since we aren't using C++11
+ */
+ void init(const ValidSchema &schema, size_t syncInterval, const Codec
&codec);
+
public:
/**
* Returns the current encoder for this writer.
@@ -108,6 +113,8 @@ public:
*/
DataFileWriterBase(const char* filename, const ValidSchema& schema,
size_t syncInterval, Codec codec = NULL_CODEC);
+ DataFileWriterBase(std::auto_ptr<OutputStream> outputStream,
+ const ValidSchema& schema, size_t syncInterval, Codec
codec);
~DataFileWriterBase();
/**
@@ -141,6 +148,10 @@ public:
size_t syncInterval = 16 * 1024, Codec codec = NULL_CODEC) :
base_(new DataFileWriterBase(filename, schema, syncInterval, codec)) {
}
+ DataFileWriter(std::auto_ptr<OutputStream> outputStream, const
ValidSchema& schema,
+ size_t syncInterval = 16 * 1024, Codec codec = NULL_CODEC) :
+ base_(new DataFileWriterBase(outputStream, schema, syncInterval,
codec)) { }
+
/**
* Writes the given piece of data into the file.
*/
@@ -218,6 +229,8 @@ public:
*/
DataFileReaderBase(const char* filename);
+ DataFileReaderBase(std::auto_ptr<InputStream> inputStream);
+
/**
* Initializes the reader so that the reader and writer schemas
* are the same.
@@ -265,6 +278,11 @@ public:
base_->init(readerSchema);
}
+ DataFileReader(std::auto_ptr<InputStream> inputStream, const ValidSchema&
readerSchema) :
+ base_(new DataFileReaderBase(inputStream)) {
+ base_->init(readerSchema);
+ }
+
/**
* Constructs the reader for the given file and the reader is
* expected to use the schema that is used with data.
@@ -274,6 +292,10 @@ public:
base_->init();
}
+ DataFileReader(std::auto_ptr<InputStream> inputStream) :
+ base_(new DataFileReaderBase(inputStream)) {
+ base_->init();
+ }
/**
* Constructs a reader using the reader base. This form of constructor
diff --git a/lang/c++/impl/DataFile.cc b/lang/c++/impl/DataFile.cc
index ee8f62c6a..1949ce0b3 100644
--- a/lang/c++/impl/DataFile.cc
+++ b/lang/c++/impl/DataFile.cc
@@ -72,19 +72,42 @@ static string toString(const ValidSchema& schema)
return oss.str();
}
-DataFileWriterBase::DataFileWriterBase(const char* filename,
- const ValidSchema& schema, size_t syncInterval, Codec codec) :
- filename_(filename), schema_(schema), encoderPtr_(binaryEncoder()),
+
+DataFileWriterBase::DataFileWriterBase(const char* filename, const
ValidSchema& schema, size_t syncInterval,
+ Codec codec) :
+ filename_(filename),
+ schema_(schema),
+ encoderPtr_(binaryEncoder()),
syncInterval_(syncInterval),
codec_(codec),
stream_(fileOutputStream(filename)),
buffer_(memoryOutputStream()),
- sync_(makeSync()), objectCount_(0)
+ sync_(makeSync()),
+ objectCount_(0)
+{
+ init(schema, syncInterval, codec);
+}
+
+DataFileWriterBase::DataFileWriterBase(std::auto_ptr<OutputStream>
outputStream,
+ const ValidSchema& schema, size_t syncInterval, Codec codec) :
+ filename_(nullptr),
+ schema_(schema),
+ encoderPtr_(binaryEncoder()),
+ syncInterval_(syncInterval),
+ codec_(codec),
+ stream_(std::move(outputStream)),
+ buffer_(memoryOutputStream()),
+ sync_(makeSync()),
+ objectCount_(0)
{
+ init(schema, syncInterval, codec);
+}
+
+void DataFileWriterBase::init(const ValidSchema &schema, size_t syncInterval,
const Codec &codec) {
if (syncInterval < minSyncInterval || syncInterval > maxSyncInterval) {
throw Exception(boost::format("Invalid sync interval: %1%. "
"Should be between %2% and %3%") % syncInterval %
- minSyncInterval % maxSyncInterval);
+ minSyncInterval % maxSyncInterval);
}
setMetadata(AVRO_CODEC_KEY, AVRO_NULL_CODEC);
@@ -105,6 +128,7 @@ DataFileWriterBase::DataFileWriterBase(const char* filename,
encoderPtr_->init(*buffer_);
}
+
DataFileWriterBase::~DataFileWriterBase()
{
if (stream_.get()) {
@@ -255,6 +279,13 @@ DataFileReaderBase::DataFileReaderBase(const char*
filename) :
readHeader();
}
+DataFileReaderBase::DataFileReaderBase(std::auto_ptr<InputStream> inputStream)
:
+ filename_(nullptr), stream_(inputStream),
+ decoder_(binaryDecoder()), objectCount_(0), eof_(false)
+{
+ readHeader();
+}
+
void DataFileReaderBase::init()
{
readerSchema_ = dataSchema_;
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> C++ DataFile support custom stream
> ----------------------------------
>
> Key: AVRO-2014
> URL: https://issues.apache.org/jira/browse/AVRO-2014
> Project: Avro
> Issue Type: Improvement
> Components: c++
> Reporter: Zoyo Pei
> Priority: Major
>
> It is recommended that C++ DataFile support custom stream. E.g,
> DataFileWriter(OutputStream *stream, ...);
> So we can write into hdfs like this
> auto writer = new DataFileWriter<T>(new HDFSOutputStream(...), ...);
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)