Repository: incubator-singa
Updated Branches:
  refs/heads/dev 396f9bf71 -> 833f46195


SINGA-200 - Implement Encoder and Decoder for data pre-processing

Add CMake scripts for singa io libs.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/bef1db0f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/bef1db0f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/bef1db0f

Branch: refs/heads/dev
Commit: bef1db0fc20145411c41a5d4082e25dab2e13bc8
Parents: 396f9bf
Author: jixin <[email protected]>
Authored: Mon Jun 20 22:09:58 2016 +0800
Committer: jixin <[email protected]>
Committed: Thu Jun 23 17:20:17 2016 +0800

----------------------------------------------------------------------
 include/singa/io/decoder.h           | 45 ++++++++++++++++++++
 include/singa/io/encoder.h           | 44 ++++++++++++++++++++
 include/singa/io/image2jpg_decoder.h | 40 ++++++++++++++++++
 include/singa/io/image2jpg_encoder.h | 40 ++++++++++++++++++
 src/CMakeLists.txt                   |  5 +++
 src/io/image2jpg_decoder.cc          | 54 ++++++++++++++++++++++++
 src/io/image2jpg_encoder.cc          | 69 +++++++++++++++++++++++++++++++
 src/proto/model.proto                | 23 +++++++++++
 test/singa/test_io.cc                | 65 +++++++++++++++++++++++++++++
 9 files changed, 385 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bef1db0f/include/singa/io/decoder.h
----------------------------------------------------------------------
diff --git a/include/singa/io/decoder.h b/include/singa/io/decoder.h
new file mode 100644
index 0000000..8330e34
--- /dev/null
+++ b/include/singa/io/decoder.h
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SINGA_IO_DECODER_H_
+#define SINGA_IO_DECODER_H_
+
+#include <vector>
+#include <string>
+#include "singa/core/tensor.h"
+
+namespace singa {
+namespace io {
+
+class Decoder {
+  public:
+    Decoder() { }
+    virtual ~Decoder() { }
+
+    /**
+    * Decode value to get data and labels
+    */
+    virtual std::vector<Tensor> Decode(std::string value) {
+      vector<Tensor> output;
+      return output; 
+    }
+};
+
+} // namespace io
+} // namespace singa
+#endif // SINGA_IO_DECODER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bef1db0f/include/singa/io/encoder.h
----------------------------------------------------------------------
diff --git a/include/singa/io/encoder.h b/include/singa/io/encoder.h
new file mode 100644
index 0000000..66a2fe5
--- /dev/null
+++ b/include/singa/io/encoder.h
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SINGA_IO_ENCODER_H_
+#define SINGA_IO_ENCODER_H_
+
+#include <vector>
+#include <string>
+#include "singa/core/tensor.h"
+
+namespace singa {
+namespace io {
+
+class Encoder {
+  public:
+    Encoder() { }
+    virtual ~Encoder() { }
+
+    /**
+     * Format each sample data as a string,
+     * whose structure depends on the proto definition.
+     * e.g., {key, shape, label, type, data, ...}
+     */
+    virtual std::string Encode(vector<Tensor>& data) { return ""; }
+};
+
+} // namespace io
+} // namespace singa
+#endif  // SINGA_IO_ENCODER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bef1db0f/include/singa/io/image2jpg_decoder.h
----------------------------------------------------------------------
diff --git a/include/singa/io/image2jpg_decoder.h 
b/include/singa/io/image2jpg_decoder.h
new file mode 100644
index 0000000..6895c29
--- /dev/null
+++ b/include/singa/io/image2jpg_decoder.h
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SINGA_IO_IMAGE2JPG_DECODER_H_
+#define SINGA_IO_IMAGE2JPG_DECODER_H_
+
+#include <vector>
+#include <string>
+#include "singa/core/tensor.h"
+#include "singa/io/decoder.h"
+#include "singa/proto/model.pb.h"
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+
+namespace singa {
+namespace io {
+
+class Image2JPGDecoder : public Decoder {
+  public:
+    std::vector<Tensor> Decode(std::string value) override;
+};
+
+} // namespace io
+} // namespace singa
+#endif // SINGA_IO_IMAGE2JPG_DECODER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bef1db0f/include/singa/io/image2jpg_encoder.h
----------------------------------------------------------------------
diff --git a/include/singa/io/image2jpg_encoder.h 
b/include/singa/io/image2jpg_encoder.h
new file mode 100644
index 0000000..2e94b64
--- /dev/null
+++ b/include/singa/io/image2jpg_encoder.h
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SINGA_IO_IMAGE2JPG_ENCODER_H_
+#define SINGA_IO_IMAGE2JPG_ENCODER_H_
+
+#include <vector>
+#include <string>
+#include "singa/core/tensor.h"
+#include "singa/io/encoder.h"
+#include "singa/proto/model.pb.h"
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+
+namespace singa {
+namespace io {
+
+class Image2JPGEncoder : public Encoder {
+  public:
+        std::string Encode(vector<Tensor>& data) override;
+};
+
+} // namespace io
+} // namespace singa
+#endif  // SINGA_IO_IMAGE2JPG_ENCODER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bef1db0f/src/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a4fa22f..90343d5 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -55,6 +55,11 @@ ADD_LIBRARY(singa_model SHARED ${model_source})
 TARGET_LINK_LIBRARIES(singa_model ${SINGA_LINKER_LIBS})
 LIST(APPEND SINGA_LINKER_LIBS singa_model)
 
+AUX_SOURCE_DIRECTORY(io io_source)
+ADD_LIBRARY(singa_io SHARED ${io_source})
+TARGET_LINK_LIBRARIES(singa_io ${SINGA_LIBKER_LIBS})
+LIST(APPEND SINGA_LINKER_LIBS singa_io)
+
 #ADD_LIBRARY(singa_layer SHARED ${LAYER_SOURCE})
 #ADD_LIBRARY(singa_model SHARED ${MODEL_SOURCE})
 #ADD_LIBRARY(singa_utils SHARED ${UTILS_SOURCE})

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bef1db0f/src/io/image2jpg_decoder.cc
----------------------------------------------------------------------
diff --git a/src/io/image2jpg_decoder.cc b/src/io/image2jpg_decoder.cc
new file mode 100644
index 0000000..da0c909
--- /dev/null
+++ b/src/io/image2jpg_decoder.cc
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "singa/io/image2jpg_decoder.h"
+#include <vector>
+//#include <opencv2/core/core.hpp>
+//#include <opencv2/highgui/highgui.hpp>
+//#include "opencv2/opencv.hpp"
+
+using namespace cv;
+using namespace std;
+
+namespace singa {
+
+namespace io {
+  vector<Tensor> Image2JPGDecoder::Decode(string value) {
+    vector<Tensor> output;
+    RecordProto image;
+    image.ParseFromString(value);
+    Shape shape(image.shape().begin(), image.shape().end());
+    Tensor features(shape), labels(Shape{1});
+    
+    //string pixel = image.pixel();
+    vector<unsigned char> pixel(image.pixel().begin(), image.pixel().end());
+    Mat buff(shape[1], shape[2], CV_8UC3, pixel.data());
+    Mat mat = imdecode(buff, CV_LOAD_IMAGE_COLOR);
+    vector<int> data;
+    data.assign(mat.datastart, mat.dataend);
+    //for (size_t i = 0; i < image.pixel().size(); i++)
+    //  data[i] = static_cast<int>(static_cast<uint8_t>(pixel[i]));
+    features.CopyDataFromHostPtr<vector<int>>(&data, data.size());
+    int l[1] = {image.label()};
+    labels.CopyDataFromHostPtr(l, 1);
+    output.push_back(features);
+    output.push_back(labels);
+    return output;
+  }
+}
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bef1db0f/src/io/image2jpg_encoder.cc
----------------------------------------------------------------------
diff --git a/src/io/image2jpg_encoder.cc b/src/io/image2jpg_encoder.cc
new file mode 100644
index 0000000..4a1f428
--- /dev/null
+++ b/src/io/image2jpg_encoder.cc
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "singa/io/image2jpg_encoder.h"
+//#include <opencv2/core/core.hpp>
+//#include <opencv2/highgui/highgui.hpp>
+//#include "opencv2/opencv.hpp"
+
+using namespace cv;
+using namespace std;
+
+namespace singa {
+
+namespace io{
+  string Image2JPGEncoder::Encode(vector<Tensor>& data) {
+    // suppose data[0]: data, data[1]: label
+    // suppose data[0] has a shape as {channel, height, width}
+    CHECK_EQ(data[0].nDim(), 3u);
+    string output;
+    size_t height = data[0].shape()[1];
+    size_t width = data[0].shape()[2];
+    Mat mat = Mat(height, width, CV_8UC3);
+    Mat resized;
+    resize(mat, resized, Size(256, 256));
+    Mat test = imread("test/samples/test.jpeg", CV_LOAD_IMAGE_COLOR);
+    if (data[0].data_type() == kInt)
+      memcpy(mat.data, data[0].data<const int*>(), data[0].Size()*sizeof(int));
+    else LOG(FATAL) << "Data type is invalid for an raw image";
+    //cout << mat << endl;
+
+    const int* label;
+    // suppose each image is attached with only one label
+    if (data[1].data_type() == kInt)
+      label = data[1].data<const int*>();
+    else LOG(FATAL) << "Data type is invalid for image label";
+
+    vector<uchar> buff;
+    vector<int> param = vector<int>(2);
+    param[0] = CV_IMWRITE_JPEG_QUALITY;
+    param[1] = 95; // default is 95
+    imencode(".jpg", mat, buff, param);
+    string buf(buff.begin(), buff.end());
+
+    RecordProto image;
+    image.set_label(label[0]);
+    for (size_t i = 0; i < data[0].nDim(); i++)
+      image.add_shape(data[0].shape()[i]);
+    image.set_pixel(buf);
+    image.SerializeToString(&output);
+
+    return output;
+  }
+}
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bef1db0f/src/proto/model.proto
----------------------------------------------------------------------
diff --git a/src/proto/model.proto b/src/proto/model.proto
index c06deec..77a7f75 100644
--- a/src/proto/model.proto
+++ b/src/proto/model.proto
@@ -51,6 +51,29 @@ message BlobProto {
   optional int32 width = 4 [default = 0];
 }
 
+/// directly copy from v0.3
+message Record {
+  enum Type {
+    // each record contains image raw feature and its label.
+    kSingleLabelImage = 0;
+  }
+  optional Type type = 1 [default = kSingleLabelImage];
+  optional string user_type =2;
+  // configuration for
+  optional RecordProto image = 5;
+
+  extensions 101 to 200;
+}
+
+// rename SingleLabelImageRecord to RecordProto
+message RecordProto {
+  repeated int32 shape = 1;
+  optional int32 label = 2;
+  optional bytes pixel = 3;
+  repeated float data = 4 [packed = true];
+}
+/// end of copy v0.3
+
 message FillerConf {
   // The filler type, case insensitive
   optional string type = 1 [default = 'constant'];

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bef1db0f/test/singa/test_io.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_io.cc b/test/singa/test_io.cc
new file mode 100644
index 0000000..16853c4
--- /dev/null
+++ b/test/singa/test_io.cc
@@ -0,0 +1,65 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#include "../include/singa/io/image2jpg_encoder.h"
+#include "../include/singa/io/image2jpg_decoder.h"
+#include "gtest/gtest.h"
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+
+using namespace singa;
+using namespace singa::io;
+using namespace cv;
+
+TEST(Decoder, Decode) {
+  Encoder encoder;
+  Decoder decoder;
+  string path = "test/samples/test.jpeg";
+  size_t resize_height = 256;
+  size_t resize_width = 256;
+  size_t channel = 3;
+  int raw_label[] = {1};
+  Mat image = imread(path, CV_LOAD_IMAGE_COLOR);
+  Mat transformed;
+  Size size(resize_height, resize_width);
+  resize(image, transformed, size);
+  
+  vector<int> buff;
+  buff.assign(transformed.datastart, transformed.dataend);
+  Shape shape{channel, (size_t)transformed.size().height, 
(size_t)transformed.size().width};
+  Tensor pixel(shape), label(Shape{1});
+  pixel.CopyDataFromHostPtr<vector<int>>(&buff, buff.size());
+  label.CopyDataFromHostPtr<int>(raw_label, 1);
+
+  vector<Tensor> input;
+  input.push_back(pixel);
+  input.push_back(label);
+  string str0 = encoder.Encode(input);
+  vector<Tensor> output = decoder.Decode(str0);
+  Shape out_shape = output.at(0).shape();
+  const int* out_pixel = output.at(0).data<const int *>();
+  const int* out_label = output.at(1).data<const int *>();
+  EXPECT_EQ(raw_label[0], out_label[0]);
+  for (size_t i = 0; i < shape.size(); i++)
+    EXPECT_EQ(shape[i], out_shape[i]);
+  for(size_t i = 0; i < 10; i++) 
+    EXPECT_EQ(buff[i], out_pixel[i]);
+}

Reply via email to