Repository: marmotta Updated Branches: refs/heads/develop 4ab20b3d8 -> a165b8e49
- code cleanups, better reuse Project: http://git-wip-us.apache.org/repos/asf/marmotta/repo Commit: http://git-wip-us.apache.org/repos/asf/marmotta/commit/a165b8e4 Tree: http://git-wip-us.apache.org/repos/asf/marmotta/tree/a165b8e4 Diff: http://git-wip-us.apache.org/repos/asf/marmotta/diff/a165b8e4 Branch: refs/heads/develop Commit: a165b8e494619da77697c98b814cda87654f902b Parents: 4ab20b3 Author: Sebastian Schaffert <[email protected]> Authored: Sat Feb 13 18:00:36 2016 +0100 Committer: Sebastian Schaffert <[email protected]> Committed: Sat Feb 13 18:00:36 2016 +0100 ---------------------------------------------------------------------- libraries/ostrich/backend/CMakeLists.txt | 2 +- libraries/ostrich/backend/parser/rdf_parser.cc | 26 +++- libraries/ostrich/backend/parser/rdf_parser.h | 2 + .../backend/persistence/marmotta_updatedb.cc | 12 +- .../ostrich/backend/serializer/CMakeLists.txt | 10 +- .../backend/serializer/serializer_base.cc | 16 ++- .../backend/serializer/serializer_raptor.cc | 125 +++---------------- libraries/ostrich/backend/util/raptor_util.cc | 64 ++++++++++ libraries/ostrich/backend/util/raptor_util.h | 6 +- 9 files changed, 134 insertions(+), 129 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/marmotta/blob/a165b8e4/libraries/ostrich/backend/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/libraries/ostrich/backend/CMakeLists.txt b/libraries/ostrich/backend/CMakeLists.txt index 5a8f110..61156a5 100644 --- a/libraries/ostrich/backend/CMakeLists.txt +++ b/libraries/ostrich/backend/CMakeLists.txt @@ -17,7 +17,7 @@ find_package (GLog REQUIRED) find_package (Boost 1.54.0 COMPONENTS iostreams filesystem system) find_package (Tcmalloc) -#add_definitions(-DNDEBUG) +add_definitions(-DNDEBUG) if (Boost_IOSTREAMS_FOUND) message(STATUS "Enabling gzip/bzip2 support (Boost iostreams found)") http://git-wip-us.apache.org/repos/asf/marmotta/blob/a165b8e4/libraries/ostrich/backend/parser/rdf_parser.cc ---------------------------------------------------------------------- diff --git a/libraries/ostrich/backend/parser/rdf_parser.cc b/libraries/ostrich/backend/parser/rdf_parser.cc index cf2dc4d..ebfdc6b 100644 --- a/libraries/ostrich/backend/parser/rdf_parser.cc +++ b/libraries/ostrich/backend/parser/rdf_parser.cc @@ -18,15 +18,18 @@ #include "rdf_parser.h" #include <raptor2/raptor2.h> #include <util/raptor_util.h> +#include <glog/logging.h> namespace marmotta { namespace parser { + Parser::Parser(const rdf::URI& baseUri, Format format) : stmt_handler([](const rdf::Statement& stmt) { }) , ns_handler([](const rdf::Namespace& ns) { }) { world = raptor_new_world(); base = raptor_new_uri(world, (unsigned char const *) baseUri.getUri().c_str()); + raptor_world_set_log_handler(world, this, raptor_error_handler); switch (format) { case RDFXML: @@ -79,15 +82,34 @@ void Parser::raptor_ns_handler(void *user_data, raptor_namespace *nspace) { (const char*)raptor_uri_as_string(raptor_namespace_get_uri(nspace)))); } +void Parser::raptor_error_handler(void *user_data, raptor_log_message* message) { + Parser* p = static_cast<Parser*>(user_data); + p->error = std::string("parse error (") + + std::to_string(message->locator->line) + ":" + + std::to_string(message->locator->column) + "): " + + message->text; + + LOG(ERROR) << p->error; +} + + void Parser::parse(std::istream &in) { if(in) { raptor_parser_parse_start(parser, base); + int status = 0; + char buffer[8192]; while (in.read(buffer, 8192)) { - raptor_parser_parse_chunk(parser, (unsigned char const *) buffer, in.gcount(), 0); + status = raptor_parser_parse_chunk(parser, (unsigned char const *) buffer, in.gcount(), 0); + if (status != 0) { + throw ParseError(error); + } + } + status = raptor_parser_parse_chunk(parser, (unsigned char const *) buffer, in.gcount(), 1); + if (status != 0) { + throw ParseError(error); } - raptor_parser_parse_chunk(parser, (unsigned char const *) buffer, in.gcount(), 1); } } http://git-wip-us.apache.org/repos/asf/marmotta/blob/a165b8e4/libraries/ostrich/backend/parser/rdf_parser.h ---------------------------------------------------------------------- diff --git a/libraries/ostrich/backend/parser/rdf_parser.h b/libraries/ostrich/backend/parser/rdf_parser.h index de2ebdf..b4ff346 100644 --- a/libraries/ostrich/backend/parser/rdf_parser.h +++ b/libraries/ostrich/backend/parser/rdf_parser.h @@ -66,12 +66,14 @@ class Parser { raptor_parser* parser; raptor_world* world; raptor_uri* base; + std::string error; std::function<void(const rdf::Statement&)> stmt_handler; std::function<void(const rdf::Namespace&)> ns_handler; static void raptor_stmt_handler(void* user_data, raptor_statement* statement); static void raptor_ns_handler(void* user_data, raptor_namespace *nspace); + static void raptor_error_handler(void *user_data, raptor_log_message* message); }; class ParseError : std::exception { http://git-wip-us.apache.org/repos/asf/marmotta/blob/a165b8e4/libraries/ostrich/backend/persistence/marmotta_updatedb.cc ---------------------------------------------------------------------- diff --git a/libraries/ostrich/backend/persistence/marmotta_updatedb.cc b/libraries/ostrich/backend/persistence/marmotta_updatedb.cc index b26b019..f29bef9 100644 --- a/libraries/ostrich/backend/persistence/marmotta_updatedb.cc +++ b/libraries/ostrich/backend/persistence/marmotta_updatedb.cc @@ -118,17 +118,7 @@ class MarmottaClient { } int64_t size() { - /* - ClientContext context; - google::protobuf::Int64Value result; - - Status status = stub_->Size(&context, r, &result); - if (status.ok()) { - return result.value(); - } else { - return -1; - } - */ + return db->Size(); } private: marmotta::persistence::LevelDBPersistence* db; http://git-wip-us.apache.org/repos/asf/marmotta/blob/a165b8e4/libraries/ostrich/backend/serializer/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/libraries/ostrich/backend/serializer/CMakeLists.txt b/libraries/ostrich/backend/serializer/CMakeLists.txt index 0fe9c4c..b050fb9 100644 --- a/libraries/ostrich/backend/serializer/CMakeLists.txt +++ b/libraries/ostrich/backend/serializer/CMakeLists.txt @@ -1,4 +1,10 @@ include_directories(.. ${CMAKE_CURRENT_BINARY_DIR}/..) -add_library(marmotta_serializer serializer_raptor.h serializer_raptor.cc serializer_raptor.cc serializer_proto.cc serializer_proto.h serializer_base.cc serializer_base.h serializer.cc serializer.h) -target_link_libraries(marmotta_serializer marmotta_model ${CMAKE_THREAD_LIBS_INIT} ${RAPTOR_LIBRARY}) \ No newline at end of file +add_library(marmotta_serializer + serializer_raptor.h serializer_raptor.cc + serializer_proto.cc serializer_proto.h + serializer_base.cc serializer_base.h + serializer.cc serializer.h) +target_link_libraries(marmotta_serializer + marmotta_model marmotta_raptor_util + ${CMAKE_THREAD_LIBS_INIT} ${RAPTOR_LIBRARY}) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/marmotta/blob/a165b8e4/libraries/ostrich/backend/serializer/serializer_base.cc ---------------------------------------------------------------------- diff --git a/libraries/ostrich/backend/serializer/serializer_base.cc b/libraries/ostrich/backend/serializer/serializer_base.cc index 4b3e86d..c168dae 100644 --- a/libraries/ostrich/backend/serializer/serializer_base.cc +++ b/libraries/ostrich/backend/serializer/serializer_base.cc @@ -21,6 +21,14 @@ namespace marmotta { namespace serializer { namespace { + +const std::map<std::string, rdf::URI> kDefaultNamespaces = { + {"skos", "http://www.w3.org/2004/02/skos/core#"}, + {"rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"}, + {"rdfs", "http://www.w3.org/2000/01/rdf-schema#"}, + {"owl", "http://www.w3.org/2002/07/owl#"}, +}; + static std::map<std::string, rdf::URI> namespacesMap(std::vector<rdf::Namespace> list) { std::map<std::string, rdf::URI> result; for (auto it = list.cbegin(); it != list.cend(); it++) { @@ -54,10 +62,14 @@ Format FormatFromString(const std::string &name) { } SerializerBase::SerializerBase(const rdf::URI& baseUri, Format format, std::vector<rdf::Namespace> namespaces) - : baseUri(baseUri), format(format), namespaces(namespacesMap(namespaces)) { } + : baseUri(baseUri), format(format), namespaces(namespacesMap(namespaces)) { + this->namespaces.insert(kDefaultNamespaces.cbegin(), kDefaultNamespaces.cend()); +} SerializerBase::SerializerBase(const rdf::URI& baseUri, Format format, std::map<std::string, rdf::URI> namespaces) - : baseUri(baseUri), format(format), namespaces(namespaces) { } + : baseUri(baseUri), format(format), namespaces(namespaces) { + this->namespaces.insert(kDefaultNamespaces.cbegin(), kDefaultNamespaces.cend()); +} } // namespace serializer http://git-wip-us.apache.org/repos/asf/marmotta/blob/a165b8e4/libraries/ostrich/backend/serializer/serializer_raptor.cc ---------------------------------------------------------------------- diff --git a/libraries/ostrich/backend/serializer/serializer_raptor.cc b/libraries/ostrich/backend/serializer/serializer_raptor.cc index ff37691..f7788fb 100644 --- a/libraries/ostrich/backend/serializer/serializer_raptor.cc +++ b/libraries/ostrich/backend/serializer/serializer_raptor.cc @@ -16,6 +16,11 @@ * limitations under the License. */ #include "serializer_raptor.h" +#include <raptor2/raptor2.h> +#include <util/raptor_util.h> + +#define STR(s) (const unsigned char*)s.c_str() +#define CPSTR(s) (const unsigned char*)strdup(s.c_str()) namespace marmotta { namespace serializer { @@ -97,7 +102,7 @@ RaptorSerializer::RaptorSerializer(const rdf::URI& baseUri, Format format) : SerializerBase(baseUri, format) { world = raptor_new_world(); - base = raptor_new_uri(world, (unsigned char const *) baseUri.getUri().c_str()); + base = raptor_new_uri(world, STR(baseUri.getUri())); initRaptor(); } @@ -105,7 +110,7 @@ RaptorSerializer::RaptorSerializer(const rdf::URI& baseUri, Format format, std:: : SerializerBase(baseUri, format, namespaces) { world = raptor_new_world(); - base = raptor_new_uri(world, (unsigned char const *) baseUri.getUri().c_str()); + base = raptor_new_uri(world, STR(baseUri.getUri())); initRaptor(); } @@ -113,77 +118,30 @@ RaptorSerializer::RaptorSerializer(const rdf::URI& baseUri, Format format, std:: : SerializerBase(baseUri, format, namespaces) { world = raptor_new_world(); - base = raptor_new_uri(world, (unsigned char const *) baseUri.getUri().c_str()); + base = raptor_new_uri(world, STR(baseUri.getUri())); initRaptor(); } RaptorSerializer::~RaptorSerializer() { // check for NULL in case a move operation has set the fields to a null pointer - if(serializer != NULL) + if(serializer != nullptr) raptor_free_serializer(serializer); - if(base != NULL) + if(base != nullptr) raptor_free_uri(base); - if(world != NULL) + if(world != nullptr) raptor_free_world(world); } -/* -RaptorSerializer::RaptorSerializer(const RaptorSerializer &other) { - format = other.format; - namespaces = other.namespaces; - - world = raptor_new_world(); - base = raptor_new_uri(world, raptor_uri_as_string(other.base)); - initRaptor(); -} - -RaptorSerializer::RaptorSerializer(RaptorSerializer &&other) { - format = other.format; - namespaces = other.namespaces; - base = other.base; - world = other.world; - serializer = other.serializer; - - other.serializer = NULL; - other.base = NULL; - other.world = NULL; -} - -RaptorSerializer &RaptorSerializer::operator=(const RaptorSerializer &other) { - format = other.format; - namespaces = other.namespaces; - - world = raptor_new_world(); - base = raptor_new_uri(world, raptor_uri_as_string(other.base)); - initRaptor(); - - return *this; -} - -RaptorSerializer &RaptorSerializer::operator=(RaptorSerializer &&other) { - format = other.format; - namespaces = other.namespaces; - serializer = other.serializer; - base = other.base; - world = other.world; - - other.serializer = NULL; - other.base = NULL; - other.world = NULL; - - return *this; -} -*/ void RaptorSerializer::initRaptor() { serializer = raptor_new_serializer(world, raptorFormat(format).c_str()); for(const auto &e : namespaces) { - raptor_uri* uri = raptor_new_uri(world, (unsigned char const *) e.second.getUri().c_str()); - raptor_serializer_set_namespace(serializer, uri, (unsigned char const *) e.first.c_str()); + raptor_uri* uri = raptor_new_uri(world, STR(e.second.getUri())); + raptor_serializer_set_namespace(serializer, uri, CPSTR(e.first)); } raptor_world_set_log_handler(world, this, [](void *user_data, raptor_log_message* message){ std::cerr << message->level << ": " << message->text << std::endl; @@ -198,59 +156,10 @@ void RaptorSerializer::prepare(std::ostream &out) { void RaptorSerializer::serialize(const rdf::Statement &stmt) { raptor_statement* triple = raptor_new_statement(world); - if (stmt.getMessage().subject().has_uri()) { - triple->subject = raptor_new_term_from_uri_string( - world, (unsigned char const *) stmt.getMessage().subject().uri().uri().c_str()); - } else if (stmt.getMessage().subject().has_bnode()) { - triple->subject = raptor_new_term_from_blank( - world, (unsigned char const *) stmt.getMessage().subject().bnode().id().c_str()); - } else { - throw SerializationError("invalid subject type"); - } - - triple->predicate = raptor_new_term_from_uri_string( - world, (unsigned char const *) stmt.getMessage().predicate().uri().c_str()); - - if (stmt.getMessage().object().has_resource()) { - const marmotta::rdf::proto::Resource& r = stmt.getMessage().object().resource(); - if (r.has_uri()) { - triple->object = raptor_new_term_from_uri_string( - world, (unsigned char const *) r.uri().uri().c_str()); - } else if(r.has_bnode()) { - triple->object = raptor_new_term_from_blank( - world, (unsigned char const *) r.bnode().id().c_str()); - } else { - throw SerializationError("invalid object resource type"); - } - } else if (stmt.getMessage().object().has_literal()) { - const marmotta::rdf::proto::Literal& l = stmt.getMessage().object().literal(); - if (l.has_stringliteral()) { - triple->object = raptor_new_term_from_counted_literal( - world, - (unsigned char const *) l.stringliteral().content().c_str(), l.stringliteral().content().size(), NULL, - (unsigned char const *) l.stringliteral().language().c_str(), l.stringliteral().language().size()); - } else if(l.has_dataliteral()) { - triple->object = raptor_new_term_from_counted_literal( - world, - (unsigned char const *) l.dataliteral().content().c_str(), l.dataliteral().content().size(), - raptor_new_uri(world, (unsigned char const *) l.dataliteral().datatype().uri().c_str()), - (unsigned char const *) "", 0); - } else { - throw SerializationError("invalid object literal type"); - } - } else { - throw SerializationError("invalid object type"); - } - - if (stmt.getMessage().context().has_uri()) { - triple->graph = raptor_new_term_from_uri_string( - world, (unsigned char const *) stmt.getMessage().context().uri().uri().c_str()); - } else if (stmt.getMessage().context().has_bnode()) { - triple->graph = raptor_new_term_from_blank( - world, (unsigned char const *) stmt.getMessage().context().bnode().id().c_str()); - } else { - triple->graph = nullptr; - } + triple->subject = util::raptor::AsTerm(world, stmt.getSubject()); + triple->predicate = util::raptor::AsTerm(world, stmt.getPredicate()); + triple->object = util::raptor::AsTerm(world, stmt.getObject()); + triple->graph = util::raptor::AsTerm(world, stmt.getContext()); raptor_serializer_serialize_statement(serializer, triple); http://git-wip-us.apache.org/repos/asf/marmotta/blob/a165b8e4/libraries/ostrich/backend/util/raptor_util.cc ---------------------------------------------------------------------- diff --git a/libraries/ostrich/backend/util/raptor_util.cc b/libraries/ostrich/backend/util/raptor_util.cc index bbeaecd..e20b265 100644 --- a/libraries/ostrich/backend/util/raptor_util.cc +++ b/libraries/ostrich/backend/util/raptor_util.cc @@ -109,6 +109,70 @@ rdf::Statement ConvertStatement(raptor_statement *triple) { } } + +namespace { +raptor_term *AsStringLiteral(raptor_world* world, const rdf::Value &v) { + rdf::StringLiteral l(v.getMessage().literal().stringliteral()); + + return raptor_new_term_from_counted_literal( + world, + STR(l.getContent()), l.getContent().size(), + nullptr, + STR(l.getLanguage()), l.getLanguage().size()); +} + +raptor_term *AsDatatypeLiteral(raptor_world* world, const rdf::Value &v) { + rdf::DatatypeLiteral l(v.getMessage().literal().dataliteral()); + + return raptor_new_term_from_counted_literal( + world, + STR(l.getContent()), l.getContent().size(), + raptor_new_uri(world, STR(l.getDatatype().getUri())), + (unsigned char const *) "", 0); +} +} // namespace + + +/* + * Convert a Marmotta Resource into a raptor term. + */ +raptor_term* AsTerm(raptor_world* world, const rdf::Resource& r) { + switch (r.type) { + case rdf::Resource::URI: + return raptor_new_term_from_uri_string(world, STR(r.stringValue())); + case rdf::Resource::BNODE: + return raptor_new_term_from_blank(world, STR(r.stringValue())); + default: + return nullptr; + } +} + +/* + * Convert a Marmotta Value into a raptor term. + */ +raptor_term* AsTerm(raptor_world* world, const rdf::Value& v) { + switch (v.type) { + case rdf::Value::URI: + return raptor_new_term_from_uri_string(world, STR(v.stringValue())); + case rdf::Value::BNODE: + return raptor_new_term_from_blank(world, STR(v.stringValue())); + case rdf::Value::STRING_LITERAL: + return AsStringLiteral(world, v); + case rdf::Value::DATATYPE_LITERAL: + return AsDatatypeLiteral(world, v); + default: + return nullptr; + } + +} + +/* + * Convert a Marmotta URI into a raptor term. + */ +raptor_term* AsTerm(raptor_world* world, const rdf::URI& u) { + return raptor_new_term_from_uri_string(world, STR(u.stringValue())); +} + } // namespace raptor } // namespace util } // namespace marmotta http://git-wip-us.apache.org/repos/asf/marmotta/blob/a165b8e4/libraries/ostrich/backend/util/raptor_util.h ---------------------------------------------------------------------- diff --git a/libraries/ostrich/backend/util/raptor_util.h b/libraries/ostrich/backend/util/raptor_util.h index 1899235..445d61e 100644 --- a/libraries/ostrich/backend/util/raptor_util.h +++ b/libraries/ostrich/backend/util/raptor_util.h @@ -54,17 +54,17 @@ rdf::Statement ConvertStatement(raptor_statement* triple); /* * Convert a Marmotta Resource into a raptor term. */ -raptor_term* AsLiteral(raptor_world* world, const rdf::Resource& r); +raptor_term* AsTerm(raptor_world* world, const rdf::Resource& r); /* * Convert a Marmotta Value into a raptor term. */ -raptor_term* AsLiteral(raptor_world* world, const rdf::Value& v); +raptor_term* AsTerm(raptor_world* world, const rdf::Value& v); /* * Convert a Marmotta URI into a raptor term. */ -raptor_term* AsLiteral(raptor_world* world, const rdf::URI& u); +raptor_term* AsTerm(raptor_world* world, const rdf::URI& u); } // namespace raptor } // namespace util
