Kelson has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/296914 )
Change subject: Add a API to get the offset of a article in the zimfile. ...................................................................... Add a API to get the offset of a article in the zimfile. To get the offset of a article : - get the article - use article.getOffset() If offset cannot be found (not regular article (redirection...) or cluster is compressed), 0 is returned. Change-Id: I5b4aced056c16aa8fc62ce4b8048553ae1f96c25 --- M zimlib/include/zim/article.h M zimlib/include/zim/cluster.h M zimlib/include/zim/file.h M zimlib/src/cluster.cpp M zimlib/src/file.cpp 5 files changed, 32 insertions(+), 7 deletions(-) Approvals: Kelson: Verified; Looks good to me, approved diff --git a/zimlib/include/zim/article.h b/zimlib/include/zim/article.h index 5172adb..b950173 100644 --- a/zimlib/include/zim/article.h +++ b/zimlib/include/zim/article.h @@ -85,6 +85,15 @@ : const_cast<File&>(file).getBlob(dirent.getClusterNumber(), dirent.getBlobNumber()); } + offset_type getOffset() const + { + Dirent dirent = getDirent(); + return isRedirect() + || isLinktarget() + || isDeleted() ? 0 + : const_cast<File&>(file).getOffset(dirent.getClusterNumber(), dirent.getBlobNumber()); + } + std::string getPage(bool layout = true, unsigned maxRecurse = 10); void getPage(std::ostream&, bool layout = true, unsigned maxRecurse = 10); diff --git a/zimlib/include/zim/cluster.h b/zimlib/include/zim/cluster.h index bd55cb5..96b16f0 100644 --- a/zimlib/include/zim/cluster.h +++ b/zimlib/include/zim/cluster.h @@ -42,6 +42,7 @@ CompressionType compression; Offsets offsets; Data data; + offset_type startOffset; void read(std::istream& in); void write(std::ostream& out) const; @@ -49,14 +50,15 @@ public: ClusterImpl(); - void setCompression(CompressionType c) { compression = c; } - CompressionType getCompression() const { return compression; } - bool isCompressed() const { return compression == zimcompZip || compression == zimcompBzip2 || compression == zimcompLzma; } + void setCompression(CompressionType c) { compression = c; } + CompressionType getCompression() const { return compression; } + bool isCompressed() const { return compression == zimcompZip || compression == zimcompBzip2 || compression == zimcompLzma; } - size_type getCount() const { return offsets.size() - 1; } - const char* getData(unsigned n) const { return &data[ offsets[n] ]; } - size_type getSize(unsigned n) const { return offsets[n+1] - offsets[n]; } - size_type getSize() const { return offsets.size() * sizeof(size_type) + data.size(); } + size_type getCount() const { return offsets.size() - 1; } + const char* getData(unsigned n) const { return &data[ offsets[n] ]; } + size_type getSize(unsigned n) const { return offsets[n+1] - offsets[n]; } + size_type getSize() const { return offsets.size() * sizeof(size_type) + data.size(); } + offset_type getOffset(size_type n) const { return startOffset + offsets[n]; } Blob getBlob(size_type n) const; void clear(); @@ -85,6 +87,7 @@ const char* getBlobPtr(size_type n) const { return impl->getData(n); } size_type getBlobSize(size_type n) const { return impl->getSize(n); } + offset_type getBlobOffset(size_type n) const { return impl->getOffset(n); } Blob getBlob(size_type n) const; size_type count() const { return impl ? impl->getCount() : 0; } diff --git a/zimlib/include/zim/file.h b/zimlib/include/zim/file.h index a6ac75b..0a3a2c3 100644 --- a/zimlib/include/zim/file.h +++ b/zimlib/include/zim/file.h @@ -62,6 +62,7 @@ Blob getBlob(size_type clusterIdx, size_type blobIdx) { return getCluster(clusterIdx).getBlob(blobIdx); } + offset_type getOffset(size_type clusterIdx, size_type blobIdx); size_type getNamespaceBeginOffset(char ch) { return impl->getNamespaceBeginOffset(ch); } diff --git a/zimlib/src/cluster.cpp b/zimlib/src/cluster.cpp index 3630042..3b24fee 100644 --- a/zimlib/src/cluster.cpp +++ b/zimlib/src/cluster.cpp @@ -79,6 +79,9 @@ size_type n = offset / 4; size_type a = offset; + // offset are from start of cluster !after the char telling the compression! + // but startOffset is offset from start of the cluster. + startOffset = offset + sizeof(char); log_debug1("first offset is " << offset << " n=" << n << " a=" << a); diff --git a/zimlib/src/file.cpp b/zimlib/src/file.cpp index b6777e6..c5f25a4 100644 --- a/zimlib/src/file.cpp +++ b/zimlib/src/file.cpp @@ -201,6 +201,15 @@ File::const_iterator File::findByTitle(char ns, const std::string& title) { return findxByTitle(ns, title).second; } + offset_type File::getOffset(size_type clusterIdx, size_type blobIdx) + { + Cluster cluster = getCluster(clusterIdx); + if (cluster.isCompressed()) + return 0; + offset_type blobOffset = cluster.getBlobOffset(blobIdx); + return getClusterOffset(clusterIdx) + blobOffset; + } + std::string urldecode(const std::string& url) { std::string ret; -- To view, visit https://gerrit.wikimedia.org/r/296914 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I5b4aced056c16aa8fc62ce4b8048553ae1f96c25 Gerrit-PatchSet: 1 Gerrit-Project: openzim Gerrit-Branch: master Gerrit-Owner: Mgautierfr <mgaut...@kymeria.fr> Gerrit-Reviewer: Kelson <kel...@kiwix.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits