Kelson has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/296914 )

Change subject: Add a API to get the offset of a article in the zimfile.
......................................................................


Add a API to get the offset of a article in the zimfile.

To get the offset of a article :

- get the article
- use article.getOffset()

If offset cannot be found (not regular article (redirection...) or cluster
is compressed), 0 is returned.

Change-Id: I5b4aced056c16aa8fc62ce4b8048553ae1f96c25
---
M zimlib/include/zim/article.h
M zimlib/include/zim/cluster.h
M zimlib/include/zim/file.h
M zimlib/src/cluster.cpp
M zimlib/src/file.cpp
5 files changed, 32 insertions(+), 7 deletions(-)

Approvals:
  Kelson: Verified; Looks good to me, approved



diff --git a/zimlib/include/zim/article.h b/zimlib/include/zim/article.h
index 5172adb..b950173 100644
--- a/zimlib/include/zim/article.h
+++ b/zimlib/include/zim/article.h
@@ -85,6 +85,15 @@
                            : 
const_cast<File&>(file).getBlob(dirent.getClusterNumber(), 
dirent.getBlobNumber());
       }
 
+      offset_type getOffset() const
+      {
+        Dirent dirent = getDirent();
+        return isRedirect()
+            || isLinktarget()
+            || isDeleted() ? 0
+                           : 
const_cast<File&>(file).getOffset(dirent.getClusterNumber(), 
dirent.getBlobNumber());
+      }
+
       std::string getPage(bool layout = true, unsigned maxRecurse = 10);
       void getPage(std::ostream&, bool layout = true, unsigned maxRecurse = 
10);
 
diff --git a/zimlib/include/zim/cluster.h b/zimlib/include/zim/cluster.h
index bd55cb5..96b16f0 100644
--- a/zimlib/include/zim/cluster.h
+++ b/zimlib/include/zim/cluster.h
@@ -42,6 +42,7 @@
       CompressionType compression;
       Offsets offsets;
       Data data;
+      offset_type startOffset;
 
       void read(std::istream& in);
       void write(std::ostream& out) const;
@@ -49,14 +50,15 @@
     public:
       ClusterImpl();
 
-      void setCompression(CompressionType c)  { compression = c; }
-      CompressionType getCompression() const  { return compression; }
-      bool isCompressed() const               { return compression == 
zimcompZip || compression == zimcompBzip2 || compression == zimcompLzma; }
+      void setCompression(CompressionType c)   { compression = c; }
+      CompressionType getCompression() const   { return compression; }
+      bool isCompressed() const                { return compression == 
zimcompZip || compression == zimcompBzip2 || compression == zimcompLzma; }
 
-      size_type getCount() const              { return offsets.size() - 1; }
-      const char* getData(unsigned n) const   { return &data[ offsets[n] ]; }
-      size_type getSize(unsigned n) const     { return offsets[n+1] - 
offsets[n]; }
-      size_type getSize() const               { return offsets.size() * 
sizeof(size_type) + data.size(); }
+      size_type getCount() const               { return offsets.size() - 1; }
+      const char* getData(unsigned n) const    { return &data[ offsets[n] ]; }
+      size_type getSize(unsigned n) const      { return offsets[n+1] - 
offsets[n]; }
+      size_type getSize() const                { return offsets.size() * 
sizeof(size_type) + data.size(); }
+      offset_type getOffset(size_type n) const { return startOffset + 
offsets[n]; }
       Blob getBlob(size_type n) const;
       void clear();
 
@@ -85,6 +87,7 @@
 
       const char* getBlobPtr(size_type n) const     { return impl->getData(n); 
}
       size_type getBlobSize(size_type n) const      { return impl->getSize(n); 
}
+      offset_type getBlobOffset(size_type n) const  { return 
impl->getOffset(n); }
       Blob getBlob(size_type n) const;
 
       size_type count() const   { return impl ? impl->getCount() : 0; }
diff --git a/zimlib/include/zim/file.h b/zimlib/include/zim/file.h
index a6ac75b..0a3a2c3 100644
--- a/zimlib/include/zim/file.h
+++ b/zimlib/include/zim/file.h
@@ -62,6 +62,7 @@
 
       Blob getBlob(size_type clusterIdx, size_type blobIdx)
         { return getCluster(clusterIdx).getBlob(blobIdx); }
+      offset_type getOffset(size_type clusterIdx, size_type blobIdx);
 
       size_type getNamespaceBeginOffset(char ch)
         { return impl->getNamespaceBeginOffset(ch); }
diff --git a/zimlib/src/cluster.cpp b/zimlib/src/cluster.cpp
index 3630042..3b24fee 100644
--- a/zimlib/src/cluster.cpp
+++ b/zimlib/src/cluster.cpp
@@ -79,6 +79,9 @@
 
     size_type n = offset / 4;
     size_type a = offset;
+    // offset are from start of cluster !after the char telling the 
compression!
+    // but startOffset is offset from start of the cluster.
+    startOffset = offset + sizeof(char);
 
     log_debug1("first offset is " << offset << " n=" << n << " a=" << a);
 
diff --git a/zimlib/src/file.cpp b/zimlib/src/file.cpp
index b6777e6..c5f25a4 100644
--- a/zimlib/src/file.cpp
+++ b/zimlib/src/file.cpp
@@ -201,6 +201,15 @@
   File::const_iterator File::findByTitle(char ns, const std::string& title)
   { return findxByTitle(ns, title).second; }
 
+  offset_type File::getOffset(size_type clusterIdx, size_type blobIdx)
+  {
+    Cluster cluster = getCluster(clusterIdx);
+    if (cluster.isCompressed())
+        return 0;
+    offset_type blobOffset = cluster.getBlobOffset(blobIdx);
+    return getClusterOffset(clusterIdx) + blobOffset;
+  }
+
   std::string urldecode(const std::string& url)
   {
     std::string ret;

-- 
To view, visit https://gerrit.wikimedia.org/r/296914
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I5b4aced056c16aa8fc62ce4b8048553ae1f96c25
Gerrit-PatchSet: 1
Gerrit-Project: openzim
Gerrit-Branch: master
Gerrit-Owner: Mgautierfr <mgaut...@kymeria.fr>
Gerrit-Reviewer: Kelson <kel...@kiwix.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to