Kelson has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/295517 )

Change subject: Move articleSource's related stuffs in articlesource.(h|cpp).
......................................................................


Move articleSource's related stuffs in articlesource.(h|cpp).

Change-Id: Iee91484679bf401a693af1ca7e1c7e34f2c741d0
---
M zimwriterfs/Makefile.am
A zimwriterfs/articlesource.cpp
A zimwriterfs/articlesource.h
M zimwriterfs/zimwriterfs.cpp
4 files changed, 305 insertions(+), 229 deletions(-)

Approvals:
  Kelson: Verified; Looks good to me, approved



diff --git a/zimwriterfs/Makefile.am b/zimwriterfs/Makefile.am
index 3383e35..6e46553 100644
--- a/zimwriterfs/Makefile.am
+++ b/zimwriterfs/Makefile.am
@@ -4,4 +4,5 @@
 zimwriterfs_SOURCES= \
         zimwriterfs.cpp \
         tools.cpp \
-        article.cpp
+        article.cpp \
+        articlesource.cpp
diff --git a/zimwriterfs/articlesource.cpp b/zimwriterfs/articlesource.cpp
new file mode 100644
index 0000000..8b0b34c
--- /dev/null
+++ b/zimwriterfs/articlesource.cpp
@@ -0,0 +1,256 @@
+/*
+ * Copyright 2013-2016 Emmanuel Engelhart <kel...@kiwix.org>
+ * Copyright 2016 Matthieu Gautier <mgaut...@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include "articlesource.h"
+#include "article.h"
+#include "tools.h"
+
+#include <zim/blob.h>
+
+#include <iomanip>
+#include <sstream>
+#include <map>
+
+bool popFromFilenameQueue(std::string &filename);
+bool isVerbose();
+
+extern std::string welcome;
+extern std::string language;
+extern std::string creator;
+extern std::string publisher;
+extern std::string title;
+extern std::string description;
+extern std::string directoryPath;
+
+std::map<std::string, unsigned int> counters;
+char *data = NULL;
+unsigned int dataSize = 0;
+
+
+
+ArticleSource::ArticleSource() {
+  /* Prepare metadata */
+  metadataQueue.push("Language");
+  metadataQueue.push("Publisher");
+  metadataQueue.push("Creator");
+  metadataQueue.push("Title");
+  metadataQueue.push("Description");
+  metadataQueue.push("Date");
+  metadataQueue.push("Favicon");
+  metadataQueue.push("Counter");
+}
+
+void ArticleSource::init_redirectsQueue_from_file(const std::string& path){
+    std::ifstream in_stream;
+    std::string line;
+
+    in_stream.open(path.c_str());
+    while (std::getline(in_stream, line)) {
+      redirectsQueue.push(line);
+    }
+    in_stream.close();
+}
+
+std::string ArticleSource::getMainPage() {
+  return welcome;
+}
+
+Article *article = NULL;
+const zim::writer::Article* ArticleSource::getNextArticle() {
+  std::string path;
+
+  if (article != NULL) {
+    delete(article);
+  }
+
+  if (!metadataQueue.empty()) {
+    path = metadataQueue.front();
+    metadataQueue.pop();
+    article = new MetadataArticle(path);
+  } else if (!redirectsQueue.empty()) {
+    std::string line = redirectsQueue.front();
+    redirectsQueue.pop();
+    article = new RedirectArticle(line);
+  } else if (popFromFilenameQueue(path)) {
+    do {
+      article = new Article(path);
+    } while (article && article->isInvalid() && popFromFilenameQueue(path));
+  } else {
+    article = NULL;
+  }
+
+  /* Count mimetypes */
+  if (article != NULL && !article->isRedirect()) {
+
+    if (isVerbose())
+      std::cout << "Creating entry for " << article->getAid() << std::endl;
+
+    std::string mimeType = article->getMimeType();
+    if (counters.find(mimeType) == counters.end()) {
+      counters[mimeType] = 1;
+    } else {
+      counters[mimeType]++;
+    }
+  }
+
+  return article;
+}
+
+zim::Blob ArticleSource::getData(const std::string& aid) {
+
+  if (isVerbose())
+    std::cout << "Packing data for " << aid << std::endl;
+
+  if (data != NULL) {
+    delete(data);
+    data = NULL;
+  }
+
+  if (aid.substr(0, 3) == "/M/") {
+    std::string value; 
+
+    if ( aid == "/M/Language") {
+      value = language;
+    } else if (aid == "/M/Creator") {
+      value = creator;
+    } else if (aid == "/M/Publisher") {
+      value = publisher;
+    } else if (aid == "/M/Title") {
+      value = title;
+    } else if (aid == "/M/Description") {
+      value = description;
+    } else if ( aid == "/M/Date") {
+      time_t t = time(0);
+      struct tm * now = localtime( & t );
+      std::stringstream stream;
+      stream << (now->tm_year + 1900) << '-' 
+            << std::setw(2) << std::setfill('0') << (now->tm_mon + 1) << '-'
+            << std::setw(2) << std::setfill('0') << now->tm_mday;
+      value = stream.str();
+    } else if ( aid == "/M/Counter") {
+      std::stringstream stream;
+      for (std::map<std::string, unsigned int>::iterator it = 
counters.begin(); it != counters.end(); ++it) {
+       stream << it->first << "=" << it->second << ";";
+      }
+      value = stream.str();
+    }
+
+    dataSize = value.length();
+    data = new char[dataSize];
+    memcpy(data, value.c_str(), dataSize);
+  } else {
+    std::string aidPath = directoryPath + "/" + aid;
+    
+    if (getMimeTypeForFile(aid).find("text/html") == 0) {
+      std::string html = getFileContent(aidPath);
+      
+      /* Rewrite links (src|href|...) attributes */
+      GumboOutput* output = gumbo_parse(html.c_str());
+      GumboNode* root = output->root;
+
+      std::map<std::string, bool> links;
+      getLinks(root, links);
+      std::map<std::string, bool>::iterator it;
+      std::string aidDirectory = removeLastPathElement(aid, false, false);
+      
+      /* If a link appearch to be duplicated in the HTML, it will
+        occurs only one time in the links variable */
+      for(it = links.begin(); it != links.end(); it++) {
+       if (!it->first.empty() && it->first[0] != '#' && it->first[0] != '?' && 
it->first.substr(0, 5) != "data:") {
+         replaceStringInPlace(html, "\"" + it->first + "\"", "\"" + 
computeNewUrl(aid, it->first) + "\"");
+       }
+      }
+      gumbo_destroy_output(&kGumboDefaultOptions, output);
+
+      dataSize = html.length();
+      data = new char[dataSize];
+      memcpy(data, html.c_str(), dataSize);
+    } else if (getMimeTypeForFile(aid).find("text/css") == 0) {
+      std::string css = getFileContent(aidPath);
+
+      /* Rewrite url() values in the CSS */
+      size_t startPos = 0;
+      size_t endPos = 0;
+      std::string url;
+
+      while ((startPos = css.find("url(", endPos)) && startPos != 
std::string::npos) {
+
+       /* URL delimiters */
+       endPos = css.find(")", startPos);
+       startPos = startPos + (css[startPos+4] == '\'' || css[startPos+4] == 
'"' ? 5 : 4);
+       endPos = endPos - (css[endPos-1] == '\'' || css[endPos-1] == '"' ? 1 : 
0);
+       url = css.substr(startPos, endPos - startPos);
+       std::string startDelimiter = css.substr(startPos-1, 1);
+       std::string endDelimiter = css.substr(endPos, 1);
+
+       if (url.substr(0, 5) != "data:") {
+         /* Deal with URL with arguments (using '? ') */
+         std::string path = url;
+         size_t markPos = url.find("?");
+         if (markPos != std::string::npos) {
+           path = url.substr(0, markPos);
+         }
+
+         /* Embeded fonts need to be inline because Kiwix is
+            otherwise not able to load same because of the
+            same-origin security */
+         std::string mimeType = getMimeTypeForFile(path);
+         if (mimeType == "application/font-ttf" || 
+             mimeType == "application/font-woff" || 
+             mimeType == "application/vnd.ms-opentype" ||
+             mimeType == "application/vnd.ms-fontobject") {
+
+           try {
+             std::string fontContent = getFileContent(directoryPath + "/" + 
computeAbsolutePath(aid, path));
+             replaceStringInPlaceOnce(css, 
+                                      startDelimiter + url + endDelimiter, 
+                                      startDelimiter + "data:" + mimeType + 
";base64," + 
+                                      base64_encode(reinterpret_cast<const 
unsigned char*>(fontContent.c_str()), fontContent.length()) +
+                                      endDelimiter
+                                      );
+           } catch (...) {
+           }
+         } else {
+
+           /* Deal with URL with arguments (using '? ') */
+           if (markPos != std::string::npos) {
+             endDelimiter = url.substr(markPos, 1);
+           }
+
+           replaceStringInPlaceOnce(css,
+                                    startDelimiter + url + endDelimiter,
+                                    startDelimiter + computeNewUrl(aid, path) 
+ endDelimiter);
+         }
+       }
+      }
+
+      dataSize = css.length();
+      data = new char[dataSize];
+      memcpy(data, css.c_str(), dataSize);
+    } else {
+      dataSize = getFileSize(aidPath);
+      data = new char[dataSize];
+      memcpy(data, getFileContent(aidPath).c_str(), dataSize);
+    }
+  }
+
+  return zim::Blob(data, dataSize);
+}
+
diff --git a/zimwriterfs/articlesource.h b/zimwriterfs/articlesource.h
new file mode 100644
index 0000000..adbdbda
--- /dev/null
+++ b/zimwriterfs/articlesource.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2013-2016 Emmanuel Engelhart <kel...@kiwix.org>
+ * Copyright 2016 Matthieu Gautier <mgaut...@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef OPENZIM_ZIMWRITERFS_ARTICLESOURCE_H
+#define OPENZIM_ZIMWRITERFS_ARTICLESOURCE_H
+
+#include <string>
+#include <queue>
+#include <fstream>
+
+#include <zim/writer/zimcreator.h>
+
+class ArticleSource : public zim::writer::ArticleSource {
+  public:
+    explicit ArticleSource();
+    virtual const zim::writer::Article* getNextArticle();
+    virtual zim::Blob getData(const std::string& aid);
+    virtual std::string getMainPage();
+    
+    virtual void init_redirectsQueue_from_file(const std::string& path);
+    
+  private:
+    std::queue<std::string> metadataQueue;
+    std::queue<std::string> redirectsQueue;
+};
+
+#endif //OPENZIM_ZIMWRITERFS_ARTICLESOURCE_H
diff --git a/zimwriterfs/zimwriterfs.cpp b/zimwriterfs/zimwriterfs.cpp
index 93987b4..de44cb8 100644
--- a/zimwriterfs/zimwriterfs.cpp
+++ b/zimwriterfs/zimwriterfs.cpp
@@ -26,20 +26,15 @@
 #include <unistd.h>
 #include <pthread.h>
 
-#include <iomanip>
-#include <fstream>
-#include <sstream>
 #include <queue>
-#include <map>
 #include <cstdio>
 #include <magic.h>
 
 #include <zim/writer/zimcreator.h>
-#include <zim/blob.h>
-
 
 #include "tools.h"
 #include "article.h"
+#include "articlesource.h"
 
 #define MAX_QUEUE_SIZE 100
 
@@ -57,8 +52,6 @@
 pthread_t directoryVisitor;
 pthread_mutex_t filenameQueueMutex;
 std::queue<std::string> filenameQueue;
-std::queue<std::string> metadataQueue;
-std::queue<std::string> redirectsQueue;
 
 bool isDirectoryVisitorRunningFlag = false;
 pthread_mutex_t directoryVisitorRunningMutex;
@@ -68,9 +61,6 @@
 bool uniqueNamespace = false;
 
 magic_t magic;
-std::map<std::string, unsigned int> counters;
-char *data = NULL;
-unsigned int dataSize = 0;
 
 
 void directoryVisitorRunning(bool value) {
@@ -136,204 +126,6 @@
   } while (isDirectoryVisitorRunning() || !isFilenameQueueEmpty());
 
   return retVal;
-}
-
-/* ArticleSource class */
-class ArticleSource : public zim::writer::ArticleSource {
-  public:
-    explicit ArticleSource();
-    virtual const zim::writer::Article* getNextArticle();
-    virtual zim::Blob getData(const std::string& aid);
-    virtual std::string getMainPage();
-};
-
-ArticleSource::ArticleSource() {
-}
-
-std::string ArticleSource::getMainPage() {
-  return welcome;
-}
-
-Article *article = NULL;
-const zim::writer::Article* ArticleSource::getNextArticle() {
-  std::string path;
-
-  if (article != NULL) {
-    delete(article);
-  }
-
-  if (!metadataQueue.empty()) {
-    path = metadataQueue.front();
-    metadataQueue.pop();
-    article = new MetadataArticle(path);
-  } else if (!redirectsQueue.empty()) {
-    std::string line = redirectsQueue.front();
-    redirectsQueue.pop();
-    article = new RedirectArticle(line);
-  } else if (popFromFilenameQueue(path)) {
-    do {
-      article = new Article(path);
-    } while (article && article->isInvalid() && popFromFilenameQueue(path));
-  } else {
-    article = NULL;
-  }
-
-  /* Count mimetypes */
-  if (article != NULL && !article->isRedirect()) {
-
-    if (isVerbose())
-      std::cout << "Creating entry for " << article->getAid() << std::endl;
-
-    std::string mimeType = article->getMimeType();
-    if (counters.find(mimeType) == counters.end()) {
-      counters[mimeType] = 1;
-    } else {
-      counters[mimeType]++;
-    }
-  }
-
-  return article;
-}
-
-zim::Blob ArticleSource::getData(const std::string& aid) {
-
-  if (isVerbose())
-    std::cout << "Packing data for " << aid << std::endl;
-
-  if (data != NULL) {
-    delete(data);
-    data = NULL;
-  }
-
-  if (aid.substr(0, 3) == "/M/") {
-    std::string value; 
-
-    if ( aid == "/M/Language") {
-      value = language;
-    } else if (aid == "/M/Creator") {
-      value = creator;
-    } else if (aid == "/M/Publisher") {
-      value = publisher;
-    } else if (aid == "/M/Title") {
-      value = title;
-    } else if (aid == "/M/Description") {
-      value = description;
-    } else if ( aid == "/M/Date") {
-      time_t t = time(0);
-      struct tm * now = localtime( & t );
-      std::stringstream stream;
-      stream << (now->tm_year + 1900) << '-' 
-            << std::setw(2) << std::setfill('0') << (now->tm_mon + 1) << '-'
-            << std::setw(2) << std::setfill('0') << now->tm_mday;
-      value = stream.str();
-    } else if ( aid == "/M/Counter") {
-      std::stringstream stream;
-      for (std::map<std::string, unsigned int>::iterator it = 
counters.begin(); it != counters.end(); ++it) {
-       stream << it->first << "=" << it->second << ";";
-      }
-      value = stream.str();
-    }
-
-    dataSize = value.length();
-    data = new char[dataSize];
-    memcpy(data, value.c_str(), dataSize);
-  } else {
-    std::string aidPath = directoryPath + "/" + aid;
-    
-    if (getMimeTypeForFile(aid).find("text/html") == 0) {
-      std::string html = getFileContent(aidPath);
-      
-      /* Rewrite links (src|href|...) attributes */
-      GumboOutput* output = gumbo_parse(html.c_str());
-      GumboNode* root = output->root;
-
-      std::map<std::string, bool> links;
-      getLinks(root, links);
-      std::map<std::string, bool>::iterator it;
-      std::string aidDirectory = removeLastPathElement(aid, false, false);
-      
-      /* If a link appearch to be duplicated in the HTML, it will
-        occurs only one time in the links variable */
-      for(it = links.begin(); it != links.end(); it++) {
-       if (!it->first.empty() && it->first[0] != '#' && it->first[0] != '?' && 
it->first.substr(0, 5) != "data:") {
-         replaceStringInPlace(html, "\"" + it->first + "\"", "\"" + 
computeNewUrl(aid, it->first) + "\"");
-       }
-      }
-      gumbo_destroy_output(&kGumboDefaultOptions, output);
-
-      dataSize = html.length();
-      data = new char[dataSize];
-      memcpy(data, html.c_str(), dataSize);
-    } else if (getMimeTypeForFile(aid).find("text/css") == 0) {
-      std::string css = getFileContent(aidPath);
-
-      /* Rewrite url() values in the CSS */
-      size_t startPos = 0;
-      size_t endPos = 0;
-      std::string url;
-
-      while ((startPos = css.find("url(", endPos)) && startPos != 
std::string::npos) {
-
-       /* URL delimiters */
-       endPos = css.find(")", startPos);
-       startPos = startPos + (css[startPos+4] == '\'' || css[startPos+4] == 
'"' ? 5 : 4);
-       endPos = endPos - (css[endPos-1] == '\'' || css[endPos-1] == '"' ? 1 : 
0);
-       url = css.substr(startPos, endPos - startPos);
-       std::string startDelimiter = css.substr(startPos-1, 1);
-       std::string endDelimiter = css.substr(endPos, 1);
-
-       if (url.substr(0, 5) != "data:") {
-         /* Deal with URL with arguments (using '? ') */
-         std::string path = url;
-         size_t markPos = url.find("?");
-         if (markPos != std::string::npos) {
-           path = url.substr(0, markPos);
-         }
-
-         /* Embeded fonts need to be inline because Kiwix is
-            otherwise not able to load same because of the
-            same-origin security */
-         std::string mimeType = getMimeTypeForFile(path);
-         if (mimeType == "application/font-ttf" || 
-             mimeType == "application/font-woff" || 
-             mimeType == "application/vnd.ms-opentype" ||
-             mimeType == "application/vnd.ms-fontobject") {
-
-           try {
-             std::string fontContent = getFileContent(directoryPath + "/" + 
computeAbsolutePath(aid, path));
-             replaceStringInPlaceOnce(css, 
-                                      startDelimiter + url + endDelimiter, 
-                                      startDelimiter + "data:" + mimeType + 
";base64," + 
-                                      base64_encode(reinterpret_cast<const 
unsigned char*>(fontContent.c_str()), fontContent.length()) +
-                                      endDelimiter
-                                      );
-           } catch (...) {
-           }
-         } else {
-
-           /* Deal with URL with arguments (using '? ') */
-           if (markPos != std::string::npos) {
-             endDelimiter = url.substr(markPos, 1);
-           }
-
-           replaceStringInPlaceOnce(css,
-                                    startDelimiter + url + endDelimiter,
-                                    startDelimiter + computeNewUrl(aid, path) 
+ endDelimiter);
-         }
-       }
-      }
-
-      dataSize = css.length();
-      data = new char[dataSize];
-      memcpy(data, css.c_str(), dataSize);
-    } else {
-      dataSize = getFileSize(aidPath);
-      data = new char[dataSize];
-      memcpy(data, getFileContent(aidPath).c_str(), dataSize);
-    }
-  }
-
-  return zim::Blob(data, dataSize);
 }
 
 /* Non ZIM related code */
@@ -554,16 +346,6 @@
     directoryPath = directoryPath.substr(0, directoryPath.length()-1);
   }
 
-  /* Prepare metadata */
-  metadataQueue.push("Language");
-  metadataQueue.push("Publisher");
-  metadataQueue.push("Creator");
-  metadataQueue.push("Title");
-  metadataQueue.push("Description");
-  metadataQueue.push("Date");
-  metadataQueue.push("Favicon");
-  metadataQueue.push("Counter");
-
   /* Check metadata */
   if (!fileExists(directoryPath + "/" + welcome)) {
     std::cerr << "zimwriterfs: unable to find welcome page at '" << 
directoryPath << "/" << welcome << "'. --welcome path/value must be relative to 
HTML_DIRECTORY." << std::endl;
@@ -582,15 +364,8 @@
   } else {
     if (isVerbose())
       std::cout << "Reading redirects CSV file " << redirectsPath << "..." << 
std::endl;
-
-    std::ifstream in_stream;
-    std::string line;
-
-    in_stream.open(redirectsPath.c_str());
-    while (std::getline(in_stream, line)) {
-      redirectsQueue.push(line);
-    }
-    in_stream.close();
+    
+    source.init_redirectsQueue_from_file(redirectsPath);
   }
 
   /* Init */

-- 
To view, visit https://gerrit.wikimedia.org/r/295517
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Iee91484679bf401a693af1ca7e1c7e34f2c741d0
Gerrit-PatchSet: 1
Gerrit-Project: openzim
Gerrit-Branch: master
Gerrit-Owner: Mgautierfr <mgaut...@kymeria.fr>
Gerrit-Reviewer: Kelson <kel...@kiwix.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to