Kelson has submitted this change and it was merged.

Change subject: Move article's related stuffs in article.(h|cpp).
......................................................................


Move article's related stuffs in article.(h|cpp).

Change-Id: I2a257ea1a0a13eca0748b444838a525666a9090d
---
M zimwriterfs/Makefile.am
A zimwriterfs/article.cpp
A zimwriterfs/article.h
M zimwriterfs/zimwriterfs.cpp
4 files changed, 253 insertions(+), 199 deletions(-)

Approvals:
  Kelson: Verified; Looks good to me, approved



diff --git a/zimwriterfs/Makefile.am b/zimwriterfs/Makefile.am
index ea2ab7a..3383e35 100644
--- a/zimwriterfs/Makefile.am
+++ b/zimwriterfs/Makefile.am
@@ -3,4 +3,5 @@
 
 zimwriterfs_SOURCES= \
         zimwriterfs.cpp \
-        tools.cpp
+        tools.cpp \
+        article.cpp
diff --git a/zimwriterfs/article.cpp b/zimwriterfs/article.cpp
new file mode 100644
index 0000000..f743cde
--- /dev/null
+++ b/zimwriterfs/article.cpp
@@ -0,0 +1,158 @@
+/*
+ * Copyright 2013-2016 Emmanuel Engelhart <kel...@kiwix.org>
+ * Copyright 2016 Matthieu Gautier <mgaut...@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include "article.h"
+#include "tools.h"
+
+
+extern std::string directoryPath;
+
+Article::Article(const std::string& path, const bool detectRedirects) {
+  invalid = false;
+
+  /* aid */
+  aid = path.substr(directoryPath.size()+1);
+
+  /* url */
+  url = aid;
+
+  /* mime-type */
+  mimeType = getMimeTypeForFile(aid);
+  
+  /* namespace */
+  ns = getNamespaceForMimeType(mimeType)[0];
+
+  /* HTML specific code */
+  if (mimeType.find("text/html") != std::string::npos) {
+    std::size_t found;
+    std::string html = getFileContent(path);
+    GumboOutput* output = gumbo_parse(html.c_str());
+    GumboNode* root = output->root;
+
+    /* Search the content of the <title> tag in the HTML */
+    if (root->type == GUMBO_NODE_ELEMENT && root->v.element.children.length >= 
2) {
+      const GumboVector* root_children = &root->v.element.children;
+      GumboNode* head = NULL;
+      for (int i = 0; i < root_children->length; ++i) {
+       GumboNode* child = (GumboNode*)(root_children->data[i]);
+       if (child->type == GUMBO_NODE_ELEMENT &&
+           child->v.element.tag == GUMBO_TAG_HEAD) {
+         head = child;
+         break;
+       }
+      }
+
+      if (head != NULL) {
+       GumboVector* head_children = &head->v.element.children;
+       for (int i = 0; i < head_children->length; ++i) {
+         GumboNode* child = (GumboNode*)(head_children->data[i]);
+         if (child->type == GUMBO_NODE_ELEMENT &&
+             child->v.element.tag == GUMBO_TAG_TITLE) {
+           if (child->v.element.children.length == 1) {
+             GumboNode* title_text = 
(GumboNode*)(child->v.element.children.data[0]);
+             if (title_text->type == GUMBO_NODE_TEXT) {
+               title = title_text->v.text.text;
+             }
+           }
+         }
+       }
+
+       /* Detect if this is a redirection (if no redirects CSV specified) */
+       std::string targetUrl;
+       try {
+         targetUrl = detectRedirects ? 
extractRedirectUrlFromHtml(head_children) : "";
+       } catch (std::string &error) {
+         std::cerr << error << std::endl;
+       }
+       if (!targetUrl.empty()) {
+         redirectAid = computeAbsolutePath(aid, decodeUrl(targetUrl));
+         if (!fileExists(directoryPath + "/" + redirectAid)) {
+           redirectAid.clear();
+           invalid = true;
+         }
+       }
+      }
+
+      /* If no title, then compute one from the filename */
+      if (title.empty()) {
+       found = path.rfind("/");
+       if (found != std::string::npos) {
+         title = path.substr(found+1);
+         found = title.rfind(".");
+         if (found!=std::string::npos) {
+           title = title.substr(0, found);
+         }
+       } else {
+         title = path;
+       }
+       std::replace(title.begin(), title.end(), '_',  ' ');
+      }
+    }
+
+    gumbo_destroy_output(&kGumboDefaultOptions, output);
+  }
+}
+
+std::string Article::getAid() const
+{
+  return aid;
+}
+
+bool Article::isInvalid() const
+{
+  return invalid;
+}
+
+char Article::getNamespace() const
+{
+  return ns;
+}
+
+std::string Article::getUrl() const
+{
+  return url;
+}
+
+std::string Article::getTitle() const
+{
+  return title;
+}
+
+bool Article::isRedirect() const
+{
+  return !redirectAid.empty();
+}
+
+std::string Article::getMimeType() const
+{
+  return mimeType;
+}
+
+std::string Article::getRedirectAid() const
+{
+  return redirectAid;
+}
+
+bool Article::shouldCompress() const {
+  return (getMimeType().find("text") == 0 || 
+         getMimeType() == "application/javascript" || 
+         getMimeType() == "application/json" ||
+          getMimeType() == "image/svg+xml" ? true : false);
+}
diff --git a/zimwriterfs/article.h b/zimwriterfs/article.h
new file mode 100644
index 0000000..2585fc6
--- /dev/null
+++ b/zimwriterfs/article.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2013-2016 Emmanuel Engelhart <kel...@kiwix.org>
+ * Copyright 2016 Matthieu Gautier <mgaut...@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef OPENZIM_ZIMWRITERFS_ARTICLE_H
+#define OPENZIM_ZIMWRITERFS_ARTICLE_H
+
+#include <string>
+#include <zim/writer/zimcreator.h>
+
+extern std::string favicon;
+
+class Article : public zim::writer::Article {
+  protected:
+    char ns;
+    bool invalid;
+    std::string aid;
+    std::string url;
+    std::string title;
+    std::string mimeType;
+    std::string redirectAid;
+    std::string data;
+
+  public:
+    Article() {
+      invalid = false;
+    }
+    explicit Article(const std::string& id, const bool detectRedirects = true);
+    virtual std::string getAid() const;
+    virtual char getNamespace() const;
+    virtual std::string getUrl() const;
+    virtual bool isInvalid() const;
+    virtual std::string getTitle() const;
+    virtual bool isRedirect() const;
+    virtual std::string getMimeType() const;
+    virtual std::string getRedirectAid() const;
+    virtual bool shouldCompress() const;
+};
+
+class MetadataArticle : public Article {
+  public:
+  MetadataArticle(std::string &id) {
+    if (id == "Favicon") {
+      aid = "/-/" + id;
+      mimeType="image/png";
+      redirectAid = favicon;
+      ns = '-';
+      url = "favicon";
+    } else {
+      aid = "/M/" + id;
+      mimeType="text/plain";
+      ns = 'M';
+      url = id;
+    }
+  }
+};
+
+class RedirectArticle : public Article {
+  public:
+  RedirectArticle(const std::string &line) {
+    size_t start;
+    size_t end;
+    ns = line[0];
+    end = line.find_first_of("\t", 2);
+    url = line.substr(2, end - 2);
+    start = end + 1;
+    end = line.find_first_of("\t", start);
+    title = line.substr(start, end - start);
+    redirectAid = line.substr(end + 1);
+    aid = "/" + line.substr(0, 1) + "/" + url;
+    mimeType = "text/plain";
+  }
+};
+
+#endif // OPENZIM_ZIMWRITERFS_ARTICLE_H
diff --git a/zimwriterfs/zimwriterfs.cpp b/zimwriterfs/zimwriterfs.cpp
index 693e6a1..93987b4 100644
--- a/zimwriterfs/zimwriterfs.cpp
+++ b/zimwriterfs/zimwriterfs.cpp
@@ -39,6 +39,7 @@
 
 
 #include "tools.h"
+#include "article.h"
 
 #define MAX_QUEUE_SIZE 100
 
@@ -135,203 +136,6 @@
   } while (isDirectoryVisitorRunning() || !isFilenameQueueEmpty());
 
   return retVal;
-}
-
-/* Article class */
-class Article : public zim::writer::Article {
-  protected:
-    char ns;
-    bool invalid;
-    std::string aid;
-    std::string url;
-    std::string title;
-    std::string mimeType;
-    std::string redirectAid;
-    std::string data;
-
-  public:
-    Article() {
-      invalid = false;
-    }
-    explicit Article(const std::string& id, const bool detectRedirects);
-    virtual std::string getAid() const;
-    virtual char getNamespace() const;
-    virtual std::string getUrl() const;
-    virtual bool isInvalid() const;
-    virtual std::string getTitle() const;
-    virtual bool isRedirect() const;
-    virtual std::string getMimeType() const;
-    virtual std::string getRedirectAid() const;
-    virtual bool shouldCompress() const;
-};
-
-class MetadataArticle : public Article {
-  public:
-  MetadataArticle(std::string &id) {
-    if (id == "Favicon") {
-      aid = "/-/" + id;
-      mimeType="image/png";
-      redirectAid = favicon;
-      ns = '-';
-      url = "favicon";
-    } else {
-      aid = "/M/" + id;
-      mimeType="text/plain";
-      ns = 'M';
-      url = id;
-    }
-  }
-};
-
-class RedirectArticle : public Article {
-  public:
-  RedirectArticle(const std::string &line) {
-    size_t start;
-    size_t end;
-    ns = line[0];
-    end = line.find_first_of("\t", 2);
-    url = line.substr(2, end - 2);
-    start = end + 1;
-    end = line.find_first_of("\t", start);
-    title = line.substr(start, end - start);
-    redirectAid = line.substr(end + 1);
-    aid = "/" + line.substr(0, 1) + "/" + url;
-    mimeType = "text/plain";
-  }
-};
-
-
-Article::Article(const std::string& path, const bool detectRedirects = true) {
-  invalid = false;
-
-  /* aid */
-  aid = path.substr(directoryPath.size()+1);
-
-  /* url */
-  url = aid;
-
-  /* mime-type */
-  mimeType = getMimeTypeForFile(aid);
-  
-  /* namespace */
-  ns = getNamespaceForMimeType(mimeType)[0];
-
-  /* HTML specific code */
-  if (mimeType.find("text/html") != std::string::npos) {
-    std::size_t found;
-    std::string html = getFileContent(path);
-    GumboOutput* output = gumbo_parse(html.c_str());
-    GumboNode* root = output->root;
-
-    /* Search the content of the <title> tag in the HTML */
-    if (root->type == GUMBO_NODE_ELEMENT && root->v.element.children.length >= 
2) {
-      const GumboVector* root_children = &root->v.element.children;
-      GumboNode* head = NULL;
-      for (int i = 0; i < root_children->length; ++i) {
-       GumboNode* child = (GumboNode*)(root_children->data[i]);
-       if (child->type == GUMBO_NODE_ELEMENT &&
-           child->v.element.tag == GUMBO_TAG_HEAD) {
-         head = child;
-         break;
-       }
-      }
-
-      if (head != NULL) {
-       GumboVector* head_children = &head->v.element.children;
-       for (int i = 0; i < head_children->length; ++i) {
-         GumboNode* child = (GumboNode*)(head_children->data[i]);
-         if (child->type == GUMBO_NODE_ELEMENT &&
-             child->v.element.tag == GUMBO_TAG_TITLE) {
-           if (child->v.element.children.length == 1) {
-             GumboNode* title_text = 
(GumboNode*)(child->v.element.children.data[0]);
-             if (title_text->type == GUMBO_NODE_TEXT) {
-               title = title_text->v.text.text;
-             }
-           }
-         }
-       }
-
-       /* Detect if this is a redirection (if no redirects CSV specified) */
-       std::string targetUrl;
-       try {
-         targetUrl = detectRedirects ? 
extractRedirectUrlFromHtml(head_children) : "";
-       } catch (std::string &error) {
-         std::cerr << error << std::endl;
-       }
-       if (!targetUrl.empty()) {
-         redirectAid = computeAbsolutePath(aid, decodeUrl(targetUrl));
-         if (!fileExists(directoryPath + "/" + redirectAid)) {
-           redirectAid.clear();
-           invalid = true;
-         }
-       }
-      }
-
-      /* If no title, then compute one from the filename */
-      if (title.empty()) {
-       found = path.rfind("/");
-       if (found != std::string::npos) {
-         title = path.substr(found+1);
-         found = title.rfind(".");
-         if (found!=std::string::npos) {
-           title = title.substr(0, found);
-         }
-       } else {
-         title = path;
-       }
-       std::replace(title.begin(), title.end(), '_',  ' ');
-      }
-    }
-
-    gumbo_destroy_output(&kGumboDefaultOptions, output);
-  }
-}
-
-std::string Article::getAid() const
-{
-  return aid;
-}
-
-bool Article::isInvalid() const
-{
-  return invalid;
-}
-
-char Article::getNamespace() const
-{
-  return ns;
-}
-
-std::string Article::getUrl() const
-{
-  return url;
-}
-
-std::string Article::getTitle() const
-{
-  return title;
-}
-
-bool Article::isRedirect() const
-{
-  return !redirectAid.empty();
-}
-
-std::string Article::getMimeType() const
-{
-  return mimeType;
-}
-
-std::string Article::getRedirectAid() const
-{
-  return redirectAid;
-}
-
-bool Article::shouldCompress() const {
-  return (getMimeType().find("text") == 0 || 
-         getMimeType() == "application/javascript" || 
-         getMimeType() == "application/json" ||
-         getMimeType() == "image/svg+xml" ? true : false);
 }
 
 /* ArticleSource class */
@@ -657,7 +461,7 @@
 int main(int argc, char** argv) {
   ArticleSource source;
   int minChunkSize = 2048;
-
+  
 
   /* Argument parsing */
   static struct option long_options[] = {

-- 
To view, visit https://gerrit.wikimedia.org/r/295516
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I2a257ea1a0a13eca0748b444838a525666a9090d
Gerrit-PatchSet: 1
Gerrit-Project: openzim
Gerrit-Branch: master
Gerrit-Owner: Mgautierfr <mgaut...@kymeria.fr>
Gerrit-Reviewer: Kelson <kel...@kiwix.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to