Kelson has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/295517 )
Change subject: Move articleSource's related stuffs in articlesource.(h|cpp).
......................................................................
Move articleSource's related stuffs in articlesource.(h|cpp).
Change-Id: Iee91484679bf401a693af1ca7e1c7e34f2c741d0
---
M zimwriterfs/Makefile.am
A zimwriterfs/articlesource.cpp
A zimwriterfs/articlesource.h
M zimwriterfs/zimwriterfs.cpp
4 files changed, 305 insertions(+), 229 deletions(-)
Approvals:
Kelson: Verified; Looks good to me, approved
diff --git a/zimwriterfs/Makefile.am b/zimwriterfs/Makefile.am
index 3383e35..6e46553 100644
--- a/zimwriterfs/Makefile.am
+++ b/zimwriterfs/Makefile.am
@@ -4,4 +4,5 @@
zimwriterfs_SOURCES= \
zimwriterfs.cpp \
tools.cpp \
- article.cpp
+ article.cpp \
+ articlesource.cpp
diff --git a/zimwriterfs/articlesource.cpp b/zimwriterfs/articlesource.cpp
new file mode 100644
index 0000000..8b0b34c
--- /dev/null
+++ b/zimwriterfs/articlesource.cpp
@@ -0,0 +1,256 @@
+/*
+ * Copyright 2013-2016 Emmanuel Engelhart <[email protected]>
+ * Copyright 2016 Matthieu Gautier <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include "articlesource.h"
+#include "article.h"
+#include "tools.h"
+
+#include <zim/blob.h>
+
+#include <iomanip>
+#include <sstream>
+#include <map>
+
+bool popFromFilenameQueue(std::string &filename);
+bool isVerbose();
+
+extern std::string welcome;
+extern std::string language;
+extern std::string creator;
+extern std::string publisher;
+extern std::string title;
+extern std::string description;
+extern std::string directoryPath;
+
+std::map<std::string, unsigned int> counters;
+char *data = NULL;
+unsigned int dataSize = 0;
+
+
+
+ArticleSource::ArticleSource() {
+ /* Prepare metadata */
+ metadataQueue.push("Language");
+ metadataQueue.push("Publisher");
+ metadataQueue.push("Creator");
+ metadataQueue.push("Title");
+ metadataQueue.push("Description");
+ metadataQueue.push("Date");
+ metadataQueue.push("Favicon");
+ metadataQueue.push("Counter");
+}
+
+void ArticleSource::init_redirectsQueue_from_file(const std::string& path){
+ std::ifstream in_stream;
+ std::string line;
+
+ in_stream.open(path.c_str());
+ while (std::getline(in_stream, line)) {
+ redirectsQueue.push(line);
+ }
+ in_stream.close();
+}
+
+std::string ArticleSource::getMainPage() {
+ return welcome;
+}
+
+Article *article = NULL;
+const zim::writer::Article* ArticleSource::getNextArticle() {
+ std::string path;
+
+ if (article != NULL) {
+ delete(article);
+ }
+
+ if (!metadataQueue.empty()) {
+ path = metadataQueue.front();
+ metadataQueue.pop();
+ article = new MetadataArticle(path);
+ } else if (!redirectsQueue.empty()) {
+ std::string line = redirectsQueue.front();
+ redirectsQueue.pop();
+ article = new RedirectArticle(line);
+ } else if (popFromFilenameQueue(path)) {
+ do {
+ article = new Article(path);
+ } while (article && article->isInvalid() && popFromFilenameQueue(path));
+ } else {
+ article = NULL;
+ }
+
+ /* Count mimetypes */
+ if (article != NULL && !article->isRedirect()) {
+
+ if (isVerbose())
+ std::cout << "Creating entry for " << article->getAid() << std::endl;
+
+ std::string mimeType = article->getMimeType();
+ if (counters.find(mimeType) == counters.end()) {
+ counters[mimeType] = 1;
+ } else {
+ counters[mimeType]++;
+ }
+ }
+
+ return article;
+}
+
+zim::Blob ArticleSource::getData(const std::string& aid) {
+
+ if (isVerbose())
+ std::cout << "Packing data for " << aid << std::endl;
+
+ if (data != NULL) {
+ delete(data);
+ data = NULL;
+ }
+
+ if (aid.substr(0, 3) == "/M/") {
+ std::string value;
+
+ if ( aid == "/M/Language") {
+ value = language;
+ } else if (aid == "/M/Creator") {
+ value = creator;
+ } else if (aid == "/M/Publisher") {
+ value = publisher;
+ } else if (aid == "/M/Title") {
+ value = title;
+ } else if (aid == "/M/Description") {
+ value = description;
+ } else if ( aid == "/M/Date") {
+ time_t t = time(0);
+ struct tm * now = localtime( & t );
+ std::stringstream stream;
+ stream << (now->tm_year + 1900) << '-'
+ << std::setw(2) << std::setfill('0') << (now->tm_mon + 1) << '-'
+ << std::setw(2) << std::setfill('0') << now->tm_mday;
+ value = stream.str();
+ } else if ( aid == "/M/Counter") {
+ std::stringstream stream;
+ for (std::map<std::string, unsigned int>::iterator it =
counters.begin(); it != counters.end(); ++it) {
+ stream << it->first << "=" << it->second << ";";
+ }
+ value = stream.str();
+ }
+
+ dataSize = value.length();
+ data = new char[dataSize];
+ memcpy(data, value.c_str(), dataSize);
+ } else {
+ std::string aidPath = directoryPath + "/" + aid;
+
+ if (getMimeTypeForFile(aid).find("text/html") == 0) {
+ std::string html = getFileContent(aidPath);
+
+ /* Rewrite links (src|href|...) attributes */
+ GumboOutput* output = gumbo_parse(html.c_str());
+ GumboNode* root = output->root;
+
+ std::map<std::string, bool> links;
+ getLinks(root, links);
+ std::map<std::string, bool>::iterator it;
+ std::string aidDirectory = removeLastPathElement(aid, false, false);
+
+ /* If a link appearch to be duplicated in the HTML, it will
+ occurs only one time in the links variable */
+ for(it = links.begin(); it != links.end(); it++) {
+ if (!it->first.empty() && it->first[0] != '#' && it->first[0] != '?' &&
it->first.substr(0, 5) != "data:") {
+ replaceStringInPlace(html, "\"" + it->first + "\"", "\"" +
computeNewUrl(aid, it->first) + "\"");
+ }
+ }
+ gumbo_destroy_output(&kGumboDefaultOptions, output);
+
+ dataSize = html.length();
+ data = new char[dataSize];
+ memcpy(data, html.c_str(), dataSize);
+ } else if (getMimeTypeForFile(aid).find("text/css") == 0) {
+ std::string css = getFileContent(aidPath);
+
+ /* Rewrite url() values in the CSS */
+ size_t startPos = 0;
+ size_t endPos = 0;
+ std::string url;
+
+ while ((startPos = css.find("url(", endPos)) && startPos !=
std::string::npos) {
+
+ /* URL delimiters */
+ endPos = css.find(")", startPos);
+ startPos = startPos + (css[startPos+4] == '\'' || css[startPos+4] ==
'"' ? 5 : 4);
+ endPos = endPos - (css[endPos-1] == '\'' || css[endPos-1] == '"' ? 1 :
0);
+ url = css.substr(startPos, endPos - startPos);
+ std::string startDelimiter = css.substr(startPos-1, 1);
+ std::string endDelimiter = css.substr(endPos, 1);
+
+ if (url.substr(0, 5) != "data:") {
+ /* Deal with URL with arguments (using '? ') */
+ std::string path = url;
+ size_t markPos = url.find("?");
+ if (markPos != std::string::npos) {
+ path = url.substr(0, markPos);
+ }
+
+ /* Embeded fonts need to be inline because Kiwix is
+ otherwise not able to load same because of the
+ same-origin security */
+ std::string mimeType = getMimeTypeForFile(path);
+ if (mimeType == "application/font-ttf" ||
+ mimeType == "application/font-woff" ||
+ mimeType == "application/vnd.ms-opentype" ||
+ mimeType == "application/vnd.ms-fontobject") {
+
+ try {
+ std::string fontContent = getFileContent(directoryPath + "/" +
computeAbsolutePath(aid, path));
+ replaceStringInPlaceOnce(css,
+ startDelimiter + url + endDelimiter,
+ startDelimiter + "data:" + mimeType +
";base64," +
+ base64_encode(reinterpret_cast<const
unsigned char*>(fontContent.c_str()), fontContent.length()) +
+ endDelimiter
+ );
+ } catch (...) {
+ }
+ } else {
+
+ /* Deal with URL with arguments (using '? ') */
+ if (markPos != std::string::npos) {
+ endDelimiter = url.substr(markPos, 1);
+ }
+
+ replaceStringInPlaceOnce(css,
+ startDelimiter + url + endDelimiter,
+ startDelimiter + computeNewUrl(aid, path)
+ endDelimiter);
+ }
+ }
+ }
+
+ dataSize = css.length();
+ data = new char[dataSize];
+ memcpy(data, css.c_str(), dataSize);
+ } else {
+ dataSize = getFileSize(aidPath);
+ data = new char[dataSize];
+ memcpy(data, getFileContent(aidPath).c_str(), dataSize);
+ }
+ }
+
+ return zim::Blob(data, dataSize);
+}
+
diff --git a/zimwriterfs/articlesource.h b/zimwriterfs/articlesource.h
new file mode 100644
index 0000000..adbdbda
--- /dev/null
+++ b/zimwriterfs/articlesource.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2013-2016 Emmanuel Engelhart <[email protected]>
+ * Copyright 2016 Matthieu Gautier <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef OPENZIM_ZIMWRITERFS_ARTICLESOURCE_H
+#define OPENZIM_ZIMWRITERFS_ARTICLESOURCE_H
+
+#include <string>
+#include <queue>
+#include <fstream>
+
+#include <zim/writer/zimcreator.h>
+
+class ArticleSource : public zim::writer::ArticleSource {
+ public:
+ explicit ArticleSource();
+ virtual const zim::writer::Article* getNextArticle();
+ virtual zim::Blob getData(const std::string& aid);
+ virtual std::string getMainPage();
+
+ virtual void init_redirectsQueue_from_file(const std::string& path);
+
+ private:
+ std::queue<std::string> metadataQueue;
+ std::queue<std::string> redirectsQueue;
+};
+
+#endif //OPENZIM_ZIMWRITERFS_ARTICLESOURCE_H
diff --git a/zimwriterfs/zimwriterfs.cpp b/zimwriterfs/zimwriterfs.cpp
index 93987b4..de44cb8 100644
--- a/zimwriterfs/zimwriterfs.cpp
+++ b/zimwriterfs/zimwriterfs.cpp
@@ -26,20 +26,15 @@
#include <unistd.h>
#include <pthread.h>
-#include <iomanip>
-#include <fstream>
-#include <sstream>
#include <queue>
-#include <map>
#include <cstdio>
#include <magic.h>
#include <zim/writer/zimcreator.h>
-#include <zim/blob.h>
-
#include "tools.h"
#include "article.h"
+#include "articlesource.h"
#define MAX_QUEUE_SIZE 100
@@ -57,8 +52,6 @@
pthread_t directoryVisitor;
pthread_mutex_t filenameQueueMutex;
std::queue<std::string> filenameQueue;
-std::queue<std::string> metadataQueue;
-std::queue<std::string> redirectsQueue;
bool isDirectoryVisitorRunningFlag = false;
pthread_mutex_t directoryVisitorRunningMutex;
@@ -68,9 +61,6 @@
bool uniqueNamespace = false;
magic_t magic;
-std::map<std::string, unsigned int> counters;
-char *data = NULL;
-unsigned int dataSize = 0;
void directoryVisitorRunning(bool value) {
@@ -136,204 +126,6 @@
} while (isDirectoryVisitorRunning() || !isFilenameQueueEmpty());
return retVal;
-}
-
-/* ArticleSource class */
-class ArticleSource : public zim::writer::ArticleSource {
- public:
- explicit ArticleSource();
- virtual const zim::writer::Article* getNextArticle();
- virtual zim::Blob getData(const std::string& aid);
- virtual std::string getMainPage();
-};
-
-ArticleSource::ArticleSource() {
-}
-
-std::string ArticleSource::getMainPage() {
- return welcome;
-}
-
-Article *article = NULL;
-const zim::writer::Article* ArticleSource::getNextArticle() {
- std::string path;
-
- if (article != NULL) {
- delete(article);
- }
-
- if (!metadataQueue.empty()) {
- path = metadataQueue.front();
- metadataQueue.pop();
- article = new MetadataArticle(path);
- } else if (!redirectsQueue.empty()) {
- std::string line = redirectsQueue.front();
- redirectsQueue.pop();
- article = new RedirectArticle(line);
- } else if (popFromFilenameQueue(path)) {
- do {
- article = new Article(path);
- } while (article && article->isInvalid() && popFromFilenameQueue(path));
- } else {
- article = NULL;
- }
-
- /* Count mimetypes */
- if (article != NULL && !article->isRedirect()) {
-
- if (isVerbose())
- std::cout << "Creating entry for " << article->getAid() << std::endl;
-
- std::string mimeType = article->getMimeType();
- if (counters.find(mimeType) == counters.end()) {
- counters[mimeType] = 1;
- } else {
- counters[mimeType]++;
- }
- }
-
- return article;
-}
-
-zim::Blob ArticleSource::getData(const std::string& aid) {
-
- if (isVerbose())
- std::cout << "Packing data for " << aid << std::endl;
-
- if (data != NULL) {
- delete(data);
- data = NULL;
- }
-
- if (aid.substr(0, 3) == "/M/") {
- std::string value;
-
- if ( aid == "/M/Language") {
- value = language;
- } else if (aid == "/M/Creator") {
- value = creator;
- } else if (aid == "/M/Publisher") {
- value = publisher;
- } else if (aid == "/M/Title") {
- value = title;
- } else if (aid == "/M/Description") {
- value = description;
- } else if ( aid == "/M/Date") {
- time_t t = time(0);
- struct tm * now = localtime( & t );
- std::stringstream stream;
- stream << (now->tm_year + 1900) << '-'
- << std::setw(2) << std::setfill('0') << (now->tm_mon + 1) << '-'
- << std::setw(2) << std::setfill('0') << now->tm_mday;
- value = stream.str();
- } else if ( aid == "/M/Counter") {
- std::stringstream stream;
- for (std::map<std::string, unsigned int>::iterator it =
counters.begin(); it != counters.end(); ++it) {
- stream << it->first << "=" << it->second << ";";
- }
- value = stream.str();
- }
-
- dataSize = value.length();
- data = new char[dataSize];
- memcpy(data, value.c_str(), dataSize);
- } else {
- std::string aidPath = directoryPath + "/" + aid;
-
- if (getMimeTypeForFile(aid).find("text/html") == 0) {
- std::string html = getFileContent(aidPath);
-
- /* Rewrite links (src|href|...) attributes */
- GumboOutput* output = gumbo_parse(html.c_str());
- GumboNode* root = output->root;
-
- std::map<std::string, bool> links;
- getLinks(root, links);
- std::map<std::string, bool>::iterator it;
- std::string aidDirectory = removeLastPathElement(aid, false, false);
-
- /* If a link appearch to be duplicated in the HTML, it will
- occurs only one time in the links variable */
- for(it = links.begin(); it != links.end(); it++) {
- if (!it->first.empty() && it->first[0] != '#' && it->first[0] != '?' &&
it->first.substr(0, 5) != "data:") {
- replaceStringInPlace(html, "\"" + it->first + "\"", "\"" +
computeNewUrl(aid, it->first) + "\"");
- }
- }
- gumbo_destroy_output(&kGumboDefaultOptions, output);
-
- dataSize = html.length();
- data = new char[dataSize];
- memcpy(data, html.c_str(), dataSize);
- } else if (getMimeTypeForFile(aid).find("text/css") == 0) {
- std::string css = getFileContent(aidPath);
-
- /* Rewrite url() values in the CSS */
- size_t startPos = 0;
- size_t endPos = 0;
- std::string url;
-
- while ((startPos = css.find("url(", endPos)) && startPos !=
std::string::npos) {
-
- /* URL delimiters */
- endPos = css.find(")", startPos);
- startPos = startPos + (css[startPos+4] == '\'' || css[startPos+4] ==
'"' ? 5 : 4);
- endPos = endPos - (css[endPos-1] == '\'' || css[endPos-1] == '"' ? 1 :
0);
- url = css.substr(startPos, endPos - startPos);
- std::string startDelimiter = css.substr(startPos-1, 1);
- std::string endDelimiter = css.substr(endPos, 1);
-
- if (url.substr(0, 5) != "data:") {
- /* Deal with URL with arguments (using '? ') */
- std::string path = url;
- size_t markPos = url.find("?");
- if (markPos != std::string::npos) {
- path = url.substr(0, markPos);
- }
-
- /* Embeded fonts need to be inline because Kiwix is
- otherwise not able to load same because of the
- same-origin security */
- std::string mimeType = getMimeTypeForFile(path);
- if (mimeType == "application/font-ttf" ||
- mimeType == "application/font-woff" ||
- mimeType == "application/vnd.ms-opentype" ||
- mimeType == "application/vnd.ms-fontobject") {
-
- try {
- std::string fontContent = getFileContent(directoryPath + "/" +
computeAbsolutePath(aid, path));
- replaceStringInPlaceOnce(css,
- startDelimiter + url + endDelimiter,
- startDelimiter + "data:" + mimeType +
";base64," +
- base64_encode(reinterpret_cast<const
unsigned char*>(fontContent.c_str()), fontContent.length()) +
- endDelimiter
- );
- } catch (...) {
- }
- } else {
-
- /* Deal with URL with arguments (using '? ') */
- if (markPos != std::string::npos) {
- endDelimiter = url.substr(markPos, 1);
- }
-
- replaceStringInPlaceOnce(css,
- startDelimiter + url + endDelimiter,
- startDelimiter + computeNewUrl(aid, path)
+ endDelimiter);
- }
- }
- }
-
- dataSize = css.length();
- data = new char[dataSize];
- memcpy(data, css.c_str(), dataSize);
- } else {
- dataSize = getFileSize(aidPath);
- data = new char[dataSize];
- memcpy(data, getFileContent(aidPath).c_str(), dataSize);
- }
- }
-
- return zim::Blob(data, dataSize);
}
/* Non ZIM related code */
@@ -554,16 +346,6 @@
directoryPath = directoryPath.substr(0, directoryPath.length()-1);
}
- /* Prepare metadata */
- metadataQueue.push("Language");
- metadataQueue.push("Publisher");
- metadataQueue.push("Creator");
- metadataQueue.push("Title");
- metadataQueue.push("Description");
- metadataQueue.push("Date");
- metadataQueue.push("Favicon");
- metadataQueue.push("Counter");
-
/* Check metadata */
if (!fileExists(directoryPath + "/" + welcome)) {
std::cerr << "zimwriterfs: unable to find welcome page at '" <<
directoryPath << "/" << welcome << "'. --welcome path/value must be relative to
HTML_DIRECTORY." << std::endl;
@@ -582,15 +364,8 @@
} else {
if (isVerbose())
std::cout << "Reading redirects CSV file " << redirectsPath << "..." <<
std::endl;
-
- std::ifstream in_stream;
- std::string line;
-
- in_stream.open(redirectsPath.c_str());
- while (std::getline(in_stream, line)) {
- redirectsQueue.push(line);
- }
- in_stream.close();
+
+ source.init_redirectsQueue_from_file(redirectsPath);
}
/* Init */
--
To view, visit https://gerrit.wikimedia.org/r/295517
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Iee91484679bf401a693af1ca7e1c7e34f2c741d0
Gerrit-PatchSet: 1
Gerrit-Project: openzim
Gerrit-Branch: master
Gerrit-Owner: Mgautierfr <[email protected]>
Gerrit-Reviewer: Kelson <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits