This is an automated email from the ASF dual-hosted git repository. msahyoun pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pdfbox-docs.git
The following commit(s) were added to refs/heads/master by this push: new 3fd6fc3 PDFBOX-5073: build references dynamically; update to https where supported 3fd6fc3 is described below commit 3fd6fc32180bd542a5eea72065c81e2ef3de9696 Author: Maruan Sahyoun <sahy...@fileaffairs.de> AuthorDate: Thu Jan 7 21:51:24 2021 +0100 PDFBOX-5073: build references dynamically; update to https where supported --- .../references/pdfbox-reference-alfresco.json | 6 ++++++ .../references/pdfbox-reference-apache-nutch.json | 6 ++++++ .../references/pdfbox-reference-apache-tika.json | 6 ++++++ .../_data/references/pdfbox-reference-jomic.json | 6 ++++++ .../references/pdfbox-reference-jpdfunit.json | 6 ++++++ .../_data/references/pdfbox-reference-liferay.json | 6 ++++++ .../references/pdfbox-reference-lucegene.json | 6 ++++++ .../_data/references/pdfbox-reference-lutece.json | 6 ++++++ .../references/pdfbox-reference-mmbase-lucene.json | 6 ++++++ .../_data/references/pdfbox-reference-opencms.json | 6 ++++++ .../pdfbox-reference-opensearchserver.json | 6 ++++++ .../_data/references/pdfbox-reference-orbeon.json | 6 ++++++ .../references/pdfbox-reference-pdfjuice.json | 6 ++++++ .../_data/references/pdfbox-reference-rewoo.json | 6 ++++++ .../references/pdfbox-reference-searchblox.json | 6 ++++++ .../pdfbox-reference-semantic-scholar.json | 6 ++++++ .../pdfbox-reference-simplex-repaginator.json | 6 ++++++ .../_data/references/pdfbox-reference-terrier.json | 6 ++++++ .../pdfbox-reference-triboni-ginkgo.json | 6 ++++++ content/references.md | 23 +++------------------- 20 files changed, 117 insertions(+), 20 deletions(-) diff --git a/content/_data/references/pdfbox-reference-alfresco.json b/content/_data/references/pdfbox-reference-alfresco.json new file mode 100644 index 0000000..e919af3 --- /dev/null +++ b/content/_data/references/pdfbox-reference-alfresco.json @@ -0,0 +1,6 @@ +{ + "name": "Alfresco", + "url": "https://www.alfresco.com/", + "license": "LGPL - commercial services/support/training is available", + "description": "Alfresco is an open source, open-standards content repository built by the most experienced content management team that includes the co-founder of Documentum." +} \ No newline at end of file diff --git a/content/_data/references/pdfbox-reference-apache-nutch.json b/content/_data/references/pdfbox-reference-apache-nutch.json new file mode 100644 index 0000000..978b517 --- /dev/null +++ b/content/_data/references/pdfbox-reference-apache-nutch.json @@ -0,0 +1,6 @@ +{ + "name": "Apache Nutch", + "url": "https://nutch.apache.org/", + "license": "Apache License v2", + "description": "Apache Nutch is open source web-search software. It builds on Apache Lucene, adding web-specifics, such as a crawler, a link-graph database, parsers for HTML and other document formats, etc." +} \ No newline at end of file diff --git a/content/_data/references/pdfbox-reference-apache-tika.json b/content/_data/references/pdfbox-reference-apache-tika.json new file mode 100644 index 0000000..09a3c80 --- /dev/null +++ b/content/_data/references/pdfbox-reference-apache-tika.json @@ -0,0 +1,6 @@ +{ + "name": "Apache Tika", + "url": "https://tika.apache.org/", + "license": "Apache License v2", + "description": "Apache Tika is a toolkit for detecting and extracting metadata and structured text content from various documents using existing parser libraries." +} \ No newline at end of file diff --git a/content/_data/references/pdfbox-reference-jomic.json b/content/_data/references/pdfbox-reference-jomic.json new file mode 100644 index 0000000..4e34f50 --- /dev/null +++ b/content/_data/references/pdfbox-reference-jomic.json @@ -0,0 +1,6 @@ +{ + "name": "Jomic", + "url": "http://jomic.sourceforge.net/", + "license": "GPL", + "description": "Jomic is a viewer for comic book archives." +} \ No newline at end of file diff --git a/content/_data/references/pdfbox-reference-jpdfunit.json b/content/_data/references/pdfbox-reference-jpdfunit.json new file mode 100644 index 0000000..823d395 --- /dev/null +++ b/content/_data/references/pdfbox-reference-jpdfunit.json @@ -0,0 +1,6 @@ +{ + "name": "JpdfUnit", + "url": "http://jpdfunit.sourceforge.net/", + "license": "Apache License v2", + "description": "pdfUnit is a framework for testing a generated pdf document with the JUnit Test Framework." +} \ No newline at end of file diff --git a/content/_data/references/pdfbox-reference-liferay.json b/content/_data/references/pdfbox-reference-liferay.json new file mode 100644 index 0000000..03c9ace --- /dev/null +++ b/content/_data/references/pdfbox-reference-liferay.json @@ -0,0 +1,6 @@ +{ + "name": "Liferay Portal", + "url": "https://www.liferay.com/", + "license": "MIT", + "description": "Liferay Portal is an open source portal that helps organizations collaborate more efficiently by providing a consolidated view of disparate applications." +} \ No newline at end of file diff --git a/content/_data/references/pdfbox-reference-lucegene.json b/content/_data/references/pdfbox-reference-lucegene.json new file mode 100644 index 0000000..7511a51 --- /dev/null +++ b/content/_data/references/pdfbox-reference-lucegene.json @@ -0,0 +1,6 @@ +{ + "name": "LuceGene", + "url": "http://gmod.org/wiki/LuceGene", + "license": "Artistic License", + "description": "LuceGene is an open-source document/object search and retrieval system specially tuned for bioinformatics text databases and documents." +} diff --git a/content/_data/references/pdfbox-reference-lutece.json b/content/_data/references/pdfbox-reference-lutece.json new file mode 100644 index 0000000..9357db4 --- /dev/null +++ b/content/_data/references/pdfbox-reference-lutece.json @@ -0,0 +1,6 @@ +{ + "name": "Lutece", + "url": "https://www.lutece.paris.fr/", + "license": "BSD-like", + "description": "Lutece is a portal engine which allows you to easily create your websites or intranets based upon HTML, XML content." +} diff --git a/content/_data/references/pdfbox-reference-mmbase-lucene.json b/content/_data/references/pdfbox-reference-mmbase-lucene.json new file mode 100644 index 0000000..79c9d9d --- /dev/null +++ b/content/_data/references/pdfbox-reference-mmbase-lucene.json @@ -0,0 +1,6 @@ +{ + "name": "MMBase Lucene Module", + "url": "http://mmapps.sourceforge.net/lucenemodule/", + "license": "MPL", + "description": "MMBase Lucene Module is a plugin (module) for the MMBase content management system that enables Lucene full text search through it's content, and thanks to PDFBox also PDF content." +} \ No newline at end of file diff --git a/content/_data/references/pdfbox-reference-opencms.json b/content/_data/references/pdfbox-reference-opencms.json new file mode 100644 index 0000000..add7e4e --- /dev/null +++ b/content/_data/references/pdfbox-reference-opencms.json @@ -0,0 +1,6 @@ +{ + "name": "OpenCms", + "url": "http://www.opencms.org/", + "license": "LGPL", + "description": "MMBase Lucene Module is a plugin (module) for the MMBase content management system that enables Lucene full text search through it's content, and thanks to PDFBox also PDF content." +} \ No newline at end of file diff --git a/content/_data/references/pdfbox-reference-opensearchserver.json b/content/_data/references/pdfbox-reference-opensearchserver.json new file mode 100644 index 0000000..b9d96ec --- /dev/null +++ b/content/_data/references/pdfbox-reference-opensearchserver.json @@ -0,0 +1,6 @@ +{ + "name": "OpenSearchServer", + "url": "https://www.opensearchserver.com/", + "license": "GPLv3", + "description": "An open source search engine and crawler based on best open source technologies. It is a modern search engine and a suite of high-powered full text search algorithms." +} \ No newline at end of file diff --git a/content/_data/references/pdfbox-reference-orbeon.json b/content/_data/references/pdfbox-reference-orbeon.json new file mode 100644 index 0000000..ba5de01 --- /dev/null +++ b/content/_data/references/pdfbox-reference-orbeon.json @@ -0,0 +1,6 @@ +{ + "name": "Orbeon PresentationServer", + "url": "https://www.orbeon.com/", + "license": "LGPL", + "description": "Orbeon PresentationServer (OPS) is an open source J2EE-based platform for XML-centric web applications. OPS is built around XHTML, XForms, XSLT, XML pipelines, and Web Services, which makes it ideal for applications that capture, process and present XML data. Commercial consulting/training/support is available through orbeon." +} \ No newline at end of file diff --git a/content/_data/references/pdfbox-reference-pdfjuice.json b/content/_data/references/pdfbox-reference-pdfjuice.json new file mode 100644 index 0000000..3fe62fb --- /dev/null +++ b/content/_data/references/pdfbox-reference-pdfjuice.json @@ -0,0 +1,6 @@ +{ + "name": "PDFJuice", + "url": "https://github.com/andrescg2sj/PDFJuice", + "license": "Apache License v2", + "description": "This project provides some tools that help the user to extract structured information form PDF documents. Currently, the program is able to export them to HTML." +} \ No newline at end of file diff --git a/content/_data/references/pdfbox-reference-rewoo.json b/content/_data/references/pdfbox-reference-rewoo.json new file mode 100644 index 0000000..f6282c0 --- /dev/null +++ b/content/_data/references/pdfbox-reference-rewoo.json @@ -0,0 +1,6 @@ +{ + "name": "REWOO Scope", + "url": "https://www.rewoo.de/", + "license": "Commercial", + "description": "REWOO Scope is an Enterprise Content Management (ECM) software to organize, structure and consolidate enterprise data. Apache PDFBox is an integral part to read and index PDF documents." +} \ No newline at end of file diff --git a/content/_data/references/pdfbox-reference-searchblox.json b/content/_data/references/pdfbox-reference-searchblox.json new file mode 100644 index 0000000..48247f7 --- /dev/null +++ b/content/_data/references/pdfbox-reference-searchblox.json @@ -0,0 +1,6 @@ +{ + "name": "SearchBlox", + "url": "https://www.searchblox.com/", + "license": "Commercial", + "description": "SearchBlox is a high-performance corporate search software designed for the Java 2 Enterprise Edition (J2EE) platform." +} \ No newline at end of file diff --git a/content/_data/references/pdfbox-reference-semantic-scholar.json b/content/_data/references/pdfbox-reference-semantic-scholar.json new file mode 100644 index 0000000..3c29a3d --- /dev/null +++ b/content/_data/references/pdfbox-reference-semantic-scholar.json @@ -0,0 +1,6 @@ +{ + "name": "Semantic Scholar", + "url": "https://www.semanticscholar.org/", + "license": "Web Based", + "description": "Semantic Scholar is a new service from AI2 for scientific literature search and discovery, focusing on semantics and textual understanding." +} \ No newline at end of file diff --git a/content/_data/references/pdfbox-reference-simplex-repaginator.json b/content/_data/references/pdfbox-reference-simplex-repaginator.json new file mode 100644 index 0000000..49beb1a --- /dev/null +++ b/content/_data/references/pdfbox-reference-simplex-repaginator.json @@ -0,0 +1,6 @@ +{ + "name": "SimplexRepaginator", + "url": "https://github.com/esialb/simplexrepaginator", + "license": "Apache License v2", + "description": "Simplex Repaginator converts simplex-scanned PDFs into properly duplex-paginated PDFs and vice versa." +} \ No newline at end of file diff --git a/content/_data/references/pdfbox-reference-terrier.json b/content/_data/references/pdfbox-reference-terrier.json new file mode 100644 index 0000000..59ce2c5 --- /dev/null +++ b/content/_data/references/pdfbox-reference-terrier.json @@ -0,0 +1,6 @@ +{ + "name": "Terrier", + "url": "http://terrier.org/", + "license": "MPL", + "description": "Terrier is software for the rapid development of Web, intranet and desktop search engines." +} \ No newline at end of file diff --git a/content/_data/references/pdfbox-reference-triboni-ginkgo.json b/content/_data/references/pdfbox-reference-triboni-ginkgo.json new file mode 100644 index 0000000..9e455ea --- /dev/null +++ b/content/_data/references/pdfbox-reference-triboni-ginkgo.json @@ -0,0 +1,6 @@ +{ + "name": "Triboni GinkGO", + "url": "https://triboni.com/", + "license": "Commercial", + "description": "Triboni GinkGO is a highly scalable J2EE services platform that is based on a simple XML business object defintion and scripting language. Together with XSLT content centric web applications can be configured in a very short time." +} \ No newline at end of file diff --git a/content/references.md b/content/references.md index 4c79298..57bc857 100644 --- a/content/references.md +++ b/content/references.md @@ -29,26 +29,9 @@ Please file an [improvement issue](https://issues.apache.org/jira/browse/PDFBOX) | Project Name | License | Project Description | | --- | --- | --- | -| [Alfresco](http://www.alfresco.org/) | LGPL - commercial services/support/training is available | Alfresco is an open source, open-standards content repository built by the most experienced content management team that includes the co-founder of Documentum.| -| [Apache Nutch](http://nutch.apache.org/) | Apache License v2 | Apache Nutch is open source web-search software. It builds on Apache Lucene, adding web-specifics, such as a crawler, a link-graph database, parsers for HTML and other document formats, etc.| -| [Apache Tika](http://tika.apache.org/) | Apache License v2 | Apache Tika is a toolkit for detecting and extracting metadata and structured text content from various documents using existing parser libraries.| -| [Canoo Webtest](http://webtest.canoo.com/webtest/manual/WebTestHome.html) | BSD Like | Free OpenSource tool for XP-style acceptance testing of Java-based Web applications.| -| [ECM REWOO Scope](http://www.rewoo.de/) | Commercial | REWOO Scope is an Enterprise Content Management (ECM) software to organize, structure and consolidate enterprise data. Apache PDFBox is an integral part to read and index PDF documents.| -| [Jomic](http://jomic.sourceforge.net/) | GPL | Jomic is a viewer for comic book archives.| -| [JpdfUnit](http://jpdfunit.sourceforge.net/) | Apache License v2 | pdfUnit is a framework for testing a generated pdf document with the JUnit Test Framework.| -| [Liferay Portal](http://www.liferay.com/) | MIT | Liferay Portal is an open source portal that helps organizations collaborate more efficiently by providing a consolidated view of disparate applications.| -| [LuceGene](http://gmod.org/wiki/LuceGene) | Artistic License | LuceGene is an open-source document/object search and retrieval system specially tuned for bioinformatics text databases and documents.| -| [Lutece](http://www.lutece.paris.fr/) | BSD-like | Lutece is a portal engine which allows you to easily create your websites or intranets based upon HTML,XML content.| -| [MMBase Lucene Module](http://mmapps.sourceforge.net/lucenemodule/) | MPL | Lucenemodule is a plugin (module) for the MMBase content management system that enables Lucene full text search through it's content, and thanks to PDFBox also PDF content.| -| [OpenCms](http://www.opencms.org/) | LGPL | OpenCms is a professional level Open Source Website Content Management System.| -| [OpenSearchServer](http://www.open-search-server.com/) | GPLv3 | An open source search engine and crawler based on best open source technologies. It is a modern search engine and a suite of high-powered full text search algorithms.| -| [Orbeon PresentationServer](http://forge.objectweb.org/projects/ops) | LGPL | Orbeon PresentationServer (OPS) is an open source J2EE-based platform for XML-centric web applications. OPS is built around XHTML, XForms, XSLT, XML pipelines, and Web Services, which makes it ideal for applications that capture, process and present XML data. Commercial consulting/training/support is available through orbeon.| -| [PDFJuice](https://github.com/andrescg2sj/PDFJuice) | Apache License 2.0 | This project provides some tools that help the user to extract structured information form PDF documents. Currently, the program is able to export them to HTML. | -| [SearchBlox](http://www.searchblox.com/) | Commercial | SearchBlox is a high-performance corporate search software designed for the Java 2 Enterprise Edition (J2EE) platform.| -| [Semantic Scholar](https://www.semanticscholar.org)| Web Based | Semantic Scholar is a new service from AI2 for scientific literature search and discovery, focusing on semantics and textual understanding. -| [SimplexRepaginator](http://www.simplexrepaginator.com/) | Apache License v2 | Simplex Repaginator converts simplex-scanned PDFs into properly duplex-paginated PDFs and vice versa. | -| [Terrier](http://ir.dcs.gla.ac.uk/terrier/) | MPL | Terrier is software for the rapid development of Web, intranet and desktop search engines.| -| [Triboni GinkGO](http://www.triboni.com/triboni/exec/x/int.triboni.website.display/xsl/display/name/Default/chapter/ginkgo/language/en) | Commercial | Triboni GinkGO is a highly scalable J2EE services platform that is based on a simple XML business object defintion and scripting language. Toghether with XSLT content centric web applications can be configured in a very short time.| +{% for reference in references -%} +| [{{ reference[1].name }}]({{ reference[1].url }}) | {{ reference[1].license }} | {{ reference[1].description }} | +{% endfor %} ## Articles/Books