This is an automated email from the ASF dual-hosted git repository.

snagel pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nutch-site.git


The following commit(s) were added to refs/heads/main by this push:
     new 4efc5a9  NUTCH-1999 Add /robots.txt to Nutch site (#1)
4efc5a9 is described below

commit 4efc5a9aca57430549b44a30191de041224ab865
Author: Sebastian Nagel <sna...@apache.org>
AuthorDate: Thu Sep 8 14:19:10 2022 +0200

    NUTCH-1999 Add /robots.txt to Nutch site (#1)
    
    - add robots.txt
    - add template to generate sitemap
    - include sitemap in robots.txt
---
 config.toml                  |  2 ++
 content/robots.txt           |  4 ++++
 layouts/_default/sitemap.xml | 10 ++++++++++
 3 files changed, 16 insertions(+)

diff --git a/config.toml b/config.toml
index cc8832a..a78ef2d 100644
--- a/config.toml
+++ b/config.toml
@@ -11,6 +11,7 @@ Paginate = 4
 unsafe = true # allow raw HTML in markdown content
 
 [Params]
+  siteBaseURL = "https://nutch.apache.org";
   RSSLink = "/index.xml"
   author = "Apache Nutch Project Management Committee"
   github = "https://github.com/apache/nutch";
@@ -41,3 +42,4 @@ unsafe = true # allow raw HTML in markdown content
     name = "Apache"
     weight = -100
     url = "/apache/"
+
diff --git a/content/robots.txt b/content/robots.txt
new file mode 100644
index 0000000..086e6ad
--- /dev/null
+++ b/content/robots.txt
@@ -0,0 +1,4 @@
+User-agent: *
+Allow: /
+
+Sitemap: https://nutch.apache.org/sitemap.xml
\ No newline at end of file
diff --git a/layouts/_default/sitemap.xml b/layouts/_default/sitemap.xml
new file mode 100644
index 0000000..006e6ba
--- /dev/null
+++ b/layouts/_default/sitemap.xml
@@ -0,0 +1,10 @@
+{{ printf "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>" | 
safeHTML }}
+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9";
+  xmlns:xhtml="http://www.w3.org/1999/xhtml";>
+  <url/>
+  {{ range .Data.Pages }}{{ if ne .Params.sitemapExclude true }}
+    <url>{{ $url := urls.Parse .Permalink }}
+      <loc>{{ .Site.Params.SiteBaseURL }}{{ $url.Path }}</loc>{{ if not 
.Lastmod.IsZero }}
+      <lastmod>{{ safeHTML ( .Lastmod.Format "2006-01-02T15:04:05-07:00" ) 
}}</lastmod>{{ end }}
+    </url>{{ end }}{{ end }}
+</urlset>

Reply via email to