This is an automated email from the ASF dual-hosted git repository. snagel pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/nutch-site.git
The following commit(s) were added to refs/heads/main by this push: new 4efc5a9 NUTCH-1999 Add /robots.txt to Nutch site (#1) 4efc5a9 is described below commit 4efc5a9aca57430549b44a30191de041224ab865 Author: Sebastian Nagel <sna...@apache.org> AuthorDate: Thu Sep 8 14:19:10 2022 +0200 NUTCH-1999 Add /robots.txt to Nutch site (#1) - add robots.txt - add template to generate sitemap - include sitemap in robots.txt --- config.toml | 2 ++ content/robots.txt | 4 ++++ layouts/_default/sitemap.xml | 10 ++++++++++ 3 files changed, 16 insertions(+) diff --git a/config.toml b/config.toml index cc8832a..a78ef2d 100644 --- a/config.toml +++ b/config.toml @@ -11,6 +11,7 @@ Paginate = 4 unsafe = true # allow raw HTML in markdown content [Params] + siteBaseURL = "https://nutch.apache.org" RSSLink = "/index.xml" author = "Apache Nutch Project Management Committee" github = "https://github.com/apache/nutch" @@ -41,3 +42,4 @@ unsafe = true # allow raw HTML in markdown content name = "Apache" weight = -100 url = "/apache/" + diff --git a/content/robots.txt b/content/robots.txt new file mode 100644 index 0000000..086e6ad --- /dev/null +++ b/content/robots.txt @@ -0,0 +1,4 @@ +User-agent: * +Allow: / + +Sitemap: https://nutch.apache.org/sitemap.xml \ No newline at end of file diff --git a/layouts/_default/sitemap.xml b/layouts/_default/sitemap.xml new file mode 100644 index 0000000..006e6ba --- /dev/null +++ b/layouts/_default/sitemap.xml @@ -0,0 +1,10 @@ +{{ printf "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>" | safeHTML }} +<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" + xmlns:xhtml="http://www.w3.org/1999/xhtml"> + <url/> + {{ range .Data.Pages }}{{ if ne .Params.sitemapExclude true }} + <url>{{ $url := urls.Parse .Permalink }} + <loc>{{ .Site.Params.SiteBaseURL }}{{ $url.Path }}</loc>{{ if not .Lastmod.IsZero }} + <lastmod>{{ safeHTML ( .Lastmod.Format "2006-01-02T15:04:05-07:00" ) }}</lastmod>{{ end }} + </url>{{ end }}{{ end }} +</urlset>