From a848422b94109fd9d9cc1a953d1602ce10a68ac9 Mon Sep 17 00:00:00 2001 From: heuzef Date: Tue, 6 Feb 2018 17:51:52 +0100 Subject: [PATCH] Add sitemap.sh script (sitemap generator) --- sitemap/sitemap.sh | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 sitemap/sitemap.sh diff --git a/sitemap/sitemap.sh b/sitemap/sitemap.sh new file mode 100644 index 0000000..e03363a --- /dev/null +++ b/sitemap/sitemap.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# sitemap.sh by heuzef.com (CC-BY-NC) +# Usage : sh sitemap.sh www.heuzef.com + +echo "Generating ..." +echo +sitedomain=$1 +wget --spider --recursive --level=inf --no-verbose --output-file=get_urls.txt $sitedomain +grep -i URL get_urls.txt | awk -F 'URL:' '{print $2}' | awk '{$1=$1};1' | awk '{print $1}' | sort -u | sed '/^$/d' > urls_$sitedomain.txt +header='' +echo $header > sitemap_$sitedomain.xml +while read p; do + case "$p" in + */ | *.html | *.htm) + echo ''$p'' >> sitemap_$sitedomain.xml + ;; + *) + ;; + esac +done < urls_$sitedomain.txt +echo "" >> sitemap_$sitedomain.xml +rm get_urls.txt +rm -r $sitedomain +echo "Sitemap generated OK :" +echo +ls -larth | grep $sitedomain