diff --git a/sitemap/sitemap.sh b/sitemap/sitemap.sh new file mode 100644 index 0000000..e03363a --- /dev/null +++ b/sitemap/sitemap.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# sitemap.sh by heuzef.com (CC-BY-NC) +# Usage : sh sitemap.sh www.heuzef.com + +echo "Generating ..." +echo +sitedomain=$1 +wget --spider --recursive --level=inf --no-verbose --output-file=get_urls.txt $sitedomain +grep -i URL get_urls.txt | awk -F 'URL:' '{print $2}' | awk '{$1=$1};1' | awk '{print $1}' | sort -u | sed '/^$/d' > urls_$sitedomain.txt +header='' +echo $header > sitemap_$sitedomain.xml +while read p; do + case "$p" in + */ | *.html | *.htm) + echo ''$p'' >> sitemap_$sitedomain.xml + ;; + *) + ;; + esac +done < urls_$sitedomain.txt +echo "" >> sitemap_$sitedomain.xml +rm get_urls.txt +rm -r $sitedomain +echo "Sitemap generated OK :" +echo +ls -larth | grep $sitedomain