#!/bin/bash # sitemap.sh by heuzef.com (CC-BY-NC) # Usage : sh sitemap.sh www.heuzef.com echo "Generating ..." echo sitedomain=$1 wget --spider --recursive --level=inf --no-verbose --output-file=get_urls.txt $sitedomain grep -i URL get_urls.txt | awk -F 'URL:' '{print $2}' | awk '{$1=$1};1' | awk '{print $1}' | sort -u | sed '/^$/d' > urls_$sitedomain.txt header='' echo $header > sitemap_$sitedomain.xml while read p; do case "$p" in */ | *.html | *.htm) echo ''$p'' >> sitemap_$sitedomain.xml ;; *) ;; esac done < urls_$sitedomain.txt echo "" >> sitemap_$sitedomain.xml rm get_urls.txt rm -r $sitedomain echo "Sitemap generated OK :" echo ls -larth | grep $sitedomain