You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

30 lines
1.0 KiB

#!/bin/bash
# sitemap.sh by heuzef.com (CC-BY-NC)
# Usage : sh sitemap.sh www.heuzef.com
echo "Generating ..."
echo
sitedomain=$1
wget --spider --recursive --level=inf --no-verbose --output-file=get_urls.txt $sitedomain
grep -i URL get_urls.txt | awk -F 'URL:' '{print $2}' | awk '{$1=$1};1' | awk '{print $1}' | sort -u | sed '/^$/d' > urls_$sitedomain.txt
header='<?xml version="1.0" encoding="UTF-8"?><urlset
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">'
echo $header > sitemap_$sitedomain.xml
while read p; do
case "$p" in
*/ | *.html | *.htm)
echo '<url><loc>'$p'</loc></url>' >> sitemap_$sitedomain.xml
;;
*)
;;
esac
done < urls_$sitedomain.txt
echo "</urlset>" >> sitemap_$sitedomain.xml
rm get_urls.txt
rm -r $sitedomain
echo "Sitemap generated OK :"
echo
ls -larth | grep $sitedomain