You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
30 lines
1.0 KiB
30 lines
1.0 KiB
#!/bin/bash
|
|
# sitemap.sh by heuzef.com (CC-BY-NC)
|
|
# Usage : sh sitemap.sh www.heuzef.com
|
|
|
|
echo "Generating ..."
|
|
echo
|
|
sitedomain=$1
|
|
wget --spider --recursive --level=inf --no-verbose --output-file=get_urls.txt $sitedomain
|
|
grep -i URL get_urls.txt | awk -F 'URL:' '{print $2}' | awk '{$1=$1};1' | awk '{print $1}' | sort -u | sed '/^$/d' > urls_$sitedomain.txt
|
|
header='<?xml version="1.0" encoding="UTF-8"?><urlset
|
|
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
|
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
|
|
http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">'
|
|
echo $header > sitemap_$sitedomain.xml
|
|
while read p; do
|
|
case "$p" in
|
|
*/ | *.html | *.htm)
|
|
echo '<url><loc>'$p'</loc></url>' >> sitemap_$sitedomain.xml
|
|
;;
|
|
*)
|
|
;;
|
|
esac
|
|
done < urls_$sitedomain.txt
|
|
echo "</urlset>" >> sitemap_$sitedomain.xml
|
|
rm get_urls.txt
|
|
rm -r $sitedomain
|
|
echo "Sitemap generated OK :"
|
|
echo
|
|
ls -larth | grep $sitedomain
|
|
|