parent
f5e162e2e2
commit
a848422b94
1 changed files with 30 additions and 0 deletions
@ -0,0 +1,30 @@ |
||||
#!/bin/bash |
||||
# sitemap.sh by heuzef.com (CC-BY-NC) |
||||
# Usage : sh sitemap.sh www.heuzef.com |
||||
|
||||
echo "Generating ..." |
||||
echo |
||||
sitedomain=$1 |
||||
wget --spider --recursive --level=inf --no-verbose --output-file=get_urls.txt $sitedomain |
||||
grep -i URL get_urls.txt | awk -F 'URL:' '{print $2}' | awk '{$1=$1};1' | awk '{print $1}' | sort -u | sed '/^$/d' > urls_$sitedomain.txt |
||||
header='<?xml version="1.0" encoding="UTF-8"?><urlset |
||||
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" |
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
||||
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 |
||||
http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">' |
||||
echo $header > sitemap_$sitedomain.xml |
||||
while read p; do |
||||
case "$p" in |
||||
*/ | *.html | *.htm) |
||||
echo '<url><loc>'$p'</loc></url>' >> sitemap_$sitedomain.xml |
||||
;; |
||||
*) |
||||
;; |
||||
esac |
||||
done < urls_$sitedomain.txt |
||||
echo "</urlset>" >> sitemap_$sitedomain.xml |
||||
rm get_urls.txt |
||||
rm -r $sitedomain |
||||
echo "Sitemap generated OK :" |
||||
echo |
||||
ls -larth | grep $sitedomain |
Loading…
Reference in new issue