parent
f5e162e2e2
commit
a848422b94
1 changed files with 30 additions and 0 deletions
@ -0,0 +1,30 @@ |
|||||||
|
#!/bin/bash |
||||||
|
# sitemap.sh by heuzef.com (CC-BY-NC) |
||||||
|
# Usage : sh sitemap.sh www.heuzef.com |
||||||
|
|
||||||
|
echo "Generating ..." |
||||||
|
echo |
||||||
|
sitedomain=$1 |
||||||
|
wget --spider --recursive --level=inf --no-verbose --output-file=get_urls.txt $sitedomain |
||||||
|
grep -i URL get_urls.txt | awk -F 'URL:' '{print $2}' | awk '{$1=$1};1' | awk '{print $1}' | sort -u | sed '/^$/d' > urls_$sitedomain.txt |
||||||
|
header='<?xml version="1.0" encoding="UTF-8"?><urlset |
||||||
|
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" |
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
||||||
|
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 |
||||||
|
http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">' |
||||||
|
echo $header > sitemap_$sitedomain.xml |
||||||
|
while read p; do |
||||||
|
case "$p" in |
||||||
|
*/ | *.html | *.htm) |
||||||
|
echo '<url><loc>'$p'</loc></url>' >> sitemap_$sitedomain.xml |
||||||
|
;; |
||||||
|
*) |
||||||
|
;; |
||||||
|
esac |
||||||
|
done < urls_$sitedomain.txt |
||||||
|
echo "</urlset>" >> sitemap_$sitedomain.xml |
||||||
|
rm get_urls.txt |
||||||
|
rm -r $sitedomain |
||||||
|
echo "Sitemap generated OK :" |
||||||
|
echo |
||||||
|
ls -larth | grep $sitedomain |
Loading…
Reference in new issue