edit slurp_url script

master
heuzef 6 years ago
parent 5318f697d4
commit f374264c88
  1. 30
      sitemap/sitemap.sh
  2. 3
      sitemap/sitemap_example.com.xml
  3. 3
      sitemap/sitemap_www.aoc-toiture.com.xml
  4. 12
      sitemap/sitemap_www.firewall-services.com.xml
  5. 11
      urls/slurp_urls.sh
  6. 107
      urls/urls_www.perl.org.txt

@ -1,30 +0,0 @@
#!/bin/bash
# sitemap.sh by heuzef.com (CC-BY-NC)
# Usage : sh sitemap.sh www.heuzef.com
echo "Generating ..."
echo
sitedomain=$1
wget --spider --recursive --level=inf --no-verbose --output-file=get_urls.txt $sitedomain
grep -i URL get_urls.txt | awk -F 'URL:' '{print $2}' | awk '{$1=$1};1' | awk '{print $1}' | sort -u | sed '/^$/d' > urls_$sitedomain.txt
header='<?xml version="1.0" encoding="UTF-8"?><urlset
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">'
echo $header > sitemap_$sitedomain.xml
while read p; do
case "$p" in
*/ | *.html | *.htm)
echo '<url><loc>'$p'</loc></url>' >> sitemap_$sitedomain.xml
;;
*)
;;
esac
done < urls_$sitedomain.txt
echo "</urlset>" >> sitemap_$sitedomain.xml
rm get_urls.txt
rm -r $sitedomain
echo "Sitemap generated OK :"
echo
ls -larth | grep $sitedomain

@ -1,3 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">
<url><loc>http://example.com/</loc></url>
</urlset>

@ -1,3 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">
<url><loc>http://www.aoc-toiture.com/</loc></url>
</urlset>

@ -1,12 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">
<url><loc>http://www.firewall-services.com/</loc></url>
<url><loc>http://www.firewall-services.com/clubs-entreprises.html</loc></url>
<url><loc>http://www.firewall-services.com/contact.html</loc></url>
<url><loc>http://www.firewall-services.com/espace-presse.html</loc></url>
<url><loc>http://www.firewall-services.com/formations.html</loc></url>
<url><loc>http://www.firewall-services.com/index.html</loc></url>
<url><loc>http://www.firewall-services.com/nos-competences.html</loc></url>
<url><loc>http://www.firewall-services.com/passeport.html</loc></url>
<url><loc>http://www.firewall-services.com/plan.html</loc></url>
<url><loc>http://www.firewall-services.com/prolibre.html</loc></url>
</urlset>

@ -0,0 +1,11 @@
#!/bin/bash
echo "Slurping URLs ..."
echo
sitedomain=$1
wget --spider --recursive --level=inf --no-verbose --output-file=get_urls.txt $sitedomain
grep -i URL get_urls.txt | awk -F 'URL:' '{print $2}' | awk '{$1=$1};1' | awk '{print $1}' | sort -u | sed '/^$/d' > urls_$sitedomain.txt
rm get_urls.txt
rm -r $sitedomain
echo "Done !"
echo
ls -lArth | grep urls_$sitedomain.txt

@ -0,0 +1,107 @@
https://www.perl.org/
https://www.perl.org/about.html
https://www.perl.org/about/reduce-risks.html
https://www.perl.org/about/style-guide.html
https://www.perl.org/about/whitepapers/
https://www.perl.org/about/whitepapers/perl-cpan.html
https://www.perl.org/about/whitepapers/perl-database-orm.html
https://www.perl.org/about/whitepapers/perl-ipv6.html
https://www.perl.org/about/whitepapers/perl-loadbalancer.html
https://www.perl.org/about/whitepapers/perl-object-oriented.html
https://www.perl.org/about/whitepapers/perl-plack.html
https://www.perl.org/about/whitepapers/perl-profiling.html
https://www.perl.org/about/whitepapers/perl-testing.html
https://www.perl.org/about/whitepapers/perl-webframework.html
https://www.perl.org/advocacy/white_camel/
https://www.perl.org/advocacy/white_camel/1999.html
https://www.perl.org/advocacy/white_camel/2000.html
https://www.perl.org/advocacy/white_camel/2001.html
https://www.perl.org/advocacy/white_camel/2002.html
https://www.perl.org/advocacy/white_camel/2003.html
https://www.perl.org/advocacy/white_camel/2004.html
https://www.perl.org/advocacy/white_camel/2005.html
https://www.perl.org/advocacy/white_camel/2006.html
https://www.perl.org/advocacy/white_camel/2007.html
https://www.perl.org/advocacy/white_camel/2008.html
https://www.perl.org/advocacy/white_camel/2009.html
https://www.perl.org/advocacy/white_camel/2010.html
https://www.perl.org/advocacy/white_camel/2011.html
https://www.perl.org/advocacy/white_camel/2012.html
https://www.perl.org/advocacy/white_camel/2013.html
https://www.perl.org/advocacy/white_camel/2014.html
https://www.perl.org/advocacy/white_camel/2015.html
https://www.perl.org/advocacy/white_camel/2016.html
https://www.perl.org/advocacy/white_camel/2017.html
https://www.perl.org/app.html
https://www.perl.org/books/beginning-perl/
https://www.perl.org/books/impatient-perl/
https://www.perl.org/books/library.html
https://www.perl.org/camel.html
https://www.perl.org/community.html
https://www.perl.org/contribute.html
https://www.perl.org/cpan.html
https://www.perl.org/docs.html
https://www.perl.org/events.html
https://www.perl.org/get.html
https://www.perl.org/learn.html
https://www.perl.org/media.html
https://www.perl.org/robots.txt
https://www.perl.org/siteinfo.html
https://www.perl.org/wishlist.html
http://www.perl.org/
http://www.perl.org/about.html
http://www.perl.org/about/reduce-risks.html
http://www.perl.org/about/style-guide.html
http://www.perl.org/about/whitepapers/
http://www.perl.org/about/whitepapers/perl-cpan.html
http://www.perl.org/about/whitepapers/perl-database-orm.html
http://www.perl.org/about/whitepapers/perl-ipv6.html
http://www.perl.org/about/whitepapers/perl-loadbalancer.html
http://www.perl.org/about/whitepapers/perl-object-oriented.html
http://www.perl.org/about/whitepapers/perl-plack.html
http://www.perl.org/about/whitepapers/perl-profiling.html
http://www.perl.org/about/whitepapers/perl-testing.html
http://www.perl.org/about/whitepapers/perl-webframework.html
http://www.perl.org/advocacy/white_camel/
http://www.perl.org/advocacy/white_camel/1999.html
http://www.perl.org/advocacy/white_camel/2000.html
http://www.perl.org/advocacy/white_camel/2001.html
http://www.perl.org/advocacy/white_camel/2002.html
http://www.perl.org/advocacy/white_camel/2003.html
http://www.perl.org/advocacy/white_camel/2004.html
http://www.perl.org/advocacy/white_camel/2005.html
http://www.perl.org/advocacy/white_camel/2006.html
http://www.perl.org/advocacy/white_camel/2007.html
http://www.perl.org/advocacy/white_camel/2008.html
http://www.perl.org/advocacy/white_camel/2009.html
http://www.perl.org/advocacy/white_camel/2010.html
http://www.perl.org/advocacy/white_camel/2011.html
http://www.perl.org/advocacy/white_camel/2012.html
http://www.perl.org/advocacy/white_camel/2013.html
http://www.perl.org/advocacy/white_camel/2014.html
http://www.perl.org/advocacy/white_camel/2015.html
http://www.perl.org/advocacy/white_camel/2016.html
http://www.perl.org/advocacy/white_camel/2017.html
http://www.perl.org/app.html
http://www.perl.org/books/beginning-perl/
http://www.perl.org/books/impatient-perl/
http://www.perl.org/books/library.html
http://www.perl.org/camel.html
http://www.perl.org/community.html
http://www.perl.org/contribute.html
http://www.perl.org/cpan.html
http://www.perl.org/docs.html
http://www.perl.org/events.html
http://www.perl.org/get.html
http://www.perl.org/learn.html
http://www.perl.org/media.html
http://www.perl.org/robots.txt
http://www.perl.org/siteinfo.html
http://www.perl.org/wishlist.html
http://www.powells.com/book/beginning-perl-9781861003140?partnerid=25774
http://www.powells.com/book/embedding-perl-in-html-with-mason-9780596002251?partnerid=25774
http://www.powells.com/book/modern-perl-9781680500882?partnerid=25774
http://www.powells.com/book/modperl-developers-cookbook-9780672322402?partnerid=25774
http://www.powells.com/book/perl-lwp-9780596001780?partnerid=25774
http://www.powells.com/book/practical-mod-perl-9780596002275?partnerid=25774
http://www.powells.com/book/writing-apache-modules-with-perl-and-c-9781565925670X?partnerid=25774
Loading…
Cancel
Save