A simple bash-script to crawl your site and follow all links and generate a sitemap.

You can create multiple sitemaps with this. Note, I don't set a priority of anything fancy.

NOTE: I heavily modified a script from somewhere else. But I don't recall where from. If you are the author feel free to get in contact for a link.


function kit_sitemap() {
	local sitedomain="${1}"
	local sitemapfile="${2}"

	#echo 'Doing sitemap - '$sitedomain' > '$sitemapfile

	wget --reject-regex "(.*)\?(.*)" --spider --no-verbose --recursive --level=inf --output-file=/var/app/current/sitemaps/linklist.txt $sitedomain
	grep -i URL /var/app/current/sitemaps/linklist.txt | awk -F 'URL:' '{print $2}' | awk '{$1=$1};1' | awk '{print $1}' | sort -u | sed '/^$/d' > /var/app/current/sitemaps/sortedurls.txt
	header='<?xml version="1.0" encoding="UTF-8"?><urlset
		  xmlns="http:// //"
		  xmlns:xsi="http:// //"
		  xsi:schemaLocation="http:// //
				http:// //">'
	echo $header > $sitemapfile
	while read p; do
	  case "$p" in
	  */ | *.html | *.htm | *.php)
		echo '<url><loc>'$p'</loc></url>' >> $sitemapfile
	done < /var/app/current/sitemaps/sortedurls.txt
	echo "</urlset>" >> $sitemapfile
	echo 'Done: '$sitedomain

sitedomain=https:// //
kit_sitemap $sitedomain $sitemapfile

sitedomain=https:// //
kit_sitemap $sitedomain $sitemapfile

# etc....

# email admin
echo "completed from _cron/daily/" | mail -s " done" ""

Published: Sun 24th December 2017
