#!/bin/bash
# EDArchivesGet - Build 120930 - http://camoy.sdf.org/
# Prereqs: BASH or equivalent shell, wget, sed, grep
#
# First, we get the latest archive page from ED
wget https://encyclopediadramatica.se/Encyclopedia_Dramatica:Archives --no-check-certificate
#
# Next, let's make the file name easier to work with
mv Encyclopedia_Dramatica\:Archives url1.txt
#
# Let's extract the URL...
cat url1.txt | grep -o 'http://static.encyclopediadramatica.se/archives/text-[^"]*' > url2.txt
#
# grep can't quite finish the job so we'll put sed to some use...
sed '2d' url2.txt > url3.txt
#
# Now get the archive file
wget -i url3.txt
#
# Do some cleanup
rm url1.txt url2.txt url3.txt
#
# Send it to the archives directory
mv *.xml.gz archives/
