#!/usr/bin/perl

my $VERSION = "v0.5";

#Current version: v0.5
#	Changes since last version:
#		--Made it so codes like url/3name002?3name009?dirname don't make ezpicspl choke.
#		--Cleaned up the code a lot, made it work with 'use strict'
#		--The code to delete files <3KB was broken.. Here are my thoughts on how to manage the deletion. I could either go through the directory after downloading is done and delete everything below the cutoff, which could harm files that the user didn't actually want deleted.. or set wget's acceptance field to nothing but the extension of the files we're downloading. The extension acception thing still elt it accept html 404s for some reason, though. Lastly I realized wget's -O option could write the returned data, of whatever type, to a specific file. Doing this, I think it is safe to assume the file was downloaded with the filename we expected, and we can check for existence and delete if it's below the cutoff immediately after that download.
#		--Added a commandline option for the cutoff size
#		--Added a commandline option to flag whether or not to download anything. This way a user can specify -s on the commandline and test their ezcodes without actually downloading and wastying bandwidth/time. This is not guaranteed to work, and is not displayed with '--help'.
#		--Added rename functionality on the command line.. user can now tell ezpicspl to rename the file according to the last ezcodes field, rather than insert them into a subdirectory of that name.
#
#CREDIT AND CONTACT:
#		This script written by derMoerder. You may contact me via ICQ @ 23115071, or just find me on the SomethinAwful forums(or DPPHv2) as derMoerder.
#
#NOTE:
#		This script will assume you have wget in your path, or in the same directory as this script. 
#		Filesize cutoff is at 3KB.. anything smaller will be deleted. Feel free to change the constant in the code as you please.
#

use strict;
use Getopt::Std;
use vars qw( $opt_c $opt_r $opt_t $opt_s ); #c is the cutoff filesize in kilobytes, r is a bool of whether to rename according to the last ezfield or not(as opposed to using subdirs), and t is the number of downloading threads to keep going concurrently.

main();

sub main {
	my $key; #loopvar
	my %optDef = (	c => 10, #def cutoff is 10KB
					r => 0,	#def behaviour is to put downloads in a subdir named by the last field of the ezcode
					t => 5, #def number of simultaneous download threads
					s => 0 ); #def behavior is to actually download files

	if( $#ARGV < 0 || ( $ARGV[ $#ARGV ] eq "--help" ) ) { #check for version or help options
		PrintHelp();
	}#end if

		#if the key wasn't given a value on the command-line, we'll set it to default
	if( $opt_c eq '' ) { $opt_c = $optDef{ 'c' }; }
	if( $opt_r eq '' ) { $opt_r = $optDef{ 'r' }; }
	if( $opt_t eq '' ) { $opt_t = $optDef{ 't' }; }
	if( $opt_s eq '' ) { $opt_s = $optDef{ 's' }; }

	getopts( 'c:rt:s' );







	
	Operate();
}

sub Operate { # as in "operate on the ezcode"
	my @splitCode; #when we split the ezcode into tis parts
	my ( @partsBeg, @partsEnd ); #the parts of the begginning and ending filenames once it's split up by digit sets
	my ( $rdir, $files );	#when we break up the EZCode part of the url.. these are the key parts
	my ( $fileBeg, $fileBegR, $fileEnd ); #holds the portions of the filenames before and after the changing set of digits, to be tacked on each side for the download once we're done parsing
	my ( $startNum, $endNum ); #holds the numbers with which to begin and end the ordered downloading.
	my $bShowPadding = 0; #necessary in some conditionals later do deal with some software/people that put 0's as padding in lower numbers.. ala 015.jpg
	my ( $fileName, $fileNameR ); #the filename after the path, and the filename of the remote file.. will be different if the user sets $opt_r.
	my ( @numTemp1, @numTemp2 ); #will hold the signifigant digit sets(the sets that change in order) from the beg and end filenames, and length will be used to compute '0' padding
	my ( $i, $k ); #loopvars
	my $changingSet; #will hold index in @partsX of  the sets of digits that increment
	my $extension; #will be extraced from $fileEnd for use in the wget parameters..
	my @splitFileName; #for holding the split filename before putting the extension into $extension
	my $dirPrefix; #will hold the directory prefix determined by whether $opt_r was set
	my $spider; #determined by $opt_s from user.. egts inserted into wget cmdline telling it to spider or not.




	##########
	##### This part is the parsing of the EZCode.
	##########



	$ARGV[ $#ARGV ] =~ m|^((\w)+://(.*))/([^/]+)$|;	#so... $1 will be the url with path up to the filenames, and $4 will be the ezcoded filename part(filestart.ext?fileend.ext?name).
	$rdir = "$1/"; $files = $4;

	@splitCode = split( /\?/, $files ); #split the code into startfile(0) endfile(1) and subdir(2)


	#now we'll need to check for how many sets of digits are in the filename, and see which set of digits changes.. 
		#stick all digits blocks from the startfile into @numSets .... this was originally m/\D(\d+)\D/g, but then i realized some wierdo(or some wierd server) might name without the extensions which would have confused it if i left it this way.
	@partsBeg = ( split( /(\d+)/, $splitCode[ 0 ] ) );
	@partsEnd = ( split( /(\d+)/, $splitCode[ 1 ] ) );

	$changingSet = -1;
	for( $i=0; $i<$#partsBeg; $i++ ) {
		if( $partsBeg[ $i ] ne $partsEnd[ $i ] ) { #if the number isn't the same between the begfile and endfile, this is the number we want to use with the looping
			$changingSet = $i;
		}#end if
	}#end if
	if( $changingSet == -1 ) {
		Die( "No file pattern recognized, can't download. Quitting ezpicspl.\n" );
	}#end if

	$fileBeg = "";
	$fileBegR = "";
	$fileEnd = "";
	for( $i=0; $i<$changingSet; $i++ ) {
		$fileBeg .= $partsBeg[ $i ];
		$fileBegR .= $partsBeg[ $i ];
	}
	$startNum = $partsBeg[ $changingSet ];
	if( $changingSet < $#partsBeg ) {
		for( $i=$changingSet+1; $i<$#partsBeg + 1; $i++ ) {
			$fileEnd .= $partsBeg[ $i ];
		}
	}


	#( $fileBeg, $startNum, $fileEnd ) = split( /(\d+)/, $splitCode[ 0 ] ); //commented for v0.3
	#$splitCode[ 1 ] =~ m/[^\d]*(\d+)+[^\d]*/;	//commented for v0.3
	$endNum = $partsEnd[ $changingSet ];

	@splitFileName = split( /\./, $splitCode [ 0 ] );
	$extension = $splitFileName[ $#splitFileName ];



	if( $opt_r ) { #if user wants to rename rather than insert into subdir, according to the cmdline..
		$fileBeg = $splitCode[ 2 ];
		$dirPrefix = '';
	} else {
		$dirPrefix = "$splitCode[ 2 ]/";
	}




	##########
	##### From this point on is the ordered downloading. Before this was parsing the EZCode.
	##########



	if( substr( $startNum, 0, 1 ) eq "0" ) { #if there's padding in the start, we're gonna need to pad whenever the filenum is of less digits than the ending num, so remember to do so
		$bShowPadding = 1;
	}#end if

	if( !$opt_s ) { mkdir $splitCode[ 2 ]; }

	if( $opt_s ) { $spider = "--spider"; }

	for( $i=$startNum+0; $i<$endNum + 1; $i++ ) {	#the +0 is on the $startNum so that it's treated as an integer with any 0-padding chopped off.. without this ezpcispl won't work with sets numbered like 012-199
		$fileName = ""; #empty it out, we'll be concing the filename onto this
		$fileNameR = "";
		$fileName .= $fileBeg;
		$fileNameR .= $fileBegR;
		if( $bShowPadding ) {
			@numTemp1 = split( //, $i );
			@numTemp2 = split( //, $endNum );
			for( $k=0; $k<$#numTemp2 - $#numTemp1; $k++ ) {	#won't insert 0s unless padding is necessary
				$fileName .= '0';
				$fileNameR .= '0';
			}#end for
		}#end if
		$fileName .= "$i$fileEnd";
		$fileNameR .= "$i$fileEnd";

		print "\nAttempting download of file \"$fileNameR\" out of \"$fileBegR$endNum$fileEnd\"\n";
		print `wget -nv --output-document=\"$dirPrefix$fileName\" $spider -nd -A \"$extension\" --referer=\"$rdir$fileName\" --follow-ftp --user-agent=\"MSIE 6.66 (X11; I; Etch-A-Sketch 5.4)\"  \"$rdir$fileNameR\"`;

		CleanUp( $dirPrefix, $fileName, $opt_c );

	}#end for


}#end Operate()


sub CleanUp {
	#args: [0]directory, [1]filename
	my @fileStat; #to hold file properties later when we check existence and filesize etc

	@fileStat = stat( "$_[ 0 ]$_[ 1 ]" );
	if( -e "$_[ 0 ]$_[ 1 ]" ) {	#check that it exists before deleting :) maybe wget fscked up
		print "Checking filesize..\n";
		if( $fileStat[ 7 ] < $opt_c * 1024 ) {	#if it's smaller than the specified cutoff, kill it.
			unlink( "$_[ 0 ]$_[ 1 ]" ) || Die( "Couldn't remove file \"$_[ 0 ]$_[ 1 ]\". Quitting." );
			print "\"$_[ 0 ]$_[ 1 ]\" was smaller than $opt_c Kilobytes, so it was removed.";
		} else {
			print "File of at least $opt_c Kilobytes, not removing.\n";
		}#end if
	}#end if
}#end sub CleanUp



sub PrintHelp() {
		print "Usage: EZPicspl [OPTIONS]... [EZCODE]...\n";
		print "Download from EZCodes via HTTP/FTP.\n\n";

		print "  --help\tPrint this help and exit.\n";

		print "  -c\t\tInt for cutoff size in downloads, in Kilobytes. Default is 10.\n";

		print "  -r\t\tSwitch for whether to rename file by the last field in the\n";
		print "\t\tThe EZCode. The default state is off. When this option is used,\n";
		print "\t\tthe files are not put into a subdirectory.\n";

		print "  -s\t\tSwitch for code testing, doesn't actually download. This is\n";
		print "\t\tnot guaranteed to work in this version, so test if you want but\n";
		print "\t\tbe careful.\n";

#		print "  -t\t\tInt for the number of threads to use concurrently for\n";
#		print "\t\tdownloading. The default value is 5\n";

		print "\nEZPicspl by derMoerder, $VERSION. Report bugs to derM\@eden.rutgers.edu.\n";
		print "Website: \thttp://www.geocities.com/ezpicspl\n";
		exit();
}


sub Die {
	print @_;
	exit( -1 );
}