# WEBSITE PARSING PROGRAM
#
# NOTE:  ALL CONTENT ON CREATED WEBPAGE IS OWNED BY  THE CORRESPONDING
#  WEBSITES.  CLICK ON LINK TO SEE ORIGINAL CONTENT
#
#Created by : Justin Bischoff
#
#Latest Revision : 4/6/03
#	-Bug Fixes and more comics
#
#DO NOT USE OR DISTRIBUTE THIS CODE
#
#There is no warranty, explicit or implied, on this code.
#
#Copyright 2003 Justin Bischoff
#
#Send me ideas for changes or improvements
#
#webpage content is owned by corresponding webpage owner,
#  see indiviual pages for details.
#
#publicly providing the content of the generated web page
#   without permission of the content owners could be 
#   breaking the law

#############################################
# Description
#
# This is a perl script that uses the LWP module to 
#   grab specific webpages off of the internet.
#
# The second half of the script goes through the grabbed
#	webpages and turnes them into a summary page

#############################################
# Usage
#   - use perl to run. C:\perl l33t_grabber.pl
#
# Command Line Parameters - examples (in any order)
#	"force 3" - forces up to 3 attempts at a page
#   "proxy my.proxy:69" - uses my.proxy at port 69
#   "timeout 12" - sets the timeout to 12 seconds
#   "comicsoff" - only news sites grabbed - much faster
#
# ie.  "C:\perl l33t_grabber.pl proxy proxy.fm.intel.com:911 force 2 comicsoff"
# this line will use folsom's proxy, make two attempts at a webpage and it
#   has the comics turned off.


#I hate use strict, if you don't initialize your vars, then you're dumb.
use LWP::UserAgent;
use POSIX qw/strftime/;

###############################
#things to add:
# - Save a text file w/date of source html, only look through web if file dne
# - improve and add to current events page
# - handle more failure cases, or handle them better.
# - Create RSS compliant generic parser.
#
# - automatically spawn the webpage after the script is run. (Command line parameter?)
# - more stock?, e2, circuit?
# - Current Movie Listings or Showtimes?
# - Current Events - concerts, shows, plays, lectures, etc.
#
# - Horoscope? (theOnion?)


###########################################################
#USER VARIABLES  - change these per personal preference   #
                                                          #
#for news sites, this is the number of headlines per      #
# category to display.  this only kinda works,            #
$Number_of_headlines = 3;                                 #
                                                          #
#What is the html result title going to be?               #
$Page_title = "Nerd's Web Summary";                       #
                                                          #
#What is the filename going to be?                        #
$Output_file = "nerd_summary.html";                       #
                                                          #
#width of news columns must be forced                     #
$column_width = 33;                                       #
                                                          #
#name and port of your proxy (can use command line instead)
#my $my_proxy = 'http://proxy.sc.intel.com:911';			  #
my $my_proxy = 'http://proxy.fm.intel.com:911';			  #

														  #
#timeout before we quit trying to load a page.            #
$timeout = 13;      #in LWP timeouts don't really work    #
                
#By default generate logfiles?							  #
$logfiles = 0;                                            #
#                                                         #
###########################################################

#ONLY 1337 P3rl Ninjas venture below this line!

#get time in "Month-Day-Year" 
$now = strftime( "%B-%d-%Y", localtime(time()) );

#streamline this code!
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
#print "$mon-$mday-$year : $isdst\n";

my $daysleft = 318 - $yday;


###########################################################
# process user arguments
#
#DO NOT change these default values.
$this_param = 0; $comics_on = 1; $force = 1; 
$proxy = "none";

foreach $cl_param (@ARGV) {
	#logfile - enables logfiling daily- creates lots of files.
	#			
	if($cl_param=~/logfile/){
		$logfiles = 1;

		open(LOGFILE, ">logfile_$now.txt") || die "Couldn't open logfile_$now.txt\n" . $!;

		&print_error( "HTML PARSER LOGFILE\n\nDate: $now\n\n");
	}

	#force - forces a read, if a number follows force "force 2" it will
	#         try 2 times on each website.  Force w/o a number may cause
	#		  an infinite runtime!
	if($cl_param=~/force/){
		if($ARGV[$this_param+1]=~/\d+/){
			
			$force=$ARGV[$this_param+1];
			
			if($force>5){
				&print_error( "FOOL! Do you want the script to run forever?\n");
				&print_error( "I AM COMPUTER, SMARTER THAN THOU, USE A LOWER FORCE VALUE!\n");
				$force = 5;
			}
			elsif($force<=0){
				&print_error( "WHAT ARE YOU TRYING TO DO?\nBAD USER, BAD!\n");
				$force = 1;
			}
			&print_error( "Using a force of $force cycles\n");
		}
		else{
			$force=1;
			&print_error( "ERROR PARSING COMMAND LINE PARAMETERS!\n");
			&print_error( "\"force\" must be followed by a number.\nForce not used\n");
		}
	}
	
	#timeout - sets timeout in number of seconds.  Include a number after the
	#			word timeout, ie "timeout 10"
	if($cl_param=~/timeout/){
		if($ARGV[$this_param+1]=~/\d+/){
			$timeout=$ARGV[$this_param+1];
			&print_error( "Using a timeout of $timeout\n");
		}
		else{
			&print_error( "ERROR PARSING COMMAND LINE PARAMETERS!\n");
			&print_error( "\"timeout\" must be followed by a number.\nTimeout of $timeout sec. used\n");
		}
	}

	#proxy - defines your proxy.  It must be followed by a name colon
	#		  and port number "proxy.fm.intel.com:911
	#		  current default is Intel's.
	if($cl_param=~/^proxy/){
		$proxy = $ARGV[$this_param+1];
		if($proxy =~/\w.*?\:\d+/){
			if($proxy =~ /http\:\/\// ){
				$my_proxy = $proxy;
			} 
			else{
				$my_proxy = "http://".$proxy;
			}
		}else{
			&print_error( "ERROR PARSING COMMAND LINE PARAMETERS!\n");
			&print_error( "\"proxy\" must be followed by a valid proxy.\nProxy $my_proxy used\n");
		}
	}
	
	#comicsoff - script will grab only news pages, saves you time for the script
	#				running, and time for the webpage to load up.
	if($cl_param=~/^comicsoff/){
		$comics_on=0;
		&print_error( "COMICS WILL BE OFF!\n");
	}

	$this_param++;
}################  FINISHED PROCESSING COMMAND LINE PARAMETERS  #####################

#can try Yahoo RSS http://rss.news.yahoo.com/rss/topstories
#Retuers Photos match "class="photoLink"" at http://www.reuters.com/newsPhotoGallery.jhtml?type=topNews
#http://www.kuro5hin.org/backend.rdf

#URLs to get - dont change these unless target url changes
$url[0]="http://news.google.com/news/gnmainlite.html";
#$url[0]="http://news.google.com/";
$url[1]="http://slashdot.org/slashdot.xml";
#http://www.theinquirer.net/inquirer.rss
$url[2]="http://www.theinquirer.net/index.html";
if($comics_on){
  $url[3]="http://www.dilbert.com/comics/dilbert/index.html";
  $url[4]="http://www.penny-arcade.com/view.php3";
  $url[5]="http://pvponline.com/";
  $url[6]="http://www.dieselsweeties.com/";
  $url[21]="http://www.megatokyo.com/";
  $url[20]="http://www.little-gamers.com/";
  $url[22]="http://www.ctrlaltdel-online.com/index.php?t=archives&date=last";
}
$url[7]="http://finance.yahoo.com/q?s=intc&d=v1";
#$url[8]="http://my.yahoo.com/?myHome";
$url[8]="http://www.sacbee.com/content/news/";
$url[9]="http://www.kirkwood.com/conditions.asp";
$url[10]="http://www.aceshardware.com/";
$url[11]="http://www.cnn.com/";
$url[11]="http://edition.cnn.com/";
$url[12]="http://dictionary.reference.com/wordoftheday/";
$url[13]="http://www.qotd.org/";

#UPCOMING EVENTS
$url[14]="http://www.mondaviarts.org/index.lasso";
$url[15]="http://www.berkeley.edu/calendar/";
$url[16]="http://sfgate.com/eguide/epicks/";
#$url[17]="http://www.weather.com/weather/print/95816";
#sacramento
$url[17]="http://wwwa.accuweather.com/adcbin/public/local_index.asp?zipcode=95816&partner=accuweather";
#kirkwood
#$url[17]="http://wwwa.accuweather.com/adcbin/public/local_index.asp?zipcode=95646&partner=accuweather";
$url[18]="http://news.yahoo.com/news?tmpl=index2&cid=716";
$url[19]="http://www.reuters.com/newsPhotoGallery.jhtml?type=topNews";

#create a UserAgent
my $agent_name = "PPMOD-Agent/LWP";
my $ua = LWP::UserAgent->new($agent_name);

#enable cookies
$ua->cookie_jar( {} );

#set temporary timeout for test
$ua->timeout(3);
$ua->agent('MSIE/6.0');  

#uncomment this line when line 142 is fixed :
#142: if(!($test_page[0] =~ /HTTP\/1\.1 200 OK/)){
#ie.   don't test for proxy when it is specified in command line.
#if ($proxy == "none"){}

	#test connection to see if proxy is in place.
	&print_error( "Testing connection...\n");
	my $test_request = HTTP::Request->new(GET => "http://perl.com");
	@test_page = split(/\n/,$ua->request($test_request)->as_string);
#

#set real timeout value
$ua->timeout($timeout);

#do we need to use the proxy?
if(!($test_page[0] =~ /HTTP\/1\.1 200 OK/)){
	$ua->proxy(['http'], $my_proxy);
	&print_error( "\nUsing proxy $my_proxy\n");
}
else{
	&print_error( "\nNot using a proxy\n");
}

#initialize value
$current_page=0;

#loop through each url in above list
foreach $webpage (@url) {
	&print_error( "Requesting $webpage...\n");

	#Continue to try if http request fails until we have tried $force number of times.
	for ($f_count=0;$f_count < $force ;$f_count++) {
		
		my $request = HTTP::Request->new(GET => $webpage);
	
		#if we are supposed to read this page, issue HTTP request.
		if( $comics_on || $current_page<3 || $current_page>6 ){
			#get each page and split it into an array.
			$request->header(Accept => 'text/html');
			@this_page = split(/\n/,$ua->request($request)->as_string);
		}

		#page not found.
		if( ($this_page[0] =~ /\(Internal Server Error\)/) || ($this_page[0] =~/404 Not Found/)){
			
			&print_error( "ERROR! Could not connect to : $webpage\n");
			$this_page[0] = "\<br\>\<small\>(Internal Server Error)\<br\>\n";
			$this_page[1] = "Error connecting to webpage.\<br\>\n";
			$this_page[2] = "\<a href\=\"$webpage\">Click Here to visit your webpage.\<\/a\>\<\/small\>\<br\>\n";
			
			if($f_count<$force-1){
				&print_error( "Trying again...\n");
			}
		}
		else{
			#exit for loop because we have the http data
			$f_count=$force;
			&print_error( "Data received successfully.\n");
		}

	}# for loop forcing re-reads


	#create an array of these references for each html page.
	$all_data[$current_page] = [ @this_page ];

	$current_page++;
}


######## DONE GETTING HTML #################
# Create Webpage Summary Now, parse pages.
#
# If you don't know regexps, this code will be gibberish.

&print_error( "\nCreating output file...\n");
#create output file
open(NEW, ">$Output_file") || die $!;


#always start html file with this junk.
if ($hour>12) {
	$american_time=$hour-12;
	print NEW "<HTML>\n<HEAD>\n<TITLE>$Page_title : $now $american_time:$min"."pm</TITLE>\n";
}else{
	print NEW "<HTML>\n<HEAD>\n<TITLE>$Page_title : $now $hour:$min"."am</TITLE>\n";
}
print NEW "<script type=\"text/javascript\">\n";
print NEW "<!--\n";
print NEW "function ToggleQOTD()\n";
print NEW "{\n";
print NEW "	if(qotd.style.visibility == \"visible\"){\n";
print NEW "		qotd.style.visibility = \"hidden\";\n";
print NEW "		qotd.style.top -= 110;\n";
print NEW "	}else{\n";
print NEW "		qotd.style.visibility = \"visible\";\n";
print NEW "		qotd.style.top = 110;\n";
print NEW "	}\n";
print NEW "}\n\n";
print NEW "function ToggleEvents()\n";
print NEW "{\n";
print NEW "	if(UpcomingEvents.style.visibility == \"visible\"){\n";
print NEW "		UpcomingEvents.style.visibility = \"hidden\";\n";
print NEW "	}else{\n";
print NEW "		UpcomingEvents.style.visibility = \"visible\";\n";
print NEW "	}\n";
print NEW "}\n";
print NEW "function ToggleSlash(elem)\n";
print NEW "{\n";
print NEW "	if(elem.style.display == \"none\"){\n";
print NEW "		elem.style.display = \"block\";\n";
print NEW "	}else{\n";
print NEW "		elem.style.display = \"none\";\n";
print NEW "	}\n";
print NEW "}\n";
print NEW "-->\n";
print NEW "</script>\n";

print NEW "\n</HEAD>\n<BODY>\n\n";

&print_error( "\nParsing results into new html...\n");

$headline_limit = 0;
$counter = 0;

#create outer table for rightbar
print NEW "<!--rightbar table, one column for news, one column for rightbar-->\n";
print NEW "<TABLE border=\"0\" bgcolor=\"#ffffff\"><TR><TD valign=\"top\">\n\n";


#next three lines create border around news section
print NEW " <!--Border-1 around all news and header-->\n";
print NEW " <TABLE border=\"0\" cellpadding=\"2\"><tr><td width=\"100%\" bgcolor=\"#999999\">\n";
print NEW "  <TABLE border=\"0\" bgcolor=\"#ffffff\" cellpadding=\"2\" width=\"100%\">\n";
print NEW "  <tr><td align=\"left\" bgcolor=\"#ffffff\">\n\n";

#The number here (colspan) corresponds to how many news columns the title bar spans.
print NEW "   <TABLE border=\"0\"><TR><TD colspan=\"4\" valign=\"top\">\n";


#create border around header
&html_border();

print NEW "    <TABLE border=\"0\" width=\"100%\">\n";

print NEW "    <!--Main Top Header row-->\n";


#weather - 10 day header
print NEW "\n\n<!-------- Weather SECTION -------------->\n";
print NEW "     <TR><TD align=\"center\" colspan=\"9\">\n";
$data_flag=0; $counter=0;

#table for forecast only
print NEW "\n      <TABLE border=\"0\"> <!--forecast table-->\n";
print NEW "       <TR>\n";

foreach $line (@{$all_data[17]}) {
	
	#DEBUG PRINT ENTIRE FILE
	#print NEW "<!--$line-->\n";

	if($line =~ /\(Internal Server Error\)/){
		print NEW $line;
	}
	if($line =~ /BEGIN FIRST 7/){
		$data_flag=1;
	}
	if($counter>=7){
		$data_flag=0;
	}

	#Look for images
	if (($line =~ /common\/i/) && ($data_flag==1)){
		print NEW "       <TD align=\"right\">\n";

		$line =~ s/\<\/?font.*?\>//g;
		$line =~ s/\<\/?b\>//g;
		$line =~ s/\<\/?a.*?\>//gi;

		#make picture smaller
		$line =~ s/\"31\"/\"24\"/g;
		
		#remove leading whitespace
		$line =~ s/^\s+//;
		print NEW "        <a href=\"$url[17]\" style=\"text-decoration:none\">" . $line;
		print NEW "</a>\n       </TD><TD align=\"left\" width=\"55\">\n";
	}
	if (($line =~ /sevendaynew/) && ($data_flag==1)){
		#remove some unwanted text
		$line =~ s/\s\d?\d\/\d\d?//g;
		$line =~ s/\<font.*?\<br\>/\<br\>/;
		$line =~ s/High//g;
		$line =~ s/Low//g;
		$line =~ s/ F//g;

		$line =~ s/\;\s*?\//\//;
		
		$line =~ s/\<\/?font.*?\>//g;
		$line =~ s/\<\/?b\>//g;
		$line =~ s/\<\/?a.*?\>//gi;
		
		$line =~ s/^\s+//;
		print NEW "        <small><small><a href=\"$url[17]\" style=\"text-decoration:none\">";
		print NEW "<font color=\"black\">".$line."</font></a></small></small>\n";
		
		if($line =~ /deg/){
			print NEW "       </TD>\n";
			$counter++;
		}
	}
}
$data_flag=0;
print NEW "      </TD></TR></TABLE><!--end forecast table-->\n\n";

print NEW "     </TD></TR>\n";

print NEW "\n\n<!-------- Kirkwood Snow SECTION -------------->\n";
print NEW "     <TR><TD align=\"left\">\n";

#Daily Snowfall
$data_flag=0;
foreach $line (@{$all_data[9]}) {
	if($line =~ /\(Internal Server Error\)/){
		print NEW $line;
	}
	if(($data_flag) && ($line=~/font/)){
		$line =~ s/.*?\<font.*?\>//;
		$line =~ s/\<.*//;
		print NEW "      <small>\n      <a href=\"http://www.kirkwood.com/conditions.asp\">";
		print NEW "Kirkwood</a> <a href=\"http://www.weather.com/outlook/recreation/ski/weather/tenday/209004\">";
		print NEW "snowfall</a>: $line\n      </small>\n";
		$data_flag=0;
	}
	if($line=~ /New Snow in the/) {
		$data_flag=1;
	}
}
$data_flag=0;

#temporary summer kirkwood section
#print NEW "      <small>\n      <a href=\"http://www.kirkwood.com/conditions.asp\">";
#print NEW "Days until next season</a> : ";
#print NEW "$daysleft\n      </small>\n";

#create column for middle of top header
print NEW "    </TD>\n";

print NEW "    <TD align=\"center\">\n";
print NEW "     <center>\n";
print NEW "       <font size=\"4\"><b>Justin's Website Parser</b></font><br>\n";
print NEW "     </center>\n";
print NEW "    </TD>\n";

#create column for rightside of top header
print NEW "\n\n<!-------- INTEL STOCK SECTION -------------->\n";
print NEW "    <TD align=\"right\">\n";

#Stock Ticker
foreach $line (@{$all_data[7]}) {
	if($line =~ /\(Internal Server Error\)/){
		print NEW $line;
	}

	if($line =~ /l\>Reuters/){
		#remove unwanted html tags
		$line =~ s/\<\/?td.*?\>//g;
		$line =~ s/\<\/?big.*?\>//g;
		$line =~ s/\<\/?small.*?\>//g;
	
		#remove leading whitespace
		$line =~ s/^\s+//;

		#remove everything after "Reuters".
		$line =~ s/Reuters.*/Reuters/;

		#make Reuters comment smaller
		$line =~ s/Reuters/\<small\>Reuters\<\/small\>/;



		#grab the change in stock price
		#$line =~ /\>([+-][^<]*)/;
		#$temp_1 = $1;
		#if($temp_1=~/\+/){
		#	$temp_1=~s/(.*)/\<font color\=\"green\"\>$1\<\/font\>/;
		#}
		#else{
		#	$temp_1=~s/(.*)/\<font color\=\"red\"\>$1\<\/font\>/;
		#}

		#add finance.yahoo.com to url
		#$line =~ s/href\=\"\//href\=\"http\:\/\/finance\.yahoo\.com\//;
		#print NEW "    <small>\n     $line : $temp_1 \n    </small>\n    <br>\n";
		print NEW "      <small><a href=\"$url[7]\" style=\"text-decoration:none\">";
		print NEW "<font color=\"black\"> $line </font></a></small><br>\n";
	}
}
print NEW "    </TD></TR>\n";

#row inside header to display WOTD
print NEW "\n\n<!-------- Word of the Day SECTION -------------->\n";
print NEW "    <TR><TD colspan=\"3\">\n";
################## WORD OF THE DAY
$getNextLine=0;
foreach $line (@{$all_data[12]}) {
	#handle html failures. - don't want this for WOTD!
	#if($line =~ /\(Internal Server Error\)/){
	#	print NEW $line;
	#}

	if($line=~/span style/){
		#add <bold> tags
		$line =~ s/\<span.*?\>/\<b\>/;
		$line =~ s/\<\/span.*?\>/\<\/b\>/;
		#remove all span tags
		$line =~ s/\<\/?span.*?\>//g;
		#remove all br tags
		$line =~ s/\<\/?br.*?\>//g;
		#remove excessive whitespace
		$line =~ s/\s+/ /g;

		print NEW "    <center>\n     <small>\n      <a href=\"$url[12]\" style=\"text-decoration:none\">";
		print NEW "<font color=\"black\">\n      $line";
	}
	if ($getNextLine==1) {
		print NEW "      $line\n     </font></a>\n     <\small>\n    </center>\n";
		$getNextLine=0;
	}
	if ($line =~ /\<\!\-\- WOTD/) {
		$getNextLine=1;
	}
}
################## end WORD OF THE DAY
print NEW "  </TD>\n </TR>\n</TABLE>\n";
#end WOTD Row


#right column might go here

print NEW "  </TD>\n </TR>\n ";

&end_border();


print NEW "\n\n<!-------- GOOGLE NEWS SECTION -------------->\n";
print NEW "  <TR>\n  <TD valign=\"top\" width=\"180\">\n";
#GOOGLE NEWS PARSER:
&html_border();
print NEW "<A STYLE=\"text-decoration:none\" href=\"http://news.google.com/\"><FONT size=\"5\" color=\"red\"><b>Google News</b></FONT></A><BR>\n";
&end_border();

$errorOccured=0;
foreach $line (@{$all_data[0]}) {
	@temp = split(/\<br\>/,$line);	
	foreach $line2 (@temp) {
	
		&html_error($line2);
		
		#print NEW "<!--$line2-->\n";
		#get categories
		if( ($line2 =~ /\<b\>Top/)   || ($line2 =~ /\<b\>World/) || 
			($line2 =~ /\<b\>U\.S/)  || ($line2 =~ /\<b\>Business/) || 
			($line2 =~ /\<b\>Sci/)   || ($line2 =~ /\<b\>Sports/)){# || 
		#	($line2 =~ /\<b\>Enter/) || ($line2 =~ /\<b\>Health/)){
			#$line2=~ /\<b\>([a-zA-Z ]+)\<\/b\>/;
			#$result = $1;
			#print $1."\n";

			#remove HTML tags.
			$line2 =~ s/\<\/?IMG.*?\>//g;
			$line2 =~ s/\<\/?p.*?\>//g;
			#remove table tags
			$line2 =~ s/\<\/?tr.*?\>//g;
			$line2 =~ s/\<\/?td.*?\>//g;
			$line2 =~ s/\<\/?table.*?\>//g;
			#remove links
			$line2 =~ s/\<a.*?\/a\>//g;
			#remove extra characters
			$line2 =~ s/\|//g;
			#remove whitespace
			$line2 =~ s/\s+//g;
			
			#Remove JUMP TO feature
			$line2 =~ s/\<fontsize\=\-2.*//;
	
			#dont print </small> first time
			if($headline_limit >= 1){
				print NEW "</small>\n";	
			}
			print NEW $line2, "<BR>\n";
			print NEW "<small>\n";
			$headline_limit = $Number_of_headlines + 1;
		}
	
		#get headlines and put into file
		if(($line2 =~ /.*class\=y/) && ($headline_limit > 1)){
			#remove table tags
			$line2 =~ s/\<\/?tr.*?\>//g;
			$line2 =~ s/\<\/?td.*?\>//g;
			$line2 =~ s/\<\/?table.*?\>//g;
			#/url?ntc=0L0A0&q=
			$line2 =~ s/\/url\?.*?q\=//g;
			#$line2 =~ s/\<.*?\>//;  #removes html
			$line2 =~ s/class\=.//;
			#add a linebreak after a <br> tag
			$line2 =~ s/\<BR\>/\<BR\>\n/g;

			#change %3F into actual characters
			$line2 =~ s/\%3F/\?/g;
			$line2 =~ s/\%3D/\=/g;
			$line2 =~ s/\%26/\&/g;
			
			#shorten links that are too long
	#		$line2 = &shorten_line($line2);
	
			print NEW "*$line2<br>\n";
			&print_error( $line2);
			$headline_limit--;
		}
	}
}
print NEW "  </TD>\n";
#END GOOGLE NEWS PARSER:


$headline_limit = 0;
$counter = 0;
$title = "";


print NEW "\n\n<!-------- SLASHDOT NEWS SECTION -------------->\n";
print NEW "  <TD valign=\"top\">\n";
#SLASHDOT NEWS PARSER:
&html_border();
print NEW "<A STYLE=\"text-decoration:none\" href=\"http://slashdot.org/\"><FONT size=\"5\" color=\"green\"><b>Slashdot</b></FONT></A>\n";
&end_border();
print NEW "<P>\n<small>\n";

$errorOccured=0;
foreach $line (@{$all_data[1]}) {
	&html_error($line);

	#print NEW "<small><small> $line </small></small><br>\n";
	if($line =~ /Your Headline Reader Has Been Banned/){
		print NEW "<a href=\"http://www.slashdot.org\">Slashdot</a> is being a little b*!@#.<br>\n";
		print NEW "Whoever wrote their crummy xml/rss<br> server ";
		print NEW "needs a 133tness class<br> from yours truly.";
	}

	#bug fix to remove extra "amp;" due to xml weirdness
	$line =~ s/amp\;amp\;/amp\;/g;

	#parse XML for headlines
	if(($line =~ /\<title/) && ($counter <= ($Number_of_headlines*4))){
		$line =~ s/\<.*?title\>//g;
		$line =~ s/^\s+//g;
		$title = $line;
		$original_title = $title;
		if(length($title)>$column_width+3){
			$title=substr($title,0,$column_width);
			$title=$title."...";
		}
	}
	if(($line =~ /\<url\>(.*?)\</) && ($counter <= ($Number_of_headlines*4))){
		$headline_url = $1;
#		print NEW "<div id=\"sdlink$counter\" onMouseDown=\"sdtext$counter.style.display=\'block\';\">\n";
		print NEW "<div id=\"sdlink$counter\" onMouseDown=\"ToggleSlash(sdtext$counter);\">\n";

		print NEW "<font color=\"blue\"><u>$title</u></font>\n" . "</div>\n";
#		print NEW "<a title=\"$original_title\" href=\"$headline_url\">$title</a><br>\n\n";
		&print_error( "<a title=\"$original_title\" href=\"$headline_url\">$title</a><br>\n");
		
#		GRAB CONTENT FROM LINK
		my $slash_req = HTTP::Request->new(GET => $headline_url);
		@slash_page = split(/\n/,$ua->request($slash_req)->as_string);
		$div_header = "\t<div id=\"sdtext$counter\" style=\"display:none;\"\">\n";#onMouseDown=\"sdtext$counter.style.display ='none';
		$div_header = $div_header . "\t <TABLE border=\"0\" width=\"100%\"><tr><td bgcolor=\"#999999\">\n";
		$div_header = $div_header . "\t <TABLE border=\"0\" width=\"100%\"><tr><td bgcolor=\"#eeeeee\" align=\"left\" width=\"190\">\n";
		$div_header = $div_header . "\t <small>\n" . "<b><a href=\"$headline_url\">$original_title</a></b><br>\n\t";
		$div_tailer = " \t</small>\n\t </td></tr></table>\n\t </td></tr></table>\n\t</div>";
		$gotcha=0;
		foreach $slashline (@slash_page) {
			if($slashline =~ /\<i\>(.*?)\<\/i\>/i){
				print NEW "\n $div_header $1 $div_tailer\n\n";
				$gotcha=1;
				last;
			}
		}
		if($gotcha==0){
			print NEW "\n $div_header Couldn't find the story. $div_tailer\n\n";
		}
		$counter++;
	}
#
#	#parse XML for images
#	if( ($line =~ /\<image\>(\S+)\</) && ($counter<=($Number_of_headlines*4))){
#		$image_name = $1;
#		#http://images.slashdot.org/topics/
#		$align="right";
#		if($counter % 2 == 0){
#			$align="left";
#		}
#		print NEW "<img align=\"$align\" height=\"32\" width=\"40\" src=\"http://images.slashdot.org/topics/";
#		print NEW $image_name . "\">\n";
#		print NEW $saved_title;
#	}


}
print NEW "</small>\n</P>\n";
#END SLASHDOT NEWS PARSER:


print NEW "\n\n<!-------- INQUIRER NEWS SECTION -------------->\n";
$valid_data = 0;
#INQUIRER NEWS PARSER:
&html_border();
print NEW "<A STYLE=\"text-decoration:none\" href=\"http://www.theinquirer.net/\"><FONT size=\"5\" color=\"purple\"><b>The Inquirer</b></FONT></A>\n";
&end_border();

$errorOccured=0;
foreach $line (@{$all_data[2]}) {
	
	&html_error($line);


	#parse HTML for headlines
	if($line =~ /td_mainbody/){
		$valid_data=1;
	}

	if(($valid_data) && ($line =~ /div class\=\"ht\"/)){
	
		#remove all the following HTML tags.
		$line =~ s/\<\/?td.*?\>//g;
		$line =~ s/\<\/?tr.*?\>//g;
		$line =~ s/\<\/?img.*?\>//g;
		$line =~ s/\<\/?b.*?\>//g;
		$line =~ s/\<\/?embed.*?\>//g;
		$line =~ s/\<\/?hr.*?\>//g;
		$line =~ s/\<\/?s.*?\>//g;

		#replace everything after a link with a <br>tag.
		$line =~ s/\/a\>/\/a\>\<br\>/g;
		
		#insert theinquirer.net into a link url
		$line =~ s/\"\?/\"http\:\/\/www\.theinquirer\.net\/\?/g;
		
		print NEW "</small>\n<P>\n<small>\n";
		@temp = split(/\<\/div\>\<\/div\>\<\/div\>/,$line);		

		$count = 0;
		foreach $newline (@temp) {
			if(($count < ($Number_of_headlines*4)) && !($newline=~/2002 Breakthrough Publishing Ltd/) && !($newline=~/Advert/)) {
				$newline =~ s/\<\/?div.*?\>//g;
				
				#remove font tags and everything inside them
				$newline =~ s/\<font.*?\/font\>//g;
				$newline =~ s/\s+$//;
				$newline =~ s/\<br\>.*/\<br\>\n/;
				
				#remove everything between <a></a> links
				$newline =~ s/\<\/a.*?\>.*?\<a/\<\/a\>\<br\>\n\<a/g;		

				#shorten links that are too long
				$newline = &shorten_line($newline);

				#remove leading whitespace
				$newline =~ s/^\s+//;
				print NEW $newline;
				&print_error( $newline);
			}
			$count++;
		}
		if($newline=~/\w+/){
			print NEW "</small>\n</P>\n";
			print NEW "<BR>\n";
		}
	}
}
print NEW "  </TD>\n  <TD valign=\"top\">\n";
#END INQUIRER NEWS PARSER:


#!@#$%^&*()
print NEW "\n\n<!-------- SACBEE NEWS SECTION -------------->\n";
#SacBee NEWS & INFO PARSER
&html_border();
print NEW "<A STYLE=\"text-decoration:none\" href=\"http://sacbee.com/\"><FONT size=\"5\" color=\"orange\"><b>SacBee</b></FONT></A>";
&end_border();

print NEW "<BR>\n<small>\n";

$data_flag=$Number_of_headlines * 7;
$errorOccured=0;
foreach $line (@{$all_data[8]}) {
	&html_error($line);


	#grab and print actual links to articles
	if( (($line =~ /nheadLineSS/) || ($line =~ /ntopStory/)) && ($data_flag>=0)){

		$line =~ s/\<br \/\>/\<br\>/g;

		#add html to link
		$line =~ s/href\=\"/href\=\"http\:\/\/www\.sacbee\.com/;

		$line = &shorten_line($line);

		print NEW "$line\n";
		&print_error( "$line\n");

		$data_flag--;


	}

}
print NEW"</small>\n</P>\n";
#END SACBEE NEWS & INFO PARSER



#print NEW "\n\n<!-------- YAHOO/REUTERS NEWS SECTION -------------->\n";
##YAHOO NEWS & INFO PARSER
#&html_border();
#print NEW "<A STYLE=\"text-decoration:none\" href=\"http://my.yahoo.com/\"><FONT size=\"5\" color=\"orange\"><b>My Yahoo!</b></FONT></A><BR>\n<small>\n";
#&end_border();
#
#$data_flag=0; 	$errorOccured=0;
#foreach $line (@{$all_data[8]}) {
#	&html_error($line);
#
#
#	#grab and print actual links to articles
#	if(($data_flag) && ($line =~ /story\.news\.yahoo\.com/)){
#		$line =~ s/\<\/?li.*?\>/\n/g;
#		$line =~ s/\<\/?td.*?\>//g;
#		$line =~ s/\<\/?tr.*?\>//g;
#		$line =~ s/\<\/?font.*?\>//g;
#		$line =~ s/\<\/?ul.*?\>//g;
#		$line =~ s/\/a\>/\/a\>\<br\>/g;
#		$line =~ s/\<i.*?\/i\>//g;
#		if(!($line =~ /\<b\>/)){
#			
#			#do a split here.
#			@three_headlines = split(/\n/,$line);
#			foreach $line (@three_headlines) {
#
#				#shorten links that are too long
#				$line = &shorten_line($line);
#
#				print NEW "$line\n";
#				&print_error( "$line\n");
#
#				$data_flag--;
#			}
#		}
#	}
#	#grab and print headlines
#	if (($line =~ /Top Stories from  Reuters/) || ($line =~ /World  News from  Reuters/) || ($line =~ /Business  News from  Reuters/) || ($line=~/Politics  News from  Reuters/)){
#		#$line =~ s/\<[^a].*?\<\/.*?\>//g;
#		$line =~ s/.*\<b\>/\<b\>/g;
#		$line =~ s/\<\/?td.*?\>//g;
#		$line =~ s/\<\/?tr.*?\>//g;
#		$line =~ s/\<\/?br.*?\>//g;
#		$line =~ s/\<\/?font.*?\>//g;
#		$line =~ s/\<\/?table.*?\>//g;
#		$line =~ s/\<i.*?\/i\>//g;
#		$line =~ s/\<\!.*?\/\-\-\>//g;
#		
#		#may want to remove these
#		$line =~ s/from\s+Reuters//g;
#		$line =~ s/\<\/?a.*?\>//g;
#		print NEW "</small>\n$line\n<br>\n<small>";
#		$data_flag=2;
#	}
#
#}
#print NEW"</small>\n</P>\n";
##END YAHOO NEWS & INFO PARSER




print NEW "\n\n<!-------- ACESHARDWARE NEWS SECTION -------------->\n";
$valid_data = 0;
$second_line=0;
############ ACESHARDWARE NEWS PARSER: ################
print NEW "<br>\n";
&html_border();
print NEW "<A STYLE=\"text-decoration:none\" href=\"$url[10]\"><FONT size=\"5\" color=\"blue\"><b>Ace's Hardware</b></FONT></A>\n";
&end_border();
print NEW "<P>\n<small>\n";

$errorOccured=0; $link_var = "";
foreach $line (@{$all_data[10]}) {
	&html_error($line);

	#PRINTS ARTICLE TITLE
	if ($second_line) {
		$line =~ s/\<\/?font.*?\>//g;
		$line =~ s/\<\/?b\>//g;
		#remove extra whitespace
		$line =~ s/\s*$//;
		$line =~ s/^\s*//;

		$line_new = $link_var . $line;

		$line_new = shorten_line($line_new);

		print NEW $line_new."\n";
		&print_error( "$line_new\n");
	}
	$second_line=0;

	#Finds LINK
	if(($valid_data) && ($line=~/\<a/)){
		#print NEW "\n<!--$line-->\n";
		$line =~ s/href\=\"\#/href=\"http\:\/\/www\.aceshardware\.com\/\#/g;
		$line =~ s/\<\/?font.*?\>//g;
		$line =~ s/\<\/?b\>//g;
		#insert breaks after <br> tags
		$line =~ s/\<br\>/\<br\>\n/g;
		#remove extra whitespace
		$line =~ s/\s*$//;
		$line =~ s/^\s*//;

		#insert title
		#$line =~s /\<a\s+h/\<a title\=\"$temp_line\" h/;
		$second_line=1;
		
		#print NEW $line;
		#&print_error( "$line");
		$link_var = $line;
	}
	if ($line =~ /Latest News\<\/b\>/){
		$valid_data = 1;
	}
	if ($line =~ /\<\/td/) {
		$valid_data = 0;
	}
}
print NEW "</small>\n</P>\n    </TD>\n    <TD valign=\"top\" width=\"180\">";


	########################################
	#Yahoo Top Images
#	print NEW "  <Table>\n <TR><TD valign=\"top\" width=\"180\">\n";
	print NEW "\n   <!-------- News Images from Yahoo-------------->\n";
	&html_border();
	print NEW "<A STYLE=\"text-decoration:none\"";
	print NEW "href=\"http://news.yahoo.com/news?tmpl=index2&cid=716/\">";
	print NEW "<FONT size=\"5\" color=\"red\"><b>Yahoo News</b></FONT></A><BR>\n";
	&end_border();

	print NEW "   <P>\n";

	$valid_data=0; $alternate=0;
	$errorOccured=0; $picture_size = 35;
	foreach $line (@{$all_data[18]}) {
		&html_error($line);

		if($line =~ /class\=topstory/){
			$line =~ s/.*?\>//;
			print NEW "       <small>$line</small></a><br clear=all>\n";
		}

		if(($line =~ /alt\=Photo/) or ($line =~ /alt\=Slideshow/)){
			#only one leading space
			$line =~ s/\s+/ /g;
			
			#trim size
			$line =~ s/width\=(\d{2,3})/width\=50/;
			$width = $1;
			$line =~ s/height\=(\d{2,3})/height\=50/;
			$height = $1;

			#keep original aspect ratio
			$ratio = $width / $height;
			if($width > $height){
				$height = $picture_size;
				$width = $picture_size * $ratio;
			}else{
				$width = $picture_size;
				$height = $picture_size / $ratio;
			}
			$line =~ s/height\=(\d{2,3})/height\=$height/;
			$line =~ s/width\=(\d{2,3})/width\=$width/;
	
			#remove </a> tags
			$line =~ s/\<\/a\>//;

			#edit <a> tags for no underline, <img> tags no border
		#	$line =~ s/\<a/\<a STYLE\=\"text\-decoration\:none\"/;
			$line =~ s/\<img/\<img border\=\"0\"/;

			#remove comments
			$line =~ s/\<\!\-\-.*?\-\-\>//g;

			#align alternating
			$line =~ s/align\=left//;
			$line =~ s/align\=right//;
			if($alternate==0){
				$line =~ s/\<img/\<img align\=\"left\"/;
				$alternate = 1;
			}else{
				$line =~ s/\<img/\<img align\=\"right\"/;
				$alternate = 0;
			}
			print NEW "     $line\n";
			$valid_data = 1;
		}

	}






#border for news section
&end_border();


#end news section
print NEW "  </TD>\n </TR>\n</TABLE>\n<BR>\n";


#start 4th Column
print NEW "  </TD>\n  <TD rowspan=\"9\" valign=\"top\">\n<!--START Righthand COLUMN 4-->\n";

####### SPECIAL RIGHT HAND COLUMN DATA
if (open(RB, "rightbar.txt")) {
	while (<RB>){
		print NEW $_;
	}
}
else{
	print NEW "\n\t<!-- CANNOT FIND rightbar.txt with HTML DATA-->\n";
}
print NEW "\t<!-- END OF RIGHTHAND COLUMN-->\n";
close(RB);
####### END SPECIAL RIGHT HAND COLUMN DATA

#create outer table for rightbar
print NEW "\t </TD></TR></TABLE>\n\n";




#END OF NEWS TABLE, START COMICS TABLE
print NEW "\n\n\n<!-------- END NEWS : START COMICS -------------->\n\n\n";
print NEW "<TABLE border=\"0\">\n <TR>\n  <TD colspan=\"8\" valign=\"top\">\n";

if (!($comics_on)) {
	print NEW "\n\n\n<!--------- COMICS ARE OFF ------------------->\n\n\n";
	print NEW "<h3>Comics are off</h3>\n";
}
else{

	print NEW "\n   <!-------- COMICS:Dilbert SECTION -------------->\n";
	$comic = "";
	#DILBDERT URL PARSER:
	print NEW "   <hr><FONT size=\"5\" color=\"blue\">Comics</FONT>\n   <P>\n";
	
	$errorOccured=0;
	foreach $line (@{$all_data[3]}) {
		&html_error($line);

		if($line =~ /Today\'s Dilbert Comic/){
			$line =~ s/\<\/?TD.*?\>//g;
			$line =~ s/\<\/?BR.*?\>//g;
			$line =~ s/\<\/?a.*?\>//gi;
			$line =~ s/\<\!\-\-.*?\-\-\>//g;
			#<IMG SRC="/images/ffffff_dot.gif"
			$line =~ s/\<IMG SRC\=\"\/images\/fffff.*?\>//g;
			#$line =~ s/\<IMG SRC=\".*?mystery_artist.*?\>//;
			
			#<IMG SRC="/comics/dilbert/
			$line =~ s/\<IMG\s*SRC\=\"\/comics\/dilbert//g;
			$line =~ s/gif\".*/gif\"/;
			$line =~ s/^\s+//g;
			$comic = $line;
			print NEW "    <A STYLE=\"text-decoration:none\" href=\"http://www.dilbert.com\"><IMG border=\"0\" SRC=\"http://www.dilbert.com/comics/dilbert$comic></A>\n";
		}
	
	}
	print NEW "   </P>\n";
	#END DILBERT URL PARSER:


	########################################
	#PENNY ARCADE URL PARSER:
	print NEW "\n   <!-------- COMICS:Penny Arcade SECTION -------------->\n";
	$comic = "";
	print NEW "   <P>\n";
	
	$errorOccured=0;
	foreach $line (@{$all_data[4]}) {
		&html_error($line);
		
		#Ack! Y2K10 bug!!!!
		if($line =~ /images\/200/){
			$line =~ s/\<\/?td.*?\>//g;
			$line =~ s/ALT\=\"\"//;
			$line =~ s/c\=\"/c\=\"http\:\/\/www\.penny\-arcade\.com\//;
			$line =~ s/\s+/ /g;
			$line =~ s/\<img/\<img border\=\"0\"/;
			print NEW "    <A STYLE=\"text-decoration:none\" href=\"http://www.penny-arcade.com\">";
			print NEW $line;
			print NEW "</A>\n";
		}
	
	}
	print NEW "   </P>\n";
	#END PENNY ARCADE URL PARSER:


	########################################
	#PVP SECTION
	print NEW "\n   <!-------- COMICS:PVP Online SECTION -------------->\n";
	print NEW "   <P>\n";
	
	$errorOccured=0;
	foreach $line (@{$all_data[5]}) {
		&html_error($line);
	
		if($line =~ /IMG.*?archive\//){
			$line =~ s/C\=\"/C\=\"http\:\/\/www\.pvponline\.com\//;
			$line =~ s/\<\/?center.*?\>//gi;
			$line =~ s/\<\/?br.*?\>//g;
			$line =~ s/\<\/?td.*?\>//g;
			
			#remove extra <img> tag
			$line =~ s/\<img.*?images.*?\>//;

			$line =~ s/\<IMG/\<IMG align\=\"left\" border\=\"0\"/;

			#fix: remove align="left"
			$line =~ s/align\=\"left\"//g;

			print NEW "    <A STYLE=\"text-decoration:none\" href=\"http://www.pvponline.com/\">";
			print NEW $line;
			print NEW "</A>\n";
		}
	}
	print NEW "   </P>\n";
	#END PVP URL PARSER


	########################################
	#Diesel Sweeties
	print NEW "\n   <!-------- COMICS:Diesel Sweeties SECTION -------------->\n";
	print NEW "   <P>\n";

	$errorOccured=0; $linecount=0;
	foreach $line (@{$all_data[6]}) {
		&html_error($line);
		$linecount++;
		if($line =~ /\*\*\*newest/){
			$line =~ s/.*?\*\*\*newest//;
			$line =~ s/.*?\<img/\<img/i;
			$line =~ s/\>.*/\>/;

			$line =~ s/^\s*//;

			$line =~ s/src\=\"/src\=\"http\:\/\/www\.dieselsweeties\.com/;

#			$line =~ s/\<img/\<img align\=\"right\"/i;
			print NEW "    <A STYLE=\"text-decoration:none\" href=\"http://www.dieselsweeties.com/\">";
			print NEW $line ."\n";
			print NEW "   </A>\n";
		}
	}
	print NEW "   </P>\n";
	
	########################################
	#Little Gamers SECTION
	print NEW "\n   <!-------- COMICS:Little Gamers Online SECTION -------------->\n";
	print NEW "   <P>\n";
	
	$errorOccured=0;
	foreach $line (@{$all_data[20]}) {
		&html_error($line);
	
		if($line =~ /show_strip/){
			$line =~ s/src\=\'/src\=\'http\:\/\/www\.little\-gamers\.com/;
			$line =~ s/\' border\=0/\'/i;
			$line =~ s/\<\/?tr.*?\>//gi;
			$line =~ s/\<\/?td.*?\>//gi;
			
			$line =~ s/\<img/\<img align\=\"left\" border\=\"0\"/i;

			$line =~ s/^\s+//;
			$line =~ s/\s+$//;

			print NEW "    <A STYLE=\"text-decoration:none\" href=\"http://www.little-gamers.com/\">";
			print NEW $line;
			print NEW "</A>\n";
		}
	}
	print NEW "   </P>\n";
	#END LittleGamers URL PARSER

	########################################
	#MegaTokyo Gamers SECTION
	print NEW "\n   <!-------- COMICS:MegaTokyo Online SECTION -------------->\n";
	print NEW "   <br clear=\"all\">\n";
	print NEW "   <P>\n";
	
	$errorOccured=0;
	foreach $line (@{$all_data[21]}) {
		&html_error($line);
	
		if($line =~ /\/strips/){
			$line =~ s/\<\/?tr.*?\>//gi;
			$line =~ s/\<\/?td.*?\>//gi;

			$line =~ s/.*?\<img src\=\"\/strips/\<img src\=\"\/strips/i;
			$line =~ s/src\=\"/src\=\"http\:\/\/www\.megatokyo\.com/i;
			$line =~ s/\.gif\".*/\.gif\"\>/i;

			
			$line =~ s/\<img/\<img align\=\"left\" border\=\"0\"/i;

			print NEW "    <A STYLE=\"text-decoration:none\" href=\"http://www.megatokyo.com/\">";
			print NEW $line;
			print NEW "</A>\n";
		}
	}
	print NEW "   </P>\n";
	#END MEGATOKYO URL PARSER

	########################################
	#CTRL+ALT+DEL Gamers SECTION
	print NEW "\n   <!-------- COMICS:CTRL+ALT+DEL Online SECTION -------------->\n";
	print NEW "   <br clear=\"all\">\n";
	print NEW "   <P>\n";
	
	$errorOccured=0;
	foreach $line (@{$all_data[22]}) {
		&html_error($line);

		if($line =~ /\/comics/){
			$line =~ s/\.jpg\".*/\.jpg\"\>/i;
			$line =~ s/src\=\"/src\=\"http\:\/\/www\.ctrlaltdel\-online\.com/i;	
			$line =~ s/\<img/\<img align\=\"left\" border\=\"0\"/i;

			print NEW "    <A STYLE=\"text-decoration:none\" href=\"http://www.ctrlaltdel-online.com/\">";
			print NEW $line;
			print NEW "</A>\n";
		}
	}
	print NEW "   </P>\n";
	#END CTRL+ALT+DEL URL PARSER


   print NEW "\n\n";
   }#comicsoff if statement.
#
	########################################
	#Reuters Top Images
	print NEW "\n   <!-------- News Images from Reuters-------------->\n";
	print NEW "<tr><td width=\"240\">\n<br clear=\"all\">\n";
	&html_border();
	print NEW "<A STYLE=\"text-decoration:none\"";
	print NEW "href=\"http://www.reuters.com/newsPhotoGallery.jhtml?type=topNews\">";
	print NEW "<FONT size=\"5\" color=\"orange\"><b>Reuters News</b></FONT></A><BR>\n";
	&end_border();
	print NEW "   <P>\n";

	$valid_data=0;
	$errorOccured=0;
	foreach $line (@{$all_data[19]}) {
		&html_error($line);
		if($line =~ /smPicBorder/){
			$line =~ s/class\=\".*?\"//gi;
			$line =~ s/onMouseOver\=\".*?\"//gi;
			$line =~ s/onMouseOut\=\".*?\"//gi;
			$line =~ s/javascript\:commonPopup\(\'/http\:\/\/www\.reuters\.com\//i;
			$line =~ s/\'.*?\>/\"\>/;
			$line =~ s/align\=\"\"/align\=\"left\"/i;
			print NEW "\t$line\n";
		}
		if($line =~ /\<\/b\>\<br\>/){
			$line =~ s/^\s+//;
			print NEW "\t<small><small>$line</small></small>\n";
		}

	}
	print NEW "<\p>\n";
	print NEW "</td></tr>\n\n";


	print NEW "   </TD>\n   </TR></TABLE>\n";


		
	

	
&print_error( "\n\nFinished. Closing $Output_file with the results.\n");
print NEW "  </TD>\n </TR>\n</Table>\n";




########################################
#QOTD SECTION
#create row for qotd
print NEW "\n\n<!-------- Quote of the Day SECTION -------------->\n";

print NEW "<div id=\"qotd\" background=\"#555555\" style=\"visibility:hidden; position:absolute; left:270;\">\n";

print NEW "\t<TABLE border=\"0\" width=\"356\"><tr><td bgcolor=\"#999999\">\n";
print NEW "\t<TABLE border=\"0\" width=\"350\"><tr><td bgcolor=\"#eeeeee\" align=\"center\">\n";
print NEW "\t <font size=\"3\"><b>Quote of the Day</b></font><br>\n";

#QOTD

$data_flag=0;
foreach $line (@{$all_data[13]}) {
	if($line =~ /\(Internal Server Error\)/ ){
		print NEW "\t <small> $line </small>\n";
	}
	if ($data_flag==1){
		$line =~ s/\<A.*?\<\/A\>//;
		print NEW "\t <small>\n\t  <a href=\"$url[13]\" style=\"text-decoration:none\"><font color=\"black\">";
		print NEW $line;
		print NEW "</a></font>\n\t </small>\n";
		$data_flag=0;
	}
	if($line=~/randomly selected quote/){
		$data_flag=1;
	}
}
&end_border();
print NEW "</div>\n";
#END QOTD SECTION



########################################
#Upcoming Events SECTION
print NEW "\n\n<!-------- EVENTS SECTION -------------->\n\n";
print NEW "<div id=\"UpcomingEvents\" background=\"white\" style=\"visibility:hidden; position:absolute; top:100; left:30;\">\n";
&html_border();

#events table
print NEW "<table><tr>\n";

print NEW "\n\n<!-------- Mondavi Arts Center -------------->\n";
print NEW "<td align=\"left\" valign=\"top\" width=\"30%\">\n";
#MONDAVI ARTS CENTER
print NEW "<small><b>Mondavi Center</b><br>\n";
$errorOccured=0;
$data_flag=0; $more_info=0;
foreach $line (@{$all_data[14]}) {
	$line_counter++;
	&html_error($line);

	#look for good data indicator:
	#if($line=~/Begin lasso content/){
	if($line=~/Featured Events\"/){
		$data_flag=1;

		#remove everything before our indiciator.
		$line =~ s/.*?\<img.*?Featured Event\"/\<img Featured Events\"/;

		#print NEW "\n<!--\n$line_counter:$line\n-->\n";

		#data is all one line for some reason.
		@lasso_data = split(/\<\/a>/,$line);
		foreach $line2 (@lasso_data) {
			if (($data_flag==1) || ($more_info==1)){
				if (($line2=~/announcements\.gif/) || ($line2 =~ /Post-Performance/)){
					$data_flag=0;
				}
				if ($line2=~/membership\//) {
					$more_info=0;
				}
				if ($line2 =~ /blackcopysmall/) {
					$more_info=1;
				}
				if (($data_flag==1) && ($line2 =~ /whitelink2/)){
					$line2 =~ s/\<\!\-\-.*?\-\-\>//g;
					$line2 =~ s/\&.*?\;//g;
					$line2 =~ s/\<\/?td.*?\>//g;
					$line2 =~ s/\<\/?tr.*?\>//g;
					$line2 =~ s/\<\/?span.*?\>//g;
					$line2 =~ s/\<\/?blockquote.*?\>//g;
					$line2 =~ s/\<\/?br.*?\>//g;
					$line2 =~ s/\<\/?p.*?\>//g;
					$line2 =~ s/\<\/?img.*?\>//g;
					$line2 =~ s/\<\/?div.*?\>//g; #########
					$line2 =~ s/\<\/?table.*?\>//g;
					$line2 =~ s/\<a href\=\"/\<a href\=\"http\:\/\/www\.mondaviarts\.org\//;
					$line2 =~ s/\s+/ /g;
					$line2 =~ s/^\s+//;
					print NEW "$line2</a><br>  ";
				}
				elsif (($data_flag==1) || ($more_info==1)){ 
					$line2 =~ s/\<\!\-\-.*?\-\-\>//g;
					#get rid of extra <a link
					$line2 =~ s/\<\/?a.*?\>//g;
					$line2 =~ s/\&.*?\;//g;
					$line2 =~ s/\<\/?td.*?\>//g;
					$line2 =~ s/\<\/?blockquote.*?\>//g;
					$line2 =~ s/\<\/?br.*?\>//g;
					$line2 =~ s/\<\/?p.*?\>//g;
					$line2 =~ s/\<\/?tr.*?\>//g;
					$line2 =~ s/\<\/?td.*?\>//g;
					$line2 =~ s/\<\/?span.*?\>//g;
					$line2 =~ s/\<\/?img.*?\>//g;
					$line2 =~ s/\<\/?div.*?\>//g; #########
					$line2 =~ s/\<\/?table.*?\>//g;
					$line2 =~ s/\s+/ /g;
					$line2 =~ s/Post-Performance.*//;
					if ($line2 =~ /[a-zA-Z<]/) {
						print NEW "$line2<BR>\n";
					}
				}

			}
		}
	}
}
print NEW "</small>\n";

#end mondavi column
print NEW "</td>\n";


#UC Berkley
print NEW "\n\n<!-------- UC Berkley -------------->\n";
print NEW "<td align=\"left\" valign=\"top\" width=\"30%\">\n";
print NEW "<small><b>UC Berkley</b><br>\n";
$errorOccured=0; $limit=34;
$pre_data_flag = 0; $data_flag=0; $a_tag_start=0;
foreach $line (@{$all_data[15]}) {
	&html_error($line);
	if ($line =~ /\#top/) {
		#get rid of everything after #top
		$line =~ s/\#top.*/\#top/;
		$data_flag=0;
		$pre_data_flag=0;
	}
	if ($line =~ /Events this week/) {
		$pre_data_flag=1;
		#print NEW "--$line--\n";
	}
	if (($pre_data_flag==1) && ($line =~ /\<\/tr\>/)) {
		$data_flag=1;
	}

	if (($data_flag==1) && ($limit > 0)){
		$line =~ s/\<\/?td.*?\>//g;
		$line =~ s/\<\/?img.*?\>//g;
		$line =~ s/\<\/?tr.*?\>//g;
		$line =~ s/\<\/?table.*?\>//g;
		$line =~ s/\s+/ /g;
		$line =~ s/^\s+//g;
		$line =~ s/\<\/a\>/\<\/a\>\<br\>\n/g;
		$line =~ s/\<a href\=\"\#/\<a href\=\"http\:\/\/www\.berkeley\.edu\/calendar\/\#/;
		print NEW "$line";
		$limit--;
	}
}
print NEW "</small>\n";

print NEW "</td>\n";
#end Berkely column


#EPICKS
print NEW "\n\n<!-------- SF E-Picks -------------->\n";
print NEW "<td align=\"left\" valign=\"top\" width=\"30%\">\n";
print NEW "<small><b>SF Gate</b><br>\n";
$errorOccured=0;
$data_flag=0;
foreach $line (@{$all_data[16]}) {
	&html_error($line);

	if ($line =~ /Movies:/) {
		$data_flag=1;
	}
	if ($line =~ /\TABLE/) {
		$data_flag=0;
	}

	if ($data_flag==1){
		$line =~ s/\<\/?TD.*?\>//g;
		$line =~ s/\<\/?TR.*?\>//g;
		$line =~ s/\<\/?FONT.*?\>//g;
		$line =~ s/\<\/?P.*?\>//g;
		$line =~ s/HREF\=\"\#/href\=\"http\:\/\/sfgate\.com\/eguide\/epicks\/\#/;
		$line =~ s/^\s+//g;
		
		if ($line =~ /\S+/) {
			print NEW "$line\n";
		}
	}
}
print NEW "</small>\n";

print NEW "</td>\n";
#EPICKS


print NEW "  </tr>\n";


print NEW "</table>\n";
&end_border();
print NEW "</div>\n\n";
#END Events SECTION


print NEW "      </TD></TR>\n";

########################################
#COPYRIGHT WARNING
print NEW "\n<p>\n <center>\n  <small>\n";
print NEW "   <!----COPYRIGHT WARNING---->\n   <b>COPYRIGHT WARNING</b><br>\n";
print NEW "   Webpage content is owned by corresponding webpage owner,";
print NEW " see individual pages for details.\n";
print NEW "  </small>\n </center>\n</p>\n\n";
print NEW "</BODY>\n</HTML>\n";
print NEW "<!--END OF FILE-->\n";
close(NEW);
if($logfiles){
	close(LOGFILE);
}
	
##########################################
#FUNCTIONS
sub print_error{
	$toPrint = $_[0];
	if ($logfiles){
		print LOGFILE $toPrint;
		print $toPrint;
	}
}
sub shorten_line{

	$long_string = $_[0];
	#look for too many uppercase letters
	if ($long_string =~ /[A-Z].*?[A-Z].*?[A-Z].*?[A-Z].*?[A-Z].*?[A-Z].*?[A-Z]/) {
		$column_temp=$column_width;
	}
	else{
		#set full length of string.
		$column_temp=$column_width+3;
	}

	if ($long_string =~ /\>([^<]{$column_temp,})/ ) {
#	if ($long_string =~ /\>(.){$column_temp,})?\<\/a\>/ ) { #will crash script
		$temp_1a = $1;
		$line2 = substr($temp_1a,0,$column_temp-3);
		$line2 = $line2."...";

		#remove quotes and insert ascii for quote
		$line2 =~ s/\"/\&quot\;/g;
		$temp_1a =~ s/\"/\&quot\;/g;

		$long_string =~ s/\<a/\<a title\=\"$temp_1a\"/;
		
		$long_string =~ s/\>([^<]*)/\>$line2/;

	}
	return $long_string;
}

sub html_border{
	print NEW "\n\t<!--BORDER-2 START-->\n";
	print NEW "\t<TABLE border=\"0\" width=\"100%\"><tr><td bgcolor=\"#999999\">\n";
	print NEW "\t<TABLE border=\"0\" width=\"100%\"><tr><td bgcolor=\"#eeeeee\" align=\"center\">\n";
	print NEW "\t<!--BORDER-2 START-->\n\n";
}
sub end_border{
	print NEW "\n\t<!--BORDER-2 END-->\n";
	print NEW "\t</td></tr></table>\n";
	print NEW "\t</td></tr></table>\n";
	print NEW "\t<!--BORDER-2 END-->\n\n";
}
sub html_error{
	$error_line = $_[0];

	#handle failed html lookups
	if( ($error_line =~ /\(Internal Server Error\)/) || ($errorOccured>0 && $errorOccured<3)){
		print NEW $error_line;
		$errorOccured++;
	}
}
#EOF#
###############OLD CNN NEWS SECTION ##############
print NEW "\n\n<!-------- CNN NEWS SECTION -------------->\n";
&html_border();
print NEW "<A STYLE=\"text-decoration:none\" href=\"$url[11]\"><FONT size=\"5\" color=\"red\"><b>CNN News</b></FONT></A><BR>\n";
&end_border();

$errorOccured=0;
foreach $line (@{$all_data[11]}) {
	&html_error($line);

	#get rid of <td> tags
	$line =~ s/\<td.*?\>//g;

	#get categories
	if( ($line=~ /cnnMainSections/) && ( 
		  ($line =~ /Education/) || ($line =~ /World/) || 
		  ($line =~ /U\.S/) || ($line =~ /Business/) || 
		  ($line =~ /Scien/) || ($line =~ /Sports/) || 
		  ($line =~ /Entert/) || ($line =~ /Health/) ||
		  ($line =~ /Travel/) || ($line =~ /Asia/) ||
		  ($line =~ /Europe/) || ($line =~ /Technol/) 
		) ){
		$line =~ s/<div.*?\>//g;

		#get name of section
		$line =~ s/.*?\"\s+alt\=\"/\<b\>/;
		$line =~ s/\s*\w+\:\s*\".*/\<\/b\>/;

		#dont print </small> first time
		if($headline_limit >= 1){
			print NEW "</small>\n";
		}
		print NEW $line, "<BR>\n";
		print NEW "<small>\n";
		$headline_limit = $Number_of_headlines + 1;
	}

	#get headlines and put into file
	if( (($line =~ /\&\#8226\;\&/) || ($line =~ /\&\#149\;\&/)) && ($headline_limit > 1) && !($line=~ /target\=\"new\"/) ){
		#$line =~ s/\<.*?\>//;  #removes html
		
		$line =~ s/<img.*?\>//g;
		$line =~ s/<br.*?\>//g;
		$line =~ s/<span.*?\/span\>//g;
		$line =~ s/<div.*?\>//g;
		$line =~ s/\&.*?\;//g;

		#add <br> tags after link
		$line =~ s/\/a\>/\/a\>\<br\>\n/g;
		
		#add cnn.com to links
		$line =~ s/\<a href\=\"/\<a href\=\"http\:\/\/www\.cnn\.com/g;

		#shorten links that are too long
		$line = &shorten_line($line);

		#remove leading whitespace
		$line =~ s/^\s+//;

		print NEW $line;
		&print_error( $line);
		$headline_limit--;
	}
}

############### END CNN SECTION ###############