<?
$MAX_WORD_LENGTH = 50;
//COMMON WORD
LIST
$COMMON_WORDS = array("a"=>1,"as"=>1,"any"=>1,"all"=>1,"ate"=>1,"after"=>1,"am"=>1,"an"=>1,"and"=>1,"are"=>1,"at"=>1,"away"=>1,"about"=>1,"ago"=>1,"almost"=>1,"along"=>1,"answer"=>1,"anybody"=>1,"anywhere"=>1,"arent"=>1,"around"=>1,"ask"=>1,"also"=>1,
"b"=>1,"be"=>1,"better"=>1,"black"=>1,"brown"=>1,"but"=>1,"both"=>1,"bring"=>1,"because"=>1,"been"=>1,"before"=>1,"big"=>1,"blue"=>1,"best"=>1,"by"=>1,"beg"=>1,"bad"=>1,"being"=>1,"best"=>1,"between"=>1,"based"=>1,
"c"=>1,"call"=>1,"can"=>1,"cut"=>1,"carry"=>1,"cold"=>1,"could"=>1,"clean"=>1,"cant"=>1,"come"=>1,"couldnt"=>1,"consider"=>1,"called"=>1,
"d"=>1,"did"=>1,"does"=>1,"do"=>1,"down"=>1,"dont"=>1,"day"=>1,"didnt"=>1,
"e"=>1,"eat"=>1,"every"=>1,"eve"=>1,"egg"=>1,"end"=>1,"eve"=>1,"era"=>1,"eye"=>1,"each"=>1,"either"=>1,"else"=>1,"even"=>1,"ever"=>1,"every"=>1,"everybody"=>1,"everyone"=>1,
"f"=>1,"for"=>1,"from"=>1,"full"=>1,"found"=>1,"far"=>1,"fly"=>1,"fall"=>1,"first"=>1,"fast"=>1,"five"=>1,"fall"=>1,"find"=>1,"four"=>1,"funny"=>1,
"g"=>1,"go"=>1,"get"=>1,"goes"=>1,"give"=>1,"gun"=>1,"good"=>1,"god"=>1,"give"=>1,"got"=>1,"green"=>1,"grow"=>1,"good"=>1,"green"=>1,"grow"=>1,"got"=>1,"gave"=>1,"going"=>1,"gone"=>1,"given"=>1,
"h"=>1,"hi"=>1,"hoo"=>1,"he"=>1,"his"=>1,"him"=>1,"her"=>1,"has"=>1,"how"=>1,"hold"=>1,"how"=>1,"hot"=>1,"had"=>1,"here"=>1,"help"=>1,"hurt"=>1,"have"=>1,"havet"=>1,"having"=>1,"hers"=>1,"home"=>1,"home"=>1,"href"=>1,
"i"=>1,"in"=>1,"is"=>1,"if"=>1,"its"=>1,"i"=>1,"it"=>1,"into"=>1,"im"=>1,"ill"=>1,"id"=>1,
"j"=>1,"just"=>1,"jump"=>1,"jet"=>1,"jaw"=>1,"jar"=>1,"jag"=>1,"jam"=>1,"job"=>1,"jog"=>1,"joy"=>1,"jot"=>1,
"k"=>1,"kind"=>1,"keep"=>1,"kiss"=>1,"kinder"=>1,"kind"=>1,"kid"=>1,"key"=>1,"kit"=>1,"ken"=>1,"know"=>1,
"l"=>1,"like"=>1,"little"=>1,"lust"=>1,"led"=>1,"lap"=>1,"let"=>1,"live"=>1,"long"=>1,"live"=>1,"let"=>1,"look"=>1,"law"=>1,"leg"=>1,"lie"=>1,"lid"=>1,"less"=>1,"look"=>1,"looking"=>1,
"m"=>1,"my"=>1,"may"=>1,"me"=>1,"many"=>1,"must"=>1,"much"=>1,"made"=>1,"my"=>1,"make"=>1,"met"=>1,"mix"=>1,"mom"=>1,"mud"=>1,"mug"=>1,"mum"=>1,"myself"=>1,"more"=>1,"most"=>1,"max"=>1,"maximun"=>1,
"n"=>1,"no"=>1,"nose"=>1,"not"=>1,"new"=>1,"now"=>1,"nor"=>1,"nod"=>1,"now"=>1,"nil"=>1,"nib"=>1,"nut"=>1,"nun"=>1,"never"=>1,"near"=>1,"news"=>1,"none"=>1,"nothing"=>1,"next"=>1,
"o"=>1,"of"=>1,"on"=>1,"or"=>1,"old"=>1,"open"=>1,"once"=>1,"only"=>1,"off"=>1,"our"=>1,"oops"=>1,"out"=>1,"oil"=>1,"old"=>1,"oak"=>1,"oak"=>1,"ohm"=>1,"oho"=>1,"ore"=>1,"owl"=>1,"often"=>1,"other"=>1,"ours"=>1,"out"=>1,"over"=>1,"one"=>1,
"p"=>1,"play"=>1,"pull"=>1,"pretty"=>1,"put"=>1,"push"=>1,"pad"=>1,"pop"=>1,"pan"=>1,"pap"=>1,"pay"=>1,"peg"=>1,"pet"=>1,"phi"=>1,"pie"=>1,"pig"=>1,"pet"=>1,"pub"=>1,"pin"=>1,"pit"=>1,"ply"=>1,"pod"=>1,"pus"=>1,"page"=>1,"please"=>1,
"q"=>1,"question"=>1,"quick"=>1,"quest"=>1,
"r"=>1,"ran"=>1,"red"=>1,"run"=>1,"ride"=>1,"read"=>1,"rag"=>1,"rat"=>1,"ran"=>1,"ram"=>1,"red"=>1,"ray"=>1,"rev"=>1,"rid"=>1,"rib"=>1,"rig"=>1,"rim"=>1,"rip"=>1,"rob"=>1,"rod"=>1,"roe"=>1,"row"=>1,"rum"=>1,"rug"=>1,"rut"=>1,"rather"=>1,"recent"=>1,
"s"=>1,"so"=>1,"some"=>1,"stop"=>1,"say"=>1,"sing"=>1,"say"=>1,"she"=>1,"stay"=>1,"said"=>1,"start"=>1,"soon"=>1,"six"=>1,"seven"=>1,"see"=>1,"sit"=>1,"sitting"=>1,"son"=>1,"soap"=>1,"spy"=>1,"sum"=>1,"say"=>1,"sea"=>1,"sex"=>1,"shy"=>1,"sib"=>1,"sic"=>1,"sin"=>1,"sip"=>1,"sir"=>1,"sky"=>1,"ski"=>1,"sly"=>1,"sob"=>1,"sow"=>1,"sod"=>1,"should"=>1,"something"=>1,"sometime"=>1,"somewhere"=>1,"set"=>1,"simple"=>1,"such"=>1,"side"=>1,
"t"=>1,"to"=>1,"the"=>1,"then"=>1,"that"=>1,"this"=>1,"those"=>1,"than"=>1,"these"=>1,"those"=>1,"they"=>1,"thank"=>1,"tank"=>1,"tell"=>1,"take"=>1,"together"=>1,"try"=>1,"today"=>1,"three"=>1,"tie"=>1,"thy"=>1,"tax"=>1,"tea"=>1,"tap"=>1,"taxi"=>1,"ten"=>1,"tin"=>1,"tip"=>1,"tit"=>1,"toe"=>1,"tog"=>1,"tom"=>1,"ton"=>1,"top"=>1,"tow"=>1,"toy"=>1,"two"=>1,"tub"=>1,"tug"=>1,"tun"=>1,"tux"=>1,"true"=>1,"thank"=>1,"theirs"=>1,"them"=>1,"there"=>1,"though"=>1,"through"=>1,"thus"=>1,"time"=>1,"times"=>1,"too"=>1,"type"=>1,
"u"=>1,"use"=>1,"us"=>1,"using"=>1,"usage"=>1,"useful"=>1,"up"=>1,"upon"=>1,"ups"=>1,"under"=>1,"until"=>1,"untrue"=>1,"users"=>1,
"v"=>1,"van"=>1,"vex"=>1,"via"=>1,"vow"=>1,"vat"=>1,"vim"=>1,"version"=>1,"very"=>1,
"w"=>1,"was"=>1,"waste"=>1,"why"=>1,"who"=>1,"whose"=>1,"well"=>1,"walk"=>1,"were"=>1,"which"=>1,"wish"=>1,"white"=>1,"with"=>1,"would"=>1,"write"=>1,"when"=>1,"what"=>1,"wash"=>1,"warm"=>1,"want"=>1,"went"=>1,"will"=>1,"won"=>1,"woe"=>1,"wow"=>1,"woo"=>1,"wins"=>1,"where"=>1,"web"=>1,"way"=>1,"were"=>1,"where"=>1,"whom"=>1,"wide"=>1,"within"=>1,"without"=>1,"world"=>1,"worse"=>1,"worst"=>1,"www"=>1,"we"=>1,"whether"=>1,
"y"=>1,"yes"=>1,"ya"=>1,"you"=>1,"yellow"=>1,"your"=>1,"yet"=>1,"yen"=>1,"year"=>1,"yep"=>1,"yon"=>1,"yours"=>1,
"z"=>1,"zoo"=>1,"zip"=>1,"zed"=>1,"zinc"=>1,"zoom"=>1,"zero"=>1,"zeal"=>1,"zone"=>1);
$allWords = array();
if($submit){
global $allWords;
mysql_connect( "localhost", "root", "" ) or die( "Unable to
connect to database" );
mysql_select_db( "test" ) or die( "Unable to
select database" );
LoadCurrentWords();
if ( $title and $body){
ProcessForm($title ,$body);
echo "Successfully Finished Parsing and Uploading Content";
}else{
$err="Please
fill in the fields to upload\n";
form($err);
}
}else{ //end of
main
form($err);
}
function form($errmsg)
{ ?>
<h4 align="center">File
Parser & Uploader</h4>
<div
align="center"><b><? echo $errmsg; ?></b></div>
<center>
<form
method="POST" action=<? echo $PHP_SELF ?>>
Title: <input
type="text" name="title" size="50"
maxlength="100"><p>
Abstract: <textarea rows=20
cols=50 wrap="off"
name="body"></textarea><p>
<input
type="submit" name="submit" value="Start Parsing and Upload
Content">
</table>
</form>
</center>
<?
}
function LoadCurrentWords(){
global $allWords;
$result = mysql_query( "select keyid, keyword from keytable" ) or die( "Error in
executing mysql query" );
while ( $row = mysql_fetch_array($result) ) {
$allWords[$row['keyword']] = $row['keyid'];
}
}
function ExtractWords($text){
$STATE0 = 0; //Numeric / Other
Characters
$STATE1= 1; //Alpha Characters
$state = $STATE0;
$wordList = array();
$curWord = "";
for ( $i = 0; $i < strlen($text); ++$i ) {
$ch = $text{$i};
$isAlpha = ctype_alpha( $ch );
if ( $state == $STATE0) {
if ( $isAlpha ) {
$curWord = $ch;
$state = $STATE1;
}
}
else if ( $state == $STATE1) {
if ( $isAlpha ) {
$curWord .= $ch;
}
else {
$wordList[] = strtolower( $curWord );
$state = $STATE0;
}
}
}
if ( $state == $STATE1) {
$wordList[] = strtolower( $curWord );
}
return $wordList;
}
function FilterCommonAndDuplicateWords( $wordList ) {
global $COMMON_WORDS;
global $MAX_WORD_LENGTH;
$wordMap = array();
foreach ( $wordList as $word ) {
$len = strlen( $word );
if ( ($len > 1) && ($len < $MAX_WORD_LENGTH) ) {
if ( !$wordMap[$word] ) {
if ( !$COMMON_WORDS[$word] ) {
$wordMap[$word] = 1;
}
}
}
}
return $wordMap;
}
function ProcessForm($title ,$body){
global $allWords;
$tempWordList = ExtractWords( $body );
$wordList = FilterCommonAndDuplicateWords($tempWordList);
// insert into
content
mysql_query( sprintf( "INSERT INTO content (title,
abstract) VALUES ('%s', '%s')",
mysql_escape_string($title), mysql_escape_string($body) ) );
//store the
newly generated content id in $contentId
$contentId = mysql_insert_id();
//
insert all the new words and links
while(list($word,$val)=each($wordList)) {
$keyId = "";
if ( !$allWords[$word] ) {
mysql_query( sprintf( "INSERT INTO keytable ( keyword ) VALUES ( '%s' )",
mysql_escape_string($word) ) );
$keyId = mysql_insert_id();
$allWords[$word] = $keyId;
}
else {
$keyId = $allWords[$word];
}
// insert the
link
mysql_query( sprintf( "INSERT INTO link (keyid, contid) VALUES ( %d, %d )", $keyId, $contentId ) );
}
//End of Processing Form.
}
?>