projekty/Source/example1/src/Doc/html/search.php

Go to the documentation of this file.
00001 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
00002 <html><head><meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1">
00003 <title>Search</title>
00004 <link href="doxygen.css" rel="stylesheet" type="text/css">
00005 <link href="tabs.css" rel="stylesheet" type="text/css">
00006 </head><body>
00007 <!-- Generated by Doxygen 1.4.7 -->
00008 <div class="tabs">
00009   <ul>
00010     <li><a href="main.html"><span>Main&nbsp;Page</span></a></li>
00011     <li><a href="annotated.html"><span>Data&nbsp;Structures</span></a></li>
00012     <li><a href="files.html"><span>Files</span></a></li>
00013     <li>
00014       <form action="search.php" method="get">
00015         <table cellspacing="0" cellpadding="0" border="0">
00016           <tr>
00017             <td><label>&nbsp;<u>S</u>earch&nbsp;for&nbsp;</label></td>
00018 
00019 <?php
00020 
00021 function search_results()
00022 {
00023   return "Search Results";
00024 }
00025 
00026 function matches_text($num)
00027 {
00028   if ($num==0)
00029   {
00030     return "Sorry, no documents matching your query.";
00031   }
00032   else if ($num==1)
00033   {
00034     return "Found <b>1</b> document matching your query.";
00035   }
00036   else // $num>1
00037   {
00038     return "Found <b>$num</b> documents matching your query. Showing best matches first.";
00039   }
00040 }
00041 
00042 function report_matches()
00043 {
00044   return "Matches: ";
00045 }
00046 function end_form($value)
00047 {
00048   echo "            <td><input type=\"text\" name=\"query\" value=\"$value\" size=\"20\" accesskey=\"s\"/></td>\n          </tr>\n        </table>\n      </form>\n    </li>\n  </ul>\n</div>\n";
00049 }
00050 
00051 function readInt($file)
00052 {
00053   $b1 = ord(fgetc($file)); $b2 = ord(fgetc($file));
00054   $b3 = ord(fgetc($file)); $b4 = ord(fgetc($file));
00055   return ($b1<<24)|($b2<<16)|($b3<<8)|$b4;
00056 }
00057 
00058 function readString($file)
00059 {
00060   $result="";
00061   while (ord($c=fgetc($file))) $result.=$c;
00062   return $result;
00063 }
00064 
00065 function readHeader($file)
00066 {
00067   $header =fgetc($file); $header.=fgetc($file);
00068   $header.=fgetc($file); $header.=fgetc($file);
00069   return $header;
00070 }
00071 
00072 function computeIndex($word)
00073 {
00074   // Fast string hashing
00075   //$lword = strtolower($word);
00076   //$l = strlen($lword);
00077   //for ($i=0;$i<$l;$i++)
00078   //{
00079   //  $c = ord($lword{$i});
00080   //  $v = (($v & 0xfc00) ^ ($v << 6) ^ $c) & 0xffff;
00081   //}
00082   //return $v;
00083 
00084   // Simple hashing that allows for substring search
00085   if (strlen($word)<2) return -1;
00086   // high char of the index
00087   $hi = ord($word{0});
00088   if ($hi==0) return -1;
00089   // low char of the index
00090   $lo = ord($word{1});
00091   if ($lo==0) return -1;
00092   // return index
00093   return $hi*256+$lo;
00094 }
00095 
00096 function search($file,$word,&$statsList)
00097 {
00098   $index = computeIndex($word);
00099   if ($index!=-1) // found a valid index
00100   {
00101     fseek($file,$index*4+4); // 4 bytes per entry, skip header
00102     $index = readInt($file);
00103     if ($index) // found words matching the hash key
00104     {
00105       $start=sizeof($statsList);
00106       $count=$start;
00107       fseek($file,$index);
00108       $w = readString($file);
00109       while ($w)
00110       {
00111         $statIdx = readInt($file);
00112         if ($word==substr($w,0,strlen($word)))
00113         { // found word that matches (as substring)
00114           $statsList[$count++]=array(
00115               "word"=>$word,
00116               "match"=>$w,
00117               "index"=>$statIdx,
00118               "full"=>strlen($w)==strlen($word),
00119               "docs"=>array()
00120               );
00121         }
00122         $w = readString($file);
00123       }
00124       $totalHi=0;
00125       $totalFreqHi=0;
00126       $totalFreqLo=0;
00127       for ($count=$start;$count<sizeof($statsList);$count++)
00128       {
00129         $statInfo = &$statsList[$count];
00130         $multiplier = 1;
00131         // whole word matches have a double weight
00132         if ($statInfo["full"]) $multiplier=2;
00133         fseek($file,$statInfo["index"]); 
00134         $numDocs = readInt($file);
00135         $docInfo = array();
00136         // read docs info + occurrence frequency of the word
00137         for ($i=0;$i<$numDocs;$i++)
00138         {
00139           $idx=readInt($file); 
00140           $freq=readInt($file); 
00141           $docInfo[$i]=array("idx"  => $idx,
00142                              "freq" => $freq>>1,
00143                              "rank" => 0.0,
00144                              "hi"   => $freq&1
00145                             );
00146           if ($freq&1) // word occurs in high priority doc
00147           {
00148             $totalHi++;
00149             $totalFreqHi+=$freq*$multiplier;
00150           }
00151           else // word occurs in low priority doc
00152           {
00153             $totalFreqLo+=$freq*$multiplier;
00154           }
00155         }
00156         // read name and url info for the doc
00157         for ($i=0;$i<$numDocs;$i++)
00158         {
00159           fseek($file,$docInfo[$i]["idx"]);
00160           $docInfo[$i]["name"]=readString($file);
00161           $docInfo[$i]["url"]=readString($file);
00162         }
00163         $statInfo["docs"]=$docInfo;
00164       }
00165       $totalFreq=($totalHi+1)*$totalFreqLo + $totalFreqHi;
00166       for ($count=$start;$count<sizeof($statsList);$count++)
00167       {
00168         $statInfo = &$statsList[$count];
00169         $multiplier = 1;
00170         // whole word matches have a double weight
00171         if ($statInfo["full"]) $multiplier=2;
00172         for ($i=0;$i<sizeof($statInfo["docs"]);$i++)
00173         {
00174           $docInfo = &$statInfo["docs"];
00175           // compute frequency rank of the word in each doc
00176           $freq=$docInfo[$i]["freq"];
00177           if ($docInfo[$i]["hi"])
00178           {
00179             $statInfo["docs"][$i]["rank"]=
00180               (float)($freq*$multiplier+$totalFreqLo)/$totalFreq;
00181           }
00182           else
00183           {
00184             $statInfo["docs"][$i]["rank"]=
00185               (float)($freq*$multiplier)/$totalFreq;
00186           }
00187         }
00188       }
00189     }
00190   }
00191   return $statsList;
00192 }
00193 
00194 function combine_results($results,&$docs)
00195 {
00196   foreach ($results as $wordInfo)
00197   {
00198     $docsList = &$wordInfo["docs"];
00199     foreach ($docsList as $di)
00200     {
00201       $key=$di["url"];
00202       $rank=$di["rank"];
00203       if (in_array($key, array_keys($docs)))
00204       {
00205         $docs[$key]["rank"]+=$rank;
00206       }
00207       else
00208       {
00209         $docs[$key] = array("url"=>$key,
00210             "name"=>$di["name"],
00211             "rank"=>$rank
00212             );
00213       }
00214       $docs[$key]["words"][] = array(
00215                "word"=>$wordInfo["word"],
00216                "match"=>$wordInfo["match"],
00217                "freq"=>$di["freq"]
00218                );
00219     }
00220   }
00221   return $docs;
00222 }
00223 
00224 function filter_results($docs,&$requiredWords,&$forbiddenWords)
00225 {
00226   $filteredDocs=array();
00227   while (list ($key, $val) = each ($docs)) 
00228   {
00229     $words = &$docs[$key]["words"];
00230     $copy=1; // copy entry by default
00231     if (sizeof($requiredWords)>0)
00232     {
00233       foreach ($requiredWords as $reqWord)
00234       {
00235         $found=0;
00236         foreach ($words as $wordInfo)
00237         { 
00238           $found = $wordInfo["word"]==$reqWord;
00239           if ($found) break;
00240         }
00241         if (!$found) 
00242         {
00243           $copy=0; // document contains none of the required words
00244           break;
00245         }
00246       }
00247     }
00248     if (sizeof($forbiddenWords)>0)
00249     {
00250       foreach ($words as $wordInfo)
00251       {
00252         if (in_array($wordInfo["word"],$forbiddenWords))
00253         {
00254           $copy=0; // document contains a forbidden word
00255           break;
00256         }
00257       }
00258     }
00259     if ($copy) $filteredDocs[$key]=$docs[$key];
00260   }
00261   return $filteredDocs;
00262 }
00263 
00264 function compare_rank($a,$b)
00265 {
00266   if ($a["rank"] == $b["rank"]) 
00267   {
00268     return 0;
00269   }
00270   return ($a["rank"]>$b["rank"]) ? -1 : 1; 
00271 }
00272 
00273 function sort_results($docs,&$sorted)
00274 {
00275   $sorted = $docs;
00276   usort($sorted,"compare_rank");
00277   return $sorted;
00278 }
00279 
00280 function report_results(&$docs)
00281 {
00282   echo "<table cellspacing=\"2\">\n";
00283   echo "  <tr>\n";
00284   echo "    <td colspan=\"2\"><h2>".search_results()."</h2></td>\n";
00285   echo "  </tr>\n";
00286   $numDocs = sizeof($docs);
00287   if ($numDocs==0)
00288   {
00289     echo "  <tr>\n";
00290     echo "    <td colspan=\"2\">".matches_text(0)."</td>\n";
00291     echo "  </tr>\n";
00292   }
00293   else
00294   {
00295     echo "  <tr>\n";
00296     echo "    <td colspan=\"2\">".matches_text($numDocs);
00297     echo "\n";
00298     echo "    </td>\n";
00299     echo "  </tr>\n";
00300     $num=1;
00301     foreach ($docs as $doc)
00302     {
00303       echo "  <tr>\n";
00304       echo "    <td align=\"right\">$num.</td>";
00305       echo     "<td><a class=\"el\" href=\"".$doc["url"]."\">".$doc["name"]."</a></td>\n";
00306       echo "  <tr>\n";
00307       echo "    <td></td><td class=\"tiny\">".report_matches()." ";
00308       foreach ($doc["words"] as $wordInfo)
00309       {
00310         $word = $wordInfo["word"];
00311         $matchRight = substr($wordInfo["match"],strlen($word));
00312         echo "<b>$word</b>$matchRight(".$wordInfo["freq"].") ";
00313       }
00314       echo "    </td>\n";
00315       echo "  </tr>\n";
00316       $num++;
00317     }
00318   }
00319   echo "</table>\n";
00320 }
00321 
00322 function main()
00323 {
00324   if(strcmp('4.1.0', phpversion()) > 0) 
00325   {
00326     die("Error: PHP version 4.1.0 or above required!");
00327   }
00328   if (!($file=fopen("search.idx","rb"))) 
00329   {
00330     die("Error: Search index file could NOT be opened!");
00331   }
00332   if (readHeader($file)!="DOXS")
00333   {
00334     die("Error: Header of index file is invalid!");
00335   }
00336   $query="";
00337   if (array_key_exists("query", $_GET))
00338   {
00339     $query=$_GET["query"];
00340   }
00341   end_form($query);
00342   echo "&nbsp;\n<div class=\"searchresults\">\n";
00343   $results = array();
00344   $requiredWords = array();
00345   $forbiddenWords = array();
00346   $foundWords = array();
00347   $word=strtok($query," ");
00348   while ($word) // for each word in the search query
00349   {
00350     if (($word{0}=='+')) { $word=substr($word,1); $requiredWords[]=$word; }
00351     if (($word{0}=='-')) { $word=substr($word,1); $forbiddenWords[]=$word; }
00352     if (!in_array($word,$foundWords))
00353     {
00354       $foundWords[]=$word;
00355       search($file,strtolower($word),$results);
00356     }
00357     $word=strtok(" ");
00358   }
00359   $docs = array();
00360   combine_results($results,$docs);
00361   // filter out documents with forbidden word or that do not contain
00362   // required words
00363   $filteredDocs = filter_results($docs,$requiredWords,$forbiddenWords);
00364   // sort the results based on rank
00365   $sorted = array();
00366   sort_results($filteredDocs,$sorted);
00367   // report results to the user
00368   report_results($sorted);
00369   echo "</div>\n";
00370   fclose($file);
00371 }
00372 
00373 main();
00374 
00375 
00376 ?>
00377 <hr size="1"><address style="align: right;"><small>Generated on Fri Sep 21 13:24:17 2007 for example1 by&nbsp;
00378 <a href="http://www.doxygen.org/index.html">
00379 <img src="doxygen.png" alt="doxygen" align="middle" border="0"></a> 1.4.7 </small></address>
00380 </body>
00381 </html>

Generated on Fri Sep 21 13:41:54 2007 for example1 by  doxygen 1.4.7