Author: sich
Date: 2006-04-11 16:53:12 +0000 (Tue, 11 Apr 2006)
New Revision: 8514

Added:
   trunk/apps/DarknetSpiderBot/bot.php
   trunk/apps/DarknetSpiderBot/config.php
Log:
Insert bot files

Added: trunk/apps/DarknetSpiderBot/bot.php
===================================================================
--- trunk/apps/DarknetSpiderBot/bot.php 2006-04-11 16:49:39 UTC (rev 8513)
+++ trunk/apps/DarknetSpiderBot/bot.php 2006-04-11 16:53:12 UTC (rev 8514)
@@ -0,0 +1,148 @@
+<?php
+
+require_once('config.php'); 
+
+
+/*
+$addresse_complete = "$addresse_fcp" . "$start_page";
+
+exec("c:\wget\wget.exe --timeout=$timeout $addresse_complete -O 
c:\serveur\www\freenetbot\local.html");
+
+
+
+
+$fich='local.html';
+$ouvre=fopen($fich,'r');
+$filesize = filesize("local.html");
+
+
+while(!feof($ouvre))
+{
+       $ligne=fgets($ouvre,$filesize);
+       
+       if (eregi("<title>(.*)</title>", $ligne, $titre) == TRUE) {
+               //echo $titre[1];
+       }
+       
+       if (eregi("<a(.*)>(.*)</a>", $ligne, $liens) == TRUE) {
+               $liens_complet = $liens[0];
+               $test = explode("href=",$liens_complet);
+               $testa = $test[1];
+               $test1 = explode("\"",$testa);
+               $testb = $test1[1];
+               
+               if (eregi("newbookmark",$testb) == TRUE) { }
+               elseif (eregi("@",$testb) == TRUE) {
+                       $cible = "$addresse_fcp" . "$testb";
+                       echo "externe : $cible<br>";
+               }
+               else { 
+                       $cible = "$addresse_complete" . "$testb";
+                       echo "interne : $cible<br>"; 
+               }
+               //exit();
+       }
+    break;
+}
+
+fclose($ouvre);
+*/
+
+
+$url = $addresse_fcp.$start_page;
+
+$buffer_file = 'local.html';
+
+$bot = new bot();
+$bot->getDistantFile($url, $buffer_file);
+echo 'title: '.$bot->extractTitle();
+
+//echo $bot->buffer_contents;
+
+
+class bot {
+       
+       var $buffer_contents;
+       
+       function getDistantFile ($url, $dest)
+       {
+               global $timeout, $wget_dir;
+               
+               exec($wget_dir."wget.exe --timeout=$timeout $url -O $dest");
+               $this->buffer_contents = $this->getFileContents($dest);
+       }
+       
+       function getFileContents ($file)
+       {
+               
+               $handle = fopen($file, 'r') or die('Erreur ? l\'ouverture du 
fichier'.$file);
+               $contents = fread($handle, filesize ($file));
+               fclose($handle);
+               
+               return $contents;
+       }
+       
+       function extractTitle ()
+       {
+               if ( preg_match_all('/<title>(.*?)<\/title>/s', 
$this->buffer_contents, $title) ) {
+                       return $title[1][0];
+               }
+       }
+
+       function extractidentifier_url ()
+       {
+               if ( preg_match_all('/<META NAME=\"identifier-url\" 
CONTENT=\"(.*)\">/s/i', $this->buffer_contents, $identifier_url) ) {
+                       return $identifier_url[1][0];
+               }
+       }
+
+       function extractrevisit_after ()
+       {
+               if ( preg_match_all('/<META NAME=\"revisit-after\" 
CONTENT=\"(.*)\">/s/i', $this->buffer_contents, $revisit_after) ) {
+                       return $revisit_after[1][0];
+               }
+       }
+
+       function extractdescription ()
+       {
+               if ( preg_match_all('/<META NAME=\"description\" 
CONTENT=\"(.*)\">/s/i', $this->buffer_contents, $description) ) {
+                       return $description[1][0];
+               }
+       }
+
+       function extractkeywords ()
+       {
+               if ( preg_match_all('/<META NAME=\"keywords\" 
CONTENT=\"(.*)\">/s/i', $this->buffer_contents, $keywords) ) {
+                       return $keywords[1][0];
+               }
+       }
+
+       function extractdate_creation ()
+       {
+               if ( preg_match_all('/<META NAME=\"date-creation-yyyymmdd\" 
CONTENT=\"(.*)\">/s/i', $this->buffer_contents, $date_creation) ) {
+                       return $date_creation[1][0];
+               }
+       }
+
+       function extractdate_revision ()
+       {
+               if ( preg_match_all('/<META NAME=\"date-revision-yyyymmdd\" 
CONTENT=\"(.*)\">/s/i', $this->buffer_contents, $date_revision) ) {
+                       return $date_revision[1][0];
+               }
+       }
+
+       function extractcategory ()
+       {
+               if ( preg_match_all('/<META NAME=\"category\" 
CONTENT=\"(.*)\">/s/i', $this->buffer_contents, $category) ) {
+                       return $category[1][0];
+               }
+       }
+
+       function extractpublisher ()
+       {
+               if ( preg_match_all('/<META NAME=\"publisher\" 
CONTENT=\"(.*)\">/s/i', $this->buffer_contents, $publisher) ) {
+                       return $publisher[1][0];
+               }
+       }
+}
+?>
\ No newline at end of file

Added: trunk/apps/DarknetSpiderBot/config.php
===================================================================
--- trunk/apps/DarknetSpiderBot/config.php      2006-04-11 16:49:39 UTC (rev 
8513)
+++ trunk/apps/DarknetSpiderBot/config.php      2006-04-11 16:53:12 UTC (rev 
8514)
@@ -0,0 +1,13 @@
+<?php
+       $start_page = "/SSK at 
PFeLTa1si2Ml5sDeUy7eDhPso6TPdmw-2gWfQ4Jg02w,3ocfrqgUMVWA2PeorZx40TW0c-FiIOL-TWKQHoDbVdE,AQABAAE/Index-21/";
+       $start_file = "all.html";
+       
+       $timeout = "20"; 
+       $addresse_fcp = "http://127.0.0.1:8888";;
+       
+       $hostname_bot = "10.0.0.1";
+       $database_bot = "freenet";
+       $username_bot = "sich";
+       $password_bot = "19Geneve54";
+       $bot = mysql_pconnect($hostname_bot, $username_bot, $password_bot) or 
trigger_error(mysql_error(),E_USER_ERROR); 
+?>


Reply via email to