Author: dongy
Date: 2006-04-13 19:52:43 +0000 (Thu, 13 Apr 2006)
New Revision: 8544

Added:
   trunk/apps/DarknetSpiderBot/include/database_connection.inc.php
   trunk/apps/DarknetSpiderBot/setup/
   trunk/apps/DarknetSpiderBot/setup/darknetspiderbot.sql
Modified:
   trunk/apps/DarknetSpiderBot/DarknetSpiderBot.php
   trunk/apps/DarknetSpiderBot/class/bot.class.php
   trunk/apps/DarknetSpiderBot/include/config.inc.php
Log:
DarknetSpiderBot:
#function constructURL added
#file database_connection.inc.php added
#MySQL dump added into setup directory

Modified: trunk/apps/DarknetSpiderBot/DarknetSpiderBot.php
===================================================================
--- trunk/apps/DarknetSpiderBot/DarknetSpiderBot.php    2006-04-13 17:19:47 UTC 
(rev 8543)
+++ trunk/apps/DarknetSpiderBot/DarknetSpiderBot.php    2006-04-13 19:52:43 UTC 
(rev 8544)
@@ -3,38 +3,24 @@

 require_once('include/config.inc.php');
 require_once('class/bot.class.php');
+require_once('database_connection.inc.php');


-
 $buffer_file = 'local.html';
-
-$key_value = 
'PFeLTa1si2Ml5sDeUy7eDhPso6TPdmw-2gWfQ4Jg02w,3ocfrqgUMVWA2PeorZx40TW0c-FiIOL-TWKQHoDbVdE,AQABAAE';
-$site_name = 'Index';
-$last_know_edition = '20';
-
-
-
 $bot = new bot($fcp_host, $fcp_port, $buffer_file);

-//$url = $bot->getLastEdition($key_value, $site_name, $last_know_edition);
-//print_r($bot->splitURL($url.'/test/index.html'));
+$splitedURL['key_type'] = 'SSK';
+$splitedURL['key_value'] = 
'PFeLTa1si2Ml5sDeUy7eDhPso6TPdmw-2gWfQ4Jg02w,3ocfrqgUMVWA2PeorZx40TW0c-FiIOL-TWKQHoDbVdE,AQABAAE';
+$splitedURL['site_name'] = 'Index';
+$splitedURL['edition'] = '21';


-$sitepath = "/USK@$sitekey/$sitename";
-$bot->getDistantFile($fcp_host, $fcp_port, $sitepath.'/-1');
+$path = $bot->constructURL($splitedURL);
+$bot->getDistantFile($path);

+echo $bot->extractTitle();

-//$urls = $bot->extractURLs();
-//$bot->cleanURLs($urls, $sitekey, $sitename);

-//print_r($urls);
-//echo $bot->buffer;
-print_r($bot->extractTitle());

 echo "\r\nDarknetSpiderBot is closing...\r\n";
-
-
-
-
-
 ?>
\ No newline at end of file

Modified: trunk/apps/DarknetSpiderBot/class/bot.class.php
===================================================================
--- trunk/apps/DarknetSpiderBot/class/bot.class.php     2006-04-13 17:19:47 UTC 
(rev 8543)
+++ trunk/apps/DarknetSpiderBot/class/bot.class.php     2006-04-13 19:52:43 UTC 
(rev 8544)
@@ -53,10 +53,8 @@
                return $contents;
        }

-       function getLastEdition ($key_value, $site_name, $last_know_edition)
+       function getLastEdition ($path)
        {
-               //$path = "/USK@$key_value/$site_name/-$last_know_edition";
-               $path = "/USK@$key_value/$site_name/$last_know_edition";

                $this->getDistantFile($path, 60);

@@ -82,15 +80,48 @@
                $second_slashe_pos = strpos($url, '/', 5);
                $splitedURL['key_value'] = substr($url, 5, 
$second_slashe_pos-5);

-               preg_match('#^(.+)[/-]+([0-9]+)/*(.*)$#', substr($url, 
$second_slashe_pos+1), $matches );
-               $splitedURL['site_name'] = $matches[1];
-               $splitedURL['edition'] = $matches[2];
-               $splitedURL['path'] = $matches[3];
+               if ( $splitedURL['key_type'] == 'CHK' )
+               {
+                       $splitedURL['path'] = substr($url, 
$second_slashe_pos+1);
+               }
+               else
+               {
+                       preg_match('#^(.+)[/-]+([0-9]+)(.*)$#', substr($url, 
$second_slashe_pos+1), $matches );
+                       $splitedURL['site_name'] = $matches[1];
+                       $splitedURL['edition'] = $matches[2];
+                       $splitedURL['path'] = $matches[3];
+               }
+               
+               if ( substr($splitedURL['path'], 0, 1) == '/' )
+                       $splitedURL['path'] = substr($splitedURL['path'], 1);


                return $splitedURL;
        }

+       function constructURL ($splitedURL)
+       {
+               switch ($splitedURL['key_type'])
+               {
+                       case 'USK':
+                               $url = 
'/USK@'.$splitedURL['key_value'].'/'.$splitedURL['site_name'].'/-'.$splitedURL['edition'];
+                               break;
+                               
+                       case 'SSK':
+                               $url = 
'/SSK@'.$splitedURL['key_value'].'/'.$splitedURL['site_name'].'-'.$splitedURL['edition'].'/'.$splitedURL['path'];
+                               break;
+                               
+                       case 'CHK':
+                               $url = 
'/CHK@'.$splitedURL['key_value'].'/'.$splitedURL['path'];
+                               break;
+                               
+                       default:
+                               return false;
+               }
+               return $url;
+       
+       }
+       


        function cleanURLs (&$urls, $sitekey, $sitename)
@@ -131,14 +162,14 @@
        // Extraction processing functions
        function extractTitle ()
        {
-               if ( preg_match_all('/<title>(.+?)<\/title>/s', 
$this->buffer_contents, $title) ) {
+               if ( preg_match_all('/<title>(.+?)<\/title>/s', $this->buffer, 
$title) ) {
                        return $title[1][0];
                }
        }

        function extractMetas ()
        {
-               if (preg_match_all('/<meta(.+?)>/si', $this->buffer_contents, 
$matches))
+               if (preg_match_all('/<meta(.+?)>/si', $this->buffer, $matches))
                {
                        foreach ($matches[1] as $value) // contenu de chaque 
balise meta
                        {

Modified: trunk/apps/DarknetSpiderBot/include/config.inc.php
===================================================================
--- trunk/apps/DarknetSpiderBot/include/config.inc.php  2006-04-13 17:19:47 UTC 
(rev 8543)
+++ trunk/apps/DarknetSpiderBot/include/config.inc.php  2006-04-13 19:52:43 UTC 
(rev 8544)
@@ -1,15 +1,14 @@
 <?php

-$timeout = "20"; 
-$fcp_host = "127.0.0.1";
+// fcp
+$fcp_host = '127.0.0.1';
 $fcp_port = '8888';

-/*
-$hostname_bot = "";
-$database_bot = "";
-$username_bot = "";
-$password_bot = "";
-$bot = mysql_pconnect($hostname_bot, $username_bot, $password_bot) or 
trigger_error(mysql_error(),E_USER_ERROR); 
-*/
+// database
+$mysql_server = 'localhost';
+$mysql_username = 'root';
+$mysql_password = '';

+$mysql_db_name = 'darknetspiderbot';
+
 ?>
\ No newline at end of file

Added: trunk/apps/DarknetSpiderBot/include/database_connection.inc.php
===================================================================
--- trunk/apps/DarknetSpiderBot/include/database_connection.inc.php     
2006-04-13 17:19:47 UTC (rev 8543)
+++ trunk/apps/DarknetSpiderBot/include/database_connection.inc.php     
2006-04-13 19:52:43 UTC (rev 8544)
@@ -0,0 +1,7 @@
+<?php
+// Connection to mysql
+mysql_connect($mysql_server, $mysql_username, $mysql_password) or die( 
mysql_error() );
+
+// Select database
+mysql_select_db($mysql_db_name) or die( mysql_error() );
+?>
\ No newline at end of file

Added: trunk/apps/DarknetSpiderBot/setup/darknetspiderbot.sql
===================================================================
--- trunk/apps/DarknetSpiderBot/setup/darknetspiderbot.sql      2006-04-13 
17:19:47 UTC (rev 8543)
+++ trunk/apps/DarknetSpiderBot/setup/darknetspiderbot.sql      2006-04-13 
19:52:43 UTC (rev 8544)
@@ -0,0 +1,70 @@
+-- phpMyAdmin SQL Dump
+-- version 2.6.1-rc2
+-- http://www.phpmyadmin.net
+-- 
+-- Serveur: localhost
+-- G?n?r? le : Jeudi 13 Avril 2006 ? 21:48
+-- Version du serveur: 4.0.20
+-- Version de PHP: 4.3.4
+-- 
+-- Base de donn?es: `darknetspiderbot`
+-- 
+
+-- --------------------------------------------------------
+
+-- 
+-- Structure de la table `freesites_informations`
+-- 
+
+CREATE TABLE `freesites_informations` (
+  `id_freesites` smallint(5) unsigned NOT NULL default '0',
+  `title` varchar(255) NOT NULL default '',
+  `meta_description` varchar(255) NOT NULL default '',
+  `meta_keywords` text NOT NULL,
+  UNIQUE KEY `id_freesites` (`id_freesites`)
+) TYPE=MyISAM;
+
+-- 
+-- Contenu de la table `freesites_informations`
+-- 
+
+
+-- --------------------------------------------------------
+
+-- 
+-- Structure de la table `freesites_keys`
+-- 
+
+CREATE TABLE `freesites_keys` (
+  `id` smallint(5) unsigned NOT NULL auto_increment,
+  `key_type` enum('CHK','SSK') NOT NULL default 'CHK',
+  `key_value` varchar(255) NOT NULL default '',
+  `created` timestamp(14) NOT NULL,
+  `last_update` timestamp(14) NOT NULL default '00000000000000',
+  PRIMARY KEY  (`id`),
+  UNIQUE KEY `key_value` (`key_value`),
+  KEY `last_update` (`last_update`)
+) TYPE=MyISAM AUTO_INCREMENT=1 ;
+
+-- 
+-- Contenu de la table `freesites_keys`
+-- 
+
+
+-- --------------------------------------------------------
+
+-- 
+-- Structure de la table `freesites_urls`
+-- 
+
+CREATE TABLE `freesites_urls` (
+  `id_freesites` smallint(6) NOT NULL default '0',
+  `url` varchar(255) NOT NULL default '',
+  `status` enum('standby','retrieving','retrieved','error') NOT NULL default 
'standby',
+  KEY `id_freesites` (`id_freesites`)
+) TYPE=MyISAM;
+
+-- 
+-- Contenu de la table `freesites_urls`
+-- 
+


Reply via email to