Author: dongy
Date: 2006-04-13 19:52:43 +0000 (Thu, 13 Apr 2006)
New Revision: 8544
Added:
trunk/apps/DarknetSpiderBot/include/database_connection.inc.php
trunk/apps/DarknetSpiderBot/setup/
trunk/apps/DarknetSpiderBot/setup/darknetspiderbot.sql
Modified:
trunk/apps/DarknetSpiderBot/DarknetSpiderBot.php
trunk/apps/DarknetSpiderBot/class/bot.class.php
trunk/apps/DarknetSpiderBot/include/config.inc.php
Log:
DarknetSpiderBot:
#function constructURL added
#file database_connection.inc.php added
#MySQL dump added into setup directory
Modified: trunk/apps/DarknetSpiderBot/DarknetSpiderBot.php
===================================================================
--- trunk/apps/DarknetSpiderBot/DarknetSpiderBot.php 2006-04-13 17:19:47 UTC
(rev 8543)
+++ trunk/apps/DarknetSpiderBot/DarknetSpiderBot.php 2006-04-13 19:52:43 UTC
(rev 8544)
@@ -3,38 +3,24 @@
require_once('include/config.inc.php');
require_once('class/bot.class.php');
+require_once('database_connection.inc.php');
-
$buffer_file = 'local.html';
-
-$key_value =
'PFeLTa1si2Ml5sDeUy7eDhPso6TPdmw-2gWfQ4Jg02w,3ocfrqgUMVWA2PeorZx40TW0c-FiIOL-TWKQHoDbVdE,AQABAAE';
-$site_name = 'Index';
-$last_know_edition = '20';
-
-
-
$bot = new bot($fcp_host, $fcp_port, $buffer_file);
-//$url = $bot->getLastEdition($key_value, $site_name, $last_know_edition);
-//print_r($bot->splitURL($url.'/test/index.html'));
+$splitedURL['key_type'] = 'SSK';
+$splitedURL['key_value'] =
'PFeLTa1si2Ml5sDeUy7eDhPso6TPdmw-2gWfQ4Jg02w,3ocfrqgUMVWA2PeorZx40TW0c-FiIOL-TWKQHoDbVdE,AQABAAE';
+$splitedURL['site_name'] = 'Index';
+$splitedURL['edition'] = '21';
-$sitepath = "/USK@$sitekey/$sitename";
-$bot->getDistantFile($fcp_host, $fcp_port, $sitepath.'/-1');
+$path = $bot->constructURL($splitedURL);
+$bot->getDistantFile($path);
+echo $bot->extractTitle();
-//$urls = $bot->extractURLs();
-//$bot->cleanURLs($urls, $sitekey, $sitename);
-//print_r($urls);
-//echo $bot->buffer;
-print_r($bot->extractTitle());
echo "\r\nDarknetSpiderBot is closing...\r\n";
-
-
-
-
-
?>
\ No newline at end of file
Modified: trunk/apps/DarknetSpiderBot/class/bot.class.php
===================================================================
--- trunk/apps/DarknetSpiderBot/class/bot.class.php 2006-04-13 17:19:47 UTC
(rev 8543)
+++ trunk/apps/DarknetSpiderBot/class/bot.class.php 2006-04-13 19:52:43 UTC
(rev 8544)
@@ -53,10 +53,8 @@
return $contents;
}
- function getLastEdition ($key_value, $site_name, $last_know_edition)
+ function getLastEdition ($path)
{
- //$path = "/USK@$key_value/$site_name/-$last_know_edition";
- $path = "/USK@$key_value/$site_name/$last_know_edition";
$this->getDistantFile($path, 60);
@@ -82,15 +80,48 @@
$second_slashe_pos = strpos($url, '/', 5);
$splitedURL['key_value'] = substr($url, 5,
$second_slashe_pos-5);
- preg_match('#^(.+)[/-]+([0-9]+)/*(.*)$#', substr($url,
$second_slashe_pos+1), $matches );
- $splitedURL['site_name'] = $matches[1];
- $splitedURL['edition'] = $matches[2];
- $splitedURL['path'] = $matches[3];
+ if ( $splitedURL['key_type'] == 'CHK' )
+ {
+ $splitedURL['path'] = substr($url,
$second_slashe_pos+1);
+ }
+ else
+ {
+ preg_match('#^(.+)[/-]+([0-9]+)(.*)$#', substr($url,
$second_slashe_pos+1), $matches );
+ $splitedURL['site_name'] = $matches[1];
+ $splitedURL['edition'] = $matches[2];
+ $splitedURL['path'] = $matches[3];
+ }
+
+ if ( substr($splitedURL['path'], 0, 1) == '/' )
+ $splitedURL['path'] = substr($splitedURL['path'], 1);
return $splitedURL;
}
+ function constructURL ($splitedURL)
+ {
+ switch ($splitedURL['key_type'])
+ {
+ case 'USK':
+ $url =
'/USK@'.$splitedURL['key_value'].'/'.$splitedURL['site_name'].'/-'.$splitedURL['edition'];
+ break;
+
+ case 'SSK':
+ $url =
'/SSK@'.$splitedURL['key_value'].'/'.$splitedURL['site_name'].'-'.$splitedURL['edition'].'/'.$splitedURL['path'];
+ break;
+
+ case 'CHK':
+ $url =
'/CHK@'.$splitedURL['key_value'].'/'.$splitedURL['path'];
+ break;
+
+ default:
+ return false;
+ }
+ return $url;
+
+ }
+
function cleanURLs (&$urls, $sitekey, $sitename)
@@ -131,14 +162,14 @@
// Extraction processing functions
function extractTitle ()
{
- if ( preg_match_all('/<title>(.+?)<\/title>/s',
$this->buffer_contents, $title) ) {
+ if ( preg_match_all('/<title>(.+?)<\/title>/s', $this->buffer,
$title) ) {
return $title[1][0];
}
}
function extractMetas ()
{
- if (preg_match_all('/<meta(.+?)>/si', $this->buffer_contents,
$matches))
+ if (preg_match_all('/<meta(.+?)>/si', $this->buffer, $matches))
{
foreach ($matches[1] as $value) // contenu de chaque
balise meta
{
Modified: trunk/apps/DarknetSpiderBot/include/config.inc.php
===================================================================
--- trunk/apps/DarknetSpiderBot/include/config.inc.php 2006-04-13 17:19:47 UTC
(rev 8543)
+++ trunk/apps/DarknetSpiderBot/include/config.inc.php 2006-04-13 19:52:43 UTC
(rev 8544)
@@ -1,15 +1,14 @@
<?php
-$timeout = "20";
-$fcp_host = "127.0.0.1";
+// fcp
+$fcp_host = '127.0.0.1';
$fcp_port = '8888';
-/*
-$hostname_bot = "";
-$database_bot = "";
-$username_bot = "";
-$password_bot = "";
-$bot = mysql_pconnect($hostname_bot, $username_bot, $password_bot) or
trigger_error(mysql_error(),E_USER_ERROR);
-*/
+// database
+$mysql_server = 'localhost';
+$mysql_username = 'root';
+$mysql_password = '';
+$mysql_db_name = 'darknetspiderbot';
+
?>
\ No newline at end of file
Added: trunk/apps/DarknetSpiderBot/include/database_connection.inc.php
===================================================================
--- trunk/apps/DarknetSpiderBot/include/database_connection.inc.php
2006-04-13 17:19:47 UTC (rev 8543)
+++ trunk/apps/DarknetSpiderBot/include/database_connection.inc.php
2006-04-13 19:52:43 UTC (rev 8544)
@@ -0,0 +1,7 @@
+<?php
+// Connection to mysql
+mysql_connect($mysql_server, $mysql_username, $mysql_password) or die(
mysql_error() );
+
+// Select database
+mysql_select_db($mysql_db_name) or die( mysql_error() );
+?>
\ No newline at end of file
Added: trunk/apps/DarknetSpiderBot/setup/darknetspiderbot.sql
===================================================================
--- trunk/apps/DarknetSpiderBot/setup/darknetspiderbot.sql 2006-04-13
17:19:47 UTC (rev 8543)
+++ trunk/apps/DarknetSpiderBot/setup/darknetspiderbot.sql 2006-04-13
19:52:43 UTC (rev 8544)
@@ -0,0 +1,70 @@
+-- phpMyAdmin SQL Dump
+-- version 2.6.1-rc2
+-- http://www.phpmyadmin.net
+--
+-- Serveur: localhost
+-- G?n?r? le : Jeudi 13 Avril 2006 ? 21:48
+-- Version du serveur: 4.0.20
+-- Version de PHP: 4.3.4
+--
+-- Base de donn?es: `darknetspiderbot`
+--
+
+-- --------------------------------------------------------
+
+--
+-- Structure de la table `freesites_informations`
+--
+
+CREATE TABLE `freesites_informations` (
+ `id_freesites` smallint(5) unsigned NOT NULL default '0',
+ `title` varchar(255) NOT NULL default '',
+ `meta_description` varchar(255) NOT NULL default '',
+ `meta_keywords` text NOT NULL,
+ UNIQUE KEY `id_freesites` (`id_freesites`)
+) TYPE=MyISAM;
+
+--
+-- Contenu de la table `freesites_informations`
+--
+
+
+-- --------------------------------------------------------
+
+--
+-- Structure de la table `freesites_keys`
+--
+
+CREATE TABLE `freesites_keys` (
+ `id` smallint(5) unsigned NOT NULL auto_increment,
+ `key_type` enum('CHK','SSK') NOT NULL default 'CHK',
+ `key_value` varchar(255) NOT NULL default '',
+ `created` timestamp(14) NOT NULL,
+ `last_update` timestamp(14) NOT NULL default '00000000000000',
+ PRIMARY KEY (`id`),
+ UNIQUE KEY `key_value` (`key_value`),
+ KEY `last_update` (`last_update`)
+) TYPE=MyISAM AUTO_INCREMENT=1 ;
+
+--
+-- Contenu de la table `freesites_keys`
+--
+
+
+-- --------------------------------------------------------
+
+--
+-- Structure de la table `freesites_urls`
+--
+
+CREATE TABLE `freesites_urls` (
+ `id_freesites` smallint(6) NOT NULL default '0',
+ `url` varchar(255) NOT NULL default '',
+ `status` enum('standby','retrieving','retrieved','error') NOT NULL default
'standby',
+ KEY `id_freesites` (`id_freesites`)
+) TYPE=MyISAM;
+
+--
+-- Contenu de la table `freesites_urls`
+--
+