Author: dongy
Date: 2006-04-11 20:47:12 +0000 (Tue, 11 Apr 2006)
New Revision: 8524

Modified:
   trunk/apps/DarknetSpiderBot/DarknetSpiderBot.php
Log:
problems with regex

Modified: trunk/apps/DarknetSpiderBot/DarknetSpiderBot.php
===================================================================
--- trunk/apps/DarknetSpiderBot/DarknetSpiderBot.php    2006-04-11 20:36:43 UTC 
(rev 8523)
+++ trunk/apps/DarknetSpiderBot/DarknetSpiderBot.php    2006-04-11 20:47:12 UTC 
(rev 8524)
@@ -6,7 +6,7 @@

 $buffer_file = 'local.html';

-$site_key = 
'PFeLTa1si2Ml5sDeUy7eDhPso6TPdmw-2gWfQ4Jg02w,3ocfrqgUMVWA2PeorZx40TW0c-FiIOL-TWKQHoDbVdE,AQABAAE';
+$key_value = 
'PFeLTa1si2Ml5sDeUy7eDhPso6TPdmw-2gWfQ4Jg02w,3ocfrqgUMVWA2PeorZx40TW0c-FiIOL-TWKQHoDbVdE,AQABAAE';
 $site_name = 'Index';
 $last_know_edition = '20';

@@ -14,8 +14,10 @@

 $bot = new bot($fcp_host, $fcp_port, $buffer_file);

-echo $bot->getLastEdition($site_key, $site_name, $last_know_edition);
+$url = $bot->getLastEdition($key_value, $site_name, $last_know_edition);
+print_r($bot->splitURL($url.'/test/index.html'));

+
 //$sitepath = "/USK@$sitekey/$sitename";
 //$bot->getDistantFile($fcp_host, $fcp_port, $sitepath.'/-1');

@@ -81,10 +83,10 @@
                return $contents;
        }

-       function getLastEdition ($site_key, $site_name, $last_know_edition)
+       function getLastEdition ($key_value, $site_name, $last_know_edition)
        {
-               $path = "/USK@$site_key/$site_name/-$last_know_edition";
-               //$path = "/USK@$site_key/$site_name/$last_know_edition";
+               //$path = "/USK@$key_value/$site_name/-$last_know_edition";
+               $path = "/USK@$key_value/$site_name/$last_know_edition";

                $this->getDistantFile($path, 60);

@@ -94,6 +96,35 @@
                return false;
        }

+       function splitURL ($url)
+       {
+               // the URL must begin by /[freenet:]KEY@
+               
+               // strip freenet:
+               if ( substr($url, 0, 9) == '/freenet:')
+                       $url = '/'.substr($url, 9);
+               
+               
+               $splitedURL['key_type'] = substr($url, 1, 3);
+               
+               $second_slashe_pos = strpos($url, '/', 5);
+               $splitedURL['key_value'] = substr($url, 5, 
$second_slashe_pos-5);
+               
+               //if ( preg_match('#(.+)[/-]+([0-9]+)(.+)#', substr($url, 
$second_slashe_pos+1), $matches ) )
+               if ( preg_match('/^[a-zA-Z0-9]+-[0-9]+/[a-zA-Z0-9\.\/_-]+$/', 
substr($url, $second_slashe_pos+1), $matches ) )
+                       $splitedURL['site_name'] = $matches[1];
+                       
+               echo 't:'.substr($url, $second_slashe_pos+1).'end';
+                       
+               print_r($matches);
+               
+               
+               //
+               
+                       
+               return $splitedURL;
+       }
+       
        function extractTitle ()
        {
                if ( preg_match_all('/<title>(.+?)<\/title>/s', 
$this->buffer_contents, $title) ) {


Reply via email to