Hello!

I couldn't find any pointers on where to send contributions, I hope this
is the right place.

Anyway, I was stumped when I couldn't find an easy way to escape strings
for use when printing URLs. So I added two new functions: str_uri_escape
and  the coresponding str_uri_unescape to the collection of string
functions.

Manual:

string str_uri_escape (string str)

Returns a string escaped according to RFC 2396 for use in URLs and URIs.
All reserved characters are encoded as %xx where xx is the hexadecimal
representation of of the coresponding octet.


string str_uri_unescape (string str)

Decodes an escaped string. See str_uri_escape.


More info an be found at http://www.faqs.org/rfcs/rfc2396.html

Since this is a pretty fundamental operation for webapplications I
wouldn't be suprised if similar code already exists somewhere in the PHP
source, but I didn't find anything.

Please CC any comments directly to me, I'm not subscribed to the list due
to the huge amount of mails from the bugtracking system.

Regards,
Fredrik

-- 
Do fish get thirsty?

Fredrik Öhrn                               Chalmers University of Technology
[EMAIL PROTECTED]                                                  Sweden



*** basic_functions.c.orig      Sat Mar 17 00:51:25 2001
--- basic_functions.c   Sat Mar 17 00:53:16 2001
***************
*** 140,145 ****
--- 140,147 ----
        PHP_FE(php_sapi_name,                                                   NULL)
        PHP_FE(php_uname,                                                              
 NULL)

+       PHP_FE(str_uri_escape,                                                         
+ NULL)
+       PHP_FE(str_uri_unescape,                                                       
+ NULL)
        PHP_FE(strnatcmp,                                                              
 NULL)
        PHP_FE(strnatcasecmp,                                                   NULL)
        PHP_FE(substr_count,                                                    NULL)
*** php_string.h.orig   Sat Mar 17 00:51:48 2001
--- php_string.h        Sat Mar 17 01:51:10 2001
***************
*** 78,83 ****
--- 78,85 ----
  PHP_FUNCTION(substr_count);
  PHP_FUNCTION(str_pad);
  PHP_FUNCTION(sscanf);
+ PHP_FUNCTION(str_uri_escape);
+ PHP_FUNCTION(str_uri_unescape);

  #define strnatcmp(a, b) \
        strnatcmp_ex(a, strlen(a), b, strlen(b), 0)
***************
*** 126,131 ****
--- 128,136 ----
  PHPAPI char *php_strerror(int errnum);
  #define strerror php_strerror
  #endif
+
+ PHPAPI char *php_str_uri_escape(char *str, int length, int *new_length, int 
+should_free);
+ PHPAPI void php_uri_unescape(char *str, int *len);

  void register_string_constants(INIT_FUNC_ARGS);

*** quot_print.h.orig   Sat Mar 17 02:13:31 2001
--- quot_print.h        Sat Mar 17 02:02:04 2001
***************
*** 23,26 ****
--- 23,28 ----

  PHP_FUNCTION(quoted_printable_decode);

+ PHPAPI char php_hex2int(int c);
+
  #endif /* QUOT_PRINT_H */
*** quot_print.c.orig   Sat Mar 17 01:45:54 2001
--- quot_print.c        Sat Mar 17 02:02:14 2001
***************
*** 34,40 ****
  /*
  *  Converting HEX char to INT value
  */
! static char php_hex2int(int c)
  {
        if ( isdigit(c) )
        {
--- 34,40 ----
  /*
  *  Converting HEX char to INT value
  */
! PHPAPI char php_hex2int(int c)
  {
        if ( isdigit(c) )
        {
***************
*** 43,48 ****
--- 43,52 ----
        else if ( c >= 'A' && c <= 'F' )
        {
                return c - 'A' + 10;
+       }
+       else if ( c >= 'a' && c <= 'f' )
+       {
+               return c - 'a' + 10;
        }
        else
        {
*** string.c.orig       Sat Mar 17 00:52:04 2001
--- string.c    Sat Mar 17 02:02:37 2001
***************
*** 36,41 ****
--- 36,42 ----
  #include "php_globals.h"
  #include "basic_functions.h"
  #include "php_smart_str.h"
+ #include "quot_print.h"

  #define STR_PAD_LEFT                  0
  #define STR_PAD_RIGHT                 1
***************
*** 2973,2978 ****
--- 2974,3083 ----
  }
  /* }}} */

+ /* {{{ proto string str_uri_escape(string str)
+    Escape a string according to RFC 2396 (i.e. %xx style). */
+ PHP_FUNCTION(str_uri_escape)
+ {
+       zval **str;
+
+       if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &str) == FAILURE) {
+               WRONG_PARAM_COUNT;
+       }
+       convert_to_string_ex(str);
+
+       if(Z_STRLEN_PP(str) == 0) {
+               RETURN_EMPTY_STRING();
+       }
+
+       return_value->value.str.val = 
+php_str_uri_escape((*str)->value.str.val,(*str)->value.str.len,&return_value->value.str.len,0);
+       return_value->type = IS_STRING;
+ }
+ /* }}} */
+
+ PHPAPI char *php_str_uri_escape(char *str, int length, int *new_length, int 
+should_free)
+ {
+       /* Encodes valid characters: A-Z a-z 0-9 -_.!~*'() */
+       static char ok [96] = {
+               0,1,0,0,0,0,0,1,1,1,1,0,0,1,1,0,
+               1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
+               1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+               1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,
+               0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+               1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,0
+       };
+
+       char *new_str;
+       char *source,*target;
+       unsigned char c;
+       int count = 0;
+       PLS_FETCH();
+
+       if (!str) {
+               *new_length = 0;
+               return str;
+       }
+
+       for (source = str; *source; source++)
+               if (*source<32 || !ok[*source-32])
+                       count ++;
+
+       *new_length = length + count + count;
+
+       new_str = (char *) emalloc(*new_length);
+
+       for (source=str,target=new_str; *source; source++) {
+               if (*source>31 && ok[*source-32])
+                       *target++ = *source;
+               else
+               {
+                       c = *source;
+                       *target++ = '%';
+                       *target++ = hexconvtab [c >> 4];
+                       *target++ = hexconvtab [c & 15];
+               }
+       }
+
+       *target = 0;
+       if (should_free) {
+               STR_FREE(str);
+       }
+       return new_str;
+ }
+
+ /* {{{ proto string str_uri_unescape(string str)
+    Unescape a string according to RFC 2396 (i.e. %xx style). */
+ PHP_FUNCTION(str_uri_unescape)
+ {
+       zval **str;
+
+       if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &str) == FAILURE) {
+               WRONG_PARAM_COUNT;
+       }
+       convert_to_string_ex(str);
+
+       *return_value = **str;
+       zval_copy_ctor(return_value);
+       php_uri_unescape(return_value->value.str.val,&return_value->value.str.len);
+ }
+ /* }}} */
+
+ /* be careful, this edits the string in-place */
+ PHPAPI void php_uri_unescape(char *str, int *len)
+ {
+       char *s = str, *t = str;
+       PLS_FETCH();
+
+       while (*s) {
+               if (*s == '%') {
+                       s++;                            /* skip the % */
+                       if (*s) *t = php_hex2int (*s++) << 4;
+                       if (*s) *t++ += php_hex2int (*s++);
+               } else {
+                       *t++ = *s++;
+               }
+       }
+       *t = '\0';
+ }

  /*
   * Local variables:

uri_escape.tgz

-- 
PHP Development Mailing List <http://www.php.net/>
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]
To contact the list administrators, e-mail: [EMAIL PROTECTED]

Reply via email to