jeroen          Wed May 23 15:13:58 2001 EDT

  Modified files:              
    /phpdoc/en/language types.xml 
  Log:
  updated the strings section a bit.
  
  
Index: phpdoc/en/language/types.xml
diff -u phpdoc/en/language/types.xml:1.27 phpdoc/en/language/types.xml:1.28
--- phpdoc/en/language/types.xml:1.27   Tue May 22 16:18:54 2001
+++ phpdoc/en/language/types.xml        Wed May 23 15:13:58 2001
@@ -478,107 +478,216 @@
   <sect1 id="language.types.string">
    <title>Strings</title>
    <para>
-    Strings can be specified using one of two sets of delimiters.
+    A <type>string</type> is series of characters. In PHP,
+    a character is the same as a byte, that is, there are exactly
+    256 different characters possible. This also implies that PHP
+    has no native support of Unicode. 
+    <!-- how about unicode? will we support that eventually? Are
+    there current any ways to work with unicode?
+    -->
    </para>
-   <para>
-    If the string is enclosed in double-quotes ("), variables within
-    the string will be expanded (subject to some parsing
-    limitations). As in C and Perl, the backslash ("\") character can
-    be used in specifying special characters:
-    <table>
-     <title>Escaped characters</title>
-     <tgroup cols="2">
-      <thead>
-       <row>
-        <entry>sequence</entry>
-        <entry>meaning</entry>
-       </row>
-      </thead>
-      <tbody>
-       <row>
-        <entry><literal>\n</literal></entry>
-        <entry>linefeed (LF or 0x0A (10) in ASCII)</entry>
-       </row>
-       <row>
-        <entry><literal>\r</literal></entry>
-        <entry>carriage return (CR or 0x0D (13) in ASCII)</entry>
-       </row>
-       <row>
-        <entry><literal>\t</literal></entry>
-        <entry>horizontal tab (HT or 0x09 (9) in ASCII)</entry>
-       </row>
-       <row>
-        <entry><literal>\\</literal></entry>
-        <entry>backslash</entry>
-       </row>
-       <row>
-        <entry><literal>\$</literal></entry>
-        <entry>dollar sign</entry>
-       </row>
-       <row>
-        <entry><literal>\"</literal></entry>
-        <entry>double-quote</entry>
-       </row>
-       <row>
-        <entry><literal>\[0-7]{1,3}</literal></entry>
-        <entry>
-         the sequence of characters matching the regular
-         expression is a character in octal notation
-        </entry>
-       </row>
-       <row>
-        <entry><literal>\x[0-9A-Fa-f]{1,2}</literal></entry>
-        <entry>
-         the sequence of characters matching the regular
-         expression is a character in hexadecimal notation
-        </entry>
-       </row>
-      </tbody>
-     </tgroup>
-    </table>
-   </para>
-
-   <para>
-    If you attempt to escape any other character, both the backslash
-    and the character will be output. In PHP 3, a warning will
-    be issued at the <literal>E_NOTICE</literal> level when this
-    happens. In PHP 4, no warning is generated.
-   </para>
+   <note>
+    <simpara>
+     It is no problem for a string to become very large. 
+     There is no practical bound to the size
+     of strings imposed by PHP, so there is no reason at all
+     to worry about long strings.
+    </simpara>
+   </note>
+   <sect2 id="language.types.string.syntax">
+    <title>Syntax</title>
+    <para>
+     A string literal can be specified in three different
+     ways.
+     <itemizedlist>
+
+      <listitem>
+       <simpara>
+        <link linkend="language.types.string.syntax.single">single quoted</link>
+       </simpara>
+      </listitem>
+      <listitem>
+       <simpara>
+        <link linkend="language.types.string.syntax.double">double quoted</link>
+       </simpara>
+      </listitem>
+      <listitem>
+       <simpara>
+        <link linkend="language.types.string.syntax.heredoc">heredoc syntax</link>
+       </simpara>
+      </listitem>
 
-   <para>
-    The second way to delimit a string uses the single-quote ("'")
-    character. When a string is enclosed in single quotes, the only
-    escapes that will be understood are "\\" and "\'". This is for
-    convenience, so that you can have single-quotes and backslashes in
-    a single-quoted string. Variables will <emphasis>not</emphasis> be
-    expanded inside a single-quoted string.
-   </para>
+     </itemizedlist>
+    </para>
+    <sect3 id="language.types.string.syntax.single">
+     <title>Single quoted</title>
+     <para>
+      The easiest way to specify a simple string is to
+      enclose it in single quotes (the character <literal>'</literal>). 
+     </para>
+     <para>
+      To specify a       literal single
+      quote, you will need to escape it with a backslash
+      (<literal>\</literal>), like in many other languages.
+      If a backslash needs to occur before a single quote or at 
+      the end of the string, you need to double it.
+      Note that if you try to escape any
+      other character, the backslash too will be printed! So
+      usually there is no need to escape the backslash itself.
+      <note>
+       <simpara>
+        In PHP 3, a warning will
+        be issued at the <literal>E_NOTICE</literal> level when this
+        happens.        
+       </simpara>
+      </note>
+      <note>
+       <simpara>
+        Unlike the two other syntaxes, variables will <emphasis>not</emphasis> 
+        be expanded when they occur in single quoted strings.
+       </simpara>
+      </note>
+      <informalexample>
+       <programlisting role="php">
+echo 'this is a simple string';
+echo 'You can also have embedded newlines in strings,
+like this way.';
+echo 'Arnold once said: "I\'ll be back"';
+// output: ... "I'll be back"
+echo 'Are you sure you want to delete C:\\*.*?';
+// output: ... delete C:\*.*?
+echo 'Are you sure you want to delete C:\*.*?';
+// output: ... delete C:\*.*?
+echo 'I am trying to include at this point: \n a newline';
+// output: ... this point: \n a newline
+       </programlisting>
+      </informalexample>
+     </para>
+    </sect3>
+    <sect3 id="language.types.string.syntax.double">
+     <title>Double quoted</title>
+     <para>
+      If the string is enclosed in double-quotes ("),
+      PHP understands more escape sequences for special
+      characters:
+     </para>
+     <table>
+      <title>Escaped characters</title>
+      <tgroup cols="2">
+       <thead>
+        <row>
+         <entry>sequence</entry>
+         <entry>meaning</entry>
+        </row>
+       </thead>
+       <tbody>
+        <row>
+         <entry><literal>\n</literal></entry>
+         <entry>linefeed (LF or 0x0A (10) in ASCII)</entry>
+        </row>
+        <row>
+         <entry><literal>\r</literal></entry>
+         <entry>carriage return (CR or 0x0D (13) in ASCII)</entry>
+        </row>
+        <row>
+         <entry><literal>\t</literal></entry>
+         <entry>horizontal tab (HT or 0x09 (9) in ASCII)</entry>
+        </row>
+        <row>
+         <entry><literal>\\</literal></entry>
+         <entry>backslash</entry>
+        </row>
+        <row>
+         <entry><literal>\$</literal></entry>
+         <entry>dollar sign</entry>
+        </row>
+        <row>
+         <entry><literal>\"</literal></entry>
+         <entry>double-quote</entry>
+        </row>
+        <row>
+         <entry><literal>\[0-7]{1,3}</literal></entry>
+         <entry>
+          the sequence of characters matching the regular
+          expression is a character in octal notation
+         </entry>
+        </row>
+        <row>
+         <entry><literal>\x[0-9A-Fa-f]{1,2}</literal></entry>
+         <entry>
+          the sequence of characters matching the regular
+          expression is a character in hexadecimal notation
+         </entry>
+        </row>
+       </tbody>
+      </tgroup>
+     </table>
+     <para>
+      Again, if you try to escape any other character, the
+      backspace will be printed too!
+     </para>
+     <para>
+      But the most important pre of double-quoted strings
+      is the fact that variable names will be expanded.
+      See <link linkend="language.types.string.parsing">string 
+      parsing</link> for details.
+     </para>
+    </sect3>
+    
+    <sect3 id="language.types.string.syntax.heredoc">
+     <title>Heredoc</title>
+     <simpara>
+      Another way to delimit strings is by using here doc syntax
+      ("&lt;&lt;&lt;").  One should provide an identifier after
+      <literal>&lt;&lt;&lt;</literal>, then the string, and then the
+      same identifier to close the quotation. 
+     </simpara>     
+     <simpara>
+      The closing identifier <emphasis>must</emphasis> begin in the
+      first column of the line.  Also, the identifier used must follow
+      the same naming rules as any other label in PHP: it must contain
+      only alphanumeric characters and underscores, and must start with
+      a non-digit character or underscore.
+     </simpara>
+     
+     <warning>
+      <simpara>
+       It is very important to note that the line with the closing
+       identifier contains no other characters, except 
+       <emphasis>possibly</emphasis> a <literal>;</literal>.
+       That means especially that the identifier 
+       <emphasis>may not be indented</emphasis>, and there
+       may not be any spaces or tabs after or before the <literal>;</literal>.
+      </simpara>
+      <simpara>
+       Probably the nastiest gotcha is that there may also
+       not be a carriage return (<literal>\r</literal>) at the end of 
+       the line, only 
+       a form feed, a.k.a. newline (<literal>\n</literal>).
+       Since Microsoft Windows uses the sequence 
+       <literal>\r\n</literal> as a line
+       terminator, your heredoc may not work if you write your
+       script in a windows editor. However, most programming
+       editors provide a way to save your files with UNIX
+       line terminator.
+       <!--
+       FTP will sometimes automatically convert \r\n to \n while
+       transferring your files to your webserver (which
+       is *nix, of course)
+       -->
+      </simpara>
+     </warning>
 
-   <simpara>
-    Another way to delimit strings is by using here doc syntax
-    ("&lt;&lt;&lt;").  One should provide an identifier after
-    <literal>&lt;&lt;&lt;</literal>, then the string, and then the
-    same identifier to close the quotation. 
-   </simpara>
-   
-   <simpara>
-    The closing identifier <emphasis>must</emphasis> begin in the
-    first column of the line.  Also, the identifier used must follow
-    the same naming rules as any other label in PHP: it must contain
-    only alphanumeric characters and underscores, and must start with
-    a non-digit character or underscore.
-   </simpara>
-
-   <para>
-    Here doc text behaves just like a double-quoted string, without
-    the double-quotes. This means that you do not need to escape quotes
-    in your here docs, but you can still use the escape codes listed
-    above. Variables are expanded, but the same care must be taken
-    when expressing complex variables inside a here doc as with
-    strings.
-    <example> 
-     <title>Here doc string quoting example</title>
-     <programlisting>
+     <para>
+      Here doc text behaves just like a double-quoted string, without
+      the double-quotes. This means that you do not need to escape quotes
+      in your here docs, but you can still use the escape codes listed
+      above. Variables are expanded, but the same care must be taken
+      when expressing complex variables inside a here doc as with
+      strings.
+      <example> 
+       <title>Here doc string quoting example</title>
+       <programlisting>
 &lt;?php
 $str = &lt;&lt;&lt;EOD
 Example of string
@@ -606,68 +715,22 @@
 This should print a capital 'A': \x41
 EOT;
 ?>
-     </programlisting>
-    </example>
-   </para>
-
-   <note>
-    <para>
-     Here doc support was added in PHP 4.
-    </para>
-   </note>
-   <para>
-    Strings may be concatenated using the '.' (dot) operator. Note
-    that the '+' (addition) operator will not work for this. Please
-    see <link linkend="language.operators.string">String
-    operators</link> for more information.
-   </para>
-   <para>
-    Characters within strings may be accessed by treating the string
-    as a numerically-indexed array of characters, using C-like
-    syntax. See below for examples.
-   </para>
-   <para>
-    <example>
-     <title>Some string examples</title>
-     <programlisting role="php">
-&lt;?php
-/* Assigning a string. */
-$str = "This is a string";
-
-/* Appending to it. */
-$str = $str . " with some more text";
-
-/* Another way to append, includes an escaped newline. */
-$str .= " and a newline at the end.\n";
-
-/* This string will end up being '&lt;p&gt;Number: 9&lt;/p&gt;' */
-$num = 9;
-$str = "&lt;p&gt;Number: $num&lt;/p&gt;";
-
-/* This one will be '&lt;p&gt;Number: $num&lt;/p&gt;' */
-$num = 9;
-$str = '&lt;p&gt;Number: $num&lt;/p&gt;';
-
-/* Get the first character of a string  */
-$str = 'This is a test.';
-$first = $str[0];
-
-/* Get the last character of a string. */
-$str = 'This is still a test.';
-$last = $str[strlen($str)-1];
-?&gt;    
-     </programlisting>
-    </example>
-   </para>
-    <sect2 id="language.types.string.parsing">
-     <title>String parsing</title>
-     <!-- 
-     I used simpara all over, because I don't know when
-     to use para. There will also probably some typo's
-     and misspellings.
-     -->
+       </programlisting>
+      </example>
+     </para>
+  
+     <note>
+      <para>
+       Here doc support was added in PHP 4.
+      </para>
+     </note>
+  
+    </sect3>
+    <sect3 id="language.types.string.parsing">
+     <title>Variable parsing</title>
      <simpara>
-      When a string is specified in double quotes, variables are
+      When a string is specified in double quotes or with
+      heredoc, variables are
       parsed within it. 
      </simpara>
      <simpara>
@@ -685,10 +748,10 @@
       and can by recognised
       by the curly braces surrounding the expression.
      </simpara>
-     <sect3 id="language.types.string.parsing.simple">
+     <sect4 id="language.types.string.parsing.simple">
       <title>Simple syntax</title>
       <simpara>
-       If a $ is encoutered, the parser will
+       If a <literal>$</literal> is encoutered, the parser will
        greedily take as much tokens as possible to form a valid
        variable name. Enclose the the variable name in curly
        braces if you want to explicitely specify the end of the
@@ -696,10 +759,10 @@
       </simpara>
       <informalexample>
        <programlisting role="php">
- $beer = 'Heineken';
- echo "$beer's taste is great"; // works, "'" is an invalid character for varnames
- echo "He drunk some $beers"; // won't work, 's' is a valid character for varnames
- echo "He drunk some ${beer}s"; // works
+$beer = 'Heineken';
+echo "$beer's taste is great"; // works, "'" is an invalid character for varnames
+echo "He drunk some $beers"; // won't work, 's' is a valid character for varnames
+echo "He drunk some ${beer}s"; // works
        </programlisting>
       </informalexample>
       <simpara>
@@ -720,29 +783,29 @@
       </simpara>
       <informalexample>
        <programlisting role="php">
- $fruits = array( 'strawberry' =&gt; 'red' , 'banana' =&gt; 'yellow' );
- echo "A banana is $fruits[banana].";
- echo "This square is $square-&gt;width meters broad.";
- echo "This square is $square-&gt;width00 centimeters broad."; // won't work,
-    // for a solution, see the <link 
linkend="language.types.string.parsing.complex">complex syntax</link>.
- 
- <!-- XXX this won't work:
- echo "This square is $square->{width}00 centimeters broad."; 
- // XXX: php developers: it would be consequent to make this work.
- // XXX: like the $obj->{expr} syntax outside a string works, 
- // XXX: analogously to the ${expr} syntax for variable var's.
- -->
- 
+$fruits = array( 'strawberry' =&gt; 'red' , 'banana' =&gt; 'yellow' );
+echo "A banana is $fruits[banana].";
+echo "This square is $square-&gt;width meters broad.";
+echo "This square is $square-&gt;width00 centimeters broad."; // won't work,
+   // for a solution, see the <link 
+linkend="language.types.string.parsing.complex">complex syntax</link>.
+
+<!-- XXX this won't work:
+echo "This square is $square->{width}00 centimeters broad."; 
+// XXX: php developers: it would be consequent to make this work.
+// XXX: like the $obj->{expr} syntax outside a string works, 
+// XXX: analogously to the ${expr} syntax for variable var's.
+-->
+
        </programlisting>
       </informalexample>
       <simpara>
        For anything more complex, you should use the complex syntax.
       </simpara>
-     </sect3>
-     <sect3 id="language.types.string.parsing.complex">
+     </sect4>
+     <sect4 id="language.types.string.parsing.complex">
       <title>Complex (curly) syntax</title>
       <simpara>
-       I didn't call this complex because the syntax is complex,
+       This isn't called complex because the syntax is complex,
        but because you can include complex expressions this way.
      </simpara>
      <simpara>
@@ -756,29 +819,110 @@
      </simpara>
      <informalexample>
       <programlisting role="php">
- $great = 'fantastic';
- echo "This is { $great}"; // won't work, outputs: This is { fantastic}
- echo "This is {$great}";  // works, outputs: This is fantastic
- echo "This square is {$square-&gt;width}00 centimeters broad."; 
- echo "This works: {$arr[4][3]}";     
- echo "This is wrong: {$arr[foo][3]}"; // for the same reason 
-    // as $foo[bar] is wrong outside a string. 
- <!-- XXX see the still-to-write explaination in the arrays-section. -->
- echo "You should do it this way: {$arr['foo'][3]}";
- echo "You can even write {$obj-&gt;values[3]-&gt;name}";
- echo "This is the value of the var named $name: {${$name}}";
- 
- <!-- <xxx> maybe it's better to leave this out?? -->
- // this works, but i disencourage its use, since this is NOT 
- // involving functions, rather than mere variables, arrays and objects.
- $beer = 'Heineken';
- echo "I'd like to have another {${ strrev('reeb') }}, hips";
- <!-- </xxx> -->
- 
+$great = 'fantastic';
+echo "This is { $great}"; // won't work, outputs: This is { fantastic}
+echo "This is {$great}";  // works, outputs: This is fantastic
+echo "This square is {$square-&gt;width}00 centimeters broad."; 
+echo "This works: {$arr[4][3]}";     
+echo "This is wrong: {$arr[foo][3]}"; // for the same reason 
+   // as <link linkend="language.types.array.foo-bar">$foo[bar]</link
+   > is wrong outside a string. 
+echo "You should do it this way: {$arr['foo'][3]}";
+echo "You can even write {$obj-&gt;values[3]-&gt;name}";
+echo "This is the value of the var named $name: {${$name}}";
+<!-- <xxx> maybe it's better to leave this out?? 
+// this works, but i disencourage its use, since this is NOT 
+// involving functions, rather than mere variables, arrays and objects.
+$beer = 'Heineken';
+echo "I'd like to have another {${ strrev('reeb') }}, hips";
+ </xxx> -->
        </programlisting>
       </informalexample>
-     </sect3>
-    </sect2>
+     </sect4>
+    </sect3>
+    
+    <sect3 id="language.types.string.substr">
+     <title>String access by character</title>
+     <para>
+      Characters within strings may be accessed by specifying the
+      zero-based offset of the desired character after the string 
+      in curly braces.
+     </para>
+     <note>
+      <simpara>
+       For backwards compatibility, you can still use the array-braces.
+       However, this syntax is deprecated as of PHP 4.
+      </simpara>
+     </note>
+     <para>
+      <example>
+       <title>Some string examples</title>
+       <programlisting role="php">
+<!-- TODO: either move these examples to a example section,
+as with arrays, or distribute them under the applicable
+sections. -->
+&lt;?php
+/* Assigning a string. */
+$str = "This is a string";
+
+/* Appending to it. */
+$str = $str . " with some more text";
+
+/* Another way to append, includes an escaped newline. */
+$str .= " and a newline at the end.\n";
+
+/* This string will end up being '&lt;p&gt;Number: 9&lt;/p&gt;' */
+$num = 9;
+$str = "&lt;p&gt;Number: $num&lt;/p&gt;";
+
+/* This one will be '&lt;p&gt;Number: $num&lt;/p&gt;' */
+$num = 9;
+$str = '&lt;p&gt;Number: $num&lt;/p&gt;';
+
+/* Get the first character of a string  */
+$str = 'This is a test.';
+$first = $str{0};
+
+/* Get the last character of a string. */
+$str = 'This is still a test.';
+$last = $str{strlen($str)-1};
+?&gt;    
+       </programlisting>
+      </example>
+     </para>
+    </sect3>
+
+   </sect2><!-- end syntax -->
+
+   <sect2 id="language.types.string.useful-funcs">
+    <title>Useful functions</title><!-- and operators -->
+    <para>
+     Strings may be concatenated using the '.' (dot) operator. Note
+     that the '+' (addition) operator will not work for this. Please
+     see <link linkend="language.operators.string">String
+     operators</link> for more information.
+    </para>
+    <para>
+     There are a lot of useful functions for string modification.
+    </para>
+    <simpara>
+     See the <link linkend="ref.strings">string functions section</link> 
+     for general functions, the regular expression functions for
+     advanced find&amp;replacing (in two tastes: 
+     <link linkend="ref.pcre">Perl</link> and 
+     <link linkend="ref.regex">POSIX extended</link>).
+    </simpara>
+    <simpara>
+     There are also <link linkend="ref.url">functions for URL-strings</link>,
+     and functions to encrypt/decrypt strings 
+     (<link linkend="ref.mcrypt">mcrypt</link> and 
+     <link linkend="ref.mhash">mhash</link>).
+    </simpara>
+    <simpara>
+     Finally, if you still didn't find what you're looking for,
+     see also the <link linkend="ref.ctype">character type functions</link>.
+    </simpara>
+   </sect2>
    <sect2 id="language.types.string.conversion">
     <title>String conversion</title>
 
@@ -832,7 +976,7 @@
     </para>
 
    </sect2>
-  </sect1>
+  </sect1><!-- end string -->
 
   <sect1 id="language.types.array">
    <title>Arrays</title>

Reply via email to