Author: pawelz Date: Wed Apr 7 11:34:08 2010 GMT Module: packages Tag: HEAD ---- Log message: - MagpieRSS 0.8a
---- Files affected: packages/wordpress: rss-functions.php (NONE -> 1.1) (NEW), rss.php (NONE -> 1.1) (NEW) ---- Diffs: ================================================================ Index: packages/wordpress/rss-functions.php diff -u /dev/null packages/wordpress/rss-functions.php:1.1 --- /dev/null Wed Apr 7 13:34:08 2010 +++ packages/wordpress/rss-functions.php Wed Apr 7 13:34:03 2010 @@ -0,0 +1,4 @@ +<?php +// Deprecated. Use rss.php instead. +require_once (ABSPATH . WPINC . '/rss.php'); +?> ================================================================ Index: packages/wordpress/rss.php diff -u /dev/null packages/wordpress/rss.php:1.1 --- /dev/null Wed Apr 7 13:34:08 2010 +++ packages/wordpress/rss.php Wed Apr 7 13:34:03 2010 @@ -0,0 +1,2045 @@ +<?php +/* Project: MagpieRSS: a simple RSS integration tool + * File: A compiled file for RSS syndication + * Author: Kellan Elliot-McCrea <[email protected]> + * WordPress development team <http://www.wordpress.org/> + * Charles Johnson <[email protected]> + * Version: 2010.0122 + * License: GPL + * + * Provenance: + * + * This is a drop-in replacement for the `rss-functions.php` provided with the + * WordPress 1.5 distribution, which upgrades the version of MagpieRSS from 0.51 + * to 0.8a. The update improves handling of character encoding, supports + * multiple categories for posts (using <dc:subject> or <category>), supports + * Atom 1.0, and implements many other useful features. The file is derived from + * a combination of (1) the WordPress development team's modifications to + * MagpieRSS 0.51 and (2) the latest bleeding-edge updates to the "official" + * MagpieRSS software, including Kellan's original work and some substantial + * updates by Charles Johnson. All possible through the magic of the GPL. Yay + * for free software! + * + * Differences from the main branch of MagpieRSS: + * + * 1. Everything in rss_parse.inc, rss_fetch.inc, rss_cache.inc, and + * rss_utils.inc is included in one file. + * + * 2. MagpieRSS returns the WordPress version as the user agent, rather than + * Magpie + * + * 3. class RSSCache is a modified version by WordPress developers, which + * caches feeds in the WordPress database (in the options table), rather + * than writing external files directly. + * + * 4. There are two WordPress-specific functions, get_rss() and wp_rss() + * + * Differences from the version of MagpieRSS packaged with WordPress: + * + * 1. Support for translation between multiple character encodings. Under + * PHP 5 this is very nicely handled by the XML parsing library. Under PHP + * 4 we need to do a little bit of work ourselves, using either iconv or + * mb_convert_encoding if it is not one of the (extremely limited) number + * of character sets that PHP 4's XML module can handle natively. + * + * 2. Numerous bug fixes. + * + * 3. The parser class MagpieRSS has been substantially revised to better + * support popular features such as enclosures and multiple categories, + * and to support the new Atom 1.0 IETF standard. (Atom feeds are + * normalized so as to make the data available using terminology from + * either Atom 0.3 or Atom 1.0. Atom 0.3 backward-compatibility is provided + * to allow existing software to easily begin accepting Atom 1.0 data; new + * software SHOULD NOT depend on the 0.3 terminology, but rather use the + * normalization as a convenient way to keep supporting 0.3 feeds while + * they linger in the world.) + * + * The upgraded MagpieRSS can also now handle some content constructs that + * had not been handled well by previous versions of Magpie (such as the + * use of namespaced XHTML in <xhtml:body> or <xhtml:div> elements to + * provide the full content of posts in RSS 2.0 feeds). + * + * Unlike previous versions of MagpieRSS, this version can parse multiple + * instances of the same child element in item/entry and channel/feed + * containers. This is done using simple counters next to the element + * names: the first <category> element on an RSS item, for example, can be + * found in $item['category'] (thus preserving backward compatibility); the + * second in $item['category#2'], the third in $item['category#3'], and so + * on. The number of categories applied to the item can be found in + * $item['category#'] + * + * Also unlike previous versions of MagpieRSS, this version allows you to + * access the values of elements' attributes as well as the content they + * contain. This can be done using a simple syntax inspired by XPath: to + * access the type attribute of an RSS 2.0 enclosure, for example, you + * need only access `$item['enclos...@type']`. A comma-separated list of + * attributes for the enclosure element is stored in `$item['enclosure@']`. + * (This syntax interacts easily with the syntax for multiple categories; + * for example, the value of the `scheme` attribute for the fourth category + * element on a particular item is stored in `$item['categor...@scheme']`.) + * + * Note also that this implementation IS NOT backward-compatible with the + * kludges that were used to hack in support for multiple categories and + * for enclosures in upgraded versions of MagpieRSS distributed with + * previous versions of FeedWordPress. If your hacks or filter plugins + * depended on the old way of doing things... well, I warned you that they + * might not be permanent. Sorry! + */ + +define('RSS', 'RSS'); +define('ATOM', 'Atom'); + +################################################################################ +## WordPress: make some settings WordPress-appropriate ######################### +################################################################################ + +define('MAGPIE_USER_AGENT', 'WordPress/' . $wp_version . '(+http://www.wordpress.org)'); + +$wp_encoding = get_option('blog_charset', /*default=*/ 'ISO-8859-1'); +define('MAGPIE_OUTPUT_ENCODING', ($wp_encoding?$wp_encoding:'ISO-8859-1')); + +################################################################################ +## rss_parse.inc: from MagpieRSS 0.85 ########################################## +################################################################################ + +/** +* Hybrid parser, and object, takes RSS as a string and returns a simple object. +* +* see: rss_fetch.inc for a simpler interface with integrated caching support +* +*/ +class MagpieRSS { + var $parser; + + var $current_item = array(); // item currently being parsed + var $items = array(); // collection of parsed items + var $channel = array(); // hash of channel fields + var $textinput = array(); + var $image = array(); + var $feed_type; + var $feed_version; + var $encoding = ''; // output encoding of parsed rss + + var $_source_encoding = ''; // only set if we have to parse xml prolog + + var $ERROR = ""; + var $WARNING = ""; + + // define some constants + var $_XMLNS_FAMILIAR = array ( + 'http://www.w3.org/2005/Atom' => 'atom' /* 1.0 */, + 'http://purl.org/atom/ns#' => 'atom' /* pre-1.0 */, + 'http://purl.org/rss/1.0/' => 'rss' /* 1.0 */, + 'http://backend.userland.com/RSS2' => 'rss' /* 2.0 */, + 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => 'rdf', + 'http://www.w3.org/1999/xhtml' => 'xhtml', + 'http://purl.org/dc/elements/1.1/' => 'dc', + 'http://purl.org/dc/terms/' => 'dcterms', + 'http://purl.org/rss/1.0/modules/content/' => 'content', + 'http://purl.org/rss/1.0/modules/syndication/' => 'sy', + 'http://purl.org/rss/1.0/modules/taxonomy/' => 'taxo', + 'http://purl.org/rss/1.0/modules/dc/' => 'dc', + 'http://wellformedweb.org/CommentAPI/' => 'wfw', + 'http://webns.net/mvcb/' => 'admin', + 'http://purl.org/rss/1.0/modules/annotate/' => 'annotate', + 'http://xmlns.com/foaf/0.1/' => 'foaf', + 'http://madskills.com/public/xml/rss/module/trackback/' => 'trackback', + 'http://web.resource.org/cc/' => 'cc', + 'http://search.yahoo.com/mrss' => 'media', + 'http://search.yahoo.com/mrss/' => 'media', + 'http://video.search.yahoo.com/mrss' => 'media', + 'http://video.search.yahoo.com/mrss/' => 'media', + ); + + var $_XMLBASE_RESOLVE = array ( + // Atom 0.3 and 1.0 xml:base support + 'atom' => array ( + 'link' => array ('href' => true), + 'content' => array ('src' => true, '*xml' => true, '*html' => true), + 'summary' => array ('*xml' => true, '*html' => true), + 'title' => array ('*xml' => true, '*html' => true), + 'rights' => array ('*xml' => true, '*html' => true), + 'subtitle' => array ('*xml' => true, '*html' => true), + 'info' => array('*xml' => true, '*html' => true), + 'tagline' => array('*xml' => true, '*html' => true), + 'copyright' => array ('*xml' => true, '*html' => true), + 'generator' => array ('uri' => true, 'url' => true), + 'uri' => array ('*content' => true), + 'url' => array ('*content' => true), + 'icon' => array ('*content' => true), + 'logo' => array ('*content' => true), + ), + + // for inline namespaced XHTML + 'xhtml' => array ( + 'a' => array ('href' => true), + 'applet' => array('codebase' => true), + 'area' => array('href' => true), + 'blockquote' => array('cite' => true), + 'body' => array('background' => true), + 'del' => array('cite' => true), + 'form' => array('action' => true), + 'frame' => array('longdesc' => true, 'src' => true), + 'iframe' => array('longdesc' => true, 'iframe' => true, 'src' => true), + 'head' => array('profile' => true), + 'img' => array('longdesc' => true, 'src' => true, 'usemap' => true), + 'input' => array('src' => true, 'usemap' => true), + 'ins' => array('cite' => true), + 'link' => array('href' => true), + 'object' => array('classid' => true, 'codebase' => true, 'data' => true, 'usemap' => true), + 'q' => array('cite' => true), + 'script' => array('src' => true), + ), + ); + + var $_ATOM_CONTENT_CONSTRUCTS = array( + 'content', 'summary', 'title', /* common */ + 'info', 'tagline', 'copyright', /* Atom 0.3 */ + 'rights', 'subtitle', /* Atom 1.0 */ + ); + var $_XHTML_CONTENT_CONSTRUCTS = array('body', 'div'); + var $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1'); + + // parser variables, useless if you're not a parser, treat as private + var $stack = array('element' => array (), 'ns' => array (), 'xmlns' => array (), 'xml:base' => array ()); // stack of XML data + + var $inchannel = false; + var $initem = false; + + var $incontent = array(); // non-empty if in namespaced XML content field + var $xml_escape = false; // true when accepting namespaced XML + var $exclude_top = false; // true when Atom 1.0 type="xhtml" + + var $intextinput = false; + var $inimage = false; + var $root_namespaces = array(); + var $current_namespace = false; + var $working_namespace_table = array(); + + /** + * Set up XML parser, parse source, and return populated RSS object.. + * + * @param string $source string containing the RSS to be parsed + * + * NOTE: Probably a good idea to leave the encoding options alone unless + * you know what you're doing as PHP's character set support is + * a little weird. + * + * NOTE: A lot of this is unnecessary but harmless with PHP5 + * + * + * @param string $output_encoding output the parsed RSS in this character + * set defaults to ISO-8859-1 as this is PHP's + * default. + * + * NOTE: might be changed to UTF-8 in future + * versions. + * + * @param string $input_encoding the character set of the incoming RSS source. + * Leave blank and Magpie will try to figure it + * out. + * + * + * @param bool $detect_encoding if false Magpie won't attempt to detect + * source encoding. (caveat emptor) + * + */ + function MagpieRSS ($source, $output_encoding='ISO-8859-1', + $input_encoding=null, $detect_encoding=true, $base_uri=null) + { + # if PHP xml isn't compiled in, die + # + if (!function_exists('xml_parser_create')) { + $this->error( "Failed to load PHP's XML Extension. " . + "http://www.php.net/manual/en/ref.xml.php", + E_USER_ERROR ); + } + + list($parser, $source) = $this->create_parser($source, + $output_encoding, $input_encoding, $detect_encoding); + + + if (!is_resource($parser)) { + $this->error( "Failed to create an instance of PHP's XML parser. " . + "http://www.php.net/manual/en/ref.xml.php", + E_USER_ERROR ); + } + + + $this->parser = $parser; + + # pass in parser, and a reference to this object + # setup handlers + # + xml_set_object( $this->parser, $this ); + xml_set_element_handler($this->parser, + 'feed_start_element', 'feed_end_element' ); + + xml_set_character_data_handler( $this->parser, 'feed_cdata' ); + + $this->stack['xml:base'] = array($base_uri); + + $status = xml_parse( $this->parser, $source ); + + if (! $status ) { + $errorcode = xml_get_error_code( $this->parser ); + if ( $errorcode != XML_ERROR_NONE ) { + $xml_error = xml_error_string( $errorcode ); + $error_line = xml_get_current_line_number($this->parser); + $error_col = xml_get_current_column_number($this->parser); + $errormsg = "$xml_error at line $error_line, column $error_col"; + + $this->error( $errormsg ); + } + } + + xml_parser_free( $this->parser ); + + $this->normalize(); + } + + function feed_start_element($p, $element, &$attributes) { + $el = strtolower($element); + + $namespaces = end($this->stack['xmlns']); + $baseuri = end($this->stack['xml:base']); + + if (isset($attributes['xml:base'])) { + $baseuri = Relative_URI::resolve($attributes['xml:base'], $baseuri); + } + array_push($this->stack['xml:base'], $baseuri); + + // scan for xml namespace declarations. ugly ugly ugly. + // theoretically we could use xml_set_start_namespace_decl_handler and + // xml_set_end_namespace_decl_handler to handle this more elegantly, but + // support for these is buggy + foreach ($attributes as $attr => $value) { + if ( preg_match('/^xmlns(\:([A-Z_a-z].*))?$/', $attr, $match) ) { + $ns = (isset($match[2]) ? $match[2] : ''); + $namespaces[$ns] = $value; + } + } + + array_push($this->stack['xmlns'], $namespaces); + + // check for a namespace, and split if found + // Don't munge content tags + $ns = $this->xmlns($element); + if ( empty($this->incontent) ) { + $el = strtolower($ns['element']); + $this->current_namespace = $ns['effective']; + array_push($this->stack['ns'], $ns['effective']); + } + + $nsc = $ns['canonical']; $nse = $ns['element']; + if ( isset($this->_XMLBASE_RESOLVE[$nsc][$nse]) ) { + if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse]['*xml'])) { + $attributes['xml:base'] = $baseuri; + } + foreach ($attributes as $key => $value) { + if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse][strtolower($key)])) { + $attributes[$key] = Relative_URI::resolve($attributes[$key], $baseuri); + } + } + } + + $attrs = array_change_key_case($attributes, CASE_LOWER); + + # if feed type isn't set, then this is first element of feed + # identify feed from root element + # + if (!isset($this->feed_type) ) { + if ( $el == 'rdf' ) { + $this->feed_type = RSS; + $this->root_namespaces = array('rss', 'rdf'); + $this->feed_version = '1.0'; + } + elseif ( $el == 'rss' ) { + $this->feed_type = RSS; + $this->root_namespaces = array('rss'); + $this->feed_version = $attrs['version']; + } + elseif ( $el == 'feed' ) { + $this->feed_type = ATOM; + $this->root_namespaces = array('atom'); + if ($ns['uri'] == 'http://www.w3.org/2005/Atom') { // Atom 1.0 + $this->feed_version = '1.0'; + } + else { // Atom 0.3, probably. + $this->feed_version = $attrs['version']; + } + $this->inchannel = true; + } + return; + } + + // if we're inside a namespaced content construct, treat tags as text + if ( !empty($this->incontent) ) + { + if ((count($this->incontent) > 1) or !$this->exclude_top) { + if ($ns['effective']=='xhtml') { + $tag = $ns['element']; + } + else { + $tag = $element; + $xmlns = 'xmlns'; + if (strlen($ns['prefix'])>0) { + $xmlns = $xmlns . ':' . $ns['prefix']; + } + $attributes[$xmlns] = $ns['uri']; // make sure it's visible + } + + // if tags are inlined, then flatten + $attrs_str = join(' ', + array_map(array($this, 'map_attrs'), + array_keys($attributes), + array_values($attributes) ) + ); + + if (strlen($attrs_str) > 0) { $attrs_str = ' '.$attrs_str; } + $this->append_content( "<{$tag}{$attrs_str}>" ); + } + array_push($this->incontent, $ns); // stack for parsing content XML + } + + elseif ( $el == 'channel' ) { + $this->inchannel = true; + } + + elseif ($el == 'item' or $el == 'entry' ) + { + $this->initem = true; + if ( isset($attrs['rdf:about']) ) { + $this->current_item['about'] = $attrs['rdf:about']; + } + } + + // if we're in the default namespace of an RSS feed, + // record textinput or image fields + elseif ( + $this->feed_type == RSS and + $this->current_namespace == '' and + $el == 'textinput' ) + { + $this->intextinput = true; + } + + elseif ( + $this->feed_type == RSS and + $this->current_namespace == '' and + $el == 'image' ) + { + $this->inimage = true; + } + + // set stack[0] to current element + else { + // Atom support many links per containing element. + // Magpie treats link elements of type rel='alternate' + // as being equivalent to RSS's simple link element. + + $atom_link = false; + if ( ($ns['canonical']=='atom') and $el == 'link') { + $atom_link = true; + if (isset($attrs['rel']) and $attrs['rel'] != 'alternate') { + $el = $el . "_" . $attrs['rel']; // pseudo-element names for Atom link elements + } + } + # handle atom content constructs + elseif ( ($ns['canonical']=='atom') and in_array($el, $this->_ATOM_CONTENT_CONSTRUCTS) ) + { + // avoid clashing w/ RSS mod_content + if ($el == 'content' ) { + $el = 'atom_content'; + } + + // assume that everything accepts namespaced XML + // (that will pass through some non-validating feeds; + // but so what? this isn't a validating parser) + $this->incontent = array(); + array_push($this->incontent, $ns); // start a stack + + $this->xml_escape = $this->accepts_namespaced_xml($attrs); + + if ( isset($attrs['type']) and trim(strtolower($attrs['type']))=='xhtml') { + $this->exclude_top = true; + } else { + $this->exclude_top = false; + } + } + # Handle inline XHTML body elements --CWJ + elseif ($ns['effective']=='xhtml' and in_array($el, $this->_XHTML_CONTENT_CONSTRUCTS)) { + $this->current_namespace = 'xhtml'; + $this->incontent = array(); + array_push($this->incontent, $ns); // start a stack + + $this->xml_escape = true; + $this->exclude_top = false; + } + + array_unshift($this->stack['element'], $el); + $elpath = join('_', array_reverse($this->stack['element'])); + + $n = $this->element_count($elpath); + $this->element_count($elpath, $n+1); + + if ($n > 0) { + array_shift($this->stack['element']); + array_unshift($this->stack['element'], $el.'#'.($n+1)); + $elpath = join('_', array_reverse($this->stack['element'])); + } + + // this makes the baby Jesus cry, but we can't do it in normalize() + // because we've made the element name for Atom links unpredictable + // by tacking on the relation to the end. -CWJ + if ($atom_link and isset($attrs['href'])) { + $this->append($elpath, $attrs['href']); + } + + // add attributes + if (count($attrs) > 0) { + $this->append($elpath.'@', join(',', array_keys($attrs))); + foreach ($attrs as $attr => $value) { + $this->append($elpath.'@'.$attr, $value); + } + } + } + } + + function feed_cdata ($p, $text) { + if ($this->incontent) { + if ($this->xml_escape) { $text = htmlspecialchars($text, ENT_COMPAT, $this->encoding); } + $this->append_content( $text ); + } else { + $current_el = join('_', array_reverse($this->stack['element'])); + $this->append($current_el, $text); + } + } + + function feed_end_element ($p, $el) { + $closer = $this->xmlns($el); + + if ( $this->incontent ) { + $opener = array_pop($this->incontent); + + // balance tags properly + // note: i don't think this is actually neccessary + if ($opener != $closer) { + array_push($this->incontent, $opener); + $this->append_content("<$el />"); + } elseif ($this->incontent) { // are we in the content construct still? + if ((count($this->incontent) > 1) or !$this->exclude_top) { + if ($closer['effective']=='xhtml') { + $tag = $closer['element']; + } + else { + $tag = $el; + } + $this->append_content("</$tag>"); + } + } else { // if we're done with the content construct, shift the opening of the content construct off the normal stack + array_shift( $this->stack['element'] ); + } + } + elseif ($closer['effective'] == '') { + $el = strtolower($closer['element']); + if ( $el == 'item' or $el == 'entry' ) { + $this->items[] = $this->current_item; + $this->current_item = array(); + $this->initem = false; + $this->current_category = 0; + } + elseif ($this->feed_type == RSS and $el == 'textinput' ) { + $this->intextinput = false; + } + elseif ($this->feed_type == RSS and $el == 'image' ) { + $this->inimage = false; + } + elseif ($el == 'channel' or $el == 'feed' ) { + $this->inchannel = false; + } else { + $nsc = $closer['canonical']; $nse = $closer['element']; + if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse]['*content'])) { + // Resolve relative URI in content of tag + $this->dereference_current_element(); + } + array_shift( $this->stack['element'] ); + } + } else { + $nsc = $closer['canonical']; $nse = strtolower($closer['element']); + if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse]['*content'])) { + // Resolve relative URI in content of tag + $this->dereference_current_element(); + } + array_shift( $this->stack['element'] ); + } + + if ( !$this->incontent ) { // Don't munge the namespace after finishing with elements in namespaced content constructs -CWJ + $this->current_namespace = array_pop($this->stack['ns']); + } + array_pop($this->stack['xmlns']); + array_pop($this->stack['xml:base']); <<Diff was trimmed, longer than 597 lines>> _______________________________________________ pld-cvs-commit mailing list [email protected] http://lists.pld-linux.org/mailman/listinfo/pld-cvs-commit
