packages: wordpress/rss-functions.php (NEW), wordpress/rss.php (NEW) - Magp...
pawelz
pawelz at pld-linux.org
Wed Apr 7 13:34:08 CEST 2010
Author: pawelz Date: Wed Apr 7 11:34:08 2010 GMT
Module: packages Tag: HEAD
---- Log message:
- MagpieRSS 0.8a
---- Files affected:
packages/wordpress:
rss-functions.php (NONE -> 1.1) (NEW), rss.php (NONE -> 1.1) (NEW)
---- Diffs:
================================================================
Index: packages/wordpress/rss-functions.php
diff -u /dev/null packages/wordpress/rss-functions.php:1.1
--- /dev/null Wed Apr 7 13:34:08 2010
+++ packages/wordpress/rss-functions.php Wed Apr 7 13:34:03 2010
@@ -0,0 +1,4 @@
+<?php
+// Deprecated. Use rss.php instead.
+require_once (ABSPATH . WPINC . '/rss.php');
+?>
================================================================
Index: packages/wordpress/rss.php
diff -u /dev/null packages/wordpress/rss.php:1.1
--- /dev/null Wed Apr 7 13:34:08 2010
+++ packages/wordpress/rss.php Wed Apr 7 13:34:03 2010
@@ -0,0 +1,2045 @@
+<?php
+/* Project: MagpieRSS: a simple RSS integration tool
+ * File: A compiled file for RSS syndication
+ * Author: Kellan Elliot-McCrea <kellan at protest.net>
+ * WordPress development team <http://www.wordpress.org/>
+ * Charles Johnson <technophilia at radgeek.com>
+ * Version: 2010.0122
+ * License: GPL
+ *
+ * Provenance:
+ *
+ * This is a drop-in replacement for the `rss-functions.php` provided with the
+ * WordPress 1.5 distribution, which upgrades the version of MagpieRSS from 0.51
+ * to 0.8a. The update improves handling of character encoding, supports
+ * multiple categories for posts (using <dc:subject> or <category>), supports
+ * Atom 1.0, and implements many other useful features. The file is derived from
+ * a combination of (1) the WordPress development team's modifications to
+ * MagpieRSS 0.51 and (2) the latest bleeding-edge updates to the "official"
+ * MagpieRSS software, including Kellan's original work and some substantial
+ * updates by Charles Johnson. All possible through the magic of the GPL. Yay
+ * for free software!
+ *
+ * Differences from the main branch of MagpieRSS:
+ *
+ * 1. Everything in rss_parse.inc, rss_fetch.inc, rss_cache.inc, and
+ * rss_utils.inc is included in one file.
+ *
+ * 2. MagpieRSS returns the WordPress version as the user agent, rather than
+ * Magpie
+ *
+ * 3. class RSSCache is a modified version by WordPress developers, which
+ * caches feeds in the WordPress database (in the options table), rather
+ * than writing external files directly.
+ *
+ * 4. There are two WordPress-specific functions, get_rss() and wp_rss()
+ *
+ * Differences from the version of MagpieRSS packaged with WordPress:
+ *
+ * 1. Support for translation between multiple character encodings. Under
+ * PHP 5 this is very nicely handled by the XML parsing library. Under PHP
+ * 4 we need to do a little bit of work ourselves, using either iconv or
+ * mb_convert_encoding if it is not one of the (extremely limited) number
+ * of character sets that PHP 4's XML module can handle natively.
+ *
+ * 2. Numerous bug fixes.
+ *
+ * 3. The parser class MagpieRSS has been substantially revised to better
+ * support popular features such as enclosures and multiple categories,
+ * and to support the new Atom 1.0 IETF standard. (Atom feeds are
+ * normalized so as to make the data available using terminology from
+ * either Atom 0.3 or Atom 1.0. Atom 0.3 backward-compatibility is provided
+ * to allow existing software to easily begin accepting Atom 1.0 data; new
+ * software SHOULD NOT depend on the 0.3 terminology, but rather use the
+ * normalization as a convenient way to keep supporting 0.3 feeds while
+ * they linger in the world.)
+ *
+ * The upgraded MagpieRSS can also now handle some content constructs that
+ * had not been handled well by previous versions of Magpie (such as the
+ * use of namespaced XHTML in <xhtml:body> or <xhtml:div> elements to
+ * provide the full content of posts in RSS 2.0 feeds).
+ *
+ * Unlike previous versions of MagpieRSS, this version can parse multiple
+ * instances of the same child element in item/entry and channel/feed
+ * containers. This is done using simple counters next to the element
+ * names: the first <category> element on an RSS item, for example, can be
+ * found in $item['category'] (thus preserving backward compatibility); the
+ * second in $item['category#2'], the third in $item['category#3'], and so
+ * on. The number of categories applied to the item can be found in
+ * $item['category#']
+ *
+ * Also unlike previous versions of MagpieRSS, this version allows you to
+ * access the values of elements' attributes as well as the content they
+ * contain. This can be done using a simple syntax inspired by XPath: to
+ * access the type attribute of an RSS 2.0 enclosure, for example, you
+ * need only access `$item['enclosure at type']`. A comma-separated list of
+ * attributes for the enclosure element is stored in `$item['enclosure@']`.
+ * (This syntax interacts easily with the syntax for multiple categories;
+ * for example, the value of the `scheme` attribute for the fourth category
+ * element on a particular item is stored in `$item['category#4 at scheme']`.)
+ *
+ * Note also that this implementation IS NOT backward-compatible with the
+ * kludges that were used to hack in support for multiple categories and
+ * for enclosures in upgraded versions of MagpieRSS distributed with
+ * previous versions of FeedWordPress. If your hacks or filter plugins
+ * depended on the old way of doing things... well, I warned you that they
+ * might not be permanent. Sorry!
+ */
+
+define('RSS', 'RSS');
+define('ATOM', 'Atom');
+
+################################################################################
+## WordPress: make some settings WordPress-appropriate #########################
+################################################################################
+
+define('MAGPIE_USER_AGENT', 'WordPress/' . $wp_version . '(+http://www.wordpress.org)');
+
+$wp_encoding = get_option('blog_charset', /*default=*/ 'ISO-8859-1');
+define('MAGPIE_OUTPUT_ENCODING', ($wp_encoding?$wp_encoding:'ISO-8859-1'));
+
+################################################################################
+## rss_parse.inc: from MagpieRSS 0.85 ##########################################
+################################################################################
+
+/**
+* Hybrid parser, and object, takes RSS as a string and returns a simple object.
+*
+* see: rss_fetch.inc for a simpler interface with integrated caching support
+*
+*/
+class MagpieRSS {
+ var $parser;
+
+ var $current_item = array(); // item currently being parsed
+ var $items = array(); // collection of parsed items
+ var $channel = array(); // hash of channel fields
+ var $textinput = array();
+ var $image = array();
+ var $feed_type;
+ var $feed_version;
+ var $encoding = ''; // output encoding of parsed rss
+
+ var $_source_encoding = ''; // only set if we have to parse xml prolog
+
+ var $ERROR = "";
+ var $WARNING = "";
+
+ // define some constants
+ var $_XMLNS_FAMILIAR = array (
+ 'http://www.w3.org/2005/Atom' => 'atom' /* 1.0 */,
+ 'http://purl.org/atom/ns#' => 'atom' /* pre-1.0 */,
+ 'http://purl.org/rss/1.0/' => 'rss' /* 1.0 */,
+ 'http://backend.userland.com/RSS2' => 'rss' /* 2.0 */,
+ 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => 'rdf',
+ 'http://www.w3.org/1999/xhtml' => 'xhtml',
+ 'http://purl.org/dc/elements/1.1/' => 'dc',
+ 'http://purl.org/dc/terms/' => 'dcterms',
+ 'http://purl.org/rss/1.0/modules/content/' => 'content',
+ 'http://purl.org/rss/1.0/modules/syndication/' => 'sy',
+ 'http://purl.org/rss/1.0/modules/taxonomy/' => 'taxo',
+ 'http://purl.org/rss/1.0/modules/dc/' => 'dc',
+ 'http://wellformedweb.org/CommentAPI/' => 'wfw',
+ 'http://webns.net/mvcb/' => 'admin',
+ 'http://purl.org/rss/1.0/modules/annotate/' => 'annotate',
+ 'http://xmlns.com/foaf/0.1/' => 'foaf',
+ 'http://madskills.com/public/xml/rss/module/trackback/' => 'trackback',
+ 'http://web.resource.org/cc/' => 'cc',
+ 'http://search.yahoo.com/mrss' => 'media',
+ 'http://search.yahoo.com/mrss/' => 'media',
+ 'http://video.search.yahoo.com/mrss' => 'media',
+ 'http://video.search.yahoo.com/mrss/' => 'media',
+ );
+
+ var $_XMLBASE_RESOLVE = array (
+ // Atom 0.3 and 1.0 xml:base support
+ 'atom' => array (
+ 'link' => array ('href' => true),
+ 'content' => array ('src' => true, '*xml' => true, '*html' => true),
+ 'summary' => array ('*xml' => true, '*html' => true),
+ 'title' => array ('*xml' => true, '*html' => true),
+ 'rights' => array ('*xml' => true, '*html' => true),
+ 'subtitle' => array ('*xml' => true, '*html' => true),
+ 'info' => array('*xml' => true, '*html' => true),
+ 'tagline' => array('*xml' => true, '*html' => true),
+ 'copyright' => array ('*xml' => true, '*html' => true),
+ 'generator' => array ('uri' => true, 'url' => true),
+ 'uri' => array ('*content' => true),
+ 'url' => array ('*content' => true),
+ 'icon' => array ('*content' => true),
+ 'logo' => array ('*content' => true),
+ ),
+
+ // for inline namespaced XHTML
+ 'xhtml' => array (
+ 'a' => array ('href' => true),
+ 'applet' => array('codebase' => true),
+ 'area' => array('href' => true),
+ 'blockquote' => array('cite' => true),
+ 'body' => array('background' => true),
+ 'del' => array('cite' => true),
+ 'form' => array('action' => true),
+ 'frame' => array('longdesc' => true, 'src' => true),
+ 'iframe' => array('longdesc' => true, 'iframe' => true, 'src' => true),
+ 'head' => array('profile' => true),
+ 'img' => array('longdesc' => true, 'src' => true, 'usemap' => true),
+ 'input' => array('src' => true, 'usemap' => true),
+ 'ins' => array('cite' => true),
+ 'link' => array('href' => true),
+ 'object' => array('classid' => true, 'codebase' => true, 'data' => true, 'usemap' => true),
+ 'q' => array('cite' => true),
+ 'script' => array('src' => true),
+ ),
+ );
+
+ var $_ATOM_CONTENT_CONSTRUCTS = array(
+ 'content', 'summary', 'title', /* common */
+ 'info', 'tagline', 'copyright', /* Atom 0.3 */
+ 'rights', 'subtitle', /* Atom 1.0 */
+ );
+ var $_XHTML_CONTENT_CONSTRUCTS = array('body', 'div');
+ var $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1');
+
+ // parser variables, useless if you're not a parser, treat as private
+ var $stack = array('element' => array (), 'ns' => array (), 'xmlns' => array (), 'xml:base' => array ()); // stack of XML data
+
+ var $inchannel = false;
+ var $initem = false;
+
+ var $incontent = array(); // non-empty if in namespaced XML content field
+ var $xml_escape = false; // true when accepting namespaced XML
+ var $exclude_top = false; // true when Atom 1.0 type="xhtml"
+
+ var $intextinput = false;
+ var $inimage = false;
+ var $root_namespaces = array();
+ var $current_namespace = false;
+ var $working_namespace_table = array();
+
+ /**
+ * Set up XML parser, parse source, and return populated RSS object..
+ *
+ * @param string $source string containing the RSS to be parsed
+ *
+ * NOTE: Probably a good idea to leave the encoding options alone unless
+ * you know what you're doing as PHP's character set support is
+ * a little weird.
+ *
+ * NOTE: A lot of this is unnecessary but harmless with PHP5
+ *
+ *
+ * @param string $output_encoding output the parsed RSS in this character
+ * set defaults to ISO-8859-1 as this is PHP's
+ * default.
+ *
+ * NOTE: might be changed to UTF-8 in future
+ * versions.
+ *
+ * @param string $input_encoding the character set of the incoming RSS source.
+ * Leave blank and Magpie will try to figure it
+ * out.
+ *
+ *
+ * @param bool $detect_encoding if false Magpie won't attempt to detect
+ * source encoding. (caveat emptor)
+ *
+ */
+ function MagpieRSS ($source, $output_encoding='ISO-8859-1',
+ $input_encoding=null, $detect_encoding=true, $base_uri=null)
+ {
+ # if PHP xml isn't compiled in, die
+ #
+ if (!function_exists('xml_parser_create')) {
+ $this->error( "Failed to load PHP's XML Extension. " .
+ "http://www.php.net/manual/en/ref.xml.php",
+ E_USER_ERROR );
+ }
+
+ list($parser, $source) = $this->create_parser($source,
+ $output_encoding, $input_encoding, $detect_encoding);
+
+
+ if (!is_resource($parser)) {
+ $this->error( "Failed to create an instance of PHP's XML parser. " .
+ "http://www.php.net/manual/en/ref.xml.php",
+ E_USER_ERROR );
+ }
+
+
+ $this->parser = $parser;
+
+ # pass in parser, and a reference to this object
+ # setup handlers
+ #
+ xml_set_object( $this->parser, $this );
+ xml_set_element_handler($this->parser,
+ 'feed_start_element', 'feed_end_element' );
+
+ xml_set_character_data_handler( $this->parser, 'feed_cdata' );
+
+ $this->stack['xml:base'] = array($base_uri);
+
+ $status = xml_parse( $this->parser, $source );
+
+ if (! $status ) {
+ $errorcode = xml_get_error_code( $this->parser );
+ if ( $errorcode != XML_ERROR_NONE ) {
+ $xml_error = xml_error_string( $errorcode );
+ $error_line = xml_get_current_line_number($this->parser);
+ $error_col = xml_get_current_column_number($this->parser);
+ $errormsg = "$xml_error at line $error_line, column $error_col";
+
+ $this->error( $errormsg );
+ }
+ }
+
+ xml_parser_free( $this->parser );
+
+ $this->normalize();
+ }
+
+ function feed_start_element($p, $element, &$attributes) {
+ $el = strtolower($element);
+
+ $namespaces = end($this->stack['xmlns']);
+ $baseuri = end($this->stack['xml:base']);
+
+ if (isset($attributes['xml:base'])) {
+ $baseuri = Relative_URI::resolve($attributes['xml:base'], $baseuri);
+ }
+ array_push($this->stack['xml:base'], $baseuri);
+
+ // scan for xml namespace declarations. ugly ugly ugly.
+ // theoretically we could use xml_set_start_namespace_decl_handler and
+ // xml_set_end_namespace_decl_handler to handle this more elegantly, but
+ // support for these is buggy
+ foreach ($attributes as $attr => $value) {
+ if ( preg_match('/^xmlns(\:([A-Z_a-z].*))?$/', $attr, $match) ) {
+ $ns = (isset($match[2]) ? $match[2] : '');
+ $namespaces[$ns] = $value;
+ }
+ }
+
+ array_push($this->stack['xmlns'], $namespaces);
+
+ // check for a namespace, and split if found
+ // Don't munge content tags
+ $ns = $this->xmlns($element);
+ if ( empty($this->incontent) ) {
+ $el = strtolower($ns['element']);
+ $this->current_namespace = $ns['effective'];
+ array_push($this->stack['ns'], $ns['effective']);
+ }
+
+ $nsc = $ns['canonical']; $nse = $ns['element'];
+ if ( isset($this->_XMLBASE_RESOLVE[$nsc][$nse]) ) {
+ if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse]['*xml'])) {
+ $attributes['xml:base'] = $baseuri;
+ }
+ foreach ($attributes as $key => $value) {
+ if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse][strtolower($key)])) {
+ $attributes[$key] = Relative_URI::resolve($attributes[$key], $baseuri);
+ }
+ }
+ }
+
+ $attrs = array_change_key_case($attributes, CASE_LOWER);
+
+ # if feed type isn't set, then this is first element of feed
+ # identify feed from root element
+ #
+ if (!isset($this->feed_type) ) {
+ if ( $el == 'rdf' ) {
+ $this->feed_type = RSS;
+ $this->root_namespaces = array('rss', 'rdf');
+ $this->feed_version = '1.0';
+ }
+ elseif ( $el == 'rss' ) {
+ $this->feed_type = RSS;
+ $this->root_namespaces = array('rss');
+ $this->feed_version = $attrs['version'];
+ }
+ elseif ( $el == 'feed' ) {
+ $this->feed_type = ATOM;
+ $this->root_namespaces = array('atom');
+ if ($ns['uri'] == 'http://www.w3.org/2005/Atom') { // Atom 1.0
+ $this->feed_version = '1.0';
+ }
+ else { // Atom 0.3, probably.
+ $this->feed_version = $attrs['version'];
+ }
+ $this->inchannel = true;
+ }
+ return;
+ }
+
+ // if we're inside a namespaced content construct, treat tags as text
+ if ( !empty($this->incontent) )
+ {
+ if ((count($this->incontent) > 1) or !$this->exclude_top) {
+ if ($ns['effective']=='xhtml') {
+ $tag = $ns['element'];
+ }
+ else {
+ $tag = $element;
+ $xmlns = 'xmlns';
+ if (strlen($ns['prefix'])>0) {
+ $xmlns = $xmlns . ':' . $ns['prefix'];
+ }
+ $attributes[$xmlns] = $ns['uri']; // make sure it's visible
+ }
+
+ // if tags are inlined, then flatten
+ $attrs_str = join(' ',
+ array_map(array($this, 'map_attrs'),
+ array_keys($attributes),
+ array_values($attributes) )
+ );
+
+ if (strlen($attrs_str) > 0) { $attrs_str = ' '.$attrs_str; }
+ $this->append_content( "<{$tag}{$attrs_str}>" );
+ }
+ array_push($this->incontent, $ns); // stack for parsing content XML
+ }
+
+ elseif ( $el == 'channel' ) {
+ $this->inchannel = true;
+ }
+
+ elseif ($el == 'item' or $el == 'entry' )
+ {
+ $this->initem = true;
+ if ( isset($attrs['rdf:about']) ) {
+ $this->current_item['about'] = $attrs['rdf:about'];
+ }
+ }
+
+ // if we're in the default namespace of an RSS feed,
+ // record textinput or image fields
+ elseif (
+ $this->feed_type == RSS and
+ $this->current_namespace == '' and
+ $el == 'textinput' )
+ {
+ $this->intextinput = true;
+ }
+
+ elseif (
+ $this->feed_type == RSS and
+ $this->current_namespace == '' and
+ $el == 'image' )
+ {
+ $this->inimage = true;
+ }
+
+ // set stack[0] to current element
+ else {
+ // Atom support many links per containing element.
+ // Magpie treats link elements of type rel='alternate'
+ // as being equivalent to RSS's simple link element.
+
+ $atom_link = false;
+ if ( ($ns['canonical']=='atom') and $el == 'link') {
+ $atom_link = true;
+ if (isset($attrs['rel']) and $attrs['rel'] != 'alternate') {
+ $el = $el . "_" . $attrs['rel']; // pseudo-element names for Atom link elements
+ }
+ }
+ # handle atom content constructs
+ elseif ( ($ns['canonical']=='atom') and in_array($el, $this->_ATOM_CONTENT_CONSTRUCTS) )
+ {
+ // avoid clashing w/ RSS mod_content
+ if ($el == 'content' ) {
+ $el = 'atom_content';
+ }
+
+ // assume that everything accepts namespaced XML
+ // (that will pass through some non-validating feeds;
+ // but so what? this isn't a validating parser)
+ $this->incontent = array();
+ array_push($this->incontent, $ns); // start a stack
+
+ $this->xml_escape = $this->accepts_namespaced_xml($attrs);
+
+ if ( isset($attrs['type']) and trim(strtolower($attrs['type']))=='xhtml') {
+ $this->exclude_top = true;
+ } else {
+ $this->exclude_top = false;
+ }
+ }
+ # Handle inline XHTML body elements --CWJ
+ elseif ($ns['effective']=='xhtml' and in_array($el, $this->_XHTML_CONTENT_CONSTRUCTS)) {
+ $this->current_namespace = 'xhtml';
+ $this->incontent = array();
+ array_push($this->incontent, $ns); // start a stack
+
+ $this->xml_escape = true;
+ $this->exclude_top = false;
+ }
+
+ array_unshift($this->stack['element'], $el);
+ $elpath = join('_', array_reverse($this->stack['element']));
+
+ $n = $this->element_count($elpath);
+ $this->element_count($elpath, $n+1);
+
+ if ($n > 0) {
+ array_shift($this->stack['element']);
+ array_unshift($this->stack['element'], $el.'#'.($n+1));
+ $elpath = join('_', array_reverse($this->stack['element']));
+ }
+
+ // this makes the baby Jesus cry, but we can't do it in normalize()
+ // because we've made the element name for Atom links unpredictable
+ // by tacking on the relation to the end. -CWJ
+ if ($atom_link and isset($attrs['href'])) {
+ $this->append($elpath, $attrs['href']);
+ }
+
+ // add attributes
+ if (count($attrs) > 0) {
+ $this->append($elpath.'@', join(',', array_keys($attrs)));
+ foreach ($attrs as $attr => $value) {
+ $this->append($elpath.'@'.$attr, $value);
+ }
+ }
+ }
+ }
+
+ function feed_cdata ($p, $text) {
+ if ($this->incontent) {
+ if ($this->xml_escape) { $text = htmlspecialchars($text, ENT_COMPAT, $this->encoding); }
+ $this->append_content( $text );
+ } else {
+ $current_el = join('_', array_reverse($this->stack['element']));
+ $this->append($current_el, $text);
+ }
+ }
+
+ function feed_end_element ($p, $el) {
+ $closer = $this->xmlns($el);
+
+ if ( $this->incontent ) {
+ $opener = array_pop($this->incontent);
+
+ // balance tags properly
+ // note: i don't think this is actually neccessary
+ if ($opener != $closer) {
+ array_push($this->incontent, $opener);
+ $this->append_content("<$el />");
+ } elseif ($this->incontent) { // are we in the content construct still?
+ if ((count($this->incontent) > 1) or !$this->exclude_top) {
+ if ($closer['effective']=='xhtml') {
+ $tag = $closer['element'];
+ }
+ else {
+ $tag = $el;
+ }
+ $this->append_content("</$tag>");
+ }
+ } else { // if we're done with the content construct, shift the opening of the content construct off the normal stack
+ array_shift( $this->stack['element'] );
+ }
+ }
+ elseif ($closer['effective'] == '') {
+ $el = strtolower($closer['element']);
+ if ( $el == 'item' or $el == 'entry' ) {
+ $this->items[] = $this->current_item;
+ $this->current_item = array();
+ $this->initem = false;
+ $this->current_category = 0;
+ }
+ elseif ($this->feed_type == RSS and $el == 'textinput' ) {
+ $this->intextinput = false;
+ }
+ elseif ($this->feed_type == RSS and $el == 'image' ) {
+ $this->inimage = false;
+ }
+ elseif ($el == 'channel' or $el == 'feed' ) {
+ $this->inchannel = false;
+ } else {
+ $nsc = $closer['canonical']; $nse = $closer['element'];
+ if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse]['*content'])) {
+ // Resolve relative URI in content of tag
+ $this->dereference_current_element();
+ }
+ array_shift( $this->stack['element'] );
+ }
+ } else {
+ $nsc = $closer['canonical']; $nse = strtolower($closer['element']);
+ if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse]['*content'])) {
+ // Resolve relative URI in content of tag
+ $this->dereference_current_element();
+ }
+ array_shift( $this->stack['element'] );
+ }
+
+ if ( !$this->incontent ) { // Don't munge the namespace after finishing with elements in namespaced content constructs -CWJ
+ $this->current_namespace = array_pop($this->stack['ns']);
+ }
+ array_pop($this->stack['xmlns']);
+ array_pop($this->stack['xml:base']);
<<Diff was trimmed, longer than 597 lines>>
More information about the pld-cvs-commit
mailing list