Public Member Functions | Data Fields

MagpieRSS Class Reference

Public Member Functions

 MagpieRSS ($source, $output_encoding='ISO-8859-1', $input_encoding=null, $detect_encoding=true)
 feed_start_element ($p, $element, &$attrs)
 feed_cdata ($p, $text)
 feed_end_element ($p, $el)
 concat (&$str1, $str2="")
 append_content ($text)
 append ($el, $text)
 normalize ()
 is_rss ()
 is_atom ()
 create_parser ($source, $out_enc, $in_enc, $detect)
 php5_create_parser ($in_enc, $detect)
 php4_create_parser ($source, $in_enc, $detect)
 known_encoding ($enc)
 error ($errormsg, $lvl=E_USER_WARNING)

Data Fields

 $parser
 $current_item = array()
 $items = array()
 $channel = array()
 $textinput = array()
 $image = array()
 $feed_type
 $feed_version
 $encoding = ''
 $_source_encoding = ''
 $ERROR = ""
 $WARNING = ""
 $_CONTENT_CONSTRUCTS = array('content', 'summary', 'info', 'title', 'tagline', 'copyright')
 $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1')
 $stack = array()
 $inchannel = false
 $initem = false
 $incontent = false
 $intextinput = false
 $inimage = false
 $current_namespace = false

Detailed Description

Hybrid parser, and object, takes RSS as a string and returns a simple object.

see: rss_fetch.inc for a simpler interface with integrated caching support

Definition at line 34 of file rss_parse.inc.


Member Function Documentation

append ( el,
text 
)

Definition at line 335 of file rss_parse.inc.

References $text, concat(), elseif, and image.

Referenced by feed_cdata(), and feed_start_element().

                                {
        if (!$el) {
            return;
        }
        if ( $this->current_namespace ) 
        {
            if ( $this->initem ) {
                $this->concat(
                    $this->current_item[ $this->current_namespace ][ $el ], $text);
            }
            elseif ($this->inchannel) {
                $this->concat(
                    $this->channel[ $this->current_namespace][ $el ], $text );
            }
            elseif ($this->intextinput) {
                $this->concat(
                    $this->textinput[ $this->current_namespace][ $el ], $text );
            }
            elseif ($this->inimage) {
                $this->concat(
                    $this->image[ $this->current_namespace ][ $el ], $text );
            }
        }
        else {
            if ( $this->initem ) {
                $this->concat(
                    $this->current_item[ $el ], $text);
            }
            elseif ($this->intextinput) {
                $this->concat(
                    $this->textinput[ $el ], $text );
            }
            elseif ($this->inimage) {
                $this->concat(
                    $this->image[ $el ], $text );
            }
            elseif ($this->inchannel) {
                $this->concat(
                    $this->channel[ $el ], $text );
            }
            
        }
    }

append_content ( text  ) 

Definition at line 325 of file rss_parse.inc.

References $text, concat(), and elseif.

Referenced by feed_cdata(), feed_end_element(), and feed_start_element().

                                   {
        if ( $this->initem ) {
            $this->concat( $this->current_item[ $this->incontent ], $text );
        }
        elseif ( $this->inchannel ) {
            $this->concat( $this->channel[ $this->incontent ], $text );
        }
    }

concat ( &$  str1,
str2 = "" 
)

Definition at line 316 of file rss_parse.inc.

Referenced by append(), and append_content().

                                       {
        if (!isset($str1) ) {
            $str1="";
        }
        $str1 .= $str2;
    }

create_parser ( source,
out_enc,
in_enc,
detect 
)

return XML parser, and possibly re-encoded source

Definition at line 451 of file rss_parse.inc.

References $parser, php4_create_parser(), and php5_create_parser().

Referenced by MagpieRSS().

                                                                {
        if ( substr(phpversion(),0,1) == 5) {
            $parser = $this->php5_create_parser($in_enc, $detect);
        }
        else {
            list($parser, $source) = $this->php4_create_parser($source, $in_enc, $detect);
        }
        if ($out_enc) {
            $this->encoding = $out_enc;
            xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $out_enc);
        }
        
        return array($parser, $source);
    }

error ( errormsg,
lvl = E_USER_WARNING 
)

Definition at line 559 of file rss_parse.inc.

Referenced by MagpieRSS(), and php4_create_parser().

                                                    {
        // append PHP's error message if track_errors enabled
        if ( isset($php_errormsg) ) { 
            $errormsg .= " ($php_errormsg)";
        }
        if ( MAGPIE_DEBUG ) {
            trigger_error( $errormsg, $lvl);        
        }
        else {
            error_log( $errormsg, 0);
        }
        
        $notices = E_USER_NOTICE|E_NOTICE;
        if ( $lvl&$notices ) {
            $this->WARNING = $errormsg;
        } else {
            $this->ERROR = $errormsg;
        }
    }

feed_cdata ( p,
text 
)

Definition at line 260 of file rss_parse.inc.

References $text, append(), and append_content().

                                    {
        if ($this->feed_type == ATOM and $this->incontent) 
        {
            $this->append_content( $text );
        }
        else {
            $current_el = join('_', array_reverse($this->stack));
            $this->append($current_el, $text);
        }
    }

feed_end_element ( p,
el 
)

Definition at line 271 of file rss_parse.inc.

References append_content(), elseif, and items.

                                        {
        $el = strtolower($el);
        
        if ( $el == 'item' or $el == 'entry' ) 
        {
            $this->items[] = $this->current_item;
            $this->current_item = array();
            $this->initem = false;
        }
        elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'textinput' ) 
        {
            $this->intextinput = false;
        }
        elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'image' ) 
        {
            $this->inimage = false;
        }
        elseif ($this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) )
        {   
            $this->incontent = false;
        }
        elseif ($el == 'channel' or $el == 'feed' ) 
        {
            $this->inchannel = false;
        }
        elseif ($this->feed_type == ATOM and $this->incontent  ) {
            // balance tags properly
            // note:  i don't think this is actually neccessary
            if ( $this->stack[0] == $el ) 
            {
                $this->append_content("</$el>");
            }
            else {
                $this->append_content("<$el />");
            }

            array_shift( $this->stack );
        }
        else {
            array_shift( $this->stack );
        }
        
        $this->current_namespace = false;
    }

feed_start_element ( p,
element,
&$  attrs 
)

Definition at line 146 of file rss_parse.inc.

References append(), append_content(), and elseif.

                                                       {
        $el = $element = strtolower($element);
        $attrs = array_change_key_case($attrs, CASE_LOWER);
        
        // check for a namespace, and split if found
        $ns = false;
        if ( strpos( $element, ':' ) ) {
            list($ns, $el) = split( ':', $element, 2); 
        }
        if ( $ns and $ns != 'rdf' ) {
            $this->current_namespace = $ns;
        }
            
        # if feed type isn't set, then this is first element of feed
        # identify feed from root element
        #
        if (!isset($this->feed_type) ) {
            if ( $el == 'rdf' ) {
                $this->feed_type = RSS;
                $this->feed_version = '1.0';
            }
            elseif ( $el == 'rss' ) {
                $this->feed_type = RSS;
                $this->feed_version = $attrs['version'];
            }
            elseif ( $el == 'feed' ) {
                $this->feed_type = ATOM;
                $this->feed_version = $attrs['version'];
                $this->inchannel = true;
            }
            return;
        }
    
        if ( $el == 'channel' ) 
        {
            $this->inchannel = true;
        }
        elseif ($el == 'item' or $el == 'entry' ) 
        {
            $this->initem = true;
            if ( isset($attrs['rdf:about']) ) {
                $this->current_item['about'] = $attrs['rdf:about']; 
            }
        }
        
        // if we're in the default namespace of an RSS feed,
        //  record textinput or image fields
        elseif ( 
            $this->feed_type == RSS and 
            $this->current_namespace == '' and 
            $el == 'textinput' ) 
        {
            $this->intextinput = true;
        }
        
        elseif (
            $this->feed_type == RSS and 
            $this->current_namespace == '' and 
            $el == 'image' ) 
        {
            $this->inimage = true;
        }
        
        # handle atom content constructs
        elseif ( $this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) )
        {
            // avoid clashing w/ RSS mod_content
            if ($el == 'content' ) {
                $el = 'atom_content';
            }
            
            $this->incontent = $el;
            
            
        }
        
        // if inside an Atom content construct (e.g. content or summary) field treat tags as text
        elseif ($this->feed_type == ATOM and $this->incontent ) 
        {
            // if tags are inlined, then flatten
            $attrs_str = join(' ', 
                    array_map('map_attrs', 
                    array_keys($attrs), 
                    array_values($attrs) ) );
            
            $this->append_content( "<$element $attrs_str>"  );
                    
            array_unshift( $this->stack, $el );
        }
        
        // Atom support many links per containging element.
        // Magpie treats link elements of type rel='alternate'
        // as being equivalent to RSS's simple link element.
        //
        elseif ($this->feed_type == ATOM and $el == 'link' ) 
        {
            if ( isset($attrs['rel']) and $attrs['rel'] == 'alternate' ) 
            {
                $link_el = 'link';
            }
            else {
                $link_el = 'link_' . $attrs['rel'];
            }
            
            $this->append($link_el, $attrs['href']);
        }
        // set stack[0] to current element
        else {
            array_unshift($this->stack, $el);
        }
    }

is_atom (  ) 

Definition at line 438 of file rss_parse.inc.

Referenced by normalize().

                       {
        if ( $this->feed_type == ATOM ) {
            return $this->feed_version;
        }
        else {
            return false;
        }
    }

is_rss (  ) 

Definition at line 429 of file rss_parse.inc.

Referenced by normalize().

                       {
        if ( $this->feed_type == RSS ) {
            return $this->feed_version; 
        }
        else {
            return false;
        }
    }

known_encoding ( enc  ) 

Definition at line 549 of file rss_parse.inc.

Referenced by php4_create_parser().

                                  {
        $enc = strtoupper($enc);
        if ( in_array($enc, $this->_KNOWN_ENCODINGS) ) {
            return $enc;
        }
        else {
            return false;
        }
    }

MagpieRSS ( source,
output_encoding = 'ISO-8859-1',
input_encoding = null,
detect_encoding = true 
)

Set up XML parser, parse source, and return populated RSS object..

Parameters:
string $source string containing the RSS to be parsed

NOTE: Probably a good idea to leave the encoding options alone unless you know what you're doing as PHP's character set support is a little weird.

NOTE: A lot of this is unnecessary but harmless with PHP5

Parameters:
string $output_encoding output the parsed RSS in this character set defaults to ISO-8859-1 as this is PHP's default.

NOTE: might be changed to UTF-8 in future versions.

Parameters:
string $input_encoding the character set of the incoming RSS source. Leave blank and Magpie will try to figure it out.
bool $detect_encoding if false Magpie won't attempt to detect source encoding. (caveat emptor)

Definition at line 94 of file rss_parse.inc.

References $parser, $status, create_parser(), E_USER_ERROR, error(), and normalize().

    {   
        # if PHP xml isn't compiled in, die
        #
        if (!function_exists('xml_parser_create')) {
            $this->error( "Failed to load PHP's XML Extension. " . 
                          "http://www.php.net/manual/en/ref.xml.php",
                           E_USER_ERROR );
        }
        
        list($parser, $source) = $this->create_parser($source, 
                $output_encoding, $input_encoding, $detect_encoding);
        
        
        if (!is_resource($parser)) {
            $this->error( "Failed to create an instance of PHP's XML parser. " .
                          "http://www.php.net/manual/en/ref.xml.php",
                          E_USER_ERROR );
        }

        
        $this->parser = $parser;
        
        # pass in parser, and a reference to this object
        # setup handlers
        #
        xml_set_object( $this->parser, $this );
        xml_set_element_handler($this->parser, 
                'feed_start_element', 'feed_end_element' );
                        
        xml_set_character_data_handler( $this->parser, 'feed_cdata' ); 
    
        $status = xml_parse( $this->parser, $source );
        
        if (! $status ) {
            $errorcode = xml_get_error_code( $this->parser );
            if ( $errorcode != XML_ERROR_NONE ) {
                $xml_error = xml_error_string( $errorcode );
                $error_line = xml_get_current_line_number($this->parser);
                $error_col = xml_get_current_column_number($this->parser);
                $errormsg = "$xml_error at line $error_line, column $error_col";

                $this->error( $errormsg );
            }
        }
        
        xml_parser_free( $this->parser );

        $this->normalize();
    }

normalize (  ) 

Definition at line 379 of file rss_parse.inc.

References count, elseif, is_atom(), is_rss(), items, and parse_w3cdtf().

Referenced by MagpieRSS().

                          {
        // if atom populate rss fields
        if ( $this->is_atom() ) {
            $this->channel['description'] = $this->channel['tagline'];
            for ( $i = 0; $i < count($this->items); $i++) {
                $item = $this->items[$i];
                if ( isset($item['summary']) )
                    $item['description'] = $item['summary'];
                if ( isset($item['atom_content']))
                    $item['content']['encoded'] = $item['atom_content'];
                
                $atom_date = (isset($item['issued']) ) ? $item['issued'] : $item['modified'];
                if ( $atom_date ) {
                    $epoch = @parse_w3cdtf($atom_date);
                    if ($epoch and $epoch > 0) {
                        $item['date_timestamp'] = $epoch;
                    }
                }
                
                $this->items[$i] = $item;
            }       
        }
        elseif ( $this->is_rss() ) {
            $this->channel['tagline'] = $this->channel['description'];
            for ( $i = 0; $i < count($this->items); $i++) {
                $item = $this->items[$i];
                if ( isset($item['description']))
                    $item['summary'] = $item['description'];
                if ( isset($item['content']['encoded'] ) )
                    $item['atom_content'] = $item['content']['encoded'];
                
                if ( $this->is_rss() == '1.0' and isset($item['dc']['date']) ) {
                    $epoch = @parse_w3cdtf($item['dc']['date']);
                    if ($epoch and $epoch > 0) {
                        $item['date_timestamp'] = $epoch;
                    }
                }
                elseif ( isset($item['pubdate']) ) {
                    $epoch = @strtotime($item['pubdate']);
                    if ($epoch > 0) {
                        $item['date_timestamp'] = $epoch;
                    }
                }
                
                $this->items[$i] = $item;
            }
        }
    }

php4_create_parser ( source,
in_enc,
detect 
)

Instaniate an XML parser under PHP4

Unfortunately PHP4's support for character encodings and especially XML and character encodings sucks. As long as the documents you parse only contain characters from the ISO-8859-1 character set (a superset of ASCII, and a subset of UTF-8) you're fine. However once you step out of that comfy little world things get mad, bad, and dangerous to know.

The following code is based on SJM's work with FoF

See also:
http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss

Definition at line 500 of file rss_parse.inc.

References error(), and known_encoding().

Referenced by create_parser().

                                                           {
        if ( !$detect ) {
            return array(xml_parser_create($in_enc), $source);
        }
        
        if (!$in_enc) {
            if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $source, $m)) {
                $in_enc = strtoupper($m[1]);
                $this->source_encoding = $in_enc;
            }
            else {
                $in_enc = 'UTF-8';
            }
        }
        
        if ($this->known_encoding($in_enc)) {
            return array(xml_parser_create($in_enc), $source);
        }
        
        // the dectected encoding is not one of the simple encodings PHP knows
        
        // attempt to use the iconv extension to
        // cast the XML to a known encoding
        // @see http://php.net/iconv
       
        if (function_exists('iconv'))  {
            $encoded_source = iconv($in_enc,'UTF-8', $source);
            if ($encoded_source) {
                return array(xml_parser_create('UTF-8'), $encoded_source);
            }
        }
        
        // iconv didn't work, try mb_convert_encoding
        // @see http://php.net/mbstring
        if(function_exists('mb_convert_encoding')) {
            $encoded_source = mb_convert_encoding($source, 'UTF-8', $in_enc );
            if ($encoded_source) {
                return array(xml_parser_create('UTF-8'), $encoded_source);
            }
        }
        
        // else 
        $this->error("Feed is in an unsupported character encoding. ($in_enc) " .
                     "You may see strange artifacts, and mangled characters.",
                     E_USER_NOTICE);
            
        return array(xml_parser_create(), $source);
    }

php5_create_parser ( in_enc,
detect 
)

Instantiate an XML parser under PHP5

PHP5 will do a fine job of detecting input encoding if passed an empty string as the encoding.

All hail libxml2!

Definition at line 475 of file rss_parse.inc.

Referenced by create_parser().

                                                  {
        // by default php5 does a fine job of detecting input encodings
        if(!$detect && $in_enc) {
            return xml_parser_create($in_enc);
        }
        else {
            return xml_parser_create('');
        }
    }


Field Documentation

$_CONTENT_CONSTRUCTS = array('content', 'summary', 'info', 'title', 'tagline', 'copyright')

Definition at line 53 of file rss_parse.inc.

$_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1')

Definition at line 54 of file rss_parse.inc.

$_source_encoding = ''

Definition at line 46 of file rss_parse.inc.

$channel = array()

Definition at line 39 of file rss_parse.inc.

$current_item = array()

Definition at line 37 of file rss_parse.inc.

$current_namespace = false

Definition at line 63 of file rss_parse.inc.

$encoding = ''

Definition at line 44 of file rss_parse.inc.

$ERROR = ""

Definition at line 48 of file rss_parse.inc.

$feed_type

Definition at line 42 of file rss_parse.inc.

$feed_version

Definition at line 43 of file rss_parse.inc.

$image = array()

Definition at line 41 of file rss_parse.inc.

$inchannel = false

Definition at line 58 of file rss_parse.inc.

$incontent = false

Definition at line 60 of file rss_parse.inc.

$inimage = false

Definition at line 62 of file rss_parse.inc.

$initem = false

Definition at line 59 of file rss_parse.inc.

$intextinput = false

Definition at line 61 of file rss_parse.inc.

$items = array()

Definition at line 38 of file rss_parse.inc.

$parser

Definition at line 35 of file rss_parse.inc.

Referenced by create_parser(), and MagpieRSS().

$stack = array()

Definition at line 57 of file rss_parse.inc.

$textinput = array()

Definition at line 40 of file rss_parse.inc.

$WARNING = ""

Definition at line 49 of file rss_parse.inc.


The documentation for this class was generated from the following file:

For more help developing with SiT! see http://sitracker.org/wiki/DevelopmentHowTo

© 2008-2011 Support Incident Tracker

Tsohost Logo