Public Member Functions | |
| MagpieRSS ($source, $output_encoding='ISO-8859-1', $input_encoding=null, $detect_encoding=true) | |
| feed_start_element ($p, $element, &$attrs) | |
| feed_cdata ($p, $text) | |
| feed_end_element ($p, $el) | |
| concat (&$str1, $str2="") | |
| append_content ($text) | |
| append ($el, $text) | |
| normalize () | |
| is_rss () | |
| is_atom () | |
| create_parser ($source, $out_enc, $in_enc, $detect) | |
| php5_create_parser ($in_enc, $detect) | |
| php4_create_parser ($source, $in_enc, $detect) | |
| known_encoding ($enc) | |
| error ($errormsg, $lvl=E_USER_WARNING) | |
Data Fields | |
| $parser | |
| $current_item = array() | |
| $items = array() | |
| $channel = array() | |
| $textinput = array() | |
| $image = array() | |
| $feed_type | |
| $feed_version | |
| $encoding = '' | |
| $_source_encoding = '' | |
| $ERROR = "" | |
| $WARNING = "" | |
| $_CONTENT_CONSTRUCTS = array('content', 'summary', 'info', 'title', 'tagline', 'copyright') | |
| $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1') | |
| $stack = array() | |
| $inchannel = false | |
| $initem = false | |
| $incontent = false | |
| $intextinput = false | |
| $inimage = false | |
| $current_namespace = false | |
Hybrid parser, and object, takes RSS as a string and returns a simple object.
see: rss_fetch.inc for a simpler interface with integrated caching support
Definition at line 34 of file rss_parse.inc.
| append | ( | $ | el, | |
| $ | text | |||
| ) |
Definition at line 335 of file rss_parse.inc.
References $text, concat(), elseif, and image.
Referenced by feed_cdata(), and feed_start_element().
{
if (!$el) {
return;
}
if ( $this->current_namespace )
{
if ( $this->initem ) {
$this->concat(
$this->current_item[ $this->current_namespace ][ $el ], $text);
}
elseif ($this->inchannel) {
$this->concat(
$this->channel[ $this->current_namespace][ $el ], $text );
}
elseif ($this->intextinput) {
$this->concat(
$this->textinput[ $this->current_namespace][ $el ], $text );
}
elseif ($this->inimage) {
$this->concat(
$this->image[ $this->current_namespace ][ $el ], $text );
}
}
else {
if ( $this->initem ) {
$this->concat(
$this->current_item[ $el ], $text);
}
elseif ($this->intextinput) {
$this->concat(
$this->textinput[ $el ], $text );
}
elseif ($this->inimage) {
$this->concat(
$this->image[ $el ], $text );
}
elseif ($this->inchannel) {
$this->concat(
$this->channel[ $el ], $text );
}
}
}
| append_content | ( | $ | text | ) |
Definition at line 325 of file rss_parse.inc.
References $text, concat(), and elseif.
Referenced by feed_cdata(), feed_end_element(), and feed_start_element().
| concat | ( | &$ | str1, | |
| $ | str2 = "" | |||
| ) |
Definition at line 316 of file rss_parse.inc.
Referenced by append(), and append_content().
{
if (!isset($str1) ) {
$str1="";
}
$str1 .= $str2;
}
| create_parser | ( | $ | source, | |
| $ | out_enc, | |||
| $ | in_enc, | |||
| $ | detect | |||
| ) |
return XML parser, and possibly re-encoded source
Definition at line 451 of file rss_parse.inc.
References $parser, php4_create_parser(), and php5_create_parser().
Referenced by MagpieRSS().
{
if ( substr(phpversion(),0,1) == 5) {
$parser = $this->php5_create_parser($in_enc, $detect);
}
else {
list($parser, $source) = $this->php4_create_parser($source, $in_enc, $detect);
}
if ($out_enc) {
$this->encoding = $out_enc;
xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $out_enc);
}
return array($parser, $source);
}
| error | ( | $ | errormsg, | |
| $ | lvl = E_USER_WARNING | |||
| ) |
Definition at line 559 of file rss_parse.inc.
Referenced by MagpieRSS(), and php4_create_parser().
{
// append PHP's error message if track_errors enabled
if ( isset($php_errormsg) ) {
$errormsg .= " ($php_errormsg)";
}
if ( MAGPIE_DEBUG ) {
trigger_error( $errormsg, $lvl);
}
else {
error_log( $errormsg, 0);
}
$notices = E_USER_NOTICE|E_NOTICE;
if ( $lvl&$notices ) {
$this->WARNING = $errormsg;
} else {
$this->ERROR = $errormsg;
}
}
| feed_cdata | ( | $ | p, | |
| $ | text | |||
| ) |
Definition at line 260 of file rss_parse.inc.
References $text, append(), and append_content().
{
if ($this->feed_type == ATOM and $this->incontent)
{
$this->append_content( $text );
}
else {
$current_el = join('_', array_reverse($this->stack));
$this->append($current_el, $text);
}
}
| feed_end_element | ( | $ | p, | |
| $ | el | |||
| ) |
Definition at line 271 of file rss_parse.inc.
References append_content(), elseif, and items.
{
$el = strtolower($el);
if ( $el == 'item' or $el == 'entry' )
{
$this->items[] = $this->current_item;
$this->current_item = array();
$this->initem = false;
}
elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'textinput' )
{
$this->intextinput = false;
}
elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'image' )
{
$this->inimage = false;
}
elseif ($this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) )
{
$this->incontent = false;
}
elseif ($el == 'channel' or $el == 'feed' )
{
$this->inchannel = false;
}
elseif ($this->feed_type == ATOM and $this->incontent ) {
// balance tags properly
// note: i don't think this is actually neccessary
if ( $this->stack[0] == $el )
{
$this->append_content("</$el>");
}
else {
$this->append_content("<$el />");
}
array_shift( $this->stack );
}
else {
array_shift( $this->stack );
}
$this->current_namespace = false;
}
| feed_start_element | ( | $ | p, | |
| $ | element, | |||
| &$ | attrs | |||
| ) |
Definition at line 146 of file rss_parse.inc.
References append(), append_content(), and elseif.
{
$el = $element = strtolower($element);
$attrs = array_change_key_case($attrs, CASE_LOWER);
// check for a namespace, and split if found
$ns = false;
if ( strpos( $element, ':' ) ) {
list($ns, $el) = split( ':', $element, 2);
}
if ( $ns and $ns != 'rdf' ) {
$this->current_namespace = $ns;
}
# if feed type isn't set, then this is first element of feed
# identify feed from root element
#
if (!isset($this->feed_type) ) {
if ( $el == 'rdf' ) {
$this->feed_type = RSS;
$this->feed_version = '1.0';
}
elseif ( $el == 'rss' ) {
$this->feed_type = RSS;
$this->feed_version = $attrs['version'];
}
elseif ( $el == 'feed' ) {
$this->feed_type = ATOM;
$this->feed_version = $attrs['version'];
$this->inchannel = true;
}
return;
}
if ( $el == 'channel' )
{
$this->inchannel = true;
}
elseif ($el == 'item' or $el == 'entry' )
{
$this->initem = true;
if ( isset($attrs['rdf:about']) ) {
$this->current_item['about'] = $attrs['rdf:about'];
}
}
// if we're in the default namespace of an RSS feed,
// record textinput or image fields
elseif (
$this->feed_type == RSS and
$this->current_namespace == '' and
$el == 'textinput' )
{
$this->intextinput = true;
}
elseif (
$this->feed_type == RSS and
$this->current_namespace == '' and
$el == 'image' )
{
$this->inimage = true;
}
# handle atom content constructs
elseif ( $this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) )
{
// avoid clashing w/ RSS mod_content
if ($el == 'content' ) {
$el = 'atom_content';
}
$this->incontent = $el;
}
// if inside an Atom content construct (e.g. content or summary) field treat tags as text
elseif ($this->feed_type == ATOM and $this->incontent )
{
// if tags are inlined, then flatten
$attrs_str = join(' ',
array_map('map_attrs',
array_keys($attrs),
array_values($attrs) ) );
$this->append_content( "<$element $attrs_str>" );
array_unshift( $this->stack, $el );
}
// Atom support many links per containging element.
// Magpie treats link elements of type rel='alternate'
// as being equivalent to RSS's simple link element.
//
elseif ($this->feed_type == ATOM and $el == 'link' )
{
if ( isset($attrs['rel']) and $attrs['rel'] == 'alternate' )
{
$link_el = 'link';
}
else {
$link_el = 'link_' . $attrs['rel'];
}
$this->append($link_el, $attrs['href']);
}
// set stack[0] to current element
else {
array_unshift($this->stack, $el);
}
}
| is_atom | ( | ) |
Definition at line 438 of file rss_parse.inc.
Referenced by normalize().
{
if ( $this->feed_type == ATOM ) {
return $this->feed_version;
}
else {
return false;
}
}
| is_rss | ( | ) |
Definition at line 429 of file rss_parse.inc.
Referenced by normalize().
{
if ( $this->feed_type == RSS ) {
return $this->feed_version;
}
else {
return false;
}
}
| known_encoding | ( | $ | enc | ) |
Definition at line 549 of file rss_parse.inc.
Referenced by php4_create_parser().
{
$enc = strtoupper($enc);
if ( in_array($enc, $this->_KNOWN_ENCODINGS) ) {
return $enc;
}
else {
return false;
}
}
| MagpieRSS | ( | $ | source, | |
| $ | output_encoding = 'ISO-8859-1', |
|||
| $ | input_encoding = null, |
|||
| $ | detect_encoding = true | |||
| ) |
Set up XML parser, parse source, and return populated RSS object..
| string | $source string containing the RSS to be parsed |
NOTE: Probably a good idea to leave the encoding options alone unless you know what you're doing as PHP's character set support is a little weird.
NOTE: A lot of this is unnecessary but harmless with PHP5
| string | $output_encoding output the parsed RSS in this character set defaults to ISO-8859-1 as this is PHP's default. |
NOTE: might be changed to UTF-8 in future versions.
| string | $input_encoding the character set of the incoming RSS source. Leave blank and Magpie will try to figure it out. | |
| bool | $detect_encoding if false Magpie won't attempt to detect source encoding. (caveat emptor) |
Definition at line 94 of file rss_parse.inc.
References $parser, $status, create_parser(), E_USER_ERROR, error(), and normalize().
{
# if PHP xml isn't compiled in, die
#
if (!function_exists('xml_parser_create')) {
$this->error( "Failed to load PHP's XML Extension. " .
"http://www.php.net/manual/en/ref.xml.php",
E_USER_ERROR );
}
list($parser, $source) = $this->create_parser($source,
$output_encoding, $input_encoding, $detect_encoding);
if (!is_resource($parser)) {
$this->error( "Failed to create an instance of PHP's XML parser. " .
"http://www.php.net/manual/en/ref.xml.php",
E_USER_ERROR );
}
$this->parser = $parser;
# pass in parser, and a reference to this object
# setup handlers
#
xml_set_object( $this->parser, $this );
xml_set_element_handler($this->parser,
'feed_start_element', 'feed_end_element' );
xml_set_character_data_handler( $this->parser, 'feed_cdata' );
$status = xml_parse( $this->parser, $source );
if (! $status ) {
$errorcode = xml_get_error_code( $this->parser );
if ( $errorcode != XML_ERROR_NONE ) {
$xml_error = xml_error_string( $errorcode );
$error_line = xml_get_current_line_number($this->parser);
$error_col = xml_get_current_column_number($this->parser);
$errormsg = "$xml_error at line $error_line, column $error_col";
$this->error( $errormsg );
}
}
xml_parser_free( $this->parser );
$this->normalize();
}
| normalize | ( | ) |
Definition at line 379 of file rss_parse.inc.
References count, elseif, is_atom(), is_rss(), items, and parse_w3cdtf().
Referenced by MagpieRSS().
{
// if atom populate rss fields
if ( $this->is_atom() ) {
$this->channel['description'] = $this->channel['tagline'];
for ( $i = 0; $i < count($this->items); $i++) {
$item = $this->items[$i];
if ( isset($item['summary']) )
$item['description'] = $item['summary'];
if ( isset($item['atom_content']))
$item['content']['encoded'] = $item['atom_content'];
$atom_date = (isset($item['issued']) ) ? $item['issued'] : $item['modified'];
if ( $atom_date ) {
$epoch = @parse_w3cdtf($atom_date);
if ($epoch and $epoch > 0) {
$item['date_timestamp'] = $epoch;
}
}
$this->items[$i] = $item;
}
}
elseif ( $this->is_rss() ) {
$this->channel['tagline'] = $this->channel['description'];
for ( $i = 0; $i < count($this->items); $i++) {
$item = $this->items[$i];
if ( isset($item['description']))
$item['summary'] = $item['description'];
if ( isset($item['content']['encoded'] ) )
$item['atom_content'] = $item['content']['encoded'];
if ( $this->is_rss() == '1.0' and isset($item['dc']['date']) ) {
$epoch = @parse_w3cdtf($item['dc']['date']);
if ($epoch and $epoch > 0) {
$item['date_timestamp'] = $epoch;
}
}
elseif ( isset($item['pubdate']) ) {
$epoch = @strtotime($item['pubdate']);
if ($epoch > 0) {
$item['date_timestamp'] = $epoch;
}
}
$this->items[$i] = $item;
}
}
}
| php4_create_parser | ( | $ | source, | |
| $ | in_enc, | |||
| $ | detect | |||
| ) |
Instaniate an XML parser under PHP4
Unfortunately PHP4's support for character encodings and especially XML and character encodings sucks. As long as the documents you parse only contain characters from the ISO-8859-1 character set (a superset of ASCII, and a subset of UTF-8) you're fine. However once you step out of that comfy little world things get mad, bad, and dangerous to know.
The following code is based on SJM's work with FoF
Definition at line 500 of file rss_parse.inc.
References error(), and known_encoding().
Referenced by create_parser().
{
if ( !$detect ) {
return array(xml_parser_create($in_enc), $source);
}
if (!$in_enc) {
if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $source, $m)) {
$in_enc = strtoupper($m[1]);
$this->source_encoding = $in_enc;
}
else {
$in_enc = 'UTF-8';
}
}
if ($this->known_encoding($in_enc)) {
return array(xml_parser_create($in_enc), $source);
}
// the dectected encoding is not one of the simple encodings PHP knows
// attempt to use the iconv extension to
// cast the XML to a known encoding
// @see http://php.net/iconv
if (function_exists('iconv')) {
$encoded_source = iconv($in_enc,'UTF-8', $source);
if ($encoded_source) {
return array(xml_parser_create('UTF-8'), $encoded_source);
}
}
// iconv didn't work, try mb_convert_encoding
// @see http://php.net/mbstring
if(function_exists('mb_convert_encoding')) {
$encoded_source = mb_convert_encoding($source, 'UTF-8', $in_enc );
if ($encoded_source) {
return array(xml_parser_create('UTF-8'), $encoded_source);
}
}
// else
$this->error("Feed is in an unsupported character encoding. ($in_enc) " .
"You may see strange artifacts, and mangled characters.",
E_USER_NOTICE);
return array(xml_parser_create(), $source);
}
| php5_create_parser | ( | $ | in_enc, | |
| $ | detect | |||
| ) |
Instantiate an XML parser under PHP5
PHP5 will do a fine job of detecting input encoding if passed an empty string as the encoding.
All hail libxml2!
Definition at line 475 of file rss_parse.inc.
Referenced by create_parser().
{
// by default php5 does a fine job of detecting input encodings
if(!$detect && $in_enc) {
return xml_parser_create($in_enc);
}
else {
return xml_parser_create('');
}
}
Definition at line 53 of file rss_parse.inc.
| $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1') |
Definition at line 54 of file rss_parse.inc.
| $_source_encoding = '' |
Definition at line 46 of file rss_parse.inc.
| $channel = array() |
Definition at line 39 of file rss_parse.inc.
| $current_item = array() |
Definition at line 37 of file rss_parse.inc.
| $current_namespace = false |
Definition at line 63 of file rss_parse.inc.
| $encoding = '' |
Definition at line 44 of file rss_parse.inc.
| $ERROR = "" |
Definition at line 48 of file rss_parse.inc.
| $feed_type |
Definition at line 42 of file rss_parse.inc.
| $feed_version |
Definition at line 43 of file rss_parse.inc.
| $image = array() |
Definition at line 41 of file rss_parse.inc.
| $inchannel = false |
Definition at line 58 of file rss_parse.inc.
| $incontent = false |
Definition at line 60 of file rss_parse.inc.
| $inimage = false |
Definition at line 62 of file rss_parse.inc.
| $initem = false |
Definition at line 59 of file rss_parse.inc.
| $intextinput = false |
Definition at line 61 of file rss_parse.inc.
| $items = array() |
Definition at line 38 of file rss_parse.inc.
| $parser |
Definition at line 35 of file rss_parse.inc.
Referenced by create_parser(), and MagpieRSS().
| $stack = array() |
Definition at line 57 of file rss_parse.inc.
| $textinput = array() |
Definition at line 40 of file rss_parse.inc.
| $WARNING = "" |
Definition at line 49 of file rss_parse.inc.
For more help developing with SiT! see http://sitracker.org/wiki/DevelopmentHowTo