奶瓶
(NP博士)
版主
  
老仙
UID 52707
精华
4
积分 5573
帖子 6252
金钱 5523 喜悦币
威望 50
人脉 0
阅读权限 100
注册 2004-11-22 来自 北大中文系
状态 离线
|
[推荐阅读] 迪士尼中国招聘通知
RSS你可以选择解析成数组,也可以直接用css、xlst什么的把它格式化了
解析RSS的东西很多,phpclasses上搜能搜到,直接用php的xmlparaser解析一下也可以凑合用
RSS有三种常用标准,RSS0.91/RSS1.0/RSS2.0,其中2.0是0.9x上发展来的,和1.0其实无继承关系,是两伙人搞的,所以结构有点区别
这是简单的拆解过程:
<?php
/** BSMLITE PROJECT
* Category : library * Filename : feed_parser.lib.php * Author : Dr.NP <[email]bssoft@263.net[/email]> * Copyright (c) : 2006-2008 BS.Group, all rights reserved * Version : 1.0.1
LICENSE
* This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <[url]http://www.gnu.org/licenses/[/url]>.
DESCRIPTION
* Feed parser - RSS 0.9 / RSS 1.0 / RSS 2.0 / ATOM feeds
CHANGELOG
* 05/08/2008 (1.0.0) - Library creation * 05/15/2008 (1.0.1) - ATOM supported
PACKAGE * @package bsm
*/
class BsmFeedParser { var $classname = 'BsmFeedParser'; // Feed content var $version; var $content = ''; // Feed content var $title; var $ttl; var $date; var $image; // Return value var $feed = array(); // Tag var $depth = array(); // Objects var $xml_parser; // status var $in_item = false; var $in_image = false; var $in_entry = false; var $current_element = ''; var $current_item = 0; // Constructor function __construct($content = null) { if ($content) { $this->content = $content; } } // Set feed content // You can use BSM's net library to fetch content by passing URLs public function set_content($content) { if ($content) { $this->content = $content; } } // Rarse it public function parse() { $this->xml_parser = xml_parser_create(); xml_set_object($this->xml_parser, $this); xml_set_element_handler($this->xml_parser, '_tag_open', '_tag_close'); xml_set_character_data_handler($this->xml_parser, '_tag_data'); xml_parse($this->xml_parser, $this->content); return $this->_rebuild_feed(); } // Handler: Tag open private function _tag_open($parser, $tag_name, $tag_attrib) { $this->current_element = strtoupper($tag_name); if ('ITEM' == strtoupper($tag_name)) { $this->in_item = true; $this->version = 'RSS'; $this->feed['VERSION'] = 'RSS'; } if ('IMAGE' == strtoupper($tag_name)) { $this->in_image = true; } if ('ENTRY' == strtoupper($tag_name)) { $this->in_entry = true; $this->version = 'ATOM'; $this->feed['VERSION'] = 'ATOM'; } } // Handler: Tag close private function _tag_close($parser, $tag_name) { $this->current_element = ''; if ('ITEM' == strtoupper($tag_name)) { $this->in_item = false; $this->current_item ++; } if ('IMAGE' == strtoupper($tag_name)) { $this->in_image = false; } if ('ENTRY' == strtoupper($tag_name)) { $this->in_entry = false; $this->current_item ++; } } // Handler: Tag data private function _tag_data($parser, $data) { $tag_name = $this->current_element; $data = trim($data); $data = preg_replace("/[r|n]/s", '', $data); if (!$this->in_item && !$this->in_entry) { // RSS attribute if ($tag_name) { if (!$this->in_image) { $this->feed[$tag_name] .= $data; } else if ($tag_name != 'IMAGE') { $this->feed['IMAGE'][$tag_name] .= $data; } } } else { // In item if ($tag_name && $tag_name != 'ITEM' && $tag_name != 'ENTRY') { if (!$this->in_image) { $this->feed['FEED'][$this->current_item][$tag_name] .= $data; } else if ($tag_name != 'IMAGE') { $this->feed['FEED'][$this->current_item]['IMAGE'][$tag_name] .= $data; } } } } // Rebuild private function _rebuild_feed() { for ($i = 0; $i < count($this->feed['feed']); $i++) { // RSS if ($this->feed['FEED'][$i]['DC:DATE']) { $this->feed['FEED'][$i]['PUBTIMESTAMP'] = strtotime($this->feed['FEED'][$i]['DC:DATE']); } else if ($this->feed['FEED'][$i]['PUBDATE']) { $this->feed['FEED'][$i]['PUBTIMESTAMP'] = strtotime($this->feed['FEED'][$i]['PUBDATE']); } // ATOM else if ($this->feed['FEED'][$i]['PUBLISHED']) { $this->feed['FEED'][$i]['PUBTIMESTAMP'] = strtotime($this->feed['FEED'][$i]['PUBLISHED']); } else if ($this->feed['FEED'][$i]['UPDATED']) { $this->feed['FEED'][$i]['PUBTIMESTAMP'] = strtotime($this->feed['FEED'][$i]['UPDATED']); } // RSS 0.9x if (!$this->feed['FEED'][$i]['PUBTIMESTAMP']) { $this->feed['FEED'][$i]['PUBTIMESTAMP'] = time(); } // ATOM if (!$this->feed['FEED'][$i]['DESCRIPTION']) { $this->feed['FEED'][$i]['DESCRIPTION'] = $this->feed['FEED'][$i]['SUMMARY'] ? $this->feed['FEED'][$i]['SUMMARY'] : 'NO DESCRIPTION'; } } return $this->feed; } }
// Current class name $classname = 'BsmFeedParser'; ?>
| 

图片包子,注册送100包子! |
|