daiermimi
注册会员

UID 72374
精华
0
积分 84
帖子 54
金钱 84 喜悦币
威望 0
人脉 0
阅读权限 20
注册 2006-4-25
状态 离线
|
[广告]: q
m
简单的写了一个采集类,可以采集www.picsea.net!
<?php
class girl_import
{
var $_url;
var $url;
var $begin;
var $end;
var $item_begin;
var $item_end;
var $site_root;
var $date;
// 正则表达
var $regExp_pic;
var $regExp_link;
// 预留
var $lang="GB2312";
var $lang2="UTF-8";
function girl_import($p_date=""){
$this->begin = "<img src=\"/img06/iCon_2.gif\" width=\"73\" height=\"19\" />";
$this->end = "<td width=\"7\" background=\"/img06/Frame_Rightbj.jpg\"> </td>";
$this->item_begin = "你的手机";
$this->item_end = "最前页";
$this->site_root ="http://www.picsea.net";
$this->date = $p_date;
$this->regExp_link = "|<a href=(.*) target=_blank><img src=\"/(.*)\" border=0 width=95 height=75 alt=\"(.*)\"></a></td></tr></table></td></tr></table></td></tr><tr><td height=20 align=center><a href=.* target=_blank>(.*)</a>|Uis";
$this->regExp_pic = "|<IMG.*src=\"(.*)\".*border=0.*></a>.*|Uis";
}
function setUrl($p_url){
$this->_url=$p_url;
}
function getFile($p_url){
$datalines = @file($p_url); // Read the file into an array
if(!$datalines)
return false;
$importdata = implode('', $datalines); // squish it
$importdata = str_replace(array ("\r\n", "\r"), "\n", $importdata);
return $importdata;
}
function getValidRegin($p_url,$p_begin,$p_end){
$datalines = @file($p_url); // Read the file into an array
if(!$datalines)
return false;
$importdata = implode('', $datalines); // squish it
$importdata = str_replace(array ("\r\n", "\r"), "\n", $importdata);
$importdata = $this->text_convert($importdata);
$importdata =substr($importdata,strpos($importdata,$p_begin));
$importdata =substr($importdata,0,strpos($importdata,$p_end));
return $importdata;
}
function grab($pagenum){
if(!$this->_url) return;
for($i=1;$i<=$pagenum;$i++){
$this->url = str_replace("[pagenum]",$i,$this->_url);
$this->parse();
}
}
function parse(){
$data = $this->getValidRegin($this->url,$this->begin,$this->end);
preg_match_all($this->regExp_link, $data ,$items,PREG_SET_ORDER);
foreach($items as $item){
/*
echo $item[1]; //内部链接
echo "--";
echo $item[2]; //小图片地址
echo "--";
echo $item[3]; //提示
echo "--";
echo $item[4]; //标题
echo "<br>";
continue;
*/
$url = $item[1];
$this->save_item_pic($url);
}
}
function save_item_pic($url)
{
$item_url=$this->site_root.trim($url);
$url=substr($item_url,0,(strlen($item_url)-5));
$doit=true;
$i=1;
while($doit)
{
$item_url=$url.$i.".htm";
$data=$this->getValidRegin($item_url,$this->item_begin,$this->item_end);
if(!$data) {
$doit=false;
}else{
preg_match($this->regExp_pic, $data ,$items);
$target_url=$this->site_root.$items[1];
//echo $item_url."->";
//echo $target_url;
//echo "<br>";
$seedarray =microtime();
$seedstr =split(" ",$seedarray,5);
$seed =$seedstr[0]*10000;
srand($seed);
$random =rand(10,99);
$filename= "Thumbs/".time().$random.".jpg";
//echo $filename;
@copy($target_url,$filename);
//die;
//die;*/
$i++;
}
}
}
function text_convert($s){
if((!$this->lang)||($this->lang==$this->lang2)) {
return $s;
}
return iconv($this->lang,$this->lang2,$s);
}
function unhtmlentities($string) { // From php.net for < 4.3 compat
$trans_tbl = get_html_translation_table(HTML_ENTITIES);
$trans_tbl = array_flip($trans_tbl);
return strtr($string, $trans_tbl);
}
function url_exists($url) {
$head=@get_headers($url);
if(is_array($head)) {
return true;
}
return false;
}
}
?>
图片保存到本地,已经测试了。。可以保存。。还在完善中。。
|  QQ:11050714 |
|