songlv
(超级版主)
金牌会员
 
UID 73828
精华
0
积分 2854
帖子 685
金钱 2850 喜悦币
威望 0
人脉 4
阅读权限 70
注册 2006-5-22
状态 在线
|
抓取百度知道的内容到discuz论坛的类
<?php
set_time_limit(0); mysql_connect("localhost", "root", "123456") or die("Could not connect: " . mysql_error()); mysql_select_db("discuz"); mysql_query("SET character_set_connection=gbk, character_set_results=gbk, character_set_client=binary");
$discuz=new discuz(); $page=5;
$sql="select fid from cdb_forums where fup >0"; $result = mysql_query($sql);
while ($row = mysql_fetch_assoc($result)) { for($i=1;$i<=$page;$i++){ $discuz->getList($row['fid'],$i); } } ?>
<?php class discuz{
var $tblPre='cdb_';
/** * 插入用户数据,如果有则不插入 * * @access public * @return int 用户ID */ function insertMember($username){
$sql="select uid from ".$this->tblPre."members where username ='$username'"; $result = mysql_query($sql); $row = mysql_fetch_assoc($result); if($row){ return $row['uid']; }else{ $regdate =intval(time()-rand(8640000,28640000)); $lastvisit =intval(time()-rand(86400,864000)); $lastpost =$lastvisit+rand(120,1200); $posts=rand(2,20); $oltime=rand(5,245); $extcredits1=rand(50,845); $sql="insert into ".$this->tblPre."members set username ='$username',regdate=$regdate,lastvisit=$lastvisit,lastpost=$lastpost,lastactivity =$lastvisit,posts=$posts,oltime=$oltime,extcredits1=$extcredits1,groupid =12"; $result = mysql_query($sql); $uid=mysql_insert_id(); $avatar=rand(1,9); $sql="replace into ".$this->tblPre."memberfields set nickname ='$username',uid=$uid,avatar='images/avatars/0$avatar.gif',avatarwidth=83,avatarheight=94"; $result = mysql_query($sql); $thismonth = rand(200,3000); $total=$thismonth+rand(200,3000); $sql="insert into ".$this->tblPre."onlinetime set thismonth ='$thismonth',total=$total, uid=$uid"; $result = mysql_query($sql); return $uid; }
}
/** * 插入主题 * * @access public * @return int 主题ID */ function insertThread($fid,$tid,$subject,$message,$author,$dateline){ //更新用户 $authorid=$this->insertMember($author); //插入主题 $sql="replace into ".$this->tblPre."threads set tid=$tid,fid=$fid,subject='$subject',authorid=$authorid,author='$author',dateline=$dateline"; $result = mysql_query($sql);
$subject = str_replace("t", ' ', $subject); $lastpost = "$tidt$subjectt$datelinet$author"; $todayposts=rand(0,1); $sql="UPDATE ".$this->tblPre."forums SET lastpost='$lastpost', threads=threads+1, posts=posts+1, todayposts=todayposts+$todayposts WHERE fid='$fid'"; $result = mysql_query($sql); //if(!$result)return false; //清空回帖 $sql="delete from ".$this->tblPre."posts where tid=$tid"; $result = mysql_query($sql); //插入1楼帖子 $sql="insert into ".$this->tblPre."posts set fid=$fid,tid=$tid,subject ='$subject',message='$message',authorid=$authorid,author='$author',dateline=$dateline,first =1"; $result = mysql_query($sql);
}
/** * //插入回帖 * * @access public * @return int 回帖ID */ function insertPost($fid,$tid,$message,$author,$dateline){ //更新用户 $authorid=$this->insertMember($author); //插入帖子 $sql="insert into ".$this->tblPre."posts set fid=$fid,tid=$tid,message='$message',authorid=$authorid,author='$author',dateline=$dateline,first =0"; $result = mysql_query($sql);
}
function getOne($fid,$tid){ mysql_query("SET character_set_connection=gbk, character_set_results=gbk, character_set_client=binary"); $data=file_get_contents('http://zhidao.baidu.com/question/'.intval($tid).'.html'); if(!$data)return false; //获得主题信息 preg_match_all('|<cq>(.*)</cq>(.*)<cd>(.*)</cd>(.*)提问者:(.*)-|isU',$data,$topicarr); //print_r($topicarr); //exit; $topic['title']=trim($topicarr[1][0]); $topic['content']=trim(strip_tags($topicarr[3][0],'<br>')); $topic['username']=trim(strip_tags($topicarr[5][0])); $topic['dateline']=time();
if(strpos($topic['username'],'匿名')>-1)$topic['username']='匿名'; //print_r($topic);
//获得回复内容 //preg_match_all('/<div class="f14 p90 pl10">(.*)</div>(.*)回答者:(.*)-(.*)<a href="http://www.baidu.com/search/zhidao_help.html#n5" target=_blank>(.*)</a>(.*)</div>/isU',$data,$postarr); preg_match_all('/<div class="f14 p90 pl10">(.*)</div>(.*)回答者:(.*)-(.*)target=_blank>(.*)</a>(.*)</div>/isU',$data,$postarr); $posts=array(); foreach($postarr[1] as $k=>$v){ $temp['title'] = trim(strip_tags($v,'<br>')); $temp['username'] = trim(strip_tags($postarr[3][$k])); $temp['dateline'] = strtotime(date('Y').'-'.trim($postarr[6][$k]) ); if($temp['dateline']>time())$temp['dateline']=strtotime('2007-'.trim($postarr[6][$k]) ); $topic['dateline']=min($temp['dateline'],$topic['dateline']);//推测主题大约时间 $posts[]=$temp; } //获得匿名回复的内容 unset($postarr); preg_match_all('/<div class="f14 p90 pl10">(.*)</div>(.*)回答者:匿名(.*)</div>/isU',$data,$postarr); foreach($postarr[1] as $k=>$v){ $temp['title'] = trim(strip_tags($v,'<br>')); $temp['username'] = '无名'; $temp['dateline'] = strtotime(date('Y').'-'.trim($postarr[3][$k]) ); if($temp['dateline']>time())$temp['dateline']=strtotime('2007-'.trim($postarr[6][$k]) );
$topic['dateline']=min($temp['dateline'],$topic['dateline']);//推测主题大约时间
if(strpos($temp['username'],'匿名')>-1)$temp['username']='无名'; $posts[]=$temp; } $topic['dateline']-=1000; $this->insertThread($fid,$tid,addslashes($topic['title']),addslashes($topic['content']),addslashes($topic['username']),$topic['dateline']); if($posts){ foreach($posts as $v){ $this->insertPost($fid,$tid,addslashes($v['title']),addslashes($v['username']),$v['dateline']); } $replies = count($posts); $views = rand(80,1000); $sql="UPDATE ".$this->tblPre."threads SET lastpost=".$v['dateline'].",replies=$replies,views=$views,lastposter ='".$v['username']."' WHERE tid='$tid'"; $result = mysql_query($sql); } }
function getList($fid,$page){ $cons=file_get_contents("http://zhidao.baidu.com/browse/$fid?lm=0&pn=".($page-1)*25); if(!$cons)return false; //抓取列表URL preg_match_all('|<span class="f14"><a href="/question/(.*).html" target="_blank">|isU',$cons,$urls); //print_r($urls); //抓取内容 foreach($urls[1] as $url){ $this->getone($fid,$url); sleep(1); } }
}
?> [ 本帖最后由 songlv 于 2008-3-8 10:07 PM 编辑 ]
|  倾城倾国 |
|