|
<?php
$text=open('gettemp.php');
if(!$text)
{
$url = 'http://iask.sina.com.cn/browse/i_questionList.php?cid=3&start=140&status=R&num=20&t=1203908584540&_=';
$text=geturl($url);
}
$text = str_replace(chr(92),'%',$text);
$text = str_replace(chr(34),'',unescape($text));
preg_match_all("/qid:(.*?),qclsname/s",$text,$url);
$text='';
foreach($url[1] as $s)
{
$file2='http://iask.sina.com.cn/b/'.$s.'.html';
echo $file2;
$text=clear(geturl($file2));
$text=cut('href=/ class=a05>知识人','<b>我来回答</b></legend>',$text);
// echo $text;
$quarr = getquestion($text);
//print_r($quarr);
$anarr = getanser($text);
print_r($anarr);
$userarr = getuser($text);
break;
//这里没写
}
//取出问
function getquestion($str)
{
if(!empty($str))
{
$qtitle=cut('<b class=f14 c3>','</b> </td>',$str);
$qcontent=cut('<tr><td class=f14 lh13 style=padding-right:65px;>','<table border=0 width=100%',$str);
$arr=array(
'qtitle' => $qtitle,
'qcontent' => $qcontent
);
return $arr;
}
else
{
return false;
}
}
//取出答案
function getanser($str)
{
if(!empty($str))
{
preg_match_all("/<td class=f14 lh15 style=padding-right:65px;>(.*?)<\/td><\/tr><\/table>/s",$str,$c);
return $c[1];
}
else{
return false;
}
}
//取出用户
function getuser($str)
{
if(!empty($str))
{
}
else
{
return false;
}
}
//去悼" '二个坏东西
function clear($str)
{
if(!empty($str))
{
$str=str_replace(chr(34),'',$str);
$str=str_replace(chr(39),'',$str);
return $str;
}
else
{
return false;
}
}
//--------------截取
function cut($from,$end,$file){
$message=explode($from,$file);
$message=explode($end,$message[1]);
return $message[0];
}
//取数
function geturl($url)
{
if(function_exists( "file_get_contents" ))
{
$text = @file_get_contents($url);
}
else
{
$file = @file($url);
if(!empty($file) && is_array($file)){
$text = explode('',$file);
}
}
return $text;
}
//读入
function open($file){
if(is_file($file))
{
$dh=fopen($file,'rb');
$tt=fread($dh,filesize($file));
fclose($dh);
return $tt;
}
else{
return false;
}
}
//写入
function write($str){
if(!empty($str))
{
$file='gettemp.php';
$dh=fopen($file,'wb');
fwrite($dh,$str);
fclose($dh);
return true;
}else{
return false;
}
}
//---------------------------------unescape
function unescape($str) {
$str = rawurldecode($str);
preg_match_all("/%u.{4}|&#x.{4};|&#\d+;|&#\d+?|.+/U",$str,$r);
$ar = $r[0];
foreach($ar as $k=>$v) {
if(substr($v,0,2) == "%u")
$ar[$k] = iconv("UCS-2","GBK",pack("H4",substr($v,-4)));
elseif(substr($v,0,3) == "&#x")
$ar[$k] = iconv("UCS-2","GBK",pack("H4",substr($v,3,-1)));
elseif(substr($v,0,2) == "&#") {
$ar[$k] = iconv("UCS-2","GBK",pack("n",preg_replace("/[^\d]/","",$v)));
}
}
return join("",$ar);
}
?> |
|