php采集网页数据不完整问题,file-j9九游会真人游戏第一品牌

php使用file_get_contents与curl直接采集网页时数据获取不完整,本文给予解决办法

问题

file_get_contents与curl直接采集网页时数据获取不完整

直接使用file_get_contents获取网页数据在大量获取时会出现不完整情况,后改进为:

function eyz_file_get_contents($collecturl,$timeout = 10){
    //设置超时参数
    $opts=array(
        "http"=>array(
            "method"=>"get",
            "timeout"=>$timeout,
            'header'=> "accept-encoding: gzip, deflate, sdch\r\n"//在请求的时候告诉服务器支持解gzip压缩的内容
        ),
    );
    ////创建数据流上下文
    $context = stream_context_create($opts);
    return @file_get_contents("compress.zlib://".$collecturl,0,$context);
}

通过上代码有所改善,但依旧存在不全的情况

解决方式

使用以下代码即可解决

function req_c)
{
    $res = '';
    $options = array_merge(array(
        'follow_local' => true,
        'timeout' => 30,
        'max_redirects' => 4,
        'binary_transfer' => false,
        'include_header' => false,
        'no_body' => false,
        'cookie_location' => dirname(__file__) . '/cookie',
        'useragent' => 'mozilla/4.0 (compatible; msie 6.0; windows nt 5.1',
        'post' => array() ,
        'referer' => null,
        'ssl_verifypeer' => 0,
        'ssl_verifyhost' => 0,
        'headers' => array(
            'expect:'
        ) ,
        'auth_name' => '',
        'auth_pass' => '',
        'session' => false
    ) , $options);
    $options['url'] = $url;
    $s = curl_init();
    if (!$s) return false;
    curl_setopt($s, curlopt_url, $options['url']);
    curl_setopt($s, curlopt_httpheader, $options['headers']);
    curl_setopt($s, curlopt_ssl_verifypeer, $options['ssl_verifypeer']);
    curl_setopt($s, curlopt_ssl_verifyhost, $options['ssl_verifyhost']);
    curl_setopt($s, curlopt_timeout, $options['timeout']);
    curl_setopt($s, curlopt_maxredirs, $options['max_redirects']);
    curl_setopt($s, curlopt_returntransfer, true);
    curl_setopt($s, curlopt_followlocation, $options['follow_local']);
    curl_setopt($s, curlopt_cookiejar, $options['cookie_location']);
    curl_setopt($s, curlopt_cookiefile, $options['cookie_location']);
    if (!empty($options['auth_name']) && is_string($options['auth_name']))
    {
        curl_setopt($s, curlopt_userpwd, $options['auth_name'] . ':' . $options['auth_pass']);
    }
    if (!empty($options['post']))
    {
        curl_setopt($s, curlopt_post, true);
        curl_setopt($s, curlopt_postfields, $options['post']);
        //curl_setopt($s, curlopt_postfields, array('username' => 'aeon', 'password' => '111111'));
    }
    if ($options['include_header'])
    {
        curl_setopt($s, curlopt_header, true);
    }
    if ($options['no_body'])
    {
        curl_setopt($s, curlopt_nobody, true);
    }
    if ($options['session'])
    {
        curl_setopt($s, curlopt_cookiesession, true);
        curl_setopt($s, curlopt_cookie, $options['session']);
    }
    curl_setopt($s, curlopt_useragent, $options['useragent']);
    curl_setopt($s, curlopt_referer, $options['referer']);
    $res = curl_exec($s);
    $status = curl_getinfo($s, curlinfo_http_code);
    curl_close($s);
    return $res;
}

以上是一个更完整的curl的封装,可以相对的比较好的获取页面信息,curl的获取当数据量过大时,会分批进行数据获取,本地进行数据重组

至此,问题解决


发表评论 登录

目前评论:0