当前位置:首页 > PHP教程 > php类库 > 列表

PHP抓取、分析国内视频网站的视频信息工具类

发布:smiling 来源: PHP粉丝网  添加日期:2020-11-05 11:46:59 浏览: 评论:0 

VideoUrlParser是一款基于PHP根据视频URL抓取视频信息的工具,支持优酷、土豆、酷六、56、乐视、搜狐、腾讯、新浪。

使用方法:

  1. require_once "VideoUrlParser.class.php"
  2. $url = "http://v.youku.com/v_show/id_XMjkwMzc0Njg4.html"
  3. $info = VedioUrlParser::parse($url); 
  4. echo $info

说明:调用该工具php文件VideoUrlParser.class.php,$url变量后面的字符串为视频页的地址,然后使用echo输出变量$info。

附:info含有的几个值,分别是img(用于视频缩略图),title(视频标题),url(地址),swf(视频swf播放地址)。我只用到了img和swf地址。具体的可以根据自己的需要进行调整。

VideoUrlParser类源码:

  1. <?php 
  2. /** 
  3.  * Video  
  4.  *  
  5.  * @package  
  6.  * @version 1.2 
  7.  * @copyright 2005-2011 HDJ.ME  
  8.  * @author Dijia Huang <huangdijia@gmail.com>  
  9.  * @license PHP Version 3.0 {@link http://www.php.net/license/3_0.txt} 
  10.  * 
  11.  * Usage 
  12.  * require_once "VideoUrlParser.class.php"; 
  13.  * $urls[] = "http://v.youku.com/v_show/id_XMjI4MDM4NDc2.html"; 
  14.  * $urls[] = "http://www.tudou.com/playlist/p/l13087099.html"; 
  15.  * $urls[] = "http://www.tudou.com/programs/view/ufg-A3tlcxk/"; 
  16.  * $urls[] = "http://v.ku6.com/special/show_4926690/Klze2mhMeSK6g05X.html"; 
  17.  * $urls[] = "http://www.56.com/u68/v_NjI2NTkxMzc.html"; 
  18.  * $urls[] = "http://www.letv.com/ptv/vplay/1168109.html"; 
  19.  * $urls[] = "http://video.sina.com.cn/v/b/46909166-1290055681.html"; 
  20.  * 
  21.  * foreach($urls as $url){ 
  22.  *     $info = VideoUrlParser::parse($url); 
  23.  *     //var_dump($info); 
  24.  *     echo "<a href='{$info['url']}' target='_new'>{$info['title']}</a>"; 
  25.  *     echo "<br />"; 
  26.  *     echo $info['object']; 
  27.  *     echo "<br />"; 
  28.  * } 
  29.  * 
  30.  * 
  31.  * 
  32.  * //优酷 
  33.  * http://v.youku.com/v_show/id_XMjU0NjY4OTEy.html 
  34.  * <embed src="http://player.youku.com/player.php/sid/XMjU0NjY4OTEy/v.swf" quality="high" width="480" height="400" align="middle" allowScriptAccess="sameDomain" type="application/x-shockwave-flash"></embed> 
  35.  *  
  36.  * //酷六 
  37.  * http://v.ku6.com/special/show_3917484/x0BMXAbgZdQS6FqN.html 
  38.  * <embed src="http://player.ku6.com/refer/x0BMXAbgZdQS6FqN/v.swf" quality="high" width="480" height="400" align="middle" allowScriptAccess="always" allowfullscreen="true" type="application/x-shockwave-flash"></embed> 
  39.  *  
  40.  * //土豆 
  41.  * http://www.tudou.com/playlist/p/a65929.html?iid=74905844 
  42.  * <embed src="http://www.tudou.com/l/A_0urj-Geec/&iid=74905844/v.swf" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" wmode="opaque" width="480" height="400"></embed> 
  43.  *  
  44.  * //56 
  45.  * http://www.56.com/u98/v_NTkyODY2NTU.html 
  46.  * <embed src="http://player.56.com/v_NTkyODY2NTU.swf"  type="application/x-shockwave-flash" width="480" height="405" allowNetworking="all" allowScriptAccess="always"></embed> 
  47.  *  
  48.  * //新浪播客 
  49.  * http://video.sina.com.cn/v/b/46909166-1290055681.html 
  50.  * <embed src="http://you.video.sina.com.cn/api/sinawebApi/outplayrefer.php/vid=46909166_1290055681_b0K1GHEwDWbK+l1lHz2stqkP7KQNt6nki2O0u1ehIwZYQ0/XM5GdZNQH6SjQBtkEqDhAQJ42dfcn0Rs/s.swf" pluginspage="http://www.macromedia.com/go/getflashplayer" type="application/x-shockwave-flash" name="ssss" allowFullScreen="true" allowScriptAccess="always" width="480" height="370"></embed> 
  51.  *  
  52.  * //乐视 
  53.  * http://www.letv.com/ptv/vplay/1168109.html 
  54.  * <embed src="http://i3.imgs.letv.com/player/swfPlayer.swf?id=1168109&host=app.letv.com&vstatus=1&AP=1&logoMask=0&isShowP2p=0&autoplay=true" quality="high" scale="NO_SCALE" wmode="opaque" bgcolor="#000000" width="480" height="388" name="FLV_player" align="middle" allowscriptaccess="always" allowfullscreen="true" type="application/x-shockwave-flash" pluginspage="http://www.macromedia.com/go/getflashplayer"> 
  55.  */ 
  56.  
  57. class VideoUrlParser 
  58.     const USER_AGENT = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko) 
  59.         Chrome/8.0.552.224 Safari/534.10"; 
  60.     const CHECK_URL_VALID = "/(youku\.com|tudou\.com|ku6\.com|56\.com|letv\.com|video\.sina\.com\.cn|(my\.)?tv\.sohu\.com|v\.qq\.com)/"
  61.  
  62.     /** 
  63.      * parse  
  64.      *  
  65.      * @param string $url  
  66.      * @param mixed $createObject  
  67.      * @static 
  68.      * @access public 
  69.      * @return void 
  70.      */ 
  71.     static public function parse($url=''$createObject=true){ 
  72.         $lowerurl = strtolower($url); 
  73.         preg_match(self::CHECK_URL_VALID, $lowerurl$matches); 
  74.         if(!$matchesreturn false; 
  75.  
  76.         switch($matches[1]){ 
  77.         case 'youku.com'
  78.             $data = self::_parseYouku($url); 
  79.             break
  80.         case 'tudou.com'
  81.             $data = self::_parseTudou($url); 
  82.             break
  83.         case 'ku6.com'
  84.             $data = self::_parseKu6($url); 
  85.             break
  86.         case '56.com'
  87.             $data = self::_parse56($url); 
  88.             break
  89.         case 'letv.com'
  90.             $data = self::_parseLetv($url); 
  91.             break
  92.         case 'video.sina.com.cn'
  93.             $data = self::_parseSina($url); 
  94.             break
  95.         case 'my.tv.sohu.com'
  96.         case 'tv.sohu.com'
  97.         case 'sohu.com'
  98.             $data = self::_parseSohu($url); 
  99.             break
  100.         case 'v.qq.com'
  101.             $data = self::_parseQq($url); 
  102.             break
  103.         default
  104.             $data = false; 
  105.         } 
  106.  
  107.         if($data && $createObject$data['object'] = "<embed src=\"{$data['swf']}\" quality=\"high\" width=\"480\" height=\"400\" align=\"middle\" allowNetworking=\"all\" allowScriptAccess=\"always\" type=\"application/x-shockwave-flash\"></embed>"
  108.         return $data
  109.     } 
  110.     /** 
  111.      * 腾讯视频  
  112.      * http://v.qq.com/cover/o/o9tab7nuu0q3esh.html?vid=97abu74o4w3_0 
  113.      * http://v.qq.com/play/97abu74o4w3.html 
  114.      * http://v.qq.com/cover/d/dtdqyd8g7xvoj0o.html 
  115.      * http://v.qq.com/cover/d/dtdqyd8g7xvoj0o/9SfqULsrtSb.html 
  116.      * http://imgcache.qq.com/tencentvideo_v1/player/TencentPlayer.swf?_v=20110829&vid=97abu74o4w3&autoplay=1&list=2&showcfg=1&tpid=23&title=%E7%AC%AC%E4%B8%80%E7%8E%B0%E5%9C%BA&adplay=1&cid=o9tab7nuu0q3esh 
  117.      */  
  118.     private function _parseQq($url){ 
  119.         if(preg_match("/\/play\//"$url)){ 
  120.             $html = self::_fget($url); 
  121.             preg_match("/url=[^\"]+/"$html$matches); 
  122.             if(!$matches); return false; 
  123.             $url = $matches[0]; 
  124.         } 
  125.         preg_match("/vid=([^\_]+)/"$url$matches); 
  126.         $vid = $matches[1]; 
  127.         $html = self::_fget($url); 
  128.         // query 
  129.         preg_match("/flashvars\s=\s\"([^;]+)/s"$html$matches); 
  130.         $query = $matches[1]; 
  131.         if(!$vid){ 
  132.             preg_match("/vid\s?=\s?vid\s?\|\|\s?\"(\w+)\";/i"$html$matches); 
  133.             $vid = $matches[1]; 
  134.         } 
  135.         $query = str_replace('"+vid+"'$vid$query); 
  136.         parse_str($query$output); 
  137.         $data['img'] = "http://vpic.video.qq.com/{$$output['cid']}/{$vid}_1.jpg"
  138.         $data['url'] = $url
  139.         $data['title'] = $output['title']; 
  140.         $data['swf'] = "http://imgcache.qq.com/tencentvideo_v1/player/TencentPlayer.swf?".$query
  141.         return $data
  142.     } 
  143.      
  144.  
  145.     /** 
  146.      * 优酷网  
  147.      * http://v.youku.com/v_show/id_XMjI4MDM4NDc2.html 
  148.      * http://player.youku.com/player.php/sid/XMjU0NjI2Njg4/v.swf 
  149.      */  
  150.     private function _parseYouku($url){ 
  151.         preg_match("#id\_(\w+)#"$url$matches); 
  152.  
  153.         if (emptyempty($matches)){ 
  154.             preg_match("#v_playlist\/#"$url$mat); 
  155.             if(!$matreturn false; 
  156.  
  157.             $html = self::_fget($url); 
  158.  
  159.             preg_match("#videoId2\s*=\s*\'(\w+)\'#"$html$matches); 
  160.             if(!$matchesreturn false; 
  161.         } 
  162.  
  163.         $link = "http://v.youku.com/player/getPlayList/VideoIDS/{$matches[1]}/timezone/+08/version/5/source/out?password=&ran=2513&n=3"
  164.  
  165.         $retval = self::_cget($link); 
  166.         if ($retval) { 
  167.             $json = json_decode($retval, true); 
  168.  
  169.             $data['img'] = $json['data'][0]['logo']; 
  170.             $data['title'] = $json['data'][0]['title']; 
  171.             $data['url'] = $url
  172.             $data['swf'] = "http://player.youku.com/player.php/sid/{$matches[1]}/v.swf"
  173.  
  174.             return $data
  175.         } else { 
  176.             return false; 
  177.         } 
  178.     } 
  179.  
  180.     /** 
  181.      * 土豆网 
  182.      * http://www.tudou.com/programs/view/Wtt3FjiDxEE/ 
  183.      * http://www.tudou.com/v/Wtt3FjiDxEE/v.swf 
  184.      *  
  185.      * http://www.tudou.com/playlist/p/a65718.html?iid=74909603 
  186.      * http://www.tudou.com/l/G5BzgI4lAb8/&iid=74909603/v.swf 
  187.      */ 
  188.     private function _parseTudou($url){ 
  189.         preg_match("#view/([-\w]+)/#"$url$matches); 
  190.  
  191.         if (emptyempty($matches)) { 
  192.             if (strpos($url"/playlist/") == false) return false; 
  193.  
  194.             if(strpos($url'iid=') !== false){ 
  195.                 $quarr = explode("iid="$lowerurl); 
  196.                 if (emptyempty($quarr[1]))  return false; 
  197.             }elseif(preg_match("#p\/l(\d+).#"$lowerurl$quarr)){ 
  198.                 if (emptyempty($quarr[1])) return false; 
  199.             } 
  200.  
  201.             $html = self::_fget($url); 
  202.             $html = iconv("GB2312""UTF-8"$html); 
  203.  
  204.             preg_match("/lid_code\s=\slcode\s=\s[\'\"]([^\'\"]+)/s"$html$matches); 
  205.             $icode = $matches[1]; 
  206.  
  207.             preg_match("/iid\s=\s.*?\|\|\s(\d+)/sx"$html$matches); 
  208.             $iid = $matches[1]; 
  209.  
  210.             preg_match("/listData\s=\s(\[\{.*\}\])/sx"$html$matches); 
  211.  
  212.             $find = array("/\n/"'/\s/'"/:[^\d\"]\w+[^\,]*,/i""/(\{|,)(\w+):/"); 
  213.             $replace = array(""""':"",''\\1"\\2":'); 
  214.             $str = preg_replace($find$replace$matches[1]); 
  215.             //var_dump($str); 
  216.             $json = json_decode($str); 
  217.             //var_dump($json);exit; 
  218.             if(is_array($json) || is_object($json) && !emptyempty($json)){ 
  219.                 foreach ($json as $val) { 
  220.                     if ($val->iid == $iid) { 
  221.                         break
  222.                     } 
  223.                 } 
  224.             } 
  225.  
  226.             $data['img'] = $val->pic; 
  227.             $data['title'] = $val->title; 
  228.             $data['url'] = $url
  229.             $data['swf'] = "http://www.tudou.com/l/{$icode}/&iid={$iid}/v.swf"
  230.  
  231.             return $data
  232.         } 
  233.  
  234.         $host = "www.tudou.com"
  235.         $path = "/v/{$matches[1]}/v.swf"
  236.  
  237.         $ret = self::_fsget($path$host); 
  238.  
  239.         if (preg_match("#\nLocation: (.*)\n#"$ret$mat)) { 
  240.             parse_str(parse_url(urldecode($mat[1]), PHP_URL_QUERY)); 
  241.  
  242.             $data['img'] = $snap_pic
  243.             $data['title'] = $title
  244.             $data['url'] = $url
  245.             $data['swf'] = "http://www.tudou.com/v/{$matches[1]}/v.swf"
  246.  
  247.             return $data
  248.         } 
  249.         return false; 
  250.     } 
  251.  
  252.     /** 
  253.      * 酷6网  
  254.      * http://v.ku6.com/film/show_520/3X93vo4tIS7uotHg.html 
  255.      * http://v.ku6.com/special/show_4926690/Klze2mhMeSK6g05X.html 
  256.      * http://v.ku6.com/show/7US-kDXjyKyIInDevhpwHg...html 
  257.      * http://player.ku6.com/refer/3X93vo4tIS7uotHg/v.swf 
  258.      */ 
  259.     private function _parseKu6($url){ 
  260.         if(preg_match("/show\_/"$url)){ 
  261.             preg_match("#/([-\w]+)\.html#"$url$matches); 
  262.             $url = "http://v.ku6.com/fetchVideo4Player/{$matches[1]}.html"
  263.             $html = self::_fget($url); 
  264.  
  265.             if ($html) { 
  266.                 $json = json_decode($html, true); 
  267.                 if(!$jsonreturn false; 
  268.  
  269.                 $data['img'] = $json['data']['picpath']; 
  270.                 $data['title'] = $json['data']['t']; 
  271.                 $data['url'] = $url
  272.                 $data['swf'] = "http://player.ku6.com/refer/{$matches[1]}/v.swf"
  273.  
  274.                 return $data
  275.             } else { 
  276.                 return false; 
  277.             } 
  278.         }elseif(preg_match("/show\//"$url$matches)){ 
  279.             $html = self::_fget($url); 
  280.             preg_match("/ObjectInfo\s?=\s?([^\n]*)};/si"$html$matches); 
  281.             $str = $matches[1]; 
  282.             // img 
  283.             preg_match("/cover\s?:\s?\"([^\"]+)\"/"$str$matches); 
  284.             $data['img'] = $matches[1]; 
  285.             // title 
  286.             preg_match("/title\"?\s?:\s?\"([^\"]+)\"/"$str$matches); 
  287.             $jsstr = "{\"title\":\"{$matches[1]}\"}"
  288.             $json = json_decode($jsstr, true); 
  289.             $data['title'] = $json['title']; 
  290.             // url 
  291.             $data['url'] = $url
  292.             // query 
  293.             preg_match("/\"(vid=[^\"]+)\"\sname=\"flashVars\"/s"$html$matches); 
  294.             $query = str_replace("&amp;"'&'$matches[1]); 
  295.             preg_match("/\/\/player\.ku6cdn\.com[^\"\']+/"$html$matches); 
  296.             $data['swf'] = 'http:'.$matches[0].'?'.$query
  297.  
  298.             return $data
  299.         } 
  300.     } 
  301.  
  302.     /** 
  303.      * 56网 
  304.      * http://www.56.com/u73/v_NTkzMDcwNDY.html 
  305.      * http://player.56.com/v_NTkzMDcwNDY.swf 
  306.      */ 
  307.     private function _parse56($url){ 
  308.         preg_match("#/v_(\w+)\.html#"$url$matches); 
  309.  
  310.         if (emptyempty($matches)) return false; 
  311.  
  312.         $link="http://vxml.56.com/json/{$matches[1]}/?src=out"
  313.         $retval = self::_cget($link); 
  314.  
  315.         if ($retval) { 
  316.             $json = json_decode($retval, true); 
  317.  
  318.             $data['img'] = $json['info']['img']; 
  319.             $data['title'] = $json['info']['Subject']; 
  320.             $data['url'] = $url
  321.             $data['swf'] = "http://player.56.com/v_{$matches[1]}.swf"
  322.  
  323.             return $data
  324.         } else { 
  325.             return false; 
  326.         }  
  327.     } 
  328.  
  329.     /** 
  330.      * 乐视网  
  331.      * http://www.letv.com/ptv/vplay/1168109.html 
  332.      * http://www.letv.com/player/x1168109.swf 
  333.      */ 
  334.     private function _parseLetv($url){ 
  335.         $html = self::_fget($url); 
  336.         preg_match("#http://v.t.sina.com.cn/([^'\"]*)#"$html$matches); 
  337.         parse_str(parse_url(urldecode($matches[0]), PHP_URL_QUERY)); 
  338.         preg_match("#vplay/(\d+)#"$url$matches); 
  339.         $data['img'] = $pic
  340.         $data['title'] = $title
  341.         $data['url'] = $url
  342.         $data['swf'] = "http://www.letv.com/player/x{$matches[1]}.swf"
  343.  
  344.         return $data
  345.     } 
  346.  
  347.     // 搜狐TV http://my.tv.sohu.com/u/vw/5101536 
  348.     private function _parseSohu($url){ 
  349.         $html = self::_fget($url); 
  350.         $html = iconv("GB2312""UTF-8"$html); 
  351.         preg_match_all("/og:(?:title|image|videosrc)\"\scontent=\"([^\"]+)\"/s"$html$matches); 
  352.         $data['img'] = $matches[1][1]; 
  353.         $data['title'] = $matches[1][0]; 
  354.         $data['url'] = $url
  355.         $data['swf'] = $matches[1][2]; 
  356.         return $data
  357.     } 
  358.  
  359.     /* 
  360.      * 新浪播客 
  361.      * http://video.sina.com.cn/v/b/48717043-1290055681.html 
  362.      * http://you.video.sina.com.cn/api/sinawebApi/outplayrefer.php/vid=48717043_1290055681_PUzkSndrDzXK+l1lHz2stqkP7KQNt6nki2O0u1ehIwZYQ0/XM5GdatoG5ynSA9kEqDhAQJA4dPkm0x4/s.swf 
  363.      */ 
  364.     private function _parseSina($url){ 
  365.         preg_match("/(\d+)(?:\-|\_)(\d+)/"$url$matches); 
  366.         $url = "http://video.sina.com.cn/v/b/{$matches[1]}-{$matches[2]}.html"
  367.         $html = self::_fget($url); 
  368.         preg_match("/video\s?:\s?([^<]+)}/"$html$matches); 
  369.         $find = array("/\n/""/\s*/""/\'/""/\{([^:,]+):/""/,([^:]+):/""/:[^\d\"]\w+[^\,]*,/i"); 
  370.         $replace = array('''''"''{"\\1":'',"\\1":'':"",'); 
  371.         $str = preg_replace($find$replace$matches[1]); 
  372.         $arr = json_decode($str, true); 
  373.  
  374.         $data['img'] = $arr['pic']; 
  375.         $data['title'] = $arr['title']; 
  376.         $data['url'] = $url
  377.         $data['swf'] = $arr['swfOutsideUrl']; 
  378.  
  379.         return $data
  380.     } 
  381.  
  382.     /* 
  383.      * 通过 file_get_contents 获取内容 
  384.      */ 
  385.     private function _fget($url=''){ 
  386.         if(!$urlreturn false; 
  387.         $html = file_get_contents($url); 
  388.         // 判断是否gzip压缩 
  389.         if($dehtml = self::_gzdecode($html)) 
  390.             return $dehtml
  391.         else 
  392.             return $html
  393.     } 
  394.  
  395.     /* 
  396.      * 通过 fsockopen 获取内容 
  397.      */ 
  398.     private function _fsget($path='/'$host=''$user_agent=''){ 
  399.         if(!$path || !$hostreturn false; 
  400.         $user_agent = $user_agent ? $user_agent : self::USER_AGENT; 
  401.  
  402.         $out = <<<HEADER 
  403. GET $path HTTP/1.1 
  404. Host: $host 
  405. User-Agent: $user_agent 
  406. Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 
  407. Accept-Language: zh-cn,zh;q=0.5 
  408. Accept-Charset: GB2312,utf-8;q=0.7,*;q=0.7\r\n\r\n 
  409. HEADER; 
  410.         $fp = @fsockopen($host, 80, $errno, $errstr, 10); 
  411.         if (!$fp)  return false; 
  412.         if(!fputs($fp, $out)) return false; 
  413.         while ( !feof($fp) ) { 
  414.             $html .= fgets($fp, 1024); 
  415.         } 
  416.         fclose($fp); 
  417.         // 判断是否gzip压缩 
  418.         if($dehtml = self::_gzdecode($html)) 
  419.             return $dehtml; 
  420.         else 
  421.             return $html; 
  422.     } 
  423.  
  424.     /* 
  425.      * 通过 curl 获取内容 
  426.      */ 
  427.     private function _cget($url=''$user_agent=''){ 
  428.         if(!$urlreturn
  429.  
  430.         $user_agent = $user_agent ? $user_agent : self::USER_AGENT; 
  431.  
  432.         $ch = curl_init(); 
  433.         curl_setopt($ch, CURLOPT_URL, $url); 
  434.         curl_setopt($ch, CURLOPT_HEADER, 0); 
  435.         if(strlen($user_agent)) curl_setopt($ch, CURLOPT_USERAGENT, $user_agent); 
  436.  
  437.         ob_start(); 
  438.         curl_exec($ch); 
  439.         $html = ob_get_contents();         
  440.         ob_end_clean(); 
  441.  
  442.         if(curl_errno($ch)){ 
  443.             curl_close($ch); 
  444.             return false; 
  445.         } 
  446.         curl_close($ch); 
  447.         if(!is_string($html) || !strlen($html)){ 
  448.             return false; 
  449.         } 
  450.         return $html
  451.         // 判断是否gzip压缩 
  452.         if($dehtml = self::_gzdecode($html)) 
  453.             return $dehtml
  454.         else 
  455.             return $html
  456.     } 
  457.  
  458.     private function _gzdecode($data) { 
  459.         $len = strlen ( $data ); 
  460.         if ($len < 18 || strcmp ( substr ( $data, 0, 2 ), "\x1f\x8b" )) { 
  461.             return null; // Not GZIP format (See RFC 1952)  
  462.         } 
  463.         $method = ord ( substr ( $data, 2, 1 ) ); // Compression method  
  464.         $flags = ord ( substr ( $data, 3, 1 ) ); // Flags  
  465.         if ($flags & 31 != $flags) { 
  466.             // Reserved bits are set -- NOT ALLOWED by RFC 1952  
  467.             return null; 
  468.         } 
  469.         // NOTE: $mtime may be negative (PHP integer limitations)  
  470.         $mtime = unpack ( "V"substr ( $data, 4, 4 ) ); 
  471.         $mtime = $mtime [1]; 
  472.         $xfl = substr ( $data, 8, 1 ); 
  473.         $os = substr ( $data, 8, 1 ); 
  474.         $headerlen = 10; 
  475.         $extralen = 0; 
  476.         $extra = ""
  477.         if ($flags & 4) { 
  478.             // 2-byte length prefixed EXTRA data in header  
  479.             if ($len - $headerlen - 2 < 8) { 
  480.                 return false; // Invalid format  
  481.             } 
  482.             $extralen = unpack ( "v"substr ( $data, 8, 2 ) ); 
  483.             $extralen = $extralen [1]; 
  484.             if ($len - $headerlen - 2 - $extralen < 8) { 
  485.                 return false; // Invalid format  
  486.             } 
  487.             $extra = substr ( $data, 10, $extralen ); 
  488.             $headerlen += 2 + $extralen
  489.         } 
  490.  
  491.         $filenamelen = 0; 
  492.         $filename = ""
  493.         if ($flags & 8) { 
  494.             // C-style string file NAME data in header  
  495.             if ($len - $headerlen - 1 < 8) { 
  496.                 return false; // Invalid format  
  497.             } 
  498.             $filenamelen = strpos ( substr ( $data, 8 + $extralen ), chr ( 0 ) ); 
  499.             if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 < 8) { 
  500.                 return false; // Invalid format  
  501.             } 
  502.             $filename = substr ( $data$headerlen$filenamelen ); 
  503.             $headerlen += $filenamelen + 1; 
  504.         } 
  505.  
  506.         $commentlen = 0; 
  507.         $comment = ""
  508.         if ($flags & 16) { 
  509.             // C-style string COMMENT data in header  
  510.             if ($len - $headerlen - 1 < 8) { 
  511.                 return false; // Invalid format  
  512.             } 
  513.             $commentlen = strpos ( substr ( $data, 8 + $extralen + $filenamelen ), chr ( 0 ) ); 
  514.             if ($commentlen === false || $len - $headerlen - $commentlen - 1 < 8) { 
  515.                 return false; // Invalid header format  
  516.             } 
  517.             $comment = substr ( $data$headerlen$commentlen ); 
  518.             $headerlen += $commentlen + 1; 
  519.         } 
  520.  
  521.         $headercrc = ""
  522.         if ($flags & 1) { 
  523.             // 2-bytes (lowest order) of CRC32 on header present  
  524.             if ($len - $headerlen - 2 < 8) { 
  525.                 return false; // Invalid format  
  526.             } 
  527.             $calccrc = crc32 ( substr ( $data, 0, $headerlen ) ) & 0xffff; 
  528.             $headercrc = unpack ( "v"substr ( $data$headerlen, 2 ) ); 
  529.             $headercrc = $headercrc [1]; 
  530.             if ($headercrc != $calccrc) { 
  531.                 return false; // Bad header CRC  
  532.             } 
  533.             $headerlen += 2; 
  534.         } 
  535.  
  536.         // GZIP FOOTER - These be negative due to PHP's limitations  
  537.         $datacrc = unpack ( "V"substr ( $data, - 8, 4 ) ); 
  538.         $datacrc = $datacrc [1]; 
  539.         $isize = unpack ( "V"substr ( $data, - 4 ) ); 
  540.         $isize = $isize [1]; 
  541.  
  542.         // Perform the decompression:  
  543.         $bodylen = $len - $headerlen - 8; 
  544.         if ($bodylen < 1) { 
  545.             // This should never happen - IMPLEMENTATION BUG!  
  546.             return null; 
  547.         } 
  548.         $body = substr ( $data$headerlen$bodylen ); 
  549.         $data = ""
  550.         if ($bodylen > 0) { 
  551.             switch ($method) { 
  552.                 case 8 : 
  553.                     // Currently the only supported compression method:  
  554.                     $data = gzinflate ( $body ); 
  555.                     break
  556.                 default : 
  557.                     // Unknown compression method  
  558.                     return false; 
  559.             } 
  560.         } else { 
  561.             //... 
  562.         } 
  563.  
  564.         if ($isize != strlen ( $data ) || crc32 ( $data ) != $datacrc) { 
  565.             // Bad format!  Length or CRC doesn't match!  
  566.             return false; 
  567.         } 
  568.         return $data
  569.     } 

Tags: PHP抓取 PHP视频网站

分享到: