新年
快乐

垃圾堆中的精品

垃圾堆中的精品

PHP视频抓取类,不含抖音快手

PHP视频抓取类,传入视频地址即可解析为视频地址

I know no such things as genius,it is nothing but labor and diligence.

一个视频解析抓取类,包含优酷、酷六、土豆、56、新浪播客、乐视、搜狐TV等,当然也可以更具需求自己添加即可。类里面没有写关于APP(抖音、快手等)的抓取,留着有时间再更新吧。

<?php
/**
 * I know no such things as genius,it is nothing but labor and diligence.
 */
// namespace Common\Util;

/**
 * 视频地址抓取
 *
 * @package
 * @version 1.2
 * @copyright 2005-2011 HDJ.ME
 * @author Dijia Huang <huangdijia@gmail.com>
 * @license PHP Version 3.0 {@link http://www.php.net/license/3_0.txt}
 * Usage
 * require_once "VideoUrlParser.class.php";
 * $urls[] = "http://v.youku.com/v_show/id_XMjI4MDM4NDc2.html";
 * $urls[] = "http://www.tudou.com/playlist/p/l13087099.html";
 * $urls[] = "http://www.tudou.com/programs/view/ufg-A3tlcxk/";
 * $urls[] = "http://v.ku6.com/special/show_4926690/Klze2mhMeSK6g05X.html";
 * $urls[] = "http://www.56.com/u68/v_NjI2NTkxMzc.html";
 * $urls[] = "http://www.letv.com/ptv/vplay/1168109.html";
 * $urls[] = "http://video.sina.com.cn/v/b/46909166-1290055681.html";
 * foreach($urls as $url){
 *     $info = VideoUrlParser::parse($url);
 *     //var_dump($info);
 *     echo "<a href='{$info['url']}' target='_new'>{$info['title']}</a>";
 *     echo "<br />";
 *     echo $info['object'];
 *     echo "<br />";
 * }
 * //优酷
 * http://v.youku.com/v_show/id_XMjU0NjY4OTEy.html
 * <embed src="http://player.youku.com/player.php/sid/XMjU0NjY4OTEy/v.swf" quality="high" width="480" height="400"
 *     align="middle" allowScriptAccess="sameDomain" type="application/x-shockwave-flash"></embed>
 * //酷六
 * http://v.ku6.com/special/show_3917484/x0BMXAbgZdQS6FqN.html
 * <embed src="http://player.ku6.com/refer/x0BMXAbgZdQS6FqN/v.swf" quality="high" width="480" height="400"
 *     align="middle" allowScriptAccess="always" allowfullscreen="true"
 *     type="application/x-shockwave-flash"></embed>
 * //土豆
 * http://www.tudou.com/playlist/p/a65929.html?iid=74905844
 * <embed src="http://www.tudou.com/l/A_0urj-Geec/&iid=74905844/v.swf" type="application/x-shockwave-flash"
 *     allowscriptaccess="always" allowfullscreen="true" wmode="opaque" width="480" height="400"></embed>
 * //56
 * http://www.56.com/u98/v_NTkyODY2NTU.html
 * <embed src="http://player.56.com/v_NTkyODY2NTU.swf"  type="application/x-shockwave-flash" width="480"
 *     height="405" allowNetworking="all" allowScriptAccess="always"></embed>
 * //新浪播客
 * http://video.sina.com.cn/v/b/46909166-1290055681.html
 * <embed
 *     src="http://you.video.sina.com.cn/api/sinawebApi/outplayrefer.php/vid=46909166_1290055681_b0K1GHEwDWbK+l1lHz2stqkP7KQNt6nki2O0u1ehIwZYQ0/XM5GdZNQH6SjQBtkEqDhAQJ42dfcn0Rs/s.swf"
 *     pluginspage="http://www.macromedia.com/go/getflashplayer" type="application/x-shockwave-flash" name="ssss"
 *     allowFullScreen="true" allowScriptAccess="always" width="480" height="370"></embed>
 * //乐视
 * http://www.letv.com/ptv/vplay/1168109.html
 * <embed
 *     src="http://i3.imgs.letv.com/player/swfPlayer.swf?id=1168109&host=app.letv.com&vstatus=1&AP=1&logoMask=0&isShowP2p=0&autoplay=true"
 *     quality="high" scale="NO_SCALE" wmode="opaque" bgcolor="#000000" width="480" height="388" name="FLV_player"
 *     align="middle" allowscriptaccess="always" allowfullscreen="true" type="application/x-shockwave-flash"
 *     pluginspage="http://www.macromedia.com/go/getflashplayer">
 */
class VideoUrlUtil{

	const USER_AGENT      = "Mozilla/5.0 (iPhone; CPU iPhone OS 10_3 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) CriOS/56.0.2924.75 Mobile/14E5239e Safari/602.1";
	const CHECK_URL_VALID = "/(youku\.com|tudou\.com|ku6\.com|56\.com|letv\.com|video\.sina\.com\.cn|(my\.)?tv\.sohu\.com|v\.qq\.com)/";

	/**
	 * parse
	 *
	 * @param string $url
	 * @return string
	 * @throws \Exception
	 */
	public static function parse($url = ''){
		$lowerUrl = strtolower($url);
		preg_match(self::CHECK_URL_VALID, $lowerUrl, $matches);

		if(!empty($matches)){
			switch($matches[1]){
				case 'v.qq.com':
					return self::_parseQq($url);
				case 'youku.com':
					return self::_parseYouKu($url);
				case 'tudou.com':
					return self::_parseTuDou($url);
				case 'ku6.com':
					return self::_parseKu6($url);
				case '56.com':
					return self::_parse56($url);
				case 'letv.com':
					return self::_parseLeTV($url);
				case 'video.sina.com.cn':
					return self::_parseSina($url);
				case 'my.tv.sohu.com':
				case 'tv.sohu.com':
				case 'sohu.com':
					return self::_parseSoHu($url);
			}
		}

		return $url;
	}

	/**
	 * 腾讯视频
	 * http://v.qq.com/cover/o/o9tab7nuu0q3esh.html?vid=97abu74o4w3_0
	 * http://v.qq.com/play/97abu74o4w3.html
	 * http://v.qq.com/cover/d/dtdqyd8g7xvoj0o.html
	 * http://v.qq.com/cover/d/dtdqyd8g7xvoj0o/9SfqULsrtSb.html
	 * http://imgcache.qq.com/tencentvideo_v1/player/TencentPlayer.swf?_v=20110829&vid=97abu74o4w3&autoplay=1&list=2&showcfg=1&tpid=23&title=%E7%AC%AC%E4%B8%80%E7%8E%B0%E5%9C%BA&adplay=1&cid=o9tab7nuu0q3esh
	 *
	 * @param string $url
	 * @return mixed
	 * @throws \Exception
	 */
	private static function _parseQQ($url){
		
		$vid = '';
		if(preg_match("/vid=(.*?)\&/", $url, $matches)){
			if(!empty($matches) && count($matches) > 1) $vid = $matches[1];
		}else{
			$html = self::_httpGet($url);
			if(!preg_match("/var VIDEO_INFO = ({.*?})\s/", $html, $videoInfo))
				throw new \Exception('not found video info');
			$videoInfo = json_decode($videoInfo[1], true);
			$vid = $videoInfo['vid'];
		}
		if(empty($vid)) throw new \Exception('vid not found');

		$html = self::_httpPost("http://vv.video.qq.com/getinfo", array(
			'otype' => 'json',
			'vid' => $vid,
			'format' => 'auto',
			'platform' => '11001',
			'sdtfrom' => 'v1103',
		));
		$html = substr($html, stripos($html, '=') + 1, -1);
		$videoInfo = json_decode($html, true);
		$format = current($videoInfo['fl']['fi']);
		$fileInfo = $videoInfo['vl']['vi'][0];
		$filename = $fileInfo['fn'];
		$fvkey = $fileInfo['fvkey'];
		$addrInfo = current($fileInfo['ul']['ui']);

		return "{$addrInfo['url']}{$filename}?type=mp4&vkey={$fvkey}&br={$format['br']}&fmt={$format['name']}";
	}

	/**
	 * 优酷网
	 * http://v.youku.com/v_show/id_XMjI4MDM4NDc2.html
	 * http://player.youku.com/player.php/sid/XMjU0NjI2Njg4/v.swf
	 *
	 * @param string $url
	 * @return bool
	 * @throws \Exception
	 */
	private static function _parseYouKu($url){
		throw new \Exception('暂不支持解析优酷!');
		preg_match("#id\_(\w+)#", $url, $matches);
		if(empty($matches)){
			preg_match("#v_playlist\/#", $url, $mat);
			if(!$mat) throw new \Exception("解析失败【v_playlist not found】");

			$html = self::_httpGet($url);
			preg_match("#videoId2\s*=\s*\'(\w+)\'#", $html, $matches);
			if(!$matches) throw new \Exception("解析失败【videoId not found】");
		}

				$link = "http://v.youku.com/player/getPlayList/VideoIDS/{$matches[1]}/timezone/+08/version/5/source/out?password=&ran=2513&n=3";
		$clientIp = self::getIp();
		$clientIp = $clientIp ? $clientIp : "192.168.1.1";
		$utid = self::buildKey(16);
				https://ups.youku.com/ups/get.json?vid=XMjc5Mzg1NzY5Ng==&ccode=0401&client_ip=192.168.1.1&utid=iPMOEU7K4zsCAbTVE5gQjsU7&client_ts=1496141317&playlist_id=49981133&ob=1
		$link = "http://ups.youku.com/ups/get.json?vid={$matches[1]}==&ccode=0401&client_ip={$clientIp}&utid={$utid}&client_ts=".time()."&ob=1";
		var_dump($link);
		$result = self::_httpGet($link);
		if($result){
			$json = json_decode($result, true);
			$data['img'] = $json['data'][0]['logo'];
			$data['title'] = $json['data'][0]['title'];
			$data['url'] = $url;
			$data['swf'] = "http://player.youku.com/player.php/sid/{$matches[1]}/v.swf";
			return $data;
		}else{
			throw new \Exception("解析失败【get rotem{$link}】");
		}
	}

	/**
	 * 土豆网
	 * http://www.tudou.com/programs/view/Wtt3FjiDxEE/
	 * http://www.tudou.com/v/Wtt3FjiDxEE/v.swf
	 * http://www.tudou.com/playlist/p/a65718.html?iid=74909603
	 * http://www.tudou.com/l/G5BzgI4lAb8/&iid=74909603/v.swf
	 *
	 * @param string $url
	 * @return bool
	 * @throws \Exception
	 */
	private static function _parseTuDou($url){
		throw new \Exception('暂不支持解析土豆!');
		$lowerUrl = strtolower($url);
		preg_match("#view/([-\w]+)/#", $url, $matches);

		if(empty($matches)){
			if(strpos($url, "/playlist/") == false) throw new Exception("解析失败【playlist not found】");

			if(strpos($url, 'iid = ') !== false){
				$arr = explode("iid=", $lowerUrl);
				if(empty($arr[1])) throw new \Exception("解析失败【playlist not found】");
			}elseif(preg_match("#p\/l(\d+).#", $lowerUrl, $arr)){
				if(empty($arr[1])) throw new \Exception("解析失败【playlist not found】");
			}

			$html = self::_fget($url);
			if(!$html) $html = iconv("GB2312", "UTF-8", $html);
			preg_match("/lid_code\s=\slcode\s=\s[\'\"]([^\'\"]+)/s", $html, $matches);
			$iCode = $matches[1];

			preg_match("/iid\s=\s.*?\|\|\s(\d+)/sx", $html, $matches);
			$iid = $matches[1];

			preg_match("/listData\s=\s(\[\{.*\}\])/sx", $html, $matches);
			$find = array("/\n/", ' / \s / ', "/:[^\d\"]\w+[^\,]*,/i", "/(\{|,)(\w+):/");
			$replace = array("", "", ':"",', '\\1"\\2":');
			$str = preg_replace($find, $replace, $matches[1]);
			$json = json_decode($str);
			if(is_array($json) || is_object($json) && !empty($json)){
				foreach($json as $val){
					if($val->iid == $iid){
						$data['img'] = $val->pic;
						$data['title'] = $val->title;
						$data['url'] = $url;
						$data['swf'] = "http://www.tudou.com/l/{$iCode}/&iid={$iid}/v.swf";
						return $data;
					}
				}
			}
			throw new \Exception("解析失败【iid mismatch】");
		}

		$host = "www.tudou.com";
		$path = "/v/{$matches[1]}/v.swf";
		$ret = self::_fsget($path, $host);
		if(preg_match("#\nLocation: (.*)\n#", $ret, $mat)){
			parse_str(parse_url(urldecode($mat[1]), PHP_URL_QUERY));
			$data['img'] = $snap_pic;
			$data['title'] = $title;
			$data['url'] = $url;
			$data['swf'] = "http://www.tudou.com/v/{$matches[1]}/v.swf";
			return $data;
		}
		return false;
	}

	/**
	 * 酷6网
	 * http://v.ku6.com/film/show_520/3X93vo4tIS7uotHg.html
	 * http://v.ku6.com/special/show_4926690/Klze2mhMeSK6g05X.html
	 * http://v.ku6.com/show/7US-kDXjyKyIInDevhpwHg...html
	 * http://player.ku6.com/refer/3X93vo4tIS7uotHg/v.swf
	 *
	 * @param string $url
	 * @return mixed
	 * @throws \Exception
	 */
	private static function _parseKu6($url){
		throw new \Exception('暂不支持解析酷6!');
		if(preg_match("/show\_/", $url)){
			preg_match("#/([-\w]+)\.html#", $url, $matches);
			$url = "http://v.ku6.com/fetchVideo4Player/{$matches[1]}.html";
			$html = self::_fget($url);
			if(!$html) throw new \Exception("解析失败【get remote data fail】");

			$json = json_decode($html, true);
			if(!$json) throw new \Exception("解析失败【data json parse fail】");

			$data['img'] = $json['data']['picpath'];
			$data['title'] = $json['data']['t'];
			$data['url'] = $url;
			$data['swf'] = "http://player.ku6.com/refer/{$matches[1]}/v.swf";
			return $data;
		}elseif(preg_match("/show\//", $url, $matches)){
			$html = self::_fget($url);
			preg_match("/ObjectInfo\s?=\s?([^\n]*)};/si", $html, $matches);
			$str = $matches[1];
			preg_match("/cover\s?:\s?\"([^\"]+)\"/", $str, $matches);
			$data['img'] = $matches[1];
			preg_match("/title\"?\s?:\s?\"([^\"]+)\"/", $str, $matches);
			$jsstr = "{\"title\":\"{$matches[1]}\"}";
			$json = json_decode($jsstr, true);
			$data['title'] = $json['title'];
			$data['url'] = $url;
			preg_match("/\"(vid=[^\"]+)\"\sname=\"flashVars\"/s", $html, $matches);
			$query = str_replace("&", ' & ', $matches[1]);
			preg_match("/\/\/player\.ku6cdn\.com[^\"\']+/", $html, $matches);
			$data['swf'] = 'http:'.$matches[0].' ? '.$query;
			return $data;
		}
		throw new \Exception("解析失败【url parse fail】");
	}

	/**
	 * 56网
	 * http://www.56.com/u73/v_NTkzMDcwNDY.html
	 * http://player.56.com/v_NTkzMDcwNDY.swf
	 *
	 * @param string $url
	 * @return mixed
	 * @throws \Exception
	 */
	private static function _parse56($url){
		throw new \Exception('暂不支持解析56!');
		preg_match("#/v_(\w+)\.html#", $url, $matches);
		if(empty($matches)) throw new \Exception("解析失败【url parse fail】");

		$link = "http://vxml.56.com/json/{$matches[1]}/?src=out";
		$result = self::_cget($link);
		if($result){
			$json = json_decode($result, true);
			if(!$json) throw new \Exception("解析失败【data json parse fail】");
			$data['img'] = $json['info']['img'];
			$data['title'] = $json['info']['Subject'];
			$data['url'] = $url;
			$data['swf'] = "http://player.56.com/v_{$matches[1]}.swf";
			return $data;
		}
		throw new \Exception("解析失败【get remote data fail】");
	}

	/**
	 * 乐视网
	 * http://www.letv.com/ptv/vplay/1168109.html
	 * http://www.letv.com/player/x1168109.swf
	 *
	 * @param string $url
	 * @return mixed
	 * @throws \Exception
	 */
	private static function _parseLeTV($url){
		throw new \Exception('暂不支持解析乐视!');
		$html = self::_fget($url);
		if(!$html) throw new \Exception("解析失败【get remote data fail】");

		preg_match("#http://v.t.sina.com.cn/([^'\"]*)#", $html, $matches);
		parse_str(parse_url(urldecode($matches[0]), PHP_URL_QUERY));
		preg_match("#vplay/(\d+)#", $url, $matches);
		$data['img'] = $pic;
		$data['title'] = $title;
		$data['url'] = $url;
		$data['swf'] = "http://www.letv.com/player/x{$matches[1]}.swf";

		return $data;
	}

	/**
	 * 搜狐TV http://my.tv.sohu.com/u/vw/5101536
	 *
	 * @param string $url
	 * @return mixed
	 * @throws \Exception
	 */
	private static function _parseSoHu($url){
		throw new \Exception('暂不支持解析搜狐!');
		if(!$html) throw new \Exception("解析失败【get remote data fail】");

		$html = iconv("GB2312", "UTF - 8", $html);
		if(!$html) throw new \Exception("解析失败【iconv remote data to GB2312 fail】");

		preg_match_all(" / og:(?:title | image | videosrc)\"\scontent=\"([^\"]+)\"/s", $html, $matches);
		$data['img'] = $matches[1][1];
		$data['title'] = $matches[1][0];
		$data['url'] = $url;
		$data['swf'] = $matches[1][2];
		return $data;
	}

	/**
	 * 新浪播客
	 * http://video.sina.com.cn/v/b/48717043-1290055681.html
	 * http://you.video.sina.com.cn/api/sinawebApi/outplayrefer.php/vid=48717043_1290055681_PUzkSndrDzXK+l1lHz2stqkP7KQNt6nki2O0u1ehIwZYQ0/XM5GdatoG5ynSA9kEqDhAQJA4dPkm0x4/s.swf
	 *
	 * @param string $url
	 * @return mixed
	 * @throws \Exception
	 */
	private static function _parseSina($url){
		throw new \Exception('暂不支持解析新浪播客!');
		preg_match("/(\d+)(?:\-|\_)(\d+)/", $url, $matches);
		$url = "http://video.sina.com.cn/v/b/{$matches[1]}-{$matches[2]}.html";
		$html = self::_fget($url);
		if(!$html) throw new \Exception("解析失败【get remote data fail】");

		preg_match("/video\s?:\s?([^<]+)}/", $html, $matches);
		$find = array("/\n/", "/\s*/", "/\'/", "/\{([^:,]+):/", "/,([^:]+):/", "/:[^\d\"]\w+[^\,]*,/i");
		$replace = array('', '', '"', '{"\\1":', ',"\\1":', ':"",');
		$str = preg_replace($find, $replace, $matches[1]);
		$arr = json_decode($str, true);
		if(!$arr) throw new \Exception("解析失败【data json parse fail】");

		$data['img'] = $arr['pic'];
		$data['title'] = $arr['title'];
		$data['url'] = $url;
		$data['swf'] = $arr['swfOutsideUrl'];
		return $data;
	}

	/**
	 * 获取远程服务器数据 - GET
	 *
	 * @param string $url
	 * @param bool   $isJsonParse
	 * @return mixed
	 * @throws \Exception
	 */
	private static function _httpGet($url, $isJsonParse = false){
		return self::_http($url, 'post', array(), $isJsonParse);
	}

	/**
	 * 获取远程服务器数据 - POST
	 *
	 * @param string $url
	 * @param array  $data
	 * @param bool   $isJsonParse
	 * @return mixed
	 * @throws \Exception
	 */
	private static function _httpPost($url, $data = array(), $isJsonParse = false){
		return self::_http($url, 'post', $data, $isJsonParse);
	}

	/**
	 * 网络请求
	 *
	 * @param string $url
	 * @param string $method
	 * @param array  $data
	 * @param bool   $isJsonParse
	 * @return mixed
	 * @throws \Exception
	 */
	private static function _http($url, $method, $data = array(), $isJsonParse = false){
		$curl = curl_init(); // 启动一个CURL会话
		curl_setopt($curl, CURLOPT_URL, $url);
		curl_setopt($curl, CURLOPT_HEADER, 0);
		curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
		curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); // 跳过证书检查
		curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 2);  // 从证书中检查SSL加密算法是否存在
		curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1); //重定向
		curl_setopt($curl, CURLOPT_USERAGENT, self::USER_AGENT);

		if('post' === $method){
			curl_setopt($curl, CURLOPT_POST, 1);//设置post方式提交
			curl_setopt($curl, CURLOPT_POSTFIELDS, $data);//设置post数据
		}

		$res = curl_exec($curl);     //返回api的json对象
		if($res === false){//返回结果
			$error = curl_error($curl);// 如果执行curl过程中出现异常,可打开此开关,以便查看异常内容
			$code = curl_errno($curl);
			throw new \Exception("网络请求失败[url:{$url},errMsg:{$error}]", $code);
		}
		curl_close($curl); //关闭URL请求
		if($isJsonParse){
			$res = json_decode($res, true);
			if(!$res) throw new \Exception('remote data json parse fail');
		}
		return $res;
	}

	/**
	 * 生成key
	 *
	 * @param string $num
	 * @return string
	 */
	private static function buildKey($num){
		$codes = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
		$key = '';
		$len = strlen($codes);
		for($i = 0; $i < $num; $i++){
			$chr = $codes[rand(0, $len - 1)];
			$key .= $chr;
		}
		return $key;
	}

	/**
	 * 获取客户端ip
	 *
	 * @return array|false|string
	 */
	private static function getIp(){
		if(getenv("HTTP_CLIENT_IP") && strcasecmp(getenv("HTTP_CLIENT_IP"), "unknown"))
			$ip = getenv("HTTP_CLIENT_IP");
		elseif(getenv("HTTP_X_FORWARDED_FOR") && strcasecmp(getenv("HTTP_X_FORWARDED_FOR"), "unknown"))
			$ip = getenv("HTTP_X_FORWARDED_FOR");
		elseif(getenv("REMOTE_ADDR") && strcasecmp(getenv("REMOTE_ADDR"), "unknown"))
			$ip = getenv("REMOTE_ADDR");
		elseif(isset($_SERVER['REMOTE_ADDR']) && $_SERVER['REMOTE_ADDR'] && strcasecmp($_SERVER['REMOTE_ADDR'], "unknown"))
			$ip = $_SERVER['REMOTE_ADDR'];
		else
			$ip = "";
		return $ip;
	}
}

评论回复

回到顶部