小红书无水印视频/图集解析实现思路(PHP)
一、开门见山
直接贴代码,有兴趣往下看,没兴趣 Ctrl + C
class Video {
public function redbook($url) {
$ip= $this->get_randip();
$array = parse_url($url);
switch ($array['host']) {
case 'www.xiaohongshu.com':
$id = str_replace('/explore/' , "", $array['path']);
$loc = 'https://www.xiaohongshu.com/discovery/item/'."$id";
break;
case 'xhslink.com':
$loc=get_headers($url,true)['Location'][0];
//print_r($loc);
$array = parse_url($loc);
$id = str_replace('/discovery/item/' , "", $array['path']);
break;
default:
exit(json_encode(['code'=>-1, 'msg'=>"视频链接不正确"], 480));
break;
}
$header =array(
'User-Agent:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0',
'Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Referer: https://www.xiaohongshu.com/',
'Cookie: abRequestId=9b9a13ea-1720-51e9-a6de-331abacaa69e; webBuild=4.9.0; a1=18eb6b4d5afmq46r71nf5oo7195et621myj77skjs50000382501; webId=3c8dab9561d74f401b898f922ccbc720; gid=yYdDKD4fSy6iyYdDKD4f2vMT0i6A4KFWyIi2VVMllWyj2W28d9KJyW888qYJ28y8DJ00dYYj; unread={%22ub%22:%2266037cba000000000d00f96c%22%2C%22ue%22:%2265f38c00000000000d00cbc9%22%2C%22uc%22:29}; xsecappid=xhs-pc-web; websectiga=7750c37de43b7be9de8ed9ff8ea0e576519e8cd2157322eb972ecb429a7735d4; sec_poison_id=abc0abe9-c5e3-4933-a48b-33aeacb1b480; web_session=0400698d0903920b8e59620d27344be55719de',
'Connection: keep-alive',
'CLIENT-IP:'.$ip,
'X-FORWARDED-FOR:'.$ip
);
$html = $this->curl_general($loc,$header);
//正则 window.__INITIAL_STATE__=
preg_match('/window\.__INITIAL_STATE__.+redmojiMap\"\:\{\}\}\}\}/',$html,$arr);
$arr = str_replace('window.__INITIAL_STATE__=' , "", $arr[0]);
$json = json_decode(str_replace('undefined' ,'null', $arr),true);
//print_r($json);
$images_num=count($json['note']['noteDetailMap'][$id]['note']['imageList']);
for($i = 0; $i < $images_num; $i++){
$ch[$i] = $json['note']['noteDetailMap'][$id]['note']['imageList'][$i]['infoList'][1]['url'];
}
if(empty($json['note']['noteDetailMap'][$id]['note']['video'])){
$arr = array(
'code' => 300,
'msg' => '解析成功',
'data' => [
'author' => $json['note']['noteDetailMap'][$id]['note']['user']['nickname'],
'uid' => $json['note']['noteDetailMap'][$id]['note']['user']['userId'],
'avatar'=>$json['note']['noteDetailMap'][$id]['note']['user']['avatar'],
'cover'=>$ch[0],
'like' => $json['note']['noteDetailMap'][$id]['note']['interactInfo']['likedCount'],
'title' => $json['note']['noteDetailMap'][$id]['note']['title'],
'images_num' => $images_num,
"images" => $ch
]
);
}else{
$arr = array(
'code' => 200,
'msg' => '解析成功',
'data' => [
'author' => $json['note']['noteDetailMap'][$id]['note']['user']['nickname'],
'uid' => $json['note']['noteDetailMap'][$id]['note']['user']['userId'],
'avatar'=>$json['note']['noteDetailMap'][$id]['note']['user']['avatar'],
'cover' => $ch[0],
'like' => $json['note']['noteDetailMap'][$id]['note']['interactInfo']['likedCount'],
'title' => $json['note']['noteDetailMap'][$id]['note']['desc'],
'url' => 'https://sns-video-bd.xhscdn.com/' . $json['note']['noteDetailMap'][$id]['note']['video']['consumer']['originVideoKey'],
]
);
}
return $arr;
}
private function get_randip() {
$ip_long = array(
array('607649792', '608174079'), //36.56.0.0-36.63.255.255
array('1038614528', '1039007743'), //61.232.0.0-61.237.255.255
array('1783627776', '1784676351'), //106.80.0.0-106.95.255.255
array('2035023872', '2035154943'), //121.76.0.0-121.77.255.255
array('2078801920', '2079064063'), //123.232.0.0-123.235.255.255
array('-1950089216', '-1948778497'), //139.196.0.0-139.215.255.255
array('-1425539072', '-1425014785'), //171.8.0.0-171.15.255.255
array('-1236271104', '-1235419137'), //182.80.0.0-182.92.255.255
array('-770113536', '-768606209'), //210.25.0.0-210.47.255.255
array('-569376768', '-564133889'), //222.16.0.0-222.95.255.255
);
$rand_key = mt_rand(0, 9);
$ip = long2ip(mt_rand(
$ip_long[$rand_key][0],
$ip_long[$rand_key][1]
));
return $ip;
}
private function curl_general($url, $header) {
$ch = curl_init() ;
curl_setopt_array($ch, array(
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => '',
CURLOPT_BINARYTRANSFER=>true,
CURLOPT_MAXREDIRS => 100,
CURLOPT_TIMEOUT => 0,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_SSL_VERIFYHOST=>false,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_HTTPHEADER => $header
));
//$meta = curl_getinfo($ch);//print_r($meta);
$output = curl_exec($ch);//print_r($output);
curl_close ($ch);
return $output;
}
}
二、实现方法
1、id提取
- www.xiaohongshu.com 浏览器直接复制过来的
- xhslink.com 分享的短链地址
首先拿到文章的id,直接复制过来的从链接中提取,短链接从 header
头中提取,或者重定向提取
这个id后面提取json中还会用到
拼接小红书视频/图集地址:https://www.xiaohongshu.com/discovery/item/{小红书id}
2、请求地址
请求这个地址,header
中包含 cookie
和 Accept
,这两个请求参数登录小红书网页版直接获取。
请求中的内容包含 json
格式文本,正则提取 json
格式文本,json
文本位于 window.INITIAL_STATE=
后
正则表达式如下:/window.__INITIAL_STATE__.+redmojiMap\"\:{}}}}/
,php示例如下:
preg_match('/window\.__INITIAL_STATE__.+redmojiMap\"\:\{\}\}\}\}/',$html,$arr);
$arr = str_replace('window.__INITIAL_STATE__=' , "", $arr[0]);
$json = json_decode(str_replace('undefined' ,'null', $arr),true);
3、获取图集地址
图集地址位于json文件中下面路径,id为第一步获取到的id,默认解析到的为无水印的地址,通过for循环存进数组。
$json['note']['noteDetailMap'][$id]['note']['imageList']
注意:请求头中 Accept
中需包含 image/webp;
参数,没有这个参数请求的图片包含水印,建议直接复制小红书网页中的 Accept
。
#示例代码
$images_num=count($json['note']['noteDetailMap'][$id]['note']['imageList']);
for($i = 0; $i < $images_num; $i++){
$ch[$i] = $json['note']['noteDetailMap'][$id]['note']['imageList'][$i]['infoList'][1]['url'];
}
4、获取视频地址
通过拼接 originVideoKey
参数得到无水印的地址
originVideoKey
参数位于:$json['note']['noteDetailMap'][$id]['note']['video']['consumer']['originVideoKey']
拼接方法:'https://sns-video-bd.xhscdn.com/' . {originVideoKey
}
#示例代码
$arr = array(
'code' => 200,
'msg' => '解析成功',
'data' => [
'author' => $json['note']['noteDetailMap'][$id]['note']['user']['nickname'],
'uid' => $json['note']['noteDetailMap'][$id]['note']['user']['userId'],
'avatar'=>$json['note']['noteDetailMap'][$id]['note']['user']['avatar'],
'cover' => $ch[0],
'like' => $json['note']['noteDetailMap'][$id]['note']['interactInfo']['likedCount'],
'title' => $json['note']['noteDetailMap'][$id]['note']['desc'],
'url' => 'https://sns-video-bd.xhscdn.com/' . $json['note']['noteDetailMap'][$id]['note']['video']['consumer']['originVideoKey'],
]
);