LWP::UserAgent 下载知乎视频

There's more than one way to do it!
https://metacpan.org http://perlmonks.org
头像
523066680
Administrator
Administrator
帖子: 369
注册时间: 2016年07月19日 12:14
拥有现金: 锁定
储蓄: 锁定
Has thanked: 32 times
Been thanked: 36 times
联系:

LWP::UserAgent 下载知乎视频

帖子 #1 523066680 » 2018年06月28日 15:52

首发:https://zhuanlan.zhihu.com/p/36865994

视频示例:https://www.zhihu.com/question/271736973/answer/389377346

其中 use Modern::Perl; 不是必需的。

2018-10 知乎的视频方案已经更新为MP4单文件,更新代码移步三楼

Code: [全选] [展开/收缩] [Download] (GetVideo.pl)
  1. =info
  2.     Author: 523066680
  3.     Date: 2018-05
  4. =cut
  5.  
  6. use Modern::Perl;
  7. use LWP::UserAgent;
  8. use File::Slurp;
  9. use JSON;
  10. STDOUT->autoflush(1);
  11.  
  12. goto_dir("D:/temp");
  13. our $main = "https://lens.zhihu.com/api/videos/";
  14. our $ua = LWP::UserAgent->new(  );
  15. our $target = "https://www.zhihu.com/question/271736973/answer/389377346";
  16.  
  17. my $res = $ua->get( $target );
  18. my $html = $res->content();
  19. my @video = $html=~/>https:.*?video\/(\d+)</g;
  20. my $oauth = get_oauth( $html );
  21.  
  22. for my $idx ( 0 .. $#video )
  23. {
  24.     printf "Getting video %s - %s\n", $idx, $video[$idx];
  25.     my @vlinks = get_video_links(  $oauth, $video[$idx] );
  26.     get_video( @vlinks );
  27. }
  28.  
  29. # 获取 m3u8 列表并提取链接
  30. sub get_video_links
  31. {
  32.     our ($main, $ua);
  33.     my ( $oauth, $pgcode ) = @_;
  34.  
  35.     my $res = $ua->get(
  36.                 $main .$pgcode,
  37.                 "authorization" => $oauth,
  38.             );
  39.  
  40.     die unless $res->is_success();
  41.  
  42.     my $data = decode_json( $res->content );
  43.     my $play_url = $data->{playlist}->{sd}->{play_url};  # m3u8 url
  44.     my $pre_url;
  45.  
  46.     # 获取网址共用部分
  47.     $play_url =~/(.*?\w{32})/;  
  48.     $pre_url = $1 ."/";
  49.  
  50.     $res = $ua->get( $play_url );
  51.     my @vlinks = $res->content =~/\n(.*?\d+\.ts.*?)\n/g;
  52.     grep { $_ = $pre_url . $_ } @vlinks;
  53.  
  54.     return $pgcode, @vlinks;
  55. }
  56.  
  57. # 获取视频切片,合并
  58. sub get_video
  59. {
  60.     our $ua;
  61.     my $name = shift;
  62.     my $buff = "";
  63.     my $res;
  64.  
  65.     while ( my $link = shift )
  66.     {
  67.         print $#_ + 1 ," ";
  68.         $res = $ua->get( $link );
  69.         $buff .= $res->content();
  70.     }
  71.     print "\n";
  72.  
  73.     write_file( "${name}.ts", {binmode=>":raw"}, $buff );
  74. }
  75.  
  76. sub get_oauth
  77. {
  78.     our ( $ua );
  79.     my $html = shift;
  80.     my ($js) = $html =~/(https:[^<>]+main\.app[^<>]+js)/g;
  81.     my $res = $ua->get( $js );
  82.     # pattern: authorization:"oauth c3cef7c66a1843f8b3a9e6a1e3160e20"}
  83.     my ($oauth) = $res->content =~/authorization:"([^"]{30,})"/;
  84.     return $oauth
  85. }
  86.  
  87. sub goto_dir
  88. {
  89.     my $dir = shift;
  90.     mkdir $dir unless ( -e $dir );
  91.     chdir $dir;
  92. }
  93.  
  94. __DATA__

头像
523066680
Administrator
Administrator
帖子: 369
注册时间: 2016年07月19日 12:14
拥有现金: 锁定
储蓄: 锁定
Has thanked: 32 times
Been thanked: 36 times
联系:

2018-07 更新

帖子 #2 523066680 » 2018年07月09日 15:51

更新内容 - 知乎去掉了 oauth 授权的部分
Code: [全选] [展开/收缩] [Download] (downVideo.pl)
  1. =info
  2.     Author: 523066680
  3.     Date: 2018-07
  4.     更新:知乎去掉了 oauth 授权方式
  5. =cut
  6.  
  7. use Modern::Perl;
  8. use LWP::UserAgent;
  9. use File::Slurp;
  10. use JSON;
  11. STDOUT->autoflush(1);
  12.  
  13. goto_dir("D:/temp");
  14. our $main = "https://lens.zhihu.com/api/videos/";
  15. our $ua = LWP::UserAgent->new(  );
  16. our $target = "https://www.zhihu.com/question/271736973/answer/389377346";
  17.  
  18. my $res = $ua->get( $target );
  19. my $html = $res->content();
  20. my @video = $html=~/>https:.*?video\/(\d+)</g;
  21.  
  22. for my $idx ( 0 .. $#video )
  23. {
  24.     printf "Getting video %s - %s\n", $idx, $video[$idx];
  25.     my @vlinks = get_video_links( $video[$idx] );
  26.     get_video( @vlinks );
  27. }
  28.  
  29. # 获取 m3u8 列表并提取链接
  30. sub get_video_links
  31. {
  32.     our ($main, $ua);
  33.     my ( $pgcode ) = @_;
  34.  
  35.     my $res = $ua->get( $main .$pgcode );
  36.  
  37.     die unless $res->is_success();
  38.  
  39.     my $data = decode_json( $res->content );
  40.     my $play_url = $data->{playlist}->{sd}->{play_url};  # m3u8 url
  41.     my $pre_url;
  42.  
  43.     # 获取网址共用部分
  44.     $play_url =~/(.*?\w{32})/;  
  45.     $pre_url = $1 ."/";
  46.  
  47.     $res = $ua->get( $play_url );
  48.     my @vlinks = $res->content =~/\n(.*?\d+\.ts.*?)\n/g;
  49.     grep { $_ = $pre_url . $_ } @vlinks;
  50.  
  51.     return $pgcode, @vlinks;
  52. }
  53.  
  54. # 获取视频切片,合并
  55. sub get_video
  56. {
  57.     our $ua;
  58.     my $name = shift;
  59.     my $buff = "";
  60.     my $res;
  61.  
  62.     while ( my $link = shift )
  63.     {
  64.         print $#_ + 1 ," ";
  65.         $res = $ua->get( $link );
  66.         $buff .= $res->content();
  67.     }
  68.     print "\n";
  69.  
  70.     write_file( "${name}.ts", {binmode=>":raw"}, $buff );
  71. }
  72.  
  73. sub goto_dir
  74. {
  75.     my $dir = shift;
  76.     mkdir $dir unless ( -e $dir );
  77.     chdir $dir;
  78. }

头像
523066680
Administrator
Administrator
帖子: 369
注册时间: 2016年07月19日 12:14
拥有现金: 锁定
储蓄: 锁定
Has thanked: 32 times
Been thanked: 36 times
联系:

2018-10 更新,现在是MP4单文件下载了 Re: LWP::UserAgent 下载知乎视频

帖子 #3 523066680 » 2018年10月16日 14:51

Code: [全选] [展开/收缩] [Download] (downZhihuVideo.pl)
  1. =info
  2.     Author: 523066680
  3.     2018-07 知乎去掉了 oauth 授权方式
  4.     2018-10 从 ts 多文件,变更为 mp4 单文件下载
  5. =cut
  6.  
  7. use JSON;
  8. use Encode qw/from_to/;
  9. use LWP::UserAgent;
  10. use Mojo::DOM;
  11. use File::Slurp;
  12. STDOUT->autoflush(1);
  13.  
  14. our $wdir = "D:/temp";
  15. our $main = "https://lens.zhihu.com/api/videos/";
  16. our $ua = LWP::UserAgent->new();
  17. our $target = "https://www.zhihu.com/question/271736973/answer/389377346";
  18.  
  19. my $res = $ua->get( $target );
  20. my $html = $res->content();
  21. my @video = $html=~/>https:.*?video\/(\d+)</g;  # 获取视频页面链接
  22. my $title = get_title_name( $html );
  23. my ($answerID) = ($target=~/\/(\d+)$/);
  24.  
  25. mkdir $wdir unless -e $wdir;
  26. chdir $wdir;
  27.  
  28. for my $idx ( 0 .. $#video )
  29. {
  30.     printf "Getting video %s - %s\n", $idx, $video[$idx];
  31.     get_video( $video[$idx], "${title}Answer_${answerID}_${idx}.mp4" );
  32. }
  33.  
  34. sub get_video
  35. {
  36.     our ($main, $ua);
  37.     my ( $pgcode, $fname ) = @_;
  38.  
  39.     my $res = $ua->get( $main .$pgcode );
  40.     die unless $res->is_success();
  41.  
  42.     my $data = decode_json( $res->content );
  43.     my $play_url = $data->{playlist}->{sd}->{play_url};
  44.  
  45.     $res = $ua->get( $play_url );
  46.     write_file( $fname, {binmode=>":raw"}, $res->content );
  47. }
  48.  
  49. sub get_title_name
  50. {
  51.     my $html = shift;
  52.     my $dom = Mojo::DOM->new($html);
  53.     my $title = $dom->at("title")->text;
  54.     $title =~s/ - 知乎//;
  55.     from_to( $title, "utf8", "gbk" );
  56.     return $title;
  57. }

头像
523066680
Administrator
Administrator
帖子: 369
注册时间: 2016年07月19日 12:14
拥有现金: 锁定
储蓄: 锁定
Has thanked: 32 times
Been thanked: 36 times
联系:

知乎视频推荐 Re: LWP::UserAgent 下载知乎视频

帖子 #4 523066680 » 2018年10月26日 19:16

会在这个帖子下追加一些推荐:

物理有多有趣? - 沐心的回答(和问题关系不大但是视频很满足强迫症哈哈)
https://www.zhihu.com/question/31063539/answer/509528704

有哪些内容引起极度舒适的视频? - 开眼视频的回答
https://www.zhihu.com/question/295562963/answer/522899377

有哪些内容引起极度舒适的视频? - 别人家的诸葛孔明的回答 - 知乎
https://www.zhihu.com/question/295562963/answer/524535772


回到 “Perl”

在线用户

用户浏览此论坛: 没有注册用户 和 1 访客