LWP::UserAgent 下载知乎视频

There's more than one way to do it!
https://metacpan.org http://perlmonks.org
头像
523066680
Administrator
Administrator
帖子: 347
注册时间: 2016年07月19日 12:14
拥有现金: 锁定
储蓄: 锁定
Has thanked: 31 times
Been thanked: 31 times
联系:

LWP::UserAgent 下载知乎视频

帖子 #1 523066680 » 2018年06月28日 15:52

首发:https://zhuanlan.zhihu.com/p/36865994

视频示例:https://www.zhihu.com/question/271736973/answer/389377346

其中 use Modern::Perl; 不是必需的。

Code: [全选] [展开/收缩] [Download] (GetVideo.pl)
  1. =info
  2.     Author: 523066680
  3.     Date: 2018-05
  4. =cut
  5.  
  6. use Modern::Perl;
  7. use LWP::UserAgent;
  8. use File::Slurp;
  9. use JSON;
  10. STDOUT->autoflush(1);
  11.  
  12. goto_dir("D:/temp");
  13. our $main = "https://lens.zhihu.com/api/videos/";
  14. our $ua = LWP::UserAgent->new(  );
  15. our $target = "https://www.zhihu.com/question/271736973/answer/389377346";
  16.  
  17. my $res = $ua->get( $target );
  18. my $html = $res->content();
  19. my @video = $html=~/>https:.*?video\/(\d+)</g;
  20. my $oauth = get_oauth( $html );
  21.  
  22. for my $idx ( 0 .. $#video )
  23. {
  24.     printf "Getting video %s - %s\n", $idx, $video[$idx];
  25.     my @vlinks = get_video_links(  $oauth, $video[$idx] );
  26.     get_video( @vlinks );
  27. }
  28.  
  29. # 获取 m3u8 列表并提取链接
  30. sub get_video_links
  31. {
  32.     our ($main, $ua);
  33.     my ( $oauth, $pgcode ) = @_;
  34.  
  35.     my $res = $ua->get(
  36.                 $main .$pgcode,
  37.                 "authorization" => $oauth,
  38.             );
  39.  
  40.     die unless $res->is_success();
  41.  
  42.     my $data = decode_json( $res->content );
  43.     my $play_url = $data->{playlist}->{sd}->{play_url};  # m3u8 url
  44.     my $pre_url;
  45.  
  46.     # 获取网址共用部分
  47.     $play_url =~/(.*?\w{32})/;  
  48.     $pre_url = $1 ."/";
  49.  
  50.     $res = $ua->get( $play_url );
  51.     my @vlinks = $res->content =~/\n(.*?\d+\.ts.*?)\n/g;
  52.     grep { $_ = $pre_url . $_ } @vlinks;
  53.  
  54.     return $pgcode, @vlinks;
  55. }
  56.  
  57. # 获取视频切片,合并
  58. sub get_video
  59. {
  60.     our $ua;
  61.     my $name = shift;
  62.     my $buff = "";
  63.     my $res;
  64.  
  65.     while ( my $link = shift )
  66.     {
  67.         print $#_ + 1 ," ";
  68.         $res = $ua->get( $link );
  69.         $buff .= $res->content();
  70.     }
  71.     print "\n";
  72.  
  73.     write_file( "${name}.ts", {binmode=>":raw"}, $buff );
  74. }
  75.  
  76. sub get_oauth
  77. {
  78.     our ( $ua );
  79.     my $html = shift;
  80.     my ($js) = $html =~/(https:[^<>]+main\.app[^<>]+js)/g;
  81.     my $res = $ua->get( $js );
  82.     # pattern: authorization:"oauth c3cef7c66a1843f8b3a9e6a1e3160e20"}
  83.     my ($oauth) = $res->content =~/authorization:"([^"]{30,})"/;
  84.     return $oauth
  85. }
  86.  
  87. sub goto_dir
  88. {
  89.     my $dir = shift;
  90.     mkdir $dir unless ( -e $dir );
  91.     chdir $dir;
  92. }
  93.  
  94. __DATA__

头像
523066680
Administrator
Administrator
帖子: 347
注册时间: 2016年07月19日 12:14
拥有现金: 锁定
储蓄: 锁定
Has thanked: 31 times
Been thanked: 31 times
联系:

2018-07 更新

帖子 #2 523066680 » 2018年07月09日 15:51

更新内容 - 知乎去掉了 oauth 授权的部分
Code: [全选] [展开/收缩] [Download] (downVideo.pl)
  1. =info
  2.     Author: 523066680
  3.     Date: 2018-07
  4.     更新:知乎去掉了 oauth 授权方式
  5. =cut
  6.  
  7. use Modern::Perl;
  8. use LWP::UserAgent;
  9. use File::Slurp;
  10. use JSON;
  11. STDOUT->autoflush(1);
  12.  
  13. goto_dir("D:/temp");
  14. our $main = "https://lens.zhihu.com/api/videos/";
  15. our $ua = LWP::UserAgent->new(  );
  16. our $target = "https://www.zhihu.com/question/271736973/answer/389377346";
  17.  
  18. my $res = $ua->get( $target );
  19. my $html = $res->content();
  20. my @video = $html=~/>https:.*?video\/(\d+)</g;
  21.  
  22. for my $idx ( 0 .. $#video )
  23. {
  24.     printf "Getting video %s - %s\n", $idx, $video[$idx];
  25.     my @vlinks = get_video_links( $video[$idx] );
  26.     get_video( @vlinks );
  27. }
  28.  
  29. # 获取 m3u8 列表并提取链接
  30. sub get_video_links
  31. {
  32.     our ($main, $ua);
  33.     my ( $pgcode ) = @_;
  34.  
  35.     my $res = $ua->get( $main .$pgcode );
  36.  
  37.     die unless $res->is_success();
  38.  
  39.     my $data = decode_json( $res->content );
  40.     my $play_url = $data->{playlist}->{sd}->{play_url};  # m3u8 url
  41.     my $pre_url;
  42.  
  43.     # 获取网址共用部分
  44.     $play_url =~/(.*?\w{32})/;  
  45.     $pre_url = $1 ."/";
  46.  
  47.     $res = $ua->get( $play_url );
  48.     my @vlinks = $res->content =~/\n(.*?\d+\.ts.*?)\n/g;
  49.     grep { $_ = $pre_url . $_ } @vlinks;
  50.  
  51.     return $pgcode, @vlinks;
  52. }
  53.  
  54. # 获取视频切片,合并
  55. sub get_video
  56. {
  57.     our $ua;
  58.     my $name = shift;
  59.     my $buff = "";
  60.     my $res;
  61.  
  62.     while ( my $link = shift )
  63.     {
  64.         print $#_ + 1 ," ";
  65.         $res = $ua->get( $link );
  66.         $buff .= $res->content();
  67.     }
  68.     print "\n";
  69.  
  70.     write_file( "${name}.ts", {binmode=>":raw"}, $buff );
  71. }
  72.  
  73. sub goto_dir
  74. {
  75.     my $dir = shift;
  76.     mkdir $dir unless ( -e $dir );
  77.     chdir $dir;
  78. }


回到 “Perl”

在线用户

用户浏览此论坛: 没有注册用户 和 0 访客