#!/usr/bin/perl
# Copyright © 2011, 2012 Petr Písař
# This is free software.  You may redistribute copies of it under the terms of
# the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
# There is NO WARRANTY, to the extent permitted by law.

# Changelog:
#
# Version 6
# - Accept iframe player URL with channel name in the file name
# - Send XML-RPC header needed for SMIL generator since 2012-05-09
#
# Version 5
# - Support Apple M3U and MPEG2-TS/MPEG-4 format
# - Improve HTML parser
# - Remove autoStart=false option from iframe player URL to get page with
# JSON instead of similar page with iframe again
# (http://www.ceskatelevize.cz/porady/10316155327-horizont-ct24/)
#
# Version 4
# - Support RTMP app with slashes
# - Report URI in error messages
#
# Version 3:
# - Output in rtmpdump(1) format if `-d' option is specified
# - Do not append playpath to URL if ambigous
#
# Version 2:
# - Output playpath as librtmp option if necessary
#   (http://www.ct24.cz/vysilani/10099403120-kultura-v-regionech/)
# - Perl 5.10 support
# - Find JSON via iframe first and fall back to direct JSON
# - More general example entry page URL in usage output
# - Show content provider error message if exists
#

use strict;
use warnings;
our $VERSION = 6;

use LWP::UserAgent;
use HTTP::Request::Common;
use HTTP::Response;
use XML::XPath;
use URI;
use JSON 2.0;
use Getopt::Std;
use IO::Handle;


my $SMIL_GENERATOR = '/ajax/playlistURL.php';
my $ENTRY = 'http://www.ceskatelevize.cz/ivysilani/zive/ct24/';

sub usage {
    return<<EOM;
Usage: ctstream    [-d]       [--] ENTRY_PAGE [STREAM_BITRATE]
   or: ctstream -A [-l|-f|-s] [--] ENTRY_PAGE [STREAM_BITRATE]

Get URLs of Czech Television video streams for specific ENTRY_PAGE (e.g.
<$ENTRY>). If this is the only
argument, output list of all available streams in format `STREAM_BITRATE: URL'
separated by new line. If STREAM_BITRATE is given, output URL of the stream
with given rate only.

By default, URL of RTMP/FLV/MPEG-4 stream is printed in librtmp(3) format
(space-separated librtmp options may follow the URL and all of them must be
passed as one argument to librtmp application).

If `-d' option is specied, URL with possible arguments are printed as
rtmpdump(1) arguments. Note ampersands are kept literal (this should work in
simple subshell substition).

If `-A' option is specified, HTTP/MPEG-TS/MPEG-4 video will be retrieved.
There exist three levels selected by second option:

  -l  Apple M3U play-list pointing to unbound stream segments will be output.
      This is handy if you have a player that supports the very special
      play-list. Specification can be found on
      <http://tools.ietf.org/html/draft-pantos-http-live-streaming>.

  -f  The Apple play-list will be processed and URLs of the underlying stream
      segmented to files will be printed. Due to nature of the play-list, the
      locators are printed periodically, a new segment a few seconds, possibly
      in endless loop. Intended workflow is to pipe the URLs to an HTTP client
      which echoes downloaded segments on standard input of a multimedia
      player. This is the default level. The bit rate must be specified, if
      more bit rates are available.

  -s  The stream segments will be retrieved and dumped to standard output as
      continuous stream. You can pipe it your player. The bit rate must be
      specified, if more bit rates are available.

Version: $VERSION.
Copyright © 2011, 2012 Petr Písař
This is free software.  You may redistribute copies of it under the terms of
the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
There is NO WARRANTY, to the extent permitted by law.
EOM
}

our ($opt_A, $opt_d, $opt_l, $opt_f, $opt_s);
getopts('Adlfs') or 
    die usage;

if ($#ARGV < 0 || $#ARGV > 1) {
    die "Bad invocation\n\n" . usage;
}
$ENTRY = $ARGV[0];
my $BITRATE = $ARGV[1];

if (!($opt_l || $opt_s)) { $opt_f = 1; }


# each that operates on reference to array or hash
# Works with perl 5.10.1 too.
sub eachref {
    my $ref = shift;
            
    if (ref $ref eq 'HASH') {
        # Built-in implementation always supports HASH
        return sub {
            each %$ref;
        }
    }

    if (eval 'each @$ref' ) {
        # Built-in Perl 5.12 implementation
        eval 'return sub { 
            each @$ref;
        }'
    } else {
        # Manual implementation of each ARRAY (needed for Perl < 5.12)
        my $index = -1;
   
        return sub {
            $index++;
            if ($index <= $#$ref) {
                ($index, $$ref[$index]);
            } else {
                ();
            }
        }
    }
}


# Convert nested JSON structure expressed as native hash reference into flat
# array of key and value pairs.
# E.g. { "x" => [ "y" => "1", "z" => undef ] }
# becomes ( "x[0][y]", "1", "x[1][z] => null ).
# This is handy when sending nested JSON structure as
# application/x-www-form-urlencoded by HTTP::Request::Common.
sub flatten {
    my ($ref, $prefix) = @_;
    my @output = ();
    my $doeach = eachref($ref);
    while (my ($key, $val) = &$doeach) {
        # TODO: Escape /[[]=]/
        my $id = (defined $prefix) ? $prefix . '[' . $key . ']' : $key;
        if (ref $val eq 'HASH' || ref $val eq 'ARRAY') {
            push @output, flatten($val, $id);
        } else {
            push @output, ($id, $val // 'null');
        }
    }
    return @output;
}


# Format RTMP URL for librtmp
sub formaturl_librtmp {
    my ($rtmp, $app, $playpath) = @_;

    my $stream_url = $rtmp;
    if ($playpath =~ qr{/} or $app =~ qr{/}) {
        $stream_url .= ' app=' . $app . ' playpath=' . $playpath;
    } else {
        $stream_url .= $app . '/' . $playpath;
    }
}


# Format RTMP URL for librtmp
sub formaturl_rtmpdump {
    my ($rtmp, $app, $playpath) = @_;
    
    my $stream_url = '--rtmp ' . $rtmp;
    if ($playpath =~ qr{/} or $app =~ qr{/}) {
        $stream_url .= ' --app ' . $app . ' --playpath ' . $playpath;
    } else {
        $stream_url .= $app . '/' . $playpath;
    }
}


# Find first pattern match in HTML page, HTML-unescape it and return it.
# Otherwise return undef.
sub htmlgrep {
    my ($html_page, $pattern) = @_;
    my ($text) = ($html_page =~ $pattern);
    if (defined $text) {
        $text =~ s/&gt;/>/g;
        $text =~ s/&lt;/</g;
        $text =~ s/&amp;/&/g;
    }
    return $text;
}


# Escape as URI and ampersands in additon
sub shellescape {
    local $_ = URI->new(shift);
    s/&/%26/g;
    return $_;
}


# Try to get JSON request data from HTML page text passed as argument.
# Return the JSON data or undef.
sub findjson {
    htmlgrep(shift, qr{callSOAP\(([^)]*)\);});
}


# Try to get setRequestHeader function arguments from JS page text passed as
# argument.
# Return list (header, value) or undef.
sub findrequestheader {
    local $_ = htmlgrep(shift, qr{setRequestHeader\(([^)]*)\);});
    if (!defined) { return undef; }
    return (m/'([^']*)', '([^']*)'/);
}


# Return array of { bitrate => INTEGER, url => URL } found in RTMP SMIL play
# list. Arguments is playlist as string, URL of the playlist,
# playlist as XML::XPath object and boolean signaling URL format (true for
# rtmpdump format, false for librtmp format).
sub extract_urls_from_rtmp_smil {
    my ($smil, $smil_url, $parser, $opt_d) = @_;

    my $videos =
        $parser->find('/data/smilRoot/body/switchItem/video[@enabled=true()]');
    if ($videos->size <= 0) {
        die "No videos found in SMIL playlist <" . $smil_url . ">:\n" .
            $smil . "\n";
    }
    my @bitrate_url_pairs = ();
    foreach my $video ($videos->get_nodelist) {
        my $suffix = $video->getAttribute('src');
        if (! defined $suffix) {
            print STDERR q{Missing `video/@src' attribute} . "\n";
            next;
        }
        my $prefix = $video->getParentNode->getAttribute('base');
        if (! defined $suffix) {
            print STDERR
                q{Missing `video/../@base' attribute for video } .
                "`$suffix'\n";
            next;
        }
        my $bitrate = $video->getAttribute('system-bitrate');
        if (! defined $suffix) {
            print STDERR
                q{Missing `video/@system-bitrate' attribute for video } .
                "`$suffix'\n";
            next;
        }

        # Build stream URL. Because RTMP URL can be ambigous, 
        # applications accept aditional arguments separated by space
        # (the space must not be URI-encoded).
        my $stream_url;
        {
            my $rtmp = URI->new($prefix);
            my $app = substr($rtmp->path_query, 1);
            $rtmp->path('/');
            $rtmp->query(undef);
            my $playpath = URI->new($suffix);
            if ($opt_d) {
                $stream_url = formaturl_rtmpdump($rtmp, $app, $playpath);
            } else {
                $stream_url = formaturl_librtmp($rtmp, $app, $playpath);
            }
        }

        # Store URL
        push @bitrate_url_pairs, {'bitrate' => $bitrate, 'url' => $stream_url};
    }

    return @bitrate_url_pairs;
}


# Print segment URLs or their contnent found in bottom-level Apple M3U
# play-list. This function can never return if live stream is served by
# a server.
# Arguments are URL of the playlist, LWP::UserAgent object, and boolean
# signalling content of stream segments should be printed instead of their
# URLs.
# See <http://tools.ietf.org/html/draft-pantos-http-live-streaming>.
sub iterate_bottom_apple_m3u {
    my ($m3u_url, $ua, $stream_content) = @_;
    autoflush STDOUT 1;
    my $reload = 1;
    my $last_segment = -1;
    my $target_duration;

    while ($reload) {
        # Get bottom-level Apple MPEG play-list.
        my $response = $ua->request(GET $m3u_url);
        $response->is_success or
            die "Could not get bottom-level Apple M3U play-list from <" .
                $m3u_url . ">: " . $response->status_line . "\n";
        my $retrieved_at = time;
        my $duration;
        my $sequence = 0;
        for (split(/(\r)?\n/, $response->decoded_content)) {
            if (! defined) { next; }
            if (/\A#EXT-X-TARGETDURATION:(\d+)/) {
                $target_duration = $1;
                next;
            }
            if (/\A#EXT-X-MEDIA-SEQUENCE:(\d+)/) {
                $sequence = $1;
                next;
            }
            if (/\A#EXT-X-ENDLIST\b/) {
                $reload = 0;
                next;
            }
            if (/\A#EXTINF:(\d+)/) {
                $duration = $1;
                next;
            }
            if (/\A[^#]/) {
                if (! defined $duration) {
                    print STDERR
                        'Stray URL in bottom-level Apple play-list from <' .
                        $m3u_url . ">:\n" . $response->decoded_content . "\n";
                    next;
                }
                if ($sequence > $last_segment) {
                    my $segment_url = URI->new_abs($_, $m3u_url);
                    if ($stream_content) {
                        # Get segment content.
                        $ua->set_my_handler('response_data',
                            sub { print $_[3]; 1; }, 'm_code' => 2);
                        my $response = $ua->request(GET $segment_url);
                        $response->is_success or
                            die "Could not get stream segment content from <" .
                                $segment_url . ">: " .
                                $response->status_line . "\n";
                    } else {
                        print $segment_url, "\n";
                    }
                    $last_segment = $sequence;
                }
                $duration = undef;
                $sequence++;
                next;
            }
        }
        if (! defined $target_duration || $last_segment == -1) {
            die 'No target duration or URL found in bottom-level Apple " .
                "play-list from <' .
                $m3u_url . ">:\n" . $response->decoded_content . "\n";
        }
        if ($reload) {
            my $sleep = $target_duration - (time - $retrieved_at);
            if ($sleep > 0) {
                sleep $sleep;
            }
        }
    }
}


# Return array of { bitrate => INTEGER, url => URL } found in top-level
# Apple M3U play-list. Returned URLs are locators of bottom-level Apple M3U
# play-list for given bitrate. The play-list specification is on
# <http://tools.ietf.org/html/draft-pantos-http-live-streaming>.
# Arguments are URL of the playlist and LWP::UserAgent object.
sub extract_urls_from_top_apple_m3u {
    my ($m3u_url, $ua) = @_;
    my @bitrate_url_pairs = ();

    # Get top-level Apple MPEG playlist.
    my $response = $ua->request(GET $m3u_url);
    $response->is_success or
        die "Could not get top-level Apple M3U play-list from <" .
            $m3u_url . ">: " . $response->status_line . "\n";
    my $bitrate;
    for (split(/(\r)?\n/, $response->decoded_content)) {
        if (! defined) { next; }
        if (/\A#EXT-X-STREAM-INF:(?:.*,)?BANDWIDTH=(\d+)/) {
            $bitrate = $1;
            next;
        }
        if (/\A[^#]/) {
            if (! defined $bitrate) {
                print STDERR 'Stray URL in top-level Apple play-list from <' .
                    $m3u_url . ">:\n" . $response->decoded_content . "\n";
                next;
            }
            push @bitrate_url_pairs,
                {'bitrate' => $bitrate, 'url' => URI->new_abs($_, $m3u_url)};
            $bitrate = undef;
            next;
        }
    }
    if ($#bitrate_url_pairs < 0) {
        die 'No URL found in top-level Apple play-list from <' .
            $m3u_url . ">:\n" . $response->decoded_content . "\n";
    }

    return @bitrate_url_pairs;
}


# Return array of { bitrate => INTEGER, url => URL } found in Apple SMIL play
# list. Arguments is playlist as string, URL of the playlist,
# playlist as XML::XPath object, and LWP::UserAgent object.
sub extract_urls_from_apple_smil {
    my ($smil, $smil_url, $parser, $ua) = @_;

    my $videos =
        $parser->find('/data/smilRoot/body/video');
    if ($videos->size <= 0) {
        die "No videos found in SMIL playlist <" . $smil_url . ">:\n" .
            $smil . "\n";
    }
    my @bitrate_url_pairs = ();
    foreach my $video ($videos->get_nodelist) {
        my $m3u_url = $video->getAttribute('src');
        if (! defined $m3u_url) {
            print STDERR q{Missing `video/@src' attribute} . "\n";
            next;
        }
        push @bitrate_url_pairs,
            extract_urls_from_top_apple_m3u($m3u_url, $ua);
    }

    return @bitrate_url_pairs;
}


# Get entry HTML page
my $ua = LWP::UserAgent->new;
if ($opt_A) {
    $ua->agent('Mozilla/5.0(iPad; U; CPU iPhone OS 3_2 like Mac OS X; en-us) '
        . 'AppleWebKit/531.21.10 (KHTML, like Gecko) ' .
        'Version/4.0.4 Mobile/7B314 Safari/531.21.10');
}
my $response = $ua->request(GET $ENTRY);
$response->is_success or
    die "Could not get entry page from <" . $ENTRY . ">: " .
        $response->status_line . "\n";
my $page = $response->decoded_content;


# Try to get iframe player URL
# The web page is not well-formed XML, we cannot use XPath 
# '//html:div[@id="iFramePositionContainer"]/html:iframe/@src' or
# '//html:p[@id="iframeHolder"]/html:iframe/@src';
# This is sometimes relative, sometimes absolute path
my $iframe_url = htmlgrep($page,
    qr{src="([^"]*/embed/iFramePlayer(?:[^"]*)\.php[^"]*)"});
if (defined $iframe_url && $iframe_url) {
    # If it ends with "&autoStart=false", it links to another page with the
    # same iframe player URL without the parameter. Thus remove the parameter.
    $iframe_url =~ s/&autoStart=false//; 
    # Get iframe player page
    $iframe_url = URI->new_abs($iframe_url, $ENTRY);
    $response = $ua->request(GET $iframe_url);
    $response->is_success or
        die "Could not get iframe player from <" . $iframe_url . ">: " .
            $response->status_line . "\n";
    $page = $response->decoded_content;
}


# Get JSON request data
my $json_data = htmlgrep($page, qr{callSOAP\(([^)]*)\);});
unless (defined $json_data && $json_data) {
    # Try to get error message from stream provider
    my $message = htmlgrep($page, qr{<p\s+class="message">([^<]*)<});
    if (defined $message && $message) {
        die "$message\n";
    }

    # else die in general way
    die "Could not find JSON data structure\n";
}


# Get XML-RPC header definition
my ($xmlrpc_header, $xmlrpc_value) = findrequestheader($page);
if (!defined $xmlrpc_value or !defined $xmlrpc_value) {
    print STDERR "XML-RPC header definition not found.\n";
}


# Decode JSON request data
my $data;
eval { $data = decode_json($json_data) } or 
    die "Could not decode JSON string: $json_data: $@\n";
my @data = flatten($data);


# Get SMIL playlist URL
my $smil_generator_url = URI->new_abs($SMIL_GENERATOR, $ENTRY);
$ua->default_header($xmlrpc_header => $xmlrpc_value) if defined $xmlrpc_header;
$response = $ua->request(POST $smil_generator_url, \@data);
$ua->default_header($xmlrpc_header => undef) if defined $xmlrpc_header;
$response->is_success or
    die "Could not get SMIL playlist URL from <" . $smil_generator_url .
        ">: " . $response->status_line . "\n";
my $smil_url = $response->decoded_content;


# Get SMIL playlist
$ua->agent('NSPlayer/0 (Fuck libwwperl discrimination)');
$response = $ua->request(GET $smil_url);
$response->is_success or
    die "Could not get SMIL playlist from <" . $smil_url . ">: "
        . $response->status_line . "\n";
my $smil = $response->decoded_content;


# Get stream URLs
my $parser = XML::XPath->new('xml' => $smil) or
    die "Could not parse SMIL playlist from <" . $smil_url . ">:\n" .
        $smil . "\n";
my @bitrate_url_pairs = ();
if ($opt_A) {
    @bitrate_url_pairs = extract_urls_from_apple_smil($smil, $smil_url,
        $parser, $ua);
} else {
    @bitrate_url_pairs = extract_urls_from_rtmp_smil($smil, $smil_url,
        $parser, $opt_d);
}


if ($opt_A && ($opt_f || $opt_s)) {
    # Select Apple M3U URL
    if (defined $BITRATE) {
        @bitrate_url_pairs = grep { ${$_}{'bitrate'} == $BITRATE }
            @bitrate_url_pairs;
    }
    if ($#bitrate_url_pairs < 0) {
        die "No usable video streams found in SMIL playlist:\n$smil\n";
    }
    if ($#bitrate_url_pairs == 0 ) {
        iterate_bottom_apple_m3u(${$bitrate_url_pairs[0]}{'url'}, $ua, $opt_s);
    } else {
        die "Multiple bit-rate play-lists not implemented yet.\n";
    }
} else {
    # Output URL
    my $video_counter = 0;
    for my $pair (@bitrate_url_pairs) {
        if (defined $BITRATE) {
            if (${$pair}{'bitrate'} == $BITRATE) {
                print "${$pair}{'url'}\n";
                $video_counter++;
            }
        } else {
            print "${$pair}{'bitrate'}: ${$pair}{'url'}\n";
            $video_counter++;
        }
    }
    if ($video_counter <= 0) {
        die "No usable video streams found in SMIL playlist:\n$smil\n";
    }
}

