#!/usr/bin/perl

#  Copyright (C) 2012 Morten Grouleff
#
#  Derived from "tv_grab_dk_dr_2009" by Thomas Horsten <thomas@horsten.com>
#
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

######################################################################
# Udover XMLTV kræves Parse::RecDescent og DateTime, som på
# Debian / Ubuntu kan installeres med:
#   sudo aptitude install libparse-recdescent-perl libdatetime-perl
#
# Kun testet på Linux. Beta version :)
# Kommentarer til: Thomas Horsten <thomas@horsten.com>
#
# History
#   Per Baekgaard <baekgaard@b4net.dk> 2009-08-13 20:39 CEST
#      - Minor changes supporting the released version on dr.dk
#
#   Morten Grouleff (morten@grouleff.com)
#      - Rewritten to use the new JSON api instead of screenscraping from the html.

use strict;
use warnings;

use open qw/:std :utf8/;

use JSON;
use XMLTV;
use XMLTV::ProgressBar;
use XMLTV::Options qw/ParseOptions/;
use XMLTV::Configure::Writer;

use LWP::UserAgent;
use IO::Scalar;

use DateTime;
use Data::Dumper;

# TODO: This ought be in a config file, I suppose. But for now, it lives here.
my @title_fixups = (); # regexp => [ $title, $subtitle ]. Or when prefixed with "#", regexp => [ $title, #$episode-num ].
# DR renames stuff...:
push @title_fixups, ['^Historien om\s+(.+)$', [ '\'Historien om\'', '$1']];
push @title_fixups, ['^DR2\s*Premiere\s+(.+)$', ['\'DR2 Premiere\'', '$1']];
# Extract episode number if possible:
push @title_fixups, ['^(.+)\s*\(\s*(\d{1,3})\s*\)(\s*.*)$', ['$1 . $3', '#$2']];
push @title_fixups, ['^(.+)\s*(\(\d+\:\d+\))(\s*.*)$', ['$1 . $3', '#$2']]; # episode num copy
# Move to sub-title, when " - " or " : " is in title.
push @title_fixups, ['^(.+)\s*\:\s+(.+)$', ['$1', '$2']];
push @title_fixups, ['^(.+)\s+\-\s+(.+)$', ['$1', '$2']];

my $debug = 0; # Plenty of extra output.

my $grabber_name = 'tv_grab_dk_dr';
my $id_prefix = '.dr.dk';

# FR#109  my $default_root_url = 'http://www.dr.dk/tv/oversigt/json/guide/';
my $default_root_url = 'http://www.dr.dk/tjenester/program-guide/json/guide/';

my %grabber_tags = ( 'source-info-url'	   =>
		     # FR109  'http://www.dr.dk/tv/oversigt/json/guide/',
				 'http://www.dr.dk/tjenester/program-guide/json/guide/',
		     'source-info-name'	   =>
		     'DR TV Oversigt',
		     'generator-info-name' =>
		     'XMLTV',
		     'generator-info-url'  =>
		     'http://niels.dybdahl.dk/xmltvdk/',
    );

# Time zone the server uses
my $server_tz = 'Europe/Copenhagen';
my $LocalTZ = DateTime::TimeZone->new( name => $server_tz );

sub config_stage
{
    my( $stage, $conf ) = @_;
    my $result;

    $stage eq "start" || die "Unknown stage $stage";

    my $writer = new XMLTV::Configure::Writer( OUTPUT => \$result,
					       encoding => 'utf-8' );
    if( $stage eq 'start' ) {
	$writer->start( { grabber => $grabber_name } );
	$writer->start_selectone( {
	    id => 'accept-copyright-disclaimer',
	    title => [ [ 'Acceptér ansvarsfraskrivelse', 'da'],
		       [ 'Accept disclaimer', 'en'] ],
	    description => [ [ "Data fra DR's programoversigt er " .
			       "beskyttet af loven om ophavsret, " .
			       "og må kun anvendes til personlige, " .
			       "ikke-kommercielle formål. " .
			       "Dette programs forfatter(e) kan ikke " .
			       "holdes ansvarlig for evt. misbrug.", 'da' ],
			     [ "Data from DR's program guide is " .
			       "protected by copyright law and may " .
			       "only be used for personal, non-commercial " .
			       "purposes. The author(s) " .
			       "of this program accept no responsibility " .
			       "for any mis-use.",
			       'en' ] ] } );
	$writer->write_option( {
	    value=>'reject',
	    text=> [ [ 'Jeg accepterer IKKE betingelserne', 'da'],
		     [ 'I do NOT accept these conditions', 'en'] ] } );
	$writer->write_option( {
	    value=>'accept',
	    text=> [ [ 'Jeg accepterer betingelserne', 'da'],
		     [ 'I accept these conditions', 'en'] ] } );
	$writer->end_selectone();
	$writer->start_selectone( {
	    id => 'include-radio',
	    title => [ [ 'Medtag radio-kanaler', 'da'],
		       [ 'Include radio channels', 'en'] ],
	    description => [ [ "DR's programoversigt indeholder " .
			       "radiokanaler, du kan her vælge " .
			       "om de skal medtages i listen.", 'da' ],
			     [ "DR's program guide includes radio " .
			       "channels, here you can choose whether " .
			       "to include them.", 'en' ] ] } );
	$writer->write_option( {
	    value=>'0',
	    text=> [ [ 'Udelad radio-kanaler', 'da'],
		     [ 'Exclude radio channels', 'en'] ] } );
	$writer->write_option( {
	    value=>'1',
	    text=> [ [ 'Medtag radio-kanaler', 'da'],
		     [ 'Include radio channels', 'en'] ] } );
	$writer->end_selectone();

	$writer->write_string( {
	    id => 'root-url',
	    title => [ [ 'Root URL for grabbing data', 'en' ],
		       [ 'Grund-URL for grabberen', 'da' ] ],
	    description => [ [ 'Provide the URL of DR\'s program guide ' .
			       'data data engine, ' .
			       'including the trailing slash.', 'en' ],
			     [ 'Indtast URL\'en på DR\'s tv-oversigs data ' .
			       'engine, inklusive den ' .
			       'efterfølgende skråstreg.', 'da' ] ],
	    default => $default_root_url } );

	$writer->write_string( {
	    id => 'episode-in-subtitle',
	    title => [ [ 'Should we include the episode number as default subtitle', 'en' ],
		       [ 'Indsæt afsnits-nr som undertitel?', 'da' ] ],
	    description => [ [ '  When set, insert the episode number as a subtitle with the configured string as prefix. ' .
			       '  when there is a subtitle already, prepend the episode number, ' .
			       '  Leave empty to disable this feature.', 'en' ],
			     [ '  Denne tekst vælger hvad der skal indsættes i undertitlen foran afsnitsnummeret. ' .
			       '  Når der er en undertitel i forvejen, indsættes dette blot før denne.' .
			       '  Sæt til tom for at slå indsættelsen fra. ', 'da' ] ],
	    default => '' } );

    }
    $writer->end( 'select-channels' );

    return $result;
}

sub getUrl($) {
    my ( $url ) = @_;
    my $ua = LWP::UserAgent->new;
    $ua->agent("xmltv/$XMLTV::VERSION");
    my $req = HTTP::Request->new(GET => $url);
    $req->header('Accept' => 'Accept=text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8');
    my $encoding = 'utf-8';
    $req->header('Accept-Charset' => $encoding);
    my $res = $ua->request($req);
    if ($res->is_success) {
	if ($res->header('Content-Type') && $res->header('Content-Type') =~ /[^\/]+\/[^\/]+\;\s*charset=(.*)$/) {
	    $encoding = $1;
	}
	return $res->decoded_content(charset => $encoding);
    }
    else {
	print STDERR "Error: " . $res->status_line . " on url: " . $url . "\n";
	return 0;
    }
}

sub list_channels($$)
{
    my( $conf, $opt ) = @_;
    my $chanlist = &get_channel_list($conf);
    #print Dumper $chanlist;
    my $result="";
    my $fh = new IO::Scalar \$result;
    my $oldfh = select( $fh );
    my $writer = new XMLTV::Writer(OUTPUT => $fh, encoding => 'utf-8');
    $writer->start(\%grabber_tags);
    $writer->write_channels($chanlist);
    $writer->end();
    select( $oldfh );
    $fh->close();

    #print "RESULT:\n$result\n";
    return $result;
}

sub get_channel_list($)
{
    my ( $conf ) = @_;
    my $drlist = get_dr_channel_list($conf);
    #print Dumper $drlist;
    my %chanlist = ();
    foreach my $chan (@$drlist) {
	my $shortid = $chan->{'Id'};
	$shortid =~ s/^[wr]\_//;
	my $id = $shortid . $id_prefix;
	# tv_validate_file barfs if ID contains + as for d3+.dr.dk
	$id =~ s/\+/plus/g;
	$chanlist{$id}->{'id'} = $id;
	$chanlist{$id}->{'_dr_listing_id'} = $chan->{'Id'};
#	$chanlist{$id}->{'icon'} = [{ 'src'=>$conf->{'root-url'}->[0] .
#					 "Images/Logos/" . $shortid .
#					 ".gif" }];
	$chanlist{$id}->{'_source_url'} = $chan->{'sourceUrl'};
	my $chan_lang; # = $dr_language_codes{$chan->{'country_code'}};
	$chan_lang = 'da'; # unless $chan_lang;
	$chanlist{$id}->{'_lang'} = $chan_lang;
	$chanlist{$id}->{'display-name'} =
	    [ [ $chan->{'Name'}, $chan_lang ]];
	$chanlist{$id}->{'_name'} = $chan->{'name'};
    }

    return \%chanlist;
}

sub get_dr_channel_list($)
{
    my ( $conf ) = @_;
    my @types = ('tv');
    if ($conf->{'include-radio'}[0] eq '1') {
	push (@types, 'radio');
    }
    my @results = ();
    foreach my $type (@types) {
	print STDERR "TYPE: $type\n" if $debug;
	my $url = $conf->{'root-url'}->[0] . 'channels?mediaType=' . $type;
	#  http://www.dr.dk/tjenester/program-guide/json/guide/channels?mediaType=tv
	print STDERR "Get: $url\n" if $debug;
	my $content = getUrl($url) || return 0;
	print STDERR "Got: $content\n"  if $debug;
	my $channels = ();
	eval {
	    $channels = from_json( $content, { utf8  => 1 } );
			1;
	} or do {
	    warn "Failed to get channels: $_";
	};
	#print Dumper $parsed;
	my $c = $channels->{'Channels'};
	push(@results, @$c);
    }
    return \@results;
}

sub json_date_to_xmltv($)
{
    # Format: \/Date(1352178000000)\/ -> 20090613123456 +0100
    my ($d) = @_;
    if ($d =~ m/Date\((\d+)([+-]\d\d\d\d)?\)/ ) {
	my ( $epoch_milliseconds, $timezone ) = ( $1, $2 );
	my $dt = DateTime->from_epoch( epoch => $epoch_milliseconds / 1000 );
	if ($timezone) {
	    $dt->set_time_zone($timezone);
	}
	$dt->set_time_zone($LocalTZ);
	return $dt->format_cldr('yyyyMMddHHmmss ZZZ');
    } else {
	return 0;
    }
}

sub get_schedules($$$)
{
    my ($conf,  $chan, $date ) = @_;

    my @schedules = ();

    my $url = $conf->{'root-url'}->[0] .
	'schedule?startTimesectionId=1&days=' . $date . '&channelid=' . $chan->{'_dr_listing_id'} . '&oneTimesectionOnly=false&mediaType=tv';
	 # http://www.dr.dk/tjenester/program-guide/json/guide/schedule?startTimesectionId=1&days=0&channelid=w_DR2&oneTimesectionOnly=false&mediaType=tv
    print STDERR "Get: $url\n" if $debug;
    my $content = getUrl($url) || return 0;
    print STDERR "Got: $content\n" if $debug;

    my @results = ();

    my $parsed = ();
    eval {
	$parsed = from_json( $content, { utf8  => 1 } );
	1;
    } or do {
	# Ignoring failed json parses.
    };

    if (!$parsed || ("HASH" ne ref $parsed))  {
	&warning("(BUG?) Parser barfed while processing channel " .
		 $chan->{'_name'}. " date ". $date . " (empty result?) URL: $url\n");
	print STDERR "Content: $content\n";
	print STDERR Dumper $parsed;
	return \@results;
    } elsif ( ("ARRAY" ne ref ($parsed->{'TimeSection'} )) ) {
	    &warning("(BUG?) Parser barfed while processing channel " .
		    $chan->{'_name'}. " date ". $date . " (empty result?) URL: $url\n");
	    &warning("Result type: " . ref ($parsed) . "\n");
	    print STDERR "Content: $content\n";
	    print STDERR Dumper $parsed;
	return \@results;
    }

    foreach my $section (@{$parsed->{'TimeSection'}}) {
	foreach my $listing (@{$section->{'Programs'}}) {
	    my %p = ();
	    # attributes
	    $p{'channel'} = $chan->{'id'};
	    $p{'start'} = json_date_to_xmltv($listing->{'StartDateTime'});
	    $p{'stop'} = json_date_to_xmltv($listing->{'EndDateTime'});

	    my $title = $listing->{'Title'};
	    if (@title_fixups && defined $title) {
		my $subtitle = '';
		my $episodenum;
		for my $fixup (@title_fixups) {
		    my $match = $fixup->[0];
		    #print "Inspecting '$title' for match $match\n";
		    if ($title =~ m/$match/i) {
			# The fixups contains backrefs to the regexp result. Don't make a match here, it will ruin it.
			my $placements = $fixup->[1];
			#print "Matched, applying '" . $placements->[0] . "', '" . $placements->[1] . "'\n";
			$title = eval $placements->[0] if defined $placements->[0];
			if (defined $placements->[1]) {
			    my $str = $placements->[1];
			    if (substr($str, 0, 1) eq '#') {
				my $e = eval substr($str, 1);
				if ($e =~ m/(\d+)\:(\d+)/) {
				    $episodenum = $1;
				    $p{'episode-num'} = [ [ " . " . ($1 - 1) . "/" . $2 . " . ", "xmltv_ns" ] ];
				} else {
				    $episodenum = $e;
				    $p{'episode-num'} = [ [ " . " . ($e - 1) . " . ", "xmltv_ns" ] ];
                               }
			    } else {
				$subtitle .= eval $placements->[1];
			    }
			}
		    }
		}

		########################################
		# Sæt afsnitsnummer ind først i subtitle, hvis ønsket.
		my $episode_in_subtitle = $conf->{'episode-in-subtitle'};
		if (defined $episode_in_subtitle && defined $episode_in_subtitle->[0]) {
		    if (defined $episodenum) {
			if ($subtitle eq '') {
			    $subtitle = $episode_in_subtitle->[0] . ' ' . $episodenum;
			} else {
			    $subtitle = $episode_in_subtitle->[0] . ' ' . $episodenum . ': ' . $subtitle;
			}
		    }
		}

		if ($subtitle ne '') {
		    $p{'sub-title'} = [ [ $subtitle,
					  $chan->{'_lang'} ] ];
		}
	    }

	    my @title;
	    push (@title, [ $title, $chan->{'_lang'} ]);
	    if ($listing->{'OriginalTitle'}) {
		my $original_lang = 'en'; # guess_original_language($listing);
		if (!$original_lang) {
		    $original_lang = 'en';
		}
		push (@title, [ $listing->{'OriginalTitle'}, $original_lang ]);
	    }
	    $p{'title'} = \@title;

	    my $description;
	    if ($listing->{'HasDescription'} eq 'true' || $listing->{'HasDescription'} == 1) {
		my $desc_url = $conf->{'root-url'}->[0] . 'ProgramDetails/?id=' . $listing->{'Id'} . '&days=' . $date . '&channelid=' . $chan->{'_dr_listing_id'} . '&mediaType=tv';
		# http://www.dr.dk/tjenester/program-guide/json/guide/ProgramDetails/?id=dr.dk/mas/whatson/216871786813&days=0&channelid=w_DR2&mediaType=tv
		print STDERR "Get: $desc_url\n" if $debug;
		my $desc_content = getUrl($desc_url);
		if ($desc_content) {
		    my $json;
		    eval {
			$json = from_json( $desc_content, { utf8  => 1 } );
			$description = $json->{'Description'} . ' ';
			$description =~ s/\<br\>/. /g;
			$description =~ s/\s*\n\s*/. /g;
			1;
		    } or do {
			# Ignoring failed json parses, leaving desc empty.
		    };
		}
	    }

	    ########################################
	    # punchline som ekstra beskrivelse.
	    if ($listing->{'Punchline'}) {
		my $pl = $listing->{'Punchline'};
		# Der er nogle gange linjeskift i
		# punchlines, hvilket XMLTV ikke bryder
		# sig om.
		$pl =~ s/\s*\n\s*/. /g;
		$pl =~ s/\<br\>/. /g;
		$description .= $pl;
	    }

	    if ($description) {
		$p{'desc'} = [ [ $description, $chan->{'_lang'} ] ];
	    }

	    ########################################
	    # Genudsendelse, HD, etc.
	    if ($listing->{'IsRerun'} eq 'true' || $listing->{'IsRerun'} == 1) {
		$p{'previously-shown'} = {};
	    }
	    if ($listing->{'DisplayHD'}) {
		$p{'video'}{'quality'} = 'HDTV';
	    }

	    if ($listing->{'Display16_9'}) {
		$p{'video'}{'aspect'} = '16:9';
	    }

	    ########################################
	    # Genre/kategori
	    # Her bruges genre_text, vi kunne også
	    # bruge genre_code og have en tabel
	    # til at få i det mindste den generelle
	    # kategori på engelsk (farver i MythTV!)
	    # TODO: Fix engelske kategorier
	    if ($listing->{'Category'} &&
		$listing->{'Category'} ne 'Ukategoriseret' &&
		$listing->{'Category'} ne 'Ukendt' &&
		$listing->{'Category'} ne 'Andre') {
		$p{'category'} = [ [ $listing->{'Category'}, 'da']];
	    }

	    ########################################
	    # URL
	    if ($listing->{'ProgramSeriesSiteUrl'}) {
		$p{'url'} = [ $listing->{'ProgramSeriesSiteUrl'} ];
	    }

	    # Sanity checks..
	    if (!$p{'start'}) { warning("No 'START' attribute"); next; }
	    if (!$p{'stop'}) { warning("No 'START' attribute"); next; }
	    if (!$p{'title'}) { warning("No 'TITLE' attribute"); next; }

	    #print Dumper \%p;
	    push(@results, \%p);
	}
    }
    return \@results;
}

my $opt;
my $conf;
( $opt, $conf ) = ParseOptions( {
    grabber_name => $grabber_name,
    capabilities => [qw/baseline manualconfig tkconfig apiconfig/],
    stage_sub => \&config_stage,
    listchannels_sub => \&list_channels,
    #load_old_config_sub => \&load_old_config,
    version => "$XMLTV::VERSION",
    description => "TV Oversigten fra Danmarks Radios (2012) ".
	"(www.dr.dk/tjenester/programoversigt)",
				} );



my %writer_args = ( encoding => 'utf-8' );
if (defined $opt->{'output'}) {
    my $fh = IO::File->new($opt->{'output'}, ">:utf8");
    die "Cannot write to $opt->{'output'}" if not $fh;
    $writer_args{'OUTPUT'} = $fh;
}
my $writer = new XMLTV::Writer(%writer_args);

$writer->start(\%grabber_tags);

#print "Grabbing channel list\n";
my $chanlist = &get_channel_list($conf) || die "Couldn't get channel list";

# Check channels specified are valid
my @channels = ();
foreach my $cid (@{$conf->{'channel'}}) {
    my $chan = $chanlist->{$cid};
    if (!$chan) {
	warn("Unknown channel ".$cid." in config file\n");
    } else {
	$writer->write_channel($chan);
	push (@channels, $cid);
    }
}

# data uses offset from today in days.
for (my $c=0; $c<$opt->{'days'}; $c++) {
    foreach my $cid (@channels) {
	my $chan = $chanlist->{$cid};
	if (!$chan) {
	    &warning("Unknown channel $cid\n");
	} else {
	    #print "ID: $cid Name: " .
	    #$chan->{'display-name'}[0][0]."\n";
	    my $day = $c;
	    $day += $opt->{offset} if ($opt->{offset});
	    my $schedules = get_schedules($conf, $chan, $day);
	    if ("ARRAY" ne ref($schedules)) {
		warn("Schedules for $cid on day $c not valid - empty?\n");
		next;
	    }
	    foreach my $s (@$schedules) {
		#print Dumper $s;
		if ("HASH" ne ref($s)) {
		    warn("Weird listing:\n");
		    print STDERR Dumper $s;
		} else {
		    $writer->write_programme($s);
		}
	    }
	}
    }
}
$writer->end();

=pod

=head1 NAME

tv_grab_dk_dr - Grab TV listings for Denmark.

=head1 SYNOPSIS

tv_grab_dk_dr --help

tv_grab_dk_dr --configure [--config-file FILE] [--gui OPTION]

tv_grab_dk_dr [--config-file FILE] [--output FILE] [--days N]
[--offset N] [--quiet]

tv_grab_dk_dr --capabilities

tv_grab_dk_dr --version

=head1 DESCRIPTION

Output TV listings for several channels available in Denmark.  The
data comes from dr.dk. The grabber now uses a JSON API to retrieve data
and no longer relies on parsing HTML.

First run B<tv_grab_dk_dr --configure> to choose, which channels you want
to download. Then running B<tv_grab_dk_dr> with no arguments will output
listings in XML format to standard output.

B<--configure> Prompt for which channels, and write the configuration file.

B<--config-file FILE> Set the name of the configuration file, the
default is B<~/.xmltv/tv_grab_dk_dr.conf>.  This is the file written by
B<--configure> and read when grabbing.

B<--gui OPTION> Use this option to enable a graphical interface to be used.
OPTION may be 'Tk', or left blank for the best available choice.
Additional allowed values of OPTION are 'Term' for normal terminal output
(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar.

B<--output FILE> Write to FILE rather than standard output.

B<--days N> Grab N days.  The default is one week.

B<--offset N> Start N days in the future.  The default is to start
from today.

B<--quiet> Suppress the progress messages normally written to standard
error.

B<--capabilities> Show which capabilities the grabber supports. For more
information, see L<http://wiki.xmltv.org/index.php/XmltvCapabilities>

B<--version> Show the version of the grabber.

B<--help> Print a help message and exit.

=head1 SEE ALSO

L<xmltv(5)>.

=head1 AUTHOR

This version of tv_grab_dk_dr was written by Morten Grouleff <morten at grouleff dot com>

=cut

