# Filename: iTunesConnect.pm # # iTunes Connect client interface # # Copyright 2008-2009 Brandon Fosdick <bfoz@bfoz.net> (BSD License) # # $Id: iTunesConnect.pm,v 1.12 2009/01/22 05:23:57 bfoz Exp $ package WWW::iTunesConnect; use strict; use warnings; use vars qw($VERSION); $VERSION = "1.13"; use LWP; use HTML::Form; use HTML::TreeBuilder; use IO::Uncompress::Gunzip qw(gunzip $GunzipError); use constant URL_PHOBOS => 'https://phobos.apple.com'; use constant MONTH_2_NUM => { 'Jan' => '01', 'Feb' => '02', 'Mar' => '03', 'Apr' => '04', 'May' => '05', 'Jun' => '06', 'Jul' => '07', 'Aug' => '08', 'Sep' => '09', 'Oct' => '10', 'Nov' => '11', 'Dec' => '12' }; # --- Constructor --- sub new { my ($this, %options) = @_; my $class = ref($this) || $this; my $self = {}; bless $self, $class; $self->{user} = $options{user} if $options{user}; $self->{password} = $options{password} if $options{password}; $self->{ua} = LWP::UserAgent->new(%options); $self->{ua}->cookie_jar({}); return $self; } # --- Class Methods --- # Parse a TSV data table retrieved from iTunes Connect sub parse_table { my ($content) = @_; # Parse the data into a hash of arrays my @content = split /\n/,$content; my @header = split /\t/, shift(@content); my @data; for( @content ) { my @a = split /\t/; push @data, \@a; } ('header', \@header, 'data', \@data); } # Parse a gzip'd summary file fetched from the Sales/Trend page # First argument is same as input argument to gunzip constructor # Remaining arguments are passed as options to gunzip sub parse_sales_summary { my ($input, %options) = @_; # gunzip the data into a scalar my $content; my $status = gunzip $input => \$content; return $status unless $status; # Parse the data into a hash of array refs and return parse_table($content); } # --- Instance Methods --- sub login { my $s = shift; # Bail out if no username and password return undef unless $s->{user} and $s->{password}; # Prevent repeat logins return 1 if $s->{sales_path} and $s->{financial_path}; # Fetch the login page my $r = $s->request('/WebObjects/MZLabel.woa/wa/default'); return undef unless $r; # Pull out the path for submitting user credentials $r->as_string =~ /<form.*name=.*action="(.*)">/; # $s->{login_url} = $1; return undef unless $1; # Submit the user's credentials $r = $s->request($1.'?theAccountName='.$s->{user}.'&theAccountPW='.$s->{password}.'&theAuxValue='); return undef unless $r; # Find the Sales/Trend Reports path and save it for later $r->as_string =~ /href="(.*)">\s*\n\s*<b>Sales\/Trend Reports<\/b>/; $s->{sales_path} = $1; # Find the Financial Reports path and save it for later $r->as_string =~ /href="(.*)">\s*\n\s*<b>Financial Reports<\/b>/; $s->{financial_path} = $1; 1; } # Fetch the list of available dates for Sales/Trend Daily Summary Reports. This # caches the returned results so it can be safely called multiple times. Note, # however, that if the parent script runs for longer than 24 hours the cached # results will be invalid. The cached results may become invalid sooner. sub daily_sales_summary_dates { my $s = shift; # Get an HTML::Form object for the Sales/Trends Reports Daily Summary page my $form = $s->daily_sales_summary_form(); return undef unless $form; # Pull the available dates out of the form's select input my $input = $form->find_input('#dayorweekdropdown', 'option'); return undef unless $input; # Sort and return the dates sort { $b cmp $a } $input->possible_values; } sub daily_sales_summary { my $s = shift; my $date = shift if scalar @_; return undef if $date and ($date !~ /\d{2}\/\d{2}\/\d{4}/); unless( $date ) { # Get the list of available dates my @dates = $s->daily_sales_summary_dates(); # The list is sorted in descending order, so most recent is first $date = shift @dates; return undef unless $date; } # Get an HTML::Form object for the Sales/Trends Reports Daily Summary page my $form = $s->daily_sales_summary_form(); # Submit the form to get the latest daily summary $form->value('#selReportType', 'Summary'); $form->value('#selDateType', 'Daily'); $form->value('#dayorweekdropdown', $date); $form->value('hiddenDayOrWeekSelection', $date); $form->value('hiddenSubmitTypeName', 'Download'); $form->value('download', 'Download'); # Fetch the summary my $r = $s->{ua}->request($form->click('download')); return undef unless $r; my $filename = $r->header('Content-Disposition'); $filename = (split(/=/, $filename))[1] if $filename; (parse_sales_summary(\$r->content), 'file', $r->content, 'filename', $filename); } # Fetch the list of available financial reports sub financial_report_list { my $s = shift; # Return cached list to avoid another trip on the net return $s->{financial_reports} if $s->{financial_reports}; # Check for a valid login return undef unless $s->login; # Fetch the Financial Reports page my $r = $s->request($s->{financial_path}); return undef unless $r; # Get the Items/Page form and set to display the max number of reports my @forms = HTML::Form->parse($r); @forms = grep $_->attr('name') eq 'f_0_0_5_1_5_1_1_2_9', @forms; return undef unless @forms; my $form = shift @forms; $r->as_string =~ /items\/page \(max (\d+)\)/; $form->value('itemsPerPage', $1); $r = $s->{ua}->request($form->click); return undef unless $r; # Parse the page into a tree my $tree = HTML::TreeBuilder->new_from_content($r->as_string); # Get the table by address (because there's nothing unique about it) and then get all child rows my @rows = $tree->address('0.1.2.0.0.0.3.1.1')->look_down('_tag','tr'); # The first 3 rows are headers, etc so get rid of them @rows = @rows[3..$#rows]; # Parse the list of reports my %reports; for( @rows ) { my @cols = $_->look_down('_tag','td'); $cols[0]->as_trimmed_text =~ /([A-Z][a-z]{2})\s+(\d{4})/; my $date = $2.MONTH_2_NUM->{$1}; my $region = $cols[1]->as_trimmed_text; my $a = scalar $cols[2]->look_down('_tag','a'); @{$reports{$date}{$region}}{qw(path filename)} = ($a->attr('href'), $a->as_trimmed_text); } # Save the list for later and return $s->{financial_reports} = \%reports; } sub financial_report { my $s = shift; my $date = shift if scalar @_; return undef if $date and ($date !~ /\d{4}\d{2}/); # Get the list of available reports my %reports = %{$s->financial_report_list()}; # Get the most recent month's reports if no month was given unless( $date ) { my @dates = sort { $b <=> $a } keys %reports; $date = shift @dates; return undef unless $date; } # Fetch the reports for either the given month or the most recent month available my $regions = $reports{$date}; my %out; for( keys %{$regions} ) { my $r = $s->request($regions->{$_}{path}); next unless $r; # Parse the data my %table = parse_table($r->content); my ($header, $data) = @table{qw(header data)}; # Strip off the Total row and parse it my @total = grep {$_ && length $_} @{$data->[-1]}; @total = undef unless shift(@total) eq 'Total'; if( @total ) { pop @$data; # Remove the Total row from the data pop @$data; # Discard the blank row } # Convert the various region-specific date formats to YYYYMMDD my $startIndex = 0; my $endIndex = 0; ++$startIndex while $header->[$startIndex] ne 'Start Date'; ++$endIndex while $header->[$endIndex] ne 'End Date'; my $eu_reg = qr/(\d\d)\.(\d\d)\.(\d{4})/; my $us_reg = qr/(\d\d)\/(\d\d)\/(\d{4})/; for( @$data ) { if( @$_[$startIndex] =~ $eu_reg ) # EU format { @$_[$startIndex] = $3.$2.$1; @$_[$endIndex] =~ $eu_reg; @$_[$endIndex] = $3.$2.$1; } elsif( @$_[$startIndex] =~ $us_reg ) # US format { @$_[$startIndex] = $3.$1.$2; @$_[$endIndex] =~ $us_reg; @$_[$endIndex] = $3.$1.$2; } } @{$out{$date}{$_}}{qw(header data file filename total currency)} = ($header, $data, $r->content, $regions->{$_}{filename}, @total); } %out; # Return } # Fetch the list of available dates for Sales/Trend Monthly Summary Reports. This # caches the returned results so it can be safely called multiple times. sub monthly_free_summary_dates { my $s = shift; # Get an HTML::Form object for the Sales/Trends Reports Monthly Summary page my $form = $s->monthly_free_summary_form(); return undef unless $form; # Pull the available date ranges out of the form's select input my $input = $form->find_input('9.14.1', 'option'); return undef unless $input; # Parse the strings into an array of hash references my @dates; push @dates, {'From', split(/ /, $_)} for $input->value_names; # Sort and return the date ranges sort { $b->{To} cmp $a->{To} } @dates; } sub monthly_free_summary { my $s = shift; my (%options) = @_ if scalar @_; return undef if %options and $options{To} and $options{From} and (($options{To} !~ /\d{2}\/\d{2}\/\d{4}/) or ($options{From} !~ /\d{2}\/\d{2}\/\d{4}/)); unless( %options ) { # Get the list of available dates my @months = $s->monthly_free_summary_dates(); return undef unless @months; # The list is sorted in descending order, so most recent is first %options = %{shift @months}; } # Munge the date range into the format used by the form $options{To} =~ /(\d{2})\/(\d{2})\/(\d{4})/; my $to = $3.$1.$2; $options{From} =~ /(\d{2})\/(\d{2})\/(\d{4})/; my $month = $3.$1.$2.'#'.$to; # Get an HTML::Form object for the Sales/Trends Reports Daily Summary page my $form = $s->monthly_free_summary_form(); # Submit the form to get the latest weekly summary $form->value('#selReportType', 'Summary'); $form->value('#selDateType', 'Monthly Free'); $form->value('#dayorweekdropdown', $month); $form->value('hiddenDayOrWeekSelection', $month); $form->value('hiddenSubmitTypeName', 'Download'); $form->value('download', 'Download'); # Fetch the summary my $r = $s->{ua}->request($form->click('download')); return undef unless $r; # If a given month is actually empty, the download will return the same page # with a notice to the user. Check for the notice and bail out if found. return undef unless index($r->as_string, 'There are no free transactions to report') == -1; my $filename = $r->header('Content-Disposition'); $filename = (split(/=/, $filename))[1] if $filename; (parse_sales_summary(\$r->content), 'file', $r->content, 'filename', $filename); } # Fetch the list of available dates for Sales/Trend Weekly Summary Reports. This # caches the returned results so it can be safely called multiple times. sub weekly_sales_summary_dates { my $s = shift; # Get an HTML::Form object for the Sales/Trends Reports Weekly Summary page my $form = $s->weekly_sales_summary_form(); return undef unless $form; # Pull the available date ranges out of the form's select input my $input = $form->find_input('#dayorweekdropdown', 'option'); return undef unless $input; # Parse the strings into an array of hash references my @dates; push @dates, {'From', split(/ /, $_)} for $input->value_names; # Sort and return the date ranges sort { $b->{To} cmp $a->{To} } @dates; } sub weekly_sales_summary { my $s = shift; my $week = shift if scalar @_; return undef if $week and ($week !~ /\d{2}\/\d{2}\/\d{4}/); unless( $week ) { # Get the list of available dates my @weeks = $s->weekly_sales_summary_dates(); return undef unless @weeks; # The list is sorted in descending order, so most recent is first $week = shift @weeks; $week = $week->{To}; } # Get an HTML::Form object for the Sales/Trends Reports Daily Summary page my $form = $s->weekly_sales_summary_form(); # Submit the form to get the latest weekly summary $form->value('#selReportType', 'Summary'); $form->value('#selDateType', 'Weekly'); $form->value('#dayorweekdropdown', $week); $form->value('hiddenDayOrWeekSelection', $week); $form->value('hiddenSubmitTypeName', 'Download'); $form->value('download', 'Download'); # Fetch the summary my $r = $s->{ua}->request($form->click('download')); return undef unless $r; my $filename = $r->header('Content-Disposition'); $filename = (split(/=/, $filename))[1] if $filename; (parse_sales_summary(\$r->content), 'file', $r->content, 'filename', $filename); } # --- Getters and Setters --- sub user { my $s = shift; $s->{user} = shift if scalar @_; $s->{user}; } sub password { my $s = shift; $s->{password} = shift if scalar @_; $s->{password}; } # Use the Sales/Trend Reports form to get a form for fetching daily summaries sub daily_sales_summary_form { my ($s) = @_; # Use cached response to avoid another trip on the net unless( $s->{daily_summary_sales_response} ) { # Get an HTML::Form object for the Sales/Trends Reports page. Then fill it out # and submit it to get a list of available Daily Summary dates. my $form = $s->sales_form(); return undef unless $form; $form->value('#selReportType', 'Summary'); $form->value('#selDateType', 'Daily'); $form->value('hiddenSubmitTypeName', 'ShowDropDown'); my $r = $s->{ua}->request($form->click('download')); $s->{daily_summary_sales_response} = $r; } # The response includes a form containing a select input element with the list # of available dates. Create and return a form object for it. my @forms = HTML::Form->parse($s->{daily_summary_sales_response}); @forms = grep $_->attr('name') eq 'frmVendorPage', @forms; return undef unless @forms; shift @forms; } # Use the Sales/Trend Reports form to get a form for fetching monthly summaries sub monthly_free_summary_form { my ($s) = @_; # Use cached response to avoid another trip on the net unless( $s->{monthly_summary_free_response} ) { # Get an HTML::Form object for the Sales/Trends Reports page. Then fill it out # and submit it to get a list of available Monthly Summary dates. my $form = $s->sales_form(); return undef unless $form; $form->value('#selReportType', 'Summary'); $form->value('#selDateType', 'Monthly Free'); $form->value('hiddenSubmitTypeName', 'ShowDropDown'); my $r = $s->{ua}->request($form->click('download')); $s->{monthly_summary_free_response} = $r; } # The response includes a form containing a select input element with the list # of available dates. Create and return a form object for it. my @forms = HTML::Form->parse($s->{monthly_summary_free_response}); @forms = grep $_->attr('name') eq 'frmVendorPage', @forms; return undef unless @forms; shift @forms; } # Use the Sales/Trend Reports form to get a form for fetching weekly summaries sub weekly_sales_summary_form { my ($s) = @_; # Use cached response to avoid another trip on the net unless( $s->{weekly_summary_sales_response} ) { # Get an HTML::Form object for the Sales/Trends Reports page. Then fill it out # and submit it to get a list of available Weekly Summary dates. my $form = $s->sales_form(); return undef unless $form; $form->value('#selReportType', 'Summary'); $form->value('#selDateType', 'Weekly'); $form->value('hiddenSubmitTypeName', 'ShowDropDown'); my $r = $s->{ua}->request($form->click('download')); $s->{weekly_summary_sales_response} = $r; } # The response includes a form containing a select input element with the list # of available dates. Create and return a form object for it. my @forms = HTML::Form->parse($s->{weekly_summary_sales_response}); @forms = grep $_->attr('name') eq 'frmVendorPage', @forms; return undef unless @forms; shift @forms; } # Generate an HTML::Form from the cached Sales/Trend Reports page sub sales_form { my $s = shift; # Fetch the Sales/Trend Report page my $r = $s->sales_response(); return undef unless $r; my @forms = HTML::Form->parse($r); @forms = grep $_->attr('name') eq 'frmVendorPage', @forms; return undef unless @forms; shift @forms; } # Follow the Sales/Trend Reports redirect and store the response for later use sub sales_response { my $s = shift; # Returned cached response to avoid another trip on the net return $s->{sales_response} if $s->{sales_response}; # Check for a valid login return undef unless $s->login; # Handle the Sales/Trend Reports redirect my $r = $s->request($s->{sales_path}); $r->as_string =~ /<META HTTP-EQUIV="refresh" Content="0;URL=(.*)">/; $r = $s->{ua}->get($1); $s->{sales_response} = $r; } # --- Internal use only --- sub request { my ($s, $url) = @_; return undef unless $s->{ua}; return $s->{ua}->get(URL_PHOBOS.$url); } 1; =head1 NAME iTunesConnect - An iTunesConnect client interface =head1 SYNOPSIS use WWW::iTunesConnect; my $itc = WWW::iTunesConnect->new(user=>$user, password=>$password); my %report = $itc->daily_sales_summary; =head1 DESCRIPTION C<iTunesConnect> provides an interface to Apple's iTunes Connect website. Daily, Weekly and Monthly summaries, as well as Finanacial Reports, can be retrieved. Eventually this will become a complete interface. A script suitable for use as a nightly cronjob can be found at L<http://bfoz.net/projects/itc/> =head1 CONSTRUCTOR =over =item $itc = WWW::iTunesConnect->new(user=>$user, password=>$password); Constructs and returns a new C<iTunesConnect> interface object. Accepts a hash containing the iTunes Connect username and password. =back =head1 ATTRIBUTES =over =item $itc->user Get/Set the iTunes Connect username. NOTE: User and Password must be set before calling any other methods. =item $itc->password Get/Set the iTunes Connect password. NOTE: User and Password must be set before calling any other methods. =back =head1 Class Methods =over =item %report = WWW::iTunesConnect->parse_sales_summary($input, %options) Parse a gzip'd summary file fetched from the Sales/Trend page. Arguments are the same as the L<IO::Uncompress::Gunzip> constructor, less the output argument. To parse a file pass a scalar containing the file name as $input. To parse a string of content, pass a scalar reference as $input. The %options hash is passed directly to I<gunzip>. The returned hash has two elements: I<header> and I<data>. The I<header> element is a reference to an array of the column headers in the fetched TSV file. The I<data> element is a reference to an array of array references, one for each non-header line in the fetched TSV file. =back =head1 METHODS These methods fetch various bits of information from the iTunes Connect servers. Everything here uses LWP and is therefore essentially a screen scraper. So, be careful and try not to load up Apple's servers too much. We don't want them to make this any more difficult than it already is. =over =item $itc->login() Uses the username and password properties to authenticate to the iTunes Connect server. This is automatically called as needed by the other fetch methods if user and password have already been set. =item $itc->daily_sales_summary_dates Fetch the list of available dates for Sales/Trend Daily Summary Reports. This caches the returned results so it can be safely called multiple times. Note, however, that if the parent script runs for longer than 24 hours the cached results will be invalid. Dates are sorted in descending order. =item $itc->daily_sales_summary() Fetch the most recent Sales/Trends Daily Summary report and return it as a hash of array references. The returned hash has two elements in addition to the elements returned by I<parse_sales_summary>: I<file> and I<filename>. The I<file> element is the raw content of the file retrieved from iTunes Connect and the I<filename> element is the filename provided by the Content-Disposition header line. If a single string argument is given in the form 'MM/DD/YYYY' that date will be fetched instead (if it's available). =item $itc->financial_report_list() Fetch the list of available Financial Reports. This caches the returned results and can be safely called multiple times. =item $itc->financial_report() Fetch the most recent Financial Report and return it as a hash. The keys of the returned hash are of the form 'YYYYMM', each of which is a hash containing one entry for each region included in that month's report. Each of the region entries is a yet another hash with six elements: Key Description --------------------------------------------- currency Currency code data Reference to array of report rows file Raw content of the retrieved file filename Retrieved file name header Header row total Sum of all rows in data If a single string argument is given in the form 'YYYYMM', that month's report will be fetched instead (if it's available). =item $itc->monthly_free_summary_dates Fetch the list of available months for Sales/Trend Monthly Summary Reports. This caches the returned results so it can be safely called multiple times. Months are returned as an array of hash references in descending order. Each hash contains the keys I<From> and I<To>, indicating the start and end dates of each report. =item $itc->monthly_free_summary( %options ) Fetch the most recent Sales/Trends Monthly Summary report and return it as a hash of array references. The returned hash has two elements in addition to the elements returned by I<parse_sales_summary>: I<file> and I<filename>. The I<file> element is the raw content of the file retrieved from iTunes Connect and the I<filename> element is the filename provided by the Content-Disposition header line. If both I<From> and I<To> options are passed, and both are of the form 'MM/DD/YYYY', the monthly summary matching the two dates will be fetched instead (if it's available). The hashes returned by monthly_free_summary_dates() are suitable for passing to this method. =item $itc->weekly_sales_summary_dates Fetch the list of available dates for Sales/Trend Weekly Summary Reports. This caches the returned results so it can be safely called multiple times. Dates are sorted in descending order. =item $itc->weekly_sales_summary() Fetch the most recent Sales/Trends Weekly Summary report and return it as a hash of array references. The returned hash has two elements in addition to the elements returned by I<parse_sales_summary>: I<file> and I<filename>. The I<file> element is the raw content of the file retrieved from iTunes Connect and the I<filename> element is the filename provided by the Content-Disposition header line. If a single string argument is given in the form 'MM/DD/YYYY' the week ending on the given date will be fetched instead (if it's available). =back =head1 SEE ALSO L<LWP> L<HTML::Form> L<HTML::Tree> L<IO::Uncompress::Gunzip> L<Net::SSLeay> =head1 AUTHOR Brandon Fosdick, E<lt>bfoz@bfoz.netE<gt> =head1 COPYRIGHT AND LICENSE Copyright 2008-2009 Brandon Fosdick <bfoz@bfoz.net> This software is provided under the terms of the BSD License. =cut