#!/usr/bin/perl
=head1 NAME

tap-to-junit-xml - convert perl-style TAP test output to JUnit-style XML

=head1 SYNOPSIS

tap-to-junit-xml "test suite name" [ outputprefix ] < tap_output.log

=head1 DESCRIPTION

Parse test suite output in TAP (Test Anything Protocol,
C<http://testanything.org/>) format, and produce XML output in a similar format
to that produced by the <junit> ant task.  This is useful for consumption by
continuous-integration systems like Hudson (C<https://hudson.dev.java.net/>).

C<"test suite name"> is a descriptive string used as the B<name> attribute on the
top-level <testsuites> node of the output XML.

If C<outputprefix> is specified, multi-file output will be generated, with
multiple XML files created using C<outputprefix> as their start of their
filenames. This prefix may contain slashes, in which case the files will be
placed into a directory hierarchy accordingly (although care should be taken to
ensure these directories exist in advance).

If C<outputprefix> is not specified, a single XML file will be generated on
STDOUT.

=head1 DEPENDENCIES

 TAP::Parser
 Time::HiRes
 XML::Generator 

=head1 BUGS

  - Output is optimized for Hudson, and may not look quite as good in other UIs.
  - Doesn't do anything with the STDERR from tests.
  - Doesn't fill in the 'errors' attribute in the  <testsuite> element.
  - Doesn't handle "todo" or "skip"
  - Doesn't get the elapsed time for each 'test' (i.e. assertion.)

=head1 AUTHOR

original, junit_xml.pl, by Matisse Enzer <matisse at matisse.net>; see
C<http://twoalpha.blogspot.com/2007/01/junit-style-xml-from-perl-test-files.html>.

pretty much entirely rewritten by Justin Mason <junit at jmason.org>, Feb 2008.

=head1 VERSION

Mar 27 2008 jm 

=head1 COPYRIGHT & LICENSE

Copyright (c) 2007 Matisse Enzer. All Rights Reserved.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
=cut

use strict;
use warnings;

my $opt_suitename = shift @ARGV;
my $opt_multifile = 0;
my $opt_mfprefix;

if (defined $ARGV[0]) {
  $opt_multifile = 1;
  $opt_mfprefix = $ARGV[0];
}

use TAP::Parser;
use Time::HiRes qw(gettimeofday tv_interval);
use XML::Generator qw(:noimport);

# should the 'Test Summary Report' at the end of a test suite be displayed
# as if it was a testcase?  in my opinion, no
my $HIDE_TEST_SUMMARY_REPORT = 1;

my $suite_name = $opt_suitename || 'make test';
my $safe_suite_name = $suite_name; $safe_suite_name =~ s/[^-:_A-Za-z0-9]+/_/gs;

# TODO: it'd be nice to respect 'Universal desirable behavior #1' from
# http://testanything.org/wiki/index.php/TAP_Consumers -- 'Should work on the
# TAP as a stream (ie. as each line is received) rather than wait until all the
# TAP is received'.   But it seems TAP::Parser itself doesn't support it!
# maybe when TAP::Parser does that, we'll do it too.
my $tout = join("", <STDIN>);

my $tap     = TAP::Parser->new( { tap => $tout } );
my $xmlgen = XML::Generator->new( ':pretty', ':std');
my $xmlgenunescaped = XML::Generator->new( ':pretty', ':std', 'escape' => 'unescaped');
my @properties = _get_properties($xmlgen);
my $test_results = _parse_tests( $tap, $xmlgen );

if ($opt_multifile) {
  _gen_junit_multifile_xml( $xmlgen, \@properties, $test_results );
} else {
  print STDOUT _get_junit_xml( $xmlgen, \@properties, $test_results );
}
exit;

#-------------------------------------------------------------------------------

sub _get_junit_xml {
  my ( $xmlgen, $properties, $test_results ) = @_;
  my $xml = "<?xml version='1.0' encoding='UTF-8' ?>\n" . 
          $xmlgen->testsuites({
              name => $suite_name,
            }, @$test_results);
  return $xml;
}

sub _gen_junit_multifile_xml {
  my ( $xmlgen, $properties, $test_results ) = @_;
  my $count = 1;
  foreach my $testsuite (@$test_results) {
    open OUT, ">${opt_mfprefix}.${count}.xml"
          or die "cannot write ${opt_mfprefix}-${count}.xml";
    print OUT "<?xml version='1.0' encoding='UTF-8' ?>\n";
    print OUT $testsuite;
    close OUT;
    $count++;
  }
}

sub _parse_tests {
  my ( $parser, $xmlgen ) = @_;

  my $ctx = {
    testsuites => [ ],
    test_name => 'notest',
    plan_ntests => 0,
    case_id => 0,
  };

  _new_ctx($ctx);

  my $lastunk = '';

  # unknown t/basic_lint.........
  # plan 1..1
  # comment # Running under perl version 5.008008 for linux
  # comment # Current time local: Thu Jan 24 17:44:30 2008
  # comment # Current time GMT:   Thu Jan 24 17:44:30 2008
  # comment # Using Test.pm version 1.25
  # unknown     /usr/bin/perl -T -w ../spamassassin.raw -C log/test_rules_copy  --siteconfigpath log/localrules.tmp -p log/test_default.cf  -L --lint
  # unknown     Checking anything
  # test ok 1
  # test ok 2
  # unknown t/basic_meta.........
  # plan 1..2
  # comment # Running under perl version 5.008008 for linux
  # comment # Current time local: Thu Jan 24 17:44:31 2008
  # comment # Current time GMT:   Thu Jan 24 17:44:31 2008
  # comment # Using Test.pm version 1.25
  # test not ok 1
  # comment # Failed test 1 in t/basic_meta.t at line 91
  # test ok 2
  # unknown  Failed 1/2 subtests
  # unknown t/basic_obj_api......
  # plan 1..4
  # comment # Running under perl version 5.008008 for linux
  # comment # Current time local: Thu Jan 24 17:44:33 2008
  # comment # Current time GMT:   Thu Jan 24 17:44:33 2008
  # comment # Using Test.pm version 1.25
  # test ok 1
  # test ok 2
  # test ok 3
  # test ok 4
  # test ok 9
  # unknown
  # unknown Test Summary Report
  # unknown -------------------
  # unknown t/basic_meta.t   (Wstat: 0 Tests: 2 Failed: 1)
  # unknown   Failed test:  1
  # unknown Files=3, Tests=7,  6 wallclock secs ( 0.01 usr  0.00 sys +  4.39 cusr  0.23 csys =  4.63 CPU)
  # unknown Result: FAIL
  # unknown Failed 1/3 test programs. 1/7 subtests failed.
  # unknown make: *** [test_dynamic] Error 255

  while ( my $r = $parser->next ) {
    my $t = $r->type;
    my $s = $r->as_string; $s =~ s/\s+$//;

    # warn "JMD $t $s";

    if ($t eq 'unknown') {
      $lastunk = $s;

      # PERL_DL_NONLAZY=1 /usr/bin/perl "-MExtUtils::Command::MM" "-e" "test_harness(1, 'blib/lib', 'blib/arch')" t/basic_*
      # if ($s =~ /test_harness\(.*?\)" (.+)$/) {
      # $suite_name = $1;
      # }
      if ($s =~ /^Test Summary Report$/) {
        # create a <testsuite> block for the summary
        $ctx->{plan_ntests} = 0;
        $ctx->{test_name} = "Test Summary Report";
        $ctx->{case_tests} = 1;
        _finish_test_block($ctx);
      }
      elsif ($s =~ /^Result: FAIL$/) {
        $ctx->{case_tests}++;
        $ctx->{case_failures}++;
        my $test_case = {
            classname => test_name_to_classname($ctx->{test_name}),
            name      => 'result',
            'time'    => 0,
        };
        my $failure = $xmlgen->failure({
          type => "OverallTestsFailed",
          message => $s
        }, "__FAILUREMESSAGETODO__");

        if (!$HIDE_TEST_SUMMARY_REPORT) {
          push @{$ctx->{test_cases}}, $xmlgen->testcase($test_case, $failure);
        }
      }
      elsif ($s =~ /^(\S+?)\.\.\.+1\.\.(\d+?)\s*$/) {
        # perl 5.6.x "Test" format plan line
        # unknown t/basic_lint....................1..1

        my ($name, $nt) = ($1,$2);
        if ($ctx->{plan_ntests}) {       # only if there have been tests planned
          _finish_test_block($ctx);
        }

        $ctx->{plan_ntests} = $nt+0;
        $ctx->{test_name} = "$name.t";
      }
    }
    elsif ($t eq 'plan') {
      if ($ctx->{plan_ntests}) {       # only if there have been tests planned
        _finish_test_block($ctx);
      }

      $ctx->{plan_ntests} = 0;
      $s =~ /(\d+)$/ and $ctx->{plan_ntests} = $1+0;

      $ctx->{test_name} = $lastunk;
      $ctx->{test_name} =~ s/\.*\s*$//gs;
      $ctx->{test_name} .= ".t";
    }
    elsif ($t eq 'test') {
      my $ntest = 0;
      if ($s =~ /(?:not |)\S+ (\d+)/) { $ntest = $1+0; }

      if ($ntest > $ctx->{plan_ntests}) {
        # jump in test numbers, more than planned; this is probably TAP::Parser's wierdness.
        # (when it sees the "ok" line at the end of a test case with no number,
        # it outputs the current total number of tests so far.)
        next;
      }

      # clean this up in a Hudson-compatible way; ":" and "/" are out, "." also causes
      # trouble by creating an extra "directory" in the results

      my $test_case = {
          classname => test_name_to_classname($ctx->{test_name}),
          name      => sprintf("test %6d", $ntest), # space-padding ensures ordering
          'time'    => 0,
      };

      $ctx->{case_tests}++;
      my $failure = undef;
      if ($s =~ /^not /i) {
        $ctx->{case_failures}++;
        $failure = $xmlgen->failure({
          type => "TAPTestFailed",
          message => $s
        }, "__FAILUREMESSAGETODO__");
        push @{$ctx->{test_cases}}, $xmlgen->testcase($test_case, $failure);
      }
      else {
        push @{$ctx->{test_cases}}, $xmlgen->testcase($test_case);
      }
    }
      
    $ctx->{sysout} .= $s."\n";
  }

  if (scalar(@{$ctx->{test_cases}}) == 0 &&
      scalar(@{$ctx->{testsuites}}) == 0)
  { 
    # no tests found! create a <testsuite> block containing *something* at least
    $ctx->{case_tests}++;
    my $test_case = {
        classname => test_name_to_classname($ctx->{test_name}),
        name      => 'result',
        'time'    => 0,
    };
    push @{$ctx->{test_cases}}, $xmlgen->testcase($test_case);
  }

  _finish_test_block($ctx);
  return $ctx->{testsuites};
}

sub _new_ctx {
  my $ctx = shift;
  $ctx->{start_time} = [gettimeofday];
  $ctx->{test_cases} = [];
  $ctx->{case_tests} = 0;
  $ctx->{case_failures} = 0;
  $ctx->{case_time} = 0;
  $ctx->{case_id}++;
  $ctx->{sysout} = '';
  return $ctx;
}

sub _finish_test_block {
  my $ctx = shift;
  $ctx->{sysout} =~ s/\n\S+\.*\s*\n$/\n/s;       # remove next test's "t/foo....." line

  my $elapsed_time = 0;     # TODO
  #my $elapsed_time = tv_interval( $ctx->{start_time}, [gettimeofday] );

  # clean it up to valid Java packagename format (or at least something Hudson will
  # consume)
  my $name = $ctx->{test_name};
  $name =~ s/[^-:_A-Za-z0-9]+/_/gs;
  $name = "$safe_suite_name.$name";      # a "directory" for the suite name

  my $testsuite = {
      'time'         => $elapsed_time,
      'name'         => $name,
      tests          => $ctx->{case_tests},
      failures       => $ctx->{case_failures},
      'id'           => $ctx->{case_id},
      errors         => 0,
  };

  my @fixedcases = ();
  foreach my $tc (@{$ctx->{test_cases}}) {
    if ($tc =~ s/__FAILUREMESSAGETODO__/ cdata($ctx->{sysout}) /ges) {
      push @fixedcases, \$tc;       # inhibits escaping!
    } else {
      push @fixedcases, $tc;
    }
  }

  # use "unescaped"; we have already fixed escaping on these strings.
  # note that a reference means 'this is unescaped', bizarrely.
  push @{$ctx->{testsuites}}, $xmlgenunescaped->testsuite($testsuite,
          @fixedcases,
          \("<system-out>\n".cdata($ctx->{sysout})."\n</system-out>"),
          \("<system-err />"));

  _new_ctx($ctx);
};

sub cdata {
  my $s = shift;
  $s =~ s/\]\]>/\](warning: defanged by tap-to-junit-xml)\]>/gs;
  return '<![CDATA['.$s.']]>';
}

sub _get_properties {
    my $xmlgen = shift;
    my @props;
    foreach my $key ( sort keys %ENV ) {
        push @props, $xmlgen->property( { name => "$key", value => $ENV{$key} } );
    }
    return @props;
}

sub test_name_to_classname {
  my $safe = shift;
  $safe =~ s/[^-:_A-Za-z0-9]+/_/gs;
  $safe = "$safe_suite_name.$safe";      # a "directory" for the suite name
  $safe;
}

__END__

# JUnit references:
# http://www.nabble.com/JUnit-4-XML-schematized--td13946472.html
# http://jra1mw.cvs.cern.ch:8180/cgi-bin/jra1mw.cgi/org.glite.testing.unit/config/JUnitXSchema.xsd?view=markup
# skipped tests:
# https://hudson.dev.java.net/issues/show_bug.cgi?id=1251
# Hudson source:
# http://fisheye5.cenqua.com/browse/hudson/hudson/main/core/src/main/java/hudson/tasks/junit/CaseResult.java

