#!/usr/bin/env perl
#
#   Copyright (c) MediaTek USA Inc., 2024
#
#   This program is free software;  you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or (at
#   your option) any later version.
#
#   This program is distributed in the hope that it will be useful, but
#   WITHOUT ANY WARRANTY;  without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#   General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program;  if not, see
#   <http://www.gnu.org/licenses/>.
#
#
# This script traverses C/C++ coverage data in JSON format, generated by
#   'llvm-cov export -format=text ....'
# Coverage data is in one or more JSON data files (generated by llvm-cov) and
# translates it into LCOV .info format.
#
#   $ clang[++] -o myExe -fprofile-inst-generate -fcoverage-mapping \
#       [-fcoverage-mcdc] ....
#   $ ./myExe ...
#   $ llvm-profdata merge -o myExe.profdata --sparse *.profraw
#   $ llvm-cov export -format=text -instr-profile=myExe.profdata \
#       ./myExe > myExe.json
#   $ llvm2lcov [--output myExe.info] [--test-name name] [options] myExe.json
#
# In order to generate MC/DC data, note that you must:
#   - use LLVM/18 or newer
#   - enable MC/DC instrumentation in your compile/link steps, and
#   - pass the '--mcdc-coverage' flag to llvm2lcov
#
# See 'llvm2lcov --help' for more usage information
#
# See the LLVM documentation for more information on flags and compilation options.

use strict;
use warnings;
require Exporter;

use File::Basename qw(basename dirname fileparse);
use File::Spec::Functions qw /abs2rel catdir file_name_is_absolute splitdir
                              splitpath catpath catfile/;
use File::Temp;
use File::Copy qw(copy move);
use File::Path;
use Cwd qw/abs_path getcwd realpath/;
use Time::HiRes;    # for profiling
use Capture::Tiny;
use FindBin;
use Storable;
use POSIX;

use lib "$FindBin::RealBin/../lib";
use lcovutil;

sub print_usage(*)
{
    local *HANDLE = shift;
    print(HANDLE <<EOF);
Usage: $lcovutil::tool_name [OPTIONS] json_file [json_file ...]
Translate LLVM\'s 'llvm-cov' JSON coverage data file to LCOV .info
file format.

In addition to common options supported by other tools in the LCOV
suite (e.g., --comment, --version-script, --ignore-error, --substitute,
--exclude, etc.), the tool options are:

  --output filename:
      The lcov data will be written to the specified file - or to
      the file called 'llvm2lcov.info' in the current run directory
      if this option is not used.

  --testname name:
      Coverage info will be associated with the testcase name provided.
      It is not necessary to provide a name.

  --branch-coverage:
      Include branch coverage data in the output.

  --mcdc-coverage:
      Include MC/DC data in the output.
      Note that you must be using LLVM/18 or higher, and must have instrumented
      your code for MC/DC collection.  See the LLVM documentation for details.

See LLVM documentation for directions on how to generate coverage data
in JSON format.

For example:

    # compile your example, instrumenting for MC/DC coverage
  \$ clang++ -o myExe -fprofile-inst-generate -fcoverage-mapping \\
       -fcoverage-mcdc myCode.cpp
    # run your testcases
  \$ ./myExe ...
    # convert profile data
  \$ llvm-profdata merge -o myExe.profdata --sparse *.profraw
    # export coverage data in JSON format
  \$ llvm-cov export -format=text -instr-profile=myExe.profdata \\
       ./myExe > myExe.json
    # use this script to convert to LCOV format
  \$ $lcovutil::tool_name --output myExe.info --test-name myTestcase \\
       --mcdc-coverage --branch-coverage myExe.json
    # and generate a genhtml-format coverage report:
  \$ genhtml -o html_report myExe.info ...

EOF
}

sub parse
{
    my $testname = shift;

    die('JSON file argument required') unless @_;

    my $top = TraceFile->new();

    $testname = '' unless defined($testname);

    my $srcReader = ReadCurrentSource->new();

    foreach my $jsonFile (@_) {
        die("no such JSON file '$jsonFile'") unless -e $jsonFile;
        my $json = JsonSupport::load($jsonFile);
        die("unrecognized JSON file format in $jsonFile")
            unless (defined($json) &&
                    exists($json->{data}) &&
                    'ARRAY' eq ref($json->{data}));

        lcovutil::info("read $jsonFile\n");

        foreach my $k (@{$json->{data}}) {
            #lcovutil::info("starting data entry..\n");
            foreach my $f (@{$k->{files}}) {
                lcovutil::info('parsing ' . $f->{filename} . " ..\n");
                my $filename =
                    ReadCurrentSource::resolve_path($f->{filename}, 1);
                if (TraceFile::skipCurrentFile($filename)) {
                    if (!exists($lcovutil::excluded_files{$filename})) {
                        $lcovutil::excluded_files{$filename} = 1;
                        lcovutil::info("Excluding $filename\n");
                    }
                    next;
                }
                $srcReader->open($filename);

                my $fileInfo = $top->data($filename);

                my $version = lcovutil::extractFileVersion($filename);
                $fileInfo->version($version)
                    if (defined($version) && $version ne "");

                my $lineData = $fileInfo->test($testname);
                # use branch data to derive MC/DC expression - so need
                # it, even if user didn't ask
                my $branchData = $fileInfo->testbr($testname)
                    if $lcovutil::br_coverage || $lcovutil::mcdc_coverage;
                my $mcdcData = $fileInfo->testcase_mcdc($testname)
                    if $lcovutil::mcdc_coverage;

                my $summary    = $f->{summary};
                my $branches   = $f->{branches};
                my $segments   = $f->{segments};
                my $expansions = $f->{expansions};
                my $mcdc       = $f->{mcdc_records}
                    if $lcovutil::mcdc_coverage && exists($f->{mcdc_records});

                foreach my $s (@$segments) {
                    die("unexpected segment data") unless scalar(@$s) == 6;
                    my ($line, $col, $count, $hasCount, $isRegion, $isGap) =
                        @$s;
                    next unless $hasCount;
                    $lineData->append($line, $count);
                }

                if ($branchData) {
                    my $currentLine = -1;
                    my $branchIdx;
                    foreach my $branch (@$branches) {
                        die("unexpected branch data")
                            unless scalar(@$branch) == 9;
                        my ($line, $startCol, $endline,
                            $endcol, $trueCount, $falseCount,
                            $fileId, $expandedId, $kind) = @$branch;
                        if ($line != $currentLine &&
                            defined($lineData->value($line))) {
                            $branchIdx   = 0;       # restart counter
                            $currentLine = $line;
                        } else {
                            # this branch is part of the current group
                            ++$branchIdx;
                        }
                        my $expr =
                            $srcReader->getExpr($line, $startCol, $endline,
                                                $endcol)
                            if $srcReader->notEmpty();

                        my $br =
                            BranchBlock->new($branchIdx, $trueCount, $expr);
                        $branchData->append($line, 0, $br, $filename);
                    }
                }
                if ($mcdc) {
                    foreach my $m (@$mcdc) {
                        # what are fileID and kind?
                        die("unexpected MC/DC data") unless scalar(@$m) == 7;
                        my ($line, $startCol, $endLine, $endcol, $fileId,
                            $kind, $cov)
                            = @$m;
                        die("unexpected MC/DC cov")
                            unless 'ARRAY' eq ref($cov);
                        my $groupSize = scalar(@$cov);

                        # read the source line and extract the expression...
                        my $expr =
                            $srcReader->getExpr($line, $startCol, $endLine,
                                                $endcol)
                            if ($srcReader->notEmpty());

                        my $current_mcdc =
                            $mcdcData->new_mcdc($mcdcData, $line);
                        my $branch = $branchData->value($line);
                        my $idx    = 0;
                        foreach my $c (@$cov) {
                            my $branchExpr =
                                $branch->getBlock(0)->[$idx]->expr()
                                if $branch &&
                                (scalar(@{$branch->getBlock(0)}) > $idx);
                            $branchExpr =
                                defined($branchExpr) ?
                                "'$branchExpr' in '$expr'" :
                                $idx;

                            $current_mcdc->insertExpr($filename, $groupSize, 0,
                                                      $c, $idx, $branchExpr);
                            $current_mcdc->insertExpr($filename, $groupSize, 1,
                                                      $c, $idx, $branchExpr);
                            ++$idx;
                        }
                        $mcdcData->close_mcdcBlock($current_mcdc);
                    }
                }    # MCDC
                $fileInfo->testbr()->remove($testname)
                    if $lcovutil::mcdc_coverage && !$lcovutil::br_coverage;
                lcovutil::info(2, "finished parsing $filename\n");
            }

            next unless $lcovutil::func_coverage;
            foreach my $f (@{$k->{functions}}) {
                my $name      = $f->{name};
                my $filenames = $f->{filenames};    # array
                if ($#$filenames != 0) {
                    lcovutil::ignorable_error($lcovutil::ERROR_USAGE,
                        "unsupported: function $name associated with multiple files"
                    );
                    next;
                }
                my $filename =
                    ReadCurrentSource::resolve_path($filenames->[0], 1);
                if (TraceFile::skipCurrentFile($filename)) {
                    if (!exists($lcovutil::excluded_files{$filename})) {
                        $lcovutil::excluded_files{$filename} = 1;
                        lcovutil::info("Excluding $filename\n");
                    }
                    next;
                }
                die('unexpected unknown file \'' . $filenames->[0] . '\'')
                    unless $top->file_exists($filename);
                my $info    = $top->data($filename);
                my $count   = $f->{count};
                my $regions = $f->{regions};    # startline/col, endline/col/

                my $functionMap = $info->testfnc($testname);
                my $startLine = $regions->[0]->[0];  # startline of first region
                 # NOTE:  might be a mistake to grab the end line of the last region -
                 #  LCOV follows GCC behaviour and associates lines with where they
                 #  start - not where they end...
                my $endline = $regions->[-1]->[2];    # endline of last region
                my $func =
                    $functionMap->define_function($name, $startLine, $endline)
                    unless defined($functionMap->findName($name));
                $functionMap->add_count($name, $count);

                # for the moment - don't worry about the coverpoints in the function
                #my $branches = $f->{branches};
                #my $mcdc = $f->{mcdc_records} if exists($f->{mcdc_records});
                #foreach my $r (@$regions) {
                #  my ($startLine, $startCol, $endLine, $endCol, $count, $fileId,
                #      $expandedId, $kind) = @$r;
                #}
            }
        }
        lcovutil::info(2, "finished $jsonFile\n");
    }
    # now create the merge summary...
    foreach my $filename ($top->files()) {
        my $info = $top->data($filename);

        my @work;
        push(@work, [$info->sum(), $info->test]);
        push(@work, [$info->sumbr(), $info->testbr])
            if $lcovutil::br_coverage;
        push(@work, [$info->func(), $info->testfnc])
            if $lcovutil::func_coverage;
        push(@work, [$info->mcdc(), $info->testcase_mcdc()])
            if $lcovutil::mcdc_coverage;

        foreach my $d (@work) {
            my ($sum, $pertest) = @$d;
            $sum->union($pertest->value($testname));
        }
    }
    return $top;
}

my $output_filename = 'llvm2lcov.info';
my $testname;
my %opts = ('test-name|t=s'       => \$testname,
            'output-filename|o=s' => \$output_filename,);
my %rc_opts;
if (!lcovutil::parseOptions(\%rc_opts, \%opts, \$output_filename)) {
    print(STDERR "argparse failed");
}

my $info = parse($testname, @ARGV);
$info->applyFilters(ReadCurrentSource->new());
$info->write_info_file($output_filename);

$info->print_summary() if $lcovutil::verbose >= 0;
my $exit_code = 0;
$info->checkCoverageCriteria();
CoverageCriteria::summarize();
$exit_code = 1 if $CoverageCriteria::coverageCriteriaStatus;

lcovutil::summarize_messages();

lcovutil::cleanup_callbacks();

exit $exit_code;
