#!/usr/bin/perl -w

# The purpose of this script is to compute a binary compatibility
# signature for a project based on Git.  The assumption is that the
# project has an associated submodule with content that can only be
# distributed in binary form.  The submodule has dependencies on the
# supermodule which affect binary compatibility, and all files which
# are depended on are referred to as dependent files.  This signature
# is computed by the MD5 hash of the concatenation of the paths and
# SHA-1 hashes of all dependent files.
#
# The story is complicated by repository forking - the user may make
# changes to dependent files.  Since an external user cannot rebuild
# the associated submodule, for external users we need to compute the
# signature using the closest commit to a canonical repository and hope
# for the best.  A canonical repository is used to identify a commit
# that is a best common ancestor of HEAD and the canonical repository's
# canonical branch (cf. git-merge-base).
#
# The concepts "dependent file", "canonical repository",
# "canonical branch" and "is an external user" are project-specific and
# are defined by the below three functions.

use strict;
# Locate directory of original perl script 
use FindBin;
use lib "$FindBin::Bin"; # add the current perl script's path into module search path
use Git;  # Git.pm is under rose/scripts/.
use Digest::MD5;

# The below is user-configurable

# Given a file path, returns true iff the path names a dependent file.
# Liao, 12/11/2009, this Makefile.am is being changed frequently now
# It is troublesome to use it for signature generation.
#	     || $path eq "src/frontend/CxxFrontend/Makefile.am" # Rules for generating the tarball
sub is_dependent_file {
	my ($path) = @_;
	return ($path =~ m@^src/ROSETTA/@ # ROSETTA generated header files are used by the EDG/SAGE connection
	     || $path eq "src/frontend/CxxFrontend/EDG" # The reference to the EDG submodule
	     );
}

# Liao, 12/11/2009, this Makefile.am is being changed frequently now
# It is troublesome to use it for signature generation.
#	     || $path eq "src/frontend/CxxFrontend/Makefile.am" # Rules for generating the tarball

# A list of canonical repositories.  This need not include every repository;
# if regex or other pattern matching is required this can be done in
# canonical_repo_branch below.
my @canonical_repos = qw(
	/nfs/casc/overture/ROSE/git/ROSE.git
	file:///nfs/casc/overture/ROSE/git/ROSE.git
	/usr/casc/overture/ROSE/git/ROSE.git
	file:///usr/casc/overture/ROSE/git/ROSE.git
	http://rosecompiler.org/rose.git
	http://www.rosecompiler.org/rose.git
	ssh://liaoch@portal-auth.nersc.gov/project/projectdirs/rosecompiler/www.rosecompiler.org/rose.git
);

# Given a remote repository URL, returns the name of the canonical branch iff
# the URL identifies a canonical repository, undef otherwise.
sub canonical_repo_branch {
	my ($repo) = @_;
	# The internal central repository and (TODO) the external central repository.
	return "master" if grep { $repo eq $_ } @canonical_repos;
	return "master" if $repo =~ m@^ssh://.*/(nfs|usr)/casc/overture/ROSE/git/ROSE\.git$@;
	return undef;
}

# Given a path to the working copy, returns true iff this is an external user.
sub needs_canonical_repo {
	my ($wcpath) = @_;
	# Whether this is an external user is defined by whether the user does not have
	# the EDG submodule checked out.
	return (! -e "$wcpath/src/frontend/CxxFrontend/EDG/Makefile.am");
}

# The below should not need to be modified

# beginning of the execution
# ----------------------------------------------------
# make dist will save the signature to the top directory/bincompat-sig
# which has the same name as the perl script under scripts/bincompat-sig
# very confusing!
# Liao, 12/11/2009
#
# Case 1: using a distribution package
# Check the precomputed bincompat-sig
# If it exist, it means this source tree is a distribution package
# We directly get the precomputed signature to use and exit this script
# This file only exists in tarballs, in which we will not have access to the git history
if (-e "$FindBin::Bin/../bincompat-sig") {
	open(SIG, "$FindBin::Bin/../bincompat-sig") or die "couldn't open signature file: $!";
	print <SIG>;
	close(SIG);
	exit 0;
}

# Case 2: using a ROSE git repository
# for non distribution package: it must be a git repository
# Using FindBin ensures that the script works no matter where it is invoked from
my $repo = Git->repository(Directory => "$FindBin::Bin/..");

# Assigns indices into the result of parse_ls_tree_line, i.e.
# $ git ls-tree HEAD
# 100644 blob e2493c0aeaf5d11b750011a444bb684b62d6c608    .gitignore
# ^mode  ^type ^object                                    ^path
my $MODE = 0; my $TYPE = 1; my $OBJECT = 2; my $PATH = 3;

# Parses line of ls-tree output
sub parse_ls_tree_line {
	my ($line) = @_;
	my $hline = [split /\s/, $line, 4];
	return $hline;
}

# Returns data for an object which is used to compute a signature
sub get_significance_data_for {
	my ($file) = @_;
	return $file->[$OBJECT];
}

# adds data used to compute a signature from the given commit to the given hash
sub add_significance_data {
	my ($commit, $hash) = @_;
	my @ls_tree_lines = $repo->command("ls-tree", "-r", $commit);
	my @files = map { parse_ls_tree_line $_ } @ls_tree_lines;
	my @dependent_files = sort { $a->[$PATH] cmp $b->[$PATH] } grep { is_dependent_file $_->[$PATH] } @files;
	for my $dependent_file (@dependent_files) {
		$hash->add($dependent_file->[$PATH]);
		$hash->add("\0");
		$hash->add(get_significance_data_for($dependent_file));
		$hash->add("\0");
	}
}

# Convenience function for git-merge-base(1)
sub merge_base {
	my @commits = @_;
	return $repo->command_oneline("merge-base", @commits);
}

# Finds a remote in the current repository which refers to a canonical
# repository.  Returns a ref which can be used to refer to the
# canonical branch of that repository.
sub find_canonical_ref {
	my @remotes = $repo->command("remote");
	my %remotes;
	my @canon_ref_candidates;
	for my $remote (@remotes) {
		my $url = $repo->config("remote.$remote.url");
		$remotes{$remote} = $url;
		my $branch = canonical_repo_branch($url);
		if (defined $branch) {
			my $canon_ref = "refs/remotes/$remote/$branch";
			my $canon_ref_commit = eval { $repo->command_oneline([ "rev-parse", $canon_ref ], STDERR => 0) };
			unless (defined $canon_ref_commit) {
				print STDERR <<ERROR;
The remote tracking ref $canon_ref does not exist in this repository.
Please ensure that your remotes are up to date (use "git fetch $remote").
ERROR
				exit 1;
			}
			push @canon_ref_candidates, $canon_ref;
		}
	}
	if (@canon_ref_candidates) {
		return $repo->command_oneline("rev-list", @canon_ref_candidates);
	}
	print STDERR <<ERROR;
Unable to find a remote tracking a canonical repository.  Please add a
canonical repository as a remote and ensure it is up to date.  Currently
configured remotes are:

ERROR
	for my $remote (@remotes) {
		print STDERR "   $remote => $remotes{$remote}\n";
	}
	print STDERR <<ERROR;

Potential canonical repositories include:

ERROR
	for my $canonical_repo (@canonical_repos) {
		print STDERR "   $canonical_repo\n";
	}

	exit 1;
}

my $relevant_commit;

if (@ARGV > 1 && $ARGV[0] eq "for") {
	$relevant_commit = $ARGV[1];
}
# If the user cannot generate the binary content themselves, revert to the most specific commit
# that is common to both the user's branch and the canonical repository
elsif (needs_canonical_repo($repo->wc_path)) {
	my $canon_ref = find_canonical_ref;
	$relevant_commit = merge_base("HEAD", $canon_ref);
} else {
	$relevant_commit = "HEAD";
}

if (@ARGV > 0 && $ARGV[0] eq "diff") {
	$repo->command_noisy("diff", @ARGV[1..$#ARGV], $relevant_commit);
	exit 0;
}

if (@ARGV > 0 && $ARGV[0] eq "show-rev") {
	print "$relevant_commit\n";
	exit 0;
}

my $hash = Digest::MD5->new;
add_significance_data($relevant_commit, $hash);
print $hash->hexdigest . "\n";
