#!/bin/bash
# Compare results from an old svn repo named <basename>_pre_Allura
# in the current directory with a new svn repo named <basename> in
# the current directory.

# This script should be run without arguments.  It first compares the
# complete log file for the two repos.  If that is identical, it then
# checks out a sample of approximately $nsample (defined below)
# revisions starting at revision 1 and ending at the latest revision
# for each repo and checks that the corresponding checked-out trees
# are identical for each of those revisions.

## Find oldrepo.
oldrepo=$(\ls --directory *_pre_Allura)
oldrepo_rc=$?
if [ "$oldrepo_rc" -ne 0 ] ; then
    echo "Error: could not find old svn repo with name of *_pre_Allura"
    exit $oldrepo_rc
else
    echo "Found old svn repo called $oldrepo"
fi

## Find newrepo.
newrepo_name=$(echo $oldrepo|sed -e "s?_pre_Allura??")
newrepo=$(\ls --directory $newrepo_name)
newrepo_rc=$?
if [ "$newrepo_rc" -ne 0 ] ; then
    echo "Error: could not find new svn repo with name of $newrepo_name"
    exit $newrepo_rc
else
    echo "Found new svn repo called $newrepo"
fi

## Check that old and new repo have the same number of revisions.
old_number_of_revisions=$(svnlook youngest $oldrepo)
new_number_of_revisions=$(svnlook youngest $newrepo)
if [ "$old_number_of_revisions" -ne "$new_number_of_revisions" ] ; then
    echo "Error: Old and new svn repo have different number of revisions"
    exit 1
else
    number_of_revisions=$new_number_of_revisions
    echo "Number of revisions = $number_of_revisions"
    ## Rough number of different revisions that are compared in detail.
    ## Actual number could be from nsample to 2*nsample
    ## depending on how nsample divides into number_of_revisions
    nsample=100
    ((revision_increment = number_of_revisions/nsample))
    ((revision_increment = revision_increment <=0 ? 1 : revision_increment))
    echo "Revision increment used for detailed comparisons = $revision_increment"
fi

## Checkout latest revision of old repo and new repo into /tmp
rm -rf /tmp/old_source_tree
rm -rf /tmp/new_source_tree
svn checkout --quiet file://localhost/$(pwd)/$oldrepo /tmp/old_source_tree
svn checkout --quiet file://localhost/$(pwd)/$newrepo /tmp/new_source_tree

## Compare detailed log file for each local directory tree created above.
old_dir=$(pwd)
cd /tmp

svn log --verbose old_source_tree >| old_detailed_log
svn log --verbose new_source_tree >| new_detailed_log
cmp old_detailed_log new_detailed_log
cmp_rc=$?
if [ "$cmp_rc" -ne 0 ] ; then
    echo "Error: old and new repos have detailed logs that are different"
    exit $cmp_rc
else
    echo "Old and new repos have identical detailed logs"
fi

## Compare local directory trees for substantial sample of revisions
## including both first and last revision.
for revision in $(seq 1 $revision_increment $number_of_revisions) $number_of_revisions ; do
    svn update --quiet --revision=$revision old_source_tree
    svn update --quiet --revision=$revision new_source_tree
    ## Find and eliminate identical broken links from the diff
    ## because otherwise such broken links cause a non-zero
    ## diff return code.  The code below does not deal with
    ## cyclical links, but I don't think any of the repos to be
    ## checked with this script have those.

    ## The find -L option makes find follow symlinks if possible, and
    ## the subsequent -type l ordinarily should find no symlinks at
    ## all, except for the case of broken links which point to
    ## non-existent files.  Those remain as symlinks (despite the -L)
    ## because it is impossible to follow them.  Thus, the following
    ## combination of options and flags always finds just the broken
    ## symlinks.
    cd old_source_tree; find -L . -type l >| ../old_broken_symlinks; cd ..
    cd new_source_tree; find -L . -type l >| ../new_broken_symlinks; cd ..

    ## Check that both directories have identical sets of broken symlinks.
    diff --brief old_broken_symlinks new_broken_symlinks
    diff_rc=$?
    if [ "$diff_rc" -ne 0 ] ; then
	echo "Error: old and new repos have different sets of broken symlinks for revision = $revision"
	exit $diff_rc
    fi

    ## Convert list of broken symlinks to the basename form.  This
    ## form is an unfortunate choice because basename collisions can
    ## occur causing additional good files that are not broken
    ## symlinks to be ignored by the diff check.  However, the
    ## basename form is the only form that is acceptable to exclude
    ## files from the diff so we are stuck with this problematic logic.
    rm -f basename_broken_symlinks; touch  basename_broken_symlinks
    for BROKEN_LINK in $(cat new_broken_symlinks); do
	basename $BROKEN_LINK >> basename_broken_symlinks
    done

    diff -Naur --exclude=".svn" --exclude-from=basename_broken_symlinks --brief old_source_tree new_source_tree
    diff_rc=$?
    if [ "$diff_rc" -ne 0 ] ; then
	echo "Error: old and new repos have different (aside from .svn) checked-out directory trees for revision = $revision"
	exit $diff_rc
    else
	echo "Old and new repos have identical (aside from .svn) checked-out directory trees for revision = $revision"
    fi
done

# Return to starting directory
cd $old_dir