#!/bin/bash # Compare results from an old svn repo named _pre_Allura # in the current directory with a new svn repo named in # the current directory. # This script should be run without arguments. It first compares the # complete log file for the two repos. If that is identical, it then # checks out a sample of approximately $nsample (defined below) # revisions starting at revision 1 and ending at the latest revision # for each repo and checks that the corresponding checked-out trees # are identical for each of those revisions. ## Find oldrepo. oldrepo=$(\ls --directory *_pre_Allura) oldrepo_rc=$? if [ "$oldrepo_rc" -ne 0 ] ; then echo "Error: could not find old svn repo with name of *_pre_Allura" exit $oldrepo_rc else echo "Found old svn repo called $oldrepo" fi ## Find newrepo. newrepo_name=$(echo $oldrepo|sed -e "s?_pre_Allura??") newrepo=$(\ls --directory $newrepo_name) newrepo_rc=$? if [ "$newrepo_rc" -ne 0 ] ; then echo "Error: could not find new svn repo with name of $newrepo_name" exit $newrepo_rc else echo "Found new svn repo called $newrepo" fi ## Check that old and new repo have the same number of revisions. old_number_of_revisions=$(svnlook youngest $oldrepo) new_number_of_revisions=$(svnlook youngest $newrepo) if [ "$old_number_of_revisions" -ne "$new_number_of_revisions" ] ; then echo "Error: Old and new svn repo have different number of revisions" exit 1 else number_of_revisions=$new_number_of_revisions echo "Number of revisions = $number_of_revisions" ## Rough number of different revisions that are compared in detail. ## Actual number could be from nsample to 2*nsample ## depending on how nsample divides into number_of_revisions nsample=100 ((revision_increment = number_of_revisions/nsample)) ((revision_increment = revision_increment <=0 ? 1 : revision_increment)) echo "Revision increment used for detailed comparisons = $revision_increment" fi ## Checkout latest revision of old repo and new repo into /tmp rm -rf /tmp/old_source_tree rm -rf /tmp/new_source_tree svn checkout --quiet file://localhost/$(pwd)/$oldrepo /tmp/old_source_tree svn checkout --quiet file://localhost/$(pwd)/$newrepo /tmp/new_source_tree ## Compare detailed log file for each local directory tree created above. old_dir=$(pwd) cd /tmp svn log --verbose old_source_tree >| old_detailed_log svn log --verbose new_source_tree >| new_detailed_log cmp old_detailed_log new_detailed_log cmp_rc=$? if [ "$cmp_rc" -ne 0 ] ; then echo "Error: old and new repos have detailed logs that are different" exit $cmp_rc else echo "Old and new repos have identical detailed logs" fi ## Compare local directory trees for substantial sample of revisions ## including both first and last revision. for revision in $(seq 1 $revision_increment $number_of_revisions) $number_of_revisions ; do svn update --quiet --revision=$revision old_source_tree svn update --quiet --revision=$revision new_source_tree ## Find and eliminate identical broken links from the diff ## because otherwise such broken links cause a non-zero ## diff return code. The code below does not deal with ## cyclical links, but I don't think any of the repos to be ## checked with this script have those. ## The find -L option makes find follow symlinks if possible, and ## the subsequent -type l ordinarily should find no symlinks at ## all, except for the case of broken links which point to ## non-existent files. Those remain as symlinks (despite the -L) ## because it is impossible to follow them. Thus, the following ## combination of options and flags always finds just the broken ## symlinks. cd old_source_tree; find -L . -type l >| ../old_broken_symlinks; cd .. cd new_source_tree; find -L . -type l >| ../new_broken_symlinks; cd .. ## Check that both directories have identical sets of broken symlinks. diff --brief old_broken_symlinks new_broken_symlinks diff_rc=$? if [ "$diff_rc" -ne 0 ] ; then echo "Error: old and new repos have different sets of broken symlinks for revision = $revision" exit $diff_rc fi ## Convert list of broken symlinks to the basename form. This ## form is an unfortunate choice because basename collisions can ## occur causing additional good files that are not broken ## symlinks to be ignored by the diff check. However, the ## basename form is the only form that is acceptable to exclude ## files from the diff so we are stuck with this problematic logic. rm -f basename_broken_symlinks; touch basename_broken_symlinks for BROKEN_LINK in $(cat new_broken_symlinks); do basename $BROKEN_LINK >> basename_broken_symlinks done diff -Naur --exclude=".svn" --exclude-from=basename_broken_symlinks --brief old_source_tree new_source_tree diff_rc=$? if [ "$diff_rc" -ne 0 ] ; then echo "Error: old and new repos have different (aside from .svn) checked-out directory trees for revision = $revision" exit $diff_rc else echo "Old and new repos have identical (aside from .svn) checked-out directory trees for revision = $revision" fi done # Return to starting directory cd $old_dir