Revision: 78354 http://sourceforge.net/p/brlcad/code/78354 Author: starseeker Date: 2021-03-01 13:19:19 +0000 (Mon, 01 Mar 2021) Log Message: ----------- Add some more utility scripts for repo comparison
Having assigned SVN revisions to most of the git commits, that opens up another option for comparison - generating diff sets from multiple git SHA1 commits that should correspond to SVN commits, and looking for differences. In an initial trial, a little less than 10% don't generate matching diffs. We've observed what appear to be occasional differences in how SVN and git calculate their diffs. Since MD5 sum comparison only works when the diffs are exactly the same, the approximate matching script uses ssdeep and comm instead to get a sense of how different the two patches are. Added Paths: ----------- brlcad/trunk/misc/repoconv/verify2/check.cxx brlcad/trunk/misc/repoconv/verify2/git_revsets.sh brlcad/trunk/misc/repoconv/verify2/rev_approx_check.sh Added: brlcad/trunk/misc/repoconv/verify2/check.cxx =================================================================== --- brlcad/trunk/misc/repoconv/verify2/check.cxx (rev 0) +++ brlcad/trunk/misc/repoconv/verify2/check.cxx 2021-03-01 13:19:19 UTC (rev 78354) @@ -0,0 +1,110 @@ +#include <iostream> +#include <fstream> +#include <map> +#include <set> +#include <string> + +int +read_map_file(std::map<std::string,std::string> &map, const char *file, int rev, std::set<std::string> *non_cad_revs) +{ + std::ifstream in_stream(file, std::ifstream::binary); + if (in_stream.good()) { + std::string line; + while (std::getline(in_stream, line)) { + // Skip empty lines + if (!line.length()) { + continue; + } + + size_t spos = line.find_first_of(";"); + if (spos == std::string::npos) { + std::cerr << "Invalid map line!: " << line << "\n"; + exit(-1); + } + + std::string id1 = line.substr(0, spos); + std::string id2 = line.substr(spos+1, std::string::npos); + + + if (non_cad_revs) { + // If we have a set to check, skip anything having to do with + // a non-cad revision + if (non_cad_revs->find(id1) != non_cad_revs->end()) + continue; + if (non_cad_revs->find(id2) != non_cad_revs->end()) + continue; + } + + + if (!rev) { + if (id2.length()) + map[id1] = id2; + } else { + if (id1.length()) + map[id2] = id1; + } + } + + in_stream.close(); + } + + return 0; +} + +int +read_set_file(std::set<std::string> &set, const char *file) +{ + std::ifstream in_stream(file, std::ifstream::binary); + if (in_stream.good()) { + std::string line; + while (std::getline(in_stream, line)) { + // Skip empty lines + if (!line.length()) { + continue; + } + if (line.length()) + set.insert(line); + } + in_stream.close(); + } + + return 0; +} + +int +main(int argc, const char **argv) +{ + std::map<std::string,std::string>::iterator b_it; + + if (argc != 4) { + std::cerr << "check svn_diffs.txt git_diffs.txt non_cad.txt\n"; + return -1; + } + + std::set<std::string> non_cad_revs; + read_set_file(non_cad_revs, argv[6]); + + std::map<std::string, std::string> svn_rev_to_contents; + read_map_file(svn_rev_to_contents, argv[1], 1, &non_cad_revs); + + std::map<std::string, std::string> git_rev_to_contents; + read_map_file(git_rev_to_contents, argv[2], 1, NULL); + + // First pass - find any revs that don't have unique commit message matches + for (b_it = git_rev_to_contents.begin(); b_it != git_rev_to_contents.end(); b_it++) { + std::string svn_md5 = svn_rev_to_contents[b_it->first]; + if (svn_md5 != b_it->second) { + std::cout << b_it->first << "\n"; + } + } + return 0; +} + +// Local Variables: +// tab-width: 8 +// mode: C++ +// c-basic-offset: 4 +// indent-tabs-mode: t +// c-file-style: "stroustrup" +// End: +// ex: shiftwidth=4 tabstop=8 Property changes on: brlcad/trunk/misc/repoconv/verify2/check.cxx ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Added: brlcad/trunk/misc/repoconv/verify2/git_revsets.sh =================================================================== --- brlcad/trunk/misc/repoconv/verify2/git_revsets.sh (rev 0) +++ brlcad/trunk/misc/repoconv/verify2/git_revsets.sh 2021-03-01 13:19:19 UTC (rev 78354) @@ -0,0 +1,23 @@ +#!/bin/bash + +rm -f revs.txt git_diffs.txt diffcontents.txt diffsorted.txt rev_shas.txt revs_raw.txt +git log --all |grep svn:revision|awk -F':' '{print $3}'|sort -n|uniq > revs_raw.txt +tail -n +2 revs_raw.txt > revs.txt +rm -f revs_raw.txt + +while read i; do + REV=$i + echo "$REV:" + git log --all --grep="^svn:revision:$REV$" --pretty=format:"%H" > rev_shas.txt + echo "" >> rev_shas.txt + rm -f diffcontents.txt + while read i; do + SHA1=$i + echo "$SHA1" + git diff $SHA1^! ':(exclude).gitignore' |grep "^[-+].*"|grep -v "[-][-][-]" | grep -v "+++" | grep -v "^[-]$"|grep -v "^+$" >> diffcontents.txt + done < rev_shas.txt + sort diffcontents.txt > diffsorted.txt + DIFFMD5=$(cat diffsorted.txt |md5sum | awk '{print $1}') + echo "$DIFFMD5;$REV" >> git_diffs.txt +done < revs.txt + Property changes on: brlcad/trunk/misc/repoconv/verify2/git_revsets.sh ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/x-sh \ No newline at end of property Added: brlcad/trunk/misc/repoconv/verify2/rev_approx_check.sh =================================================================== --- brlcad/trunk/misc/repoconv/verify2/rev_approx_check.sh (rev 0) +++ brlcad/trunk/misc/repoconv/verify2/rev_approx_check.sh 2021-03-01 13:19:19 UTC (rev 78354) @@ -0,0 +1,89 @@ +#!/bin/bash + +# Note - ssdeep needs files >=4k. For smaller files, we just use https://superuser.com/a/459121 +# to get a sense of shared lines, unless/until something better turns up... + +CWD=$(pwd) + +if [ "$#" -ne "3" ] +then + echo "rev_deep_check SVN_REPO GIT_REPO input_file" + exit 1 +fi +SVNREPO=$1 +GITREPO=$2 +input_file=$CWD/$3 + +if [ ! -d $SVNREPO ] +then + echo "$SVNREPO does not exist" + exit 1 +fi +if [ ! -d $GITREPO ] +then + echo "$GITREPO does not exist" + exit 1 +fi +if [ ! -f $input_file ] +then + input_file=$3 + if [ ! -f $input_file ] + then + echo "$input_file does not exist" + exit 1 + fi +fi + +cat $input_file |sort -n > intmp +mv intmp $input_file +while read i; do + REV=$i + echo "$REV:" + cd $GITREPO + git log --all --grep="^svn:revision:$REV$" --pretty=format:"%H" > rev_shas.txt + echo "" >> rev_shas.txt + rm -f diffcontents.txt + while read i; do + SHA1=$i + if [ "$SHA1" != "" ] + then + echo "$SHA1" + git diff $SHA1^! ':(exclude).gitignore' |grep "^[-+].*"|grep -v "[-][-][-]" | grep -v "+++" | grep -v "^[-]$"|grep -v "^+$" >> diffcontents.txt + fi + done < rev_shas.txt + sort diffcontents.txt > $CWD/gitdiff + rm diffcontents.txt rev_shas.txt + cd $CWD + svn diff --ignore-properties -c$REV file://$SVNREPO |grep "^[-+].*"|grep -v "[-][-][-]" | grep -v "+++" |grep -v "^[-]$"|grep -v "^+$" |sort > svndiff + if [ -s svndiff ] + then + if [ -s gitdiff ] + then + ssdeep -b svndiff 2>/dev/null > svndiff.hash + SCORE=$(ssdeep -bm svndiff.hash gitdiff 2>/dev/null|cut -c 37-) + if [ "$SCORE" != "" ] + then + echo "Similarity score (100->0):$SCORE" + fi + cat svndiff | sort > svndiffsort + cat gitdiff | sort > gitdiffsort + mv svndiffsort svndiff + mv gitdiffsort gitdiff + comm -12 svndiff gitdiff > commonlines + SVNWC=$(wc -c svndiff|awk '{print $1}') + GITWC=$(wc -c gitdiff|awk '{print $1}') + COMMWC=$(wc -c commonlines|awk '{print $1}') + echo "SVN:Git:common line counts: $SVNWC:$GITWC:$COMMWC" + else + echo "DIFFERENCE: Non-empty SVN diff but empty git diff" + fi + else + if [ -s gitdiff ] + then + echo "DIFFERENCE: Empty svn diff but non empty git diff" + else + echo "both diffs empty" + fi + fi +done < revs.txt + Property changes on: brlcad/trunk/misc/repoconv/verify2/rev_approx_check.sh ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/x-sh \ No newline at end of property This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. _______________________________________________ BRL-CAD Source Commits mailing list brlcad-commits@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/brlcad-commits