Revision: 78354
          http://sourceforge.net/p/brlcad/code/78354
Author:   starseeker
Date:     2021-03-01 13:19:19 +0000 (Mon, 01 Mar 2021)
Log Message:
-----------
Add some more utility scripts for repo comparison

Having assigned SVN revisions to most of the git commits,
that opens up another option for comparison - generating
diff sets from multiple git SHA1 commits that should
correspond to SVN commits, and looking for differences.

In an initial trial, a little less than 10% don't generate
matching diffs.  We've observed what appear to be occasional
differences in how SVN and git calculate their diffs.  Since
MD5 sum comparison only works when the diffs are exactly the
same, the approximate matching script uses ssdeep and comm
instead to get a sense of how different the two patches are.

Added Paths:
-----------
    brlcad/trunk/misc/repoconv/verify2/check.cxx
    brlcad/trunk/misc/repoconv/verify2/git_revsets.sh
    brlcad/trunk/misc/repoconv/verify2/rev_approx_check.sh

Added: brlcad/trunk/misc/repoconv/verify2/check.cxx
===================================================================
--- brlcad/trunk/misc/repoconv/verify2/check.cxx                                
(rev 0)
+++ brlcad/trunk/misc/repoconv/verify2/check.cxx        2021-03-01 13:19:19 UTC 
(rev 78354)
@@ -0,0 +1,110 @@
+#include <iostream>
+#include <fstream>
+#include <map>
+#include <set>
+#include <string>
+
+int
+read_map_file(std::map<std::string,std::string> &map, const char *file, int 
rev, std::set<std::string> *non_cad_revs)
+{
+    std::ifstream in_stream(file, std::ifstream::binary);
+    if (in_stream.good()) {
+       std::string line;
+       while (std::getline(in_stream, line)) {
+           // Skip empty lines
+           if (!line.length()) {
+               continue;
+           }
+
+           size_t spos = line.find_first_of(";");
+           if (spos == std::string::npos) {
+               std::cerr << "Invalid map line!: " << line << "\n";
+               exit(-1);
+           }
+
+           std::string id1 = line.substr(0, spos);
+           std::string id2 = line.substr(spos+1, std::string::npos);
+
+
+           if (non_cad_revs) {
+               // If we have a set to check, skip anything having to do with
+               // a non-cad revision
+               if (non_cad_revs->find(id1) != non_cad_revs->end())
+                   continue;
+               if (non_cad_revs->find(id2) != non_cad_revs->end())
+                   continue;
+           }
+
+
+           if (!rev) {
+               if (id2.length())
+                   map[id1] = id2;
+           } else {
+               if (id1.length())
+                   map[id2] = id1;
+           }
+       }
+
+       in_stream.close();
+    }
+
+    return 0;
+}
+
+int
+read_set_file(std::set<std::string> &set, const char *file)
+{
+    std::ifstream in_stream(file, std::ifstream::binary);
+    if (in_stream.good()) {
+       std::string line;
+       while (std::getline(in_stream, line)) {
+           // Skip empty lines
+           if (!line.length()) {
+               continue;
+           }
+           if (line.length())
+               set.insert(line);
+       }
+       in_stream.close();
+    }
+
+    return 0;
+}
+
+int
+main(int argc, const char **argv)
+{
+    std::map<std::string,std::string>::iterator b_it;
+
+    if (argc != 4) {
+       std::cerr << "check svn_diffs.txt git_diffs.txt non_cad.txt\n";
+       return -1;
+    }
+
+    std::set<std::string> non_cad_revs;
+    read_set_file(non_cad_revs, argv[6]);
+
+    std::map<std::string, std::string> svn_rev_to_contents;
+    read_map_file(svn_rev_to_contents, argv[1], 1, &non_cad_revs);
+
+    std::map<std::string, std::string> git_rev_to_contents;
+    read_map_file(git_rev_to_contents, argv[2], 1, NULL);
+
+    // First pass - find any revs that don't have unique commit message matches
+    for (b_it = git_rev_to_contents.begin(); b_it != 
git_rev_to_contents.end(); b_it++) {
+       std::string svn_md5 = svn_rev_to_contents[b_it->first];
+       if (svn_md5 != b_it->second) {
+           std::cout << b_it->first << "\n";
+       }
+    }
+    return 0;
+}
+
+// Local Variables:
+// tab-width: 8
+// mode: C++
+// c-basic-offset: 4
+// indent-tabs-mode: t
+// c-file-style: "stroustrup"
+// End:
+// ex: shiftwidth=4 tabstop=8


Property changes on: brlcad/trunk/misc/repoconv/verify2/check.cxx
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: brlcad/trunk/misc/repoconv/verify2/git_revsets.sh
===================================================================
--- brlcad/trunk/misc/repoconv/verify2/git_revsets.sh                           
(rev 0)
+++ brlcad/trunk/misc/repoconv/verify2/git_revsets.sh   2021-03-01 13:19:19 UTC 
(rev 78354)
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+rm -f revs.txt git_diffs.txt diffcontents.txt diffsorted.txt rev_shas.txt 
revs_raw.txt
+git log --all |grep svn:revision|awk -F':' '{print $3}'|sort -n|uniq > 
revs_raw.txt
+tail -n +2 revs_raw.txt > revs.txt
+rm -f revs_raw.txt
+
+while read i; do
+       REV=$i
+       echo "$REV:"
+       git log --all --grep="^svn:revision:$REV$" --pretty=format:"%H" > 
rev_shas.txt
+       echo "" >> rev_shas.txt
+       rm -f diffcontents.txt
+       while read i; do
+               SHA1=$i
+               echo "$SHA1"
+               git diff $SHA1^! ':(exclude).gitignore' |grep "^[-+].*"|grep -v 
"[-][-][-]" | grep -v "+++" | grep -v "^[-]$"|grep -v "^+$" >> diffcontents.txt
+       done < rev_shas.txt
+       sort diffcontents.txt > diffsorted.txt
+       DIFFMD5=$(cat diffsorted.txt |md5sum | awk '{print $1}')
+       echo "$DIFFMD5;$REV" >> git_diffs.txt
+done < revs.txt
+


Property changes on: brlcad/trunk/misc/repoconv/verify2/git_revsets.sh
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/x-sh
\ No newline at end of property
Added: brlcad/trunk/misc/repoconv/verify2/rev_approx_check.sh
===================================================================
--- brlcad/trunk/misc/repoconv/verify2/rev_approx_check.sh                      
        (rev 0)
+++ brlcad/trunk/misc/repoconv/verify2/rev_approx_check.sh      2021-03-01 
13:19:19 UTC (rev 78354)
@@ -0,0 +1,89 @@
+#!/bin/bash
+  
+# Note - ssdeep needs files >=4k.  For smaller files, we just use 
https://superuser.com/a/459121
+# to get a sense of shared lines, unless/until something better turns up...
+
+CWD=$(pwd)
+
+if [ "$#" -ne "3" ]
+then
+        echo "rev_deep_check SVN_REPO GIT_REPO input_file"
+        exit 1
+fi
+SVNREPO=$1 
+GITREPO=$2
+input_file=$CWD/$3
+
+if [ ! -d $SVNREPO ]
+then
+        echo "$SVNREPO does not exist"
+        exit 1
+fi
+if [ ! -d $GITREPO ]
+then 
+        echo "$GITREPO does not exist"
+        exit 1
+fi
+if [ ! -f $input_file ]
+then
+        input_file=$3
+        if [ ! -f $input_file ]
+        then
+                echo "$input_file does not exist"
+                exit 1
+        fi
+fi
+
+cat $input_file |sort -n > intmp
+mv intmp $input_file
+while read i; do
+       REV=$i
+       echo "$REV:"
+       cd $GITREPO
+       git log --all --grep="^svn:revision:$REV$" --pretty=format:"%H" > 
rev_shas.txt
+       echo "" >> rev_shas.txt
+       rm -f diffcontents.txt
+       while read i; do
+               SHA1=$i
+               if [ "$SHA1" != "" ]
+               then
+                       echo "$SHA1"
+                       git diff $SHA1^! ':(exclude).gitignore' |grep 
"^[-+].*"|grep -v "[-][-][-]" | grep -v "+++" | grep -v "^[-]$"|grep -v "^+$" 
>> diffcontents.txt
+               fi
+       done < rev_shas.txt
+       sort diffcontents.txt > $CWD/gitdiff
+       rm diffcontents.txt rev_shas.txt
+       cd $CWD
+       svn diff --ignore-properties -c$REV file://$SVNREPO |grep 
"^[-+].*"|grep -v "[-][-][-]" | grep -v "+++" |grep -v "^[-]$"|grep -v "^+$" 
|sort > svndiff
+       if [ -s svndiff ]
+       then
+               if [ -s gitdiff ]
+               then
+                       ssdeep -b svndiff 2>/dev/null > svndiff.hash
+                       SCORE=$(ssdeep -bm svndiff.hash gitdiff 2>/dev/null|cut 
-c 37-)
+                       if [ "$SCORE" != "" ]
+                       then
+                               echo "Similarity score (100->0):$SCORE"
+                       fi
+                       cat svndiff | sort > svndiffsort
+                       cat gitdiff | sort > gitdiffsort
+                       mv svndiffsort svndiff
+                       mv gitdiffsort gitdiff
+                       comm -12 svndiff gitdiff > commonlines
+                       SVNWC=$(wc -c svndiff|awk '{print $1}')
+                       GITWC=$(wc -c gitdiff|awk '{print $1}')
+                       COMMWC=$(wc -c commonlines|awk '{print $1}')
+                       echo "SVN:Git:common line counts: $SVNWC:$GITWC:$COMMWC"
+               else
+                       echo "DIFFERENCE: Non-empty SVN diff but empty git diff"
+               fi
+       else
+               if [ -s gitdiff ]
+               then
+                       echo "DIFFERENCE: Empty svn diff but non empty git diff"
+               else
+                       echo "both diffs empty"
+               fi
+       fi
+done < revs.txt
+


Property changes on: brlcad/trunk/misc/repoconv/verify2/rev_approx_check.sh
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/x-sh
\ No newline at end of property
This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.



_______________________________________________
BRL-CAD Source Commits mailing list
brlcad-commits@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/brlcad-commits

Reply via email to