Here's an improved version of the script that finds what refs and what
commits contain a particular file blob.  You give it the blob (the
full blob) as an argument and it traces what commits contain it, and
what refs and what reflog lines point (directly or indirectly) to
those commits.

As far as I can tell, it's correct, but I haven't paid any attention
to efficiency.  On my computer, it takes about 15 minutes to search a
clone of the Git repository.

Following is the script and an example of its use.  No doubt it can be
improved significantly.

Dale
----------------------------------------------------------------------
#! /bin/bash

# git-find-blob - Find a given blob, given its hash.

BLOB="${1:?First argument is the blob hash.}"

# Temporary file name base.
T=${TMPDIR:-/tmp}/${0##*/}.$$

# Make a copy of stdout, since we will be redirecting stdout within the script.
exec 3>&1

# Find the commits in which the blob exists.

# List all commits.
git rev-list --all |
while read COMMIT
do
    # COMMIT_ADDED records whether this commit hash has been stored or not.
    COMMIT_ADDED=no
    # List all files within the commit.
    git ls-tree --full-tree $COMMIT |
    while read MODE TYPE HASH NAME
    do
        # If this file matches the blob, report it.
        if [ "$HASH" = "$BLOB" ]
        then
            echo >&3 Commit $COMMIT Name "$NAME"
            if [ "$COMMIT_ADDED" = no ]
            then
                # Send the commit onward.
                echo $COMMIT
                COMMIT_ADDED=yes
            fi
            # Beware that the same blob may appear multiple times, so we
            # have to continue searching through this tree.
        fi
    done
done >$T.commits

# Find the refs that lead to the commits.

# Both of the following functions produce lines of the form:  hash ref-name

# A function to list all of the refs.
function all_refs () {
    git show-ref
}

# A function to list all of the entries in all of the reflogs.
function all_reflogs () {
    # Go to the .git/logs directory and find all the reflog files.
    cd $( git rev-parse --git-dir )
    find logs -type f |
    while read REFLOG
    do
        # Read each reflog file.
        LINE=1
        cat "$REFLOG" |
        while read BEFORE AFTER DATA
        do
            # Turn every line into two lines:  hash reflog-name
            echo $BEFORE "$REFLOG:$LINE"
            echo $AFTER "$REFLOG:$LINE"
            (( LINE++ ))
        done
    done
}

# List all the refs.
( all_refs ; all_reflogs ) |
while read COMMIT REF
do
    # Skip the null commit if it shows up.
    [ $COMMIT = 0000000000000000000000000000000000000000 ] && continue

    # REF_ADDED records whether this ref has been stored or not.
    REF_ADDED=no
    # List all the commits in the ref that contain the blob.
    git rev-list $COMMIT |
    grep -f $T.commits |
    while read COMMIT
    do
        echo >&3 Commit $COMMIT Ref "$REF"
        if [ "$REF_ADDED" = no ]
        then
            # Send the ref onward.
            echo "$REF"
            REF_ADDED=yes
        fi
    done
done >$T.refs

# Write out the refs.
<$T.refs sort -u |
sed -e 's/^/Ref /'
----------------------------------------------------------------------
$ time git-find-blob 07c2aba4f2c2434551a436d626447902f0c1872d
Commit fee106fafa1a9ee6029ad6390a7404f66b10d656 Name Makefile
Commit bd6350770e454c0bdef312419289ef6e0ba6bc81 Name Makefile
Commit f7c69c45befd9772282f1d45e988d5f62f54696b Name Makefile
Commit f918f7b13d09fbb93c2892899f680da80ed51ac1 Name Makefile
Commit 771a1a04e6c038cf974123b2303baccf5dfc4e98 Name Makefile
Commit cf5a8e3c1f4571f3a9daf59537026a2700e1f5ce Name Makefile
Commit ad158af1429bb1283f1605245b941699d3d1bc7b Name Makefile
Commit c9a507c3b8072891c5ce164ac82e8009d20c9b1f Name Makefile
Commit bf7b47d405b5f7e72058157df3b18e840ce4d0ac Name Makefile
Commit b3f76e372d66e61c06bc28def35b2e991a0692e1 Name Makefile
Commit 05ef0f3cf0cb7a398b552d61325898bc72da818c Name Makefile
Commit 9e6c9b4ee355cec3f7f48758107300d6daca34ae Name Makefile
Commit b0b86cfed2094c983b41a926d74e313f220d7e58 Name Makefile
Commit 6f128b42fb40916e4b9c233557bdde5a7b90c86a Name Makefile
Commit bcf3cbc25b543e485bfab373ef9852f5a67bee43 Name Makefile
Commit a77e57b2054683340bcf0dc0dd717e2b559d97d0 Name Makefile
^----- meaning that in these commits, at the indicated file name, is a
       file with hash 07c2aba4f2c2434551a436d626447902f0c1872d
Commit fee106fafa1a9ee6029ad6390a7404f66b10d656 Ref refs/remotes/origin/pu
Commit bd6350770e454c0bdef312419289ef6e0ba6bc81 Ref refs/remotes/origin/pu
Commit f7c69c45befd9772282f1d45e988d5f62f54696b Ref refs/remotes/origin/pu
Commit f918f7b13d09fbb93c2892899f680da80ed51ac1 Ref refs/remotes/origin/pu
Commit 771a1a04e6c038cf974123b2303baccf5dfc4e98 Ref refs/remotes/origin/pu
Commit cf5a8e3c1f4571f3a9daf59537026a2700e1f5ce Ref refs/remotes/origin/pu
Commit ad158af1429bb1283f1605245b941699d3d1bc7b Ref refs/remotes/origin/pu
Commit c9a507c3b8072891c5ce164ac82e8009d20c9b1f Ref refs/remotes/origin/pu
Commit bf7b47d405b5f7e72058157df3b18e840ce4d0ac Ref refs/remotes/origin/pu
Commit b3f76e372d66e61c06bc28def35b2e991a0692e1 Ref refs/remotes/origin/pu
Commit 05ef0f3cf0cb7a398b552d61325898bc72da818c Ref refs/remotes/origin/pu
Commit 9e6c9b4ee355cec3f7f48758107300d6daca34ae Ref refs/remotes/origin/pu
Commit b0b86cfed2094c983b41a926d74e313f220d7e58 Ref refs/remotes/origin/pu
Commit 6f128b42fb40916e4b9c233557bdde5a7b90c86a Ref refs/remotes/origin/pu
Commit bcf3cbc25b543e485bfab373ef9852f5a67bee43 Ref refs/remotes/origin/pu
Commit a77e57b2054683340bcf0dc0dd717e2b559d97d0 Ref refs/remotes/origin/pu
Commit fee106fafa1a9ee6029ad6390a7404f66b10d656 Ref 
logs/refs/remotes/origin/pu:1
                         meaning line 1 of .git/logs/refs/remotes/origin/pu 
----^
Commit bd6350770e454c0bdef312419289ef6e0ba6bc81 Ref 
logs/refs/remotes/origin/pu:1
Commit f7c69c45befd9772282f1d45e988d5f62f54696b Ref 
logs/refs/remotes/origin/pu:1
Commit f918f7b13d09fbb93c2892899f680da80ed51ac1 Ref 
logs/refs/remotes/origin/pu:1
Commit 771a1a04e6c038cf974123b2303baccf5dfc4e98 Ref 
logs/refs/remotes/origin/pu:1
Commit cf5a8e3c1f4571f3a9daf59537026a2700e1f5ce Ref 
logs/refs/remotes/origin/pu:1
Commit ad158af1429bb1283f1605245b941699d3d1bc7b Ref 
logs/refs/remotes/origin/pu:1
Commit c9a507c3b8072891c5ce164ac82e8009d20c9b1f Ref 
logs/refs/remotes/origin/pu:1
Commit bf7b47d405b5f7e72058157df3b18e840ce4d0ac Ref 
logs/refs/remotes/origin/pu:1
Commit b3f76e372d66e61c06bc28def35b2e991a0692e1 Ref 
logs/refs/remotes/origin/pu:1
Commit 05ef0f3cf0cb7a398b552d61325898bc72da818c Ref 
logs/refs/remotes/origin/pu:1
Commit 9e6c9b4ee355cec3f7f48758107300d6daca34ae Ref 
logs/refs/remotes/origin/pu:1
Commit b0b86cfed2094c983b41a926d74e313f220d7e58 Ref 
logs/refs/remotes/origin/pu:1
Commit 6f128b42fb40916e4b9c233557bdde5a7b90c86a Ref 
logs/refs/remotes/origin/pu:1
Commit bcf3cbc25b543e485bfab373ef9852f5a67bee43 Ref 
logs/refs/remotes/origin/pu:1
Commit a77e57b2054683340bcf0dc0dd717e2b559d97d0 Ref 
logs/refs/remotes/origin/pu:1
^------ meaning these commits are reachable from the indicated ref or reflog 
line
Ref logs/refs/remotes/origin/pu:1
Ref refs/remotes/origin/pu
^------ the list of refs and reflog lines from which the file blob can be 
reached

real    13m24.652s
        ^----- it's not fast
user    8m49.838s
sys     5m31.258s
$ 
----------------------------------------------------------------------

-- 
You received this message because you are subscribed to the Google Groups "Git 
for human beings" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to git-users+unsubscr...@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.

Reply via email to