this is bare bones c++ code to parse the output of git cat-file
--batch --batch-all-objects
it doesn't _do_ anything with its parsed data. it parrots tree and
commit object fields to stdout.
#include <cstdint>
#include <iostream>
#include <unordered_map>
#include <sstream>
#include <string>
#include <vector>
using namespace std;
struct RawObject
{
void read(istream & is)
{
uint64_t size;
is >> hash >> type >> size;
is.ignore(1); // newline after size
data.resize(size);
for (char * ptr = data.data(); ptr < &*data.end();) {
is.read(ptr, &*data.end() - ptr);
ptr += is.gcount();
}
}
string hash, type;
vector<char> data;
};
struct Commit
{
Commit() {}
void parse(RawObject const & obj)
{
istringstream ss(string(obj.data.begin(), obj.data.end()));
header.clear();
fields.clear();
header.emplace_back("commit", obj.hash);
static thread_local string temp;
while ("reading commit header") {
switch (ss.peek()) {
default:
header.resize(header.size() + 1);
ss >> header.back().first; ss.ignore(1);
getline(ss, header.back().second);
continue;
case ' ':
ss.ignore(1); header.back().second += '\n';
getline(ss, temp);
header.back().second += temp;
continue;
case '\n':
ss.ignore(1);
goto end_of_header;
}
}
end_of_header:
for (auto & item : header) {
fields.emplace(item.first, &item.second);
}
message = string(istreambuf_iterator<char>(ss), {});
}
string const & operator[](string const & field)
{
return *fields.equal_range(field).first->second;
}
vector<string> all(string const & field)
{
vector<string> result;
auto range = fields.equal_range(field);
for (auto it = range.first; it != range.second; ++ it) {
result.emplace_back(*it->second);
}
return result;
}
vector<pair<string, string>> header;
unordered_multimap<string, string *> fields;
string message;
};
struct Tree
{
Tree() {}
void parse(RawObject const & obj)
{
entries.clear();
istringstream ss(string(obj.data.begin(), obj.data.end()));
while (ss.tellg() < obj.data.size()) {
entries.emplace_back();
entries.back().parse(ss);
}
}
struct Entry
{
Entry() {}
void parse(istream & data)
{
getline(data, mode, ' ');
getline(data, name, '\0');
data.read(_hash, 20);
}
string mode;
string name;
string const & hash()
{
static thread_local string hex(40, 0);
for (auto dst = &hex[0], src = _hash; dst != &*hex.end(); dst += 2, ++ src) {
sprintf(dst, "%02x", (uint8_t)*src);
}
return hex;
}
char _hash[20];
};
vector<Entry> entries;
};
/* =========================================================== */
class Reader
{
public:
Reader(istream & is)
: is(is)
{ }
void read()
{
static thread_local RawObject obj;
static thread_local Commit commit;
static thread_local Tree tree;
obj.read(is);
cout << obj.type << " " << obj.data.size() << endl;
if (obj.type == "commit")
{
commit.parse(obj);
for ( auto item : commit.fields ) {
cout << item.first << ": " << *item.second << endl;
}
cout << commit.message << endl;
}
else if (obj.type == "tree")
{
tree.parse(obj);
for ( auto & entry : tree.entries ) {
cout << entry.name << ": " << entry.hash() << endl;
}
}
}
private:
istream & is;
};
int main()
{
Reader reader(cin);
while (true) {
reader.read();
}
}