================ @@ -137,13 +138,48 @@ using InstEmbeddingsMap = DenseMap<const Instruction *, Embedding>; using BBEmbeddingsMap = DenseMap<const BasicBlock *, Embedding>; /// Class for storing and accessing the IR2Vec vocabulary. -/// Encapsulates all vocabulary-related constants, logic, and access methods. +/// +/// The Vocabulary class manages seed embeddings for LLVM IR entities. It +/// contains the seed embeddings for three types of entities: instruction +/// opcodes, types, and operands. Types are grouped/canonicalized for better +/// learning (e.g., all float variants map to FloatTy). The vocabulary abstracts +/// away the canonicalization effectively, the exposed APIs handle all the known +/// LLVM IR opcodes, types and operands. +/// +/// This class helps populate the seed embeddings in an internal vector-based +/// ADT. It provides logic to map every IR entity to a specific slot index or +/// position in this vector, enabling O(1) embedding lookup while avoiding +/// unnecessary computations involving string based lookups while generating the +/// embeddings. class Vocabulary { friend class llvm::IR2VecVocabAnalysis; using VocabVector = std::vector<ir2vec::Embedding>; VocabVector Vocab; bool Valid = false; +public: + // Slot layout: ---------------- mtrofin wrote:
can you move it to private? https://github.com/llvm/llvm-project/pull/155323 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits