================
@@ -137,13 +138,48 @@ using InstEmbeddingsMap = DenseMap<const Instruction *, 
Embedding>;
 using BBEmbeddingsMap = DenseMap<const BasicBlock *, Embedding>;
 
 /// Class for storing and accessing the IR2Vec vocabulary.
-/// Encapsulates all vocabulary-related constants, logic, and access methods.
+///
+/// The Vocabulary class manages seed embeddings for LLVM IR entities. It
+/// contains the seed embeddings for three types of entities: instruction
+/// opcodes, types, and operands. Types are grouped/canonicalized for better
+/// learning (e.g., all float variants map to FloatTy). The vocabulary 
abstracts
+/// away the canonicalization effectively, the exposed APIs handle all the 
known
+/// LLVM IR opcodes, types and operands.
+///
+/// This class helps populate the seed embeddings in an internal vector-based
+/// ADT. It provides logic to map every IR entity to a specific slot index or
+/// position in this vector, enabling O(1) embedding lookup while avoiding
+/// unnecessary computations involving string based lookups while generating 
the
+/// embeddings.
 class Vocabulary {
   friend class llvm::IR2VecVocabAnalysis;
   using VocabVector = std::vector<ir2vec::Embedding>;
   VocabVector Vocab;
   bool Valid = false;
 
+public:
+  // Slot layout:
----------------
mtrofin wrote:

can you move it to private?

https://github.com/llvm/llvm-project/pull/155323
_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to