Github user myui commented on a diff in the pull request:
https://github.com/apache/incubator-hivemall/pull/116#discussion_r141546656
--- Diff: core/src/main/java/hivemall/embedding/AbstractWord2VecModel.java
---
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.embedding;
+
+import hivemall.math.random.PRNG;
+import hivemall.math.random.RandomNumberGeneratorFactory;
+import hivemall.utils.collections.maps.Int2FloatOpenHashTable;
+
+import javax.annotation.Nonnegative;
+import javax.annotation.Nonnull;
+import java.util.List;
+
+public abstract class AbstractWord2VecModel {
+ // cached sigmoid function parameters
+ protected static final int MAX_SIGMOID = 6;
+ protected static final int SIGMOID_TABLE_SIZE = 1000;
+ protected float[] sigmoidTable;
+
+
+ @Nonnegative
+ protected int dim;
+ protected int win;
+ protected int neg;
+ protected int iter;
+
+ // learning rate parameters
+ @Nonnegative
+ protected float lr;
+ @Nonnegative
+ private float startingLR;
+ @Nonnegative
+ private long numTrainWords;
+ @Nonnegative
+ protected long wordCount;
+ @Nonnegative
+ private long lastWordCount;
+
+ protected PRNG rnd;
+
+ protected Int2FloatOpenHashTable contextWeights;
+ protected Int2FloatOpenHashTable inputWeights;
+ protected Int2FloatOpenHashTable S;
+ protected int[] aliasWordId;
+
+ protected AbstractWord2VecModel(final int dim, final int win, final
int neg, final int iter,
+ final float startingLR, final long numTrainWords, final
Int2FloatOpenHashTable S,
+ final int[] aliasWordId) {
+ this.win = win;
+ this.neg = neg;
+ this.iter = iter;
+ this.dim = dim;
+ this.startingLR = this.lr = startingLR;
+ this.numTrainWords = numTrainWords;
+
+ // alias sampler for negative sampling
+ this.S = S;
+ this.aliasWordId = aliasWordId;
+
+ this.wordCount = 0L;
+ this.lastWordCount = 0L;
+ this.rnd = RandomNumberGeneratorFactory.createPRNG(1001);
+
+ this.sigmoidTable = initSigmoidTable();
+
+ // TODO how to estimate size
+ this.inputWeights = new Int2FloatOpenHashTable(10578 * dim);
+ this.inputWeights.defaultReturnValue(0.f);
+ this.contextWeights = new Int2FloatOpenHashTable(10578 * dim);
+ this.contextWeights.defaultReturnValue(0.f);
+ }
+
+ private static float[] initSigmoidTable() {
--- End diff --
`@Nonnull` for return
---