Github user myui commented on a diff in the pull request:
https://github.com/apache/incubator-hivemall/pull/111#discussion_r136037465
--- Diff: core/src/main/java/hivemall/recommend/SlimUDTF.java ---
@@ -144,11 +208,76 @@ public void process(Object[] args) throws
HiveException {
Map topKRatesOfI = this.topKRatesOfIOI.getMap(args[2]);
int j = PrimitiveObjectInspectorUtils.getInt(args[3], itemJOI);
Map Rj = this.itemJRatesOI.getMap(args[4]);
- train(i, Ri, topKRatesOfI, j, Rj);
+ trainAndStore(i, Ri, topKRatesOfI, j, Rj);
+
+ if (this.numIterations == 1) {
+ return;
+ }
+
+ if (this.previousItemId != i){
+ this.previousItemId = i;
+
+ for (Map.Entry<?, ?> userRate : ((Map<?, ?>) Ri).entrySet()) {
+ Object u = userRate.getKey();
+ double rui =
PrimitiveObjectInspectorUtils.getDouble(userRate.getValue(),
this.itemIRateValueOI);
+ this.A.unsafeSet((int) u, i, rui); // need optimize
+ }
+
+ // save KNNi
+ // count element size size: i, numKNN, [[u, numKNNu, [[item,
rate], ...], ...]
+ ByteBuffer buf = inputBuf;
+ NioStatefullSegment dst = fileIO;
+
+ int numElementOfKNNi = 0;
+ Map<?, ?> knn = this.topKRatesOfIOI.getMap(topKRatesOfI);
+ for (Map.Entry<?, ?> ri : knn.entrySet()) {
+ numElementOfKNNi +=
this.topKRatesOfIValueOI.getMap(ri.getValue()).size();
+ }
+
+ int recordBytes = SizeOf.INT + SizeOf.INT + SizeOf.INT * 2 *
knn.size() + (SizeOf.DOUBLE+SizeOf.INT) * numElementOfKNNi;
+ int requiredBytes = SizeOf.INT + recordBytes; // need to
allocate space for "recordBytes" itself
+
+ int remain = buf.remaining();
+ if (remain < requiredBytes) {
+ writeBuffer(buf, dst);
+ }
+
+ buf.putInt(i);
+ buf.putInt(knn.size());
+ for (Map.Entry<?, ?> ri :
this.topKRatesOfIOI.getMap(topKRatesOfI).entrySet()){
+ int user =
PrimitiveObjectInspectorUtils.getInt(ri.getKey(), this.topKRatesOfIKeyOI);
+ Map<?, ?> userKNN =
this.topKRatesOfIValueOI.getMap(ri.getValue());
+
+ buf.putInt(user);
+ buf.putInt(userKNN.size());
+
+ for (Map.Entry<?, ?> ratings : userKNN.entrySet()) {
+ int item =
PrimitiveObjectInspectorUtils.getInt(ratings.getKey(),
this.topKRatesOfIValueKeyOI);
+ double rating =
PrimitiveObjectInspectorUtils.getDouble(ratings.getValue(),
this.topKRatesOfIValueValueOI);
+
+ buf.putInt(item);
+ buf.putDouble(rating);
+ }
+ }
+ }
+ }
+
+ private static void writeBuffer(@Nonnull ByteBuffer srcBuf, @Nonnull
NioStatefullSegment dst)
+ throws HiveException {
+ srcBuf.flip();
+ try {
+ dst.write(srcBuf);
+ } catch (IOException e) {
+ throw new HiveException("Exception causes while writing a
buffer to file", e);
+ }
+ srcBuf.clear();
}
@Override
public void close() throws HiveException {
+
+ runIterativeTraining();
+
int numItem = Math.max(this.W.numRows(), this.W.numColumns());
--- End diff --
Please add the following method in `DoKMatrix` and use it.
```java
public void eachNonZeroCell(@Nonnull final VectorProcedure procedure) {
if (nnz == 0) {
return;
}
final IMapIterator itor = elements.entries();
while (itor.next() != -1) {
long k = itor.getKey();
int row = Primitives.getHigh(k);
int col = Primitives.getLow(k);
double value = itor.getValue();
procedure.apply(row, col, value);
}
}
@Override
public RowMajorMatrix toRowMajorMatrix() {
```
```java
public abstract class VectorProcedure {
...
public void apply(@Nonnegative int row, @Nonnegative int col, double
value) {}
}
```
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---