Chouffe commented on a change in pull request #14769: [WIP] [Clojure] Add Fine 
Tuning Sentence Pair Classification BERT Example
URL: https://github.com/apache/incubator-mxnet/pull/14769#discussion_r278194188
 
 

 ##########
 File path: 
contrib/clojure-package/examples/bert/src/bert/bert_sentence_classification.clj
 ##########
 @@ -0,0 +1,136 @@
+(ns bert.bert-sentence-classification
+  (:require [bert.util :as bert-util]
+            [clojure-csv.core :as csv]
+            [clojure.string :as string]
+            [org.apache.clojure-mxnet.callback :as callback]
+            [org.apache.clojure-mxnet.context :as context]
+            [org.apache.clojure-mxnet.dtype :as dtype]
+            [org.apache.clojure-mxnet.io :as mx-io]
+            [org.apache.clojure-mxnet.layout :as layout]
+            [org.apache.clojure-mxnet.module :as m]
+            [org.apache.clojure-mxnet.ndarray :as ndarray]
+            [org.apache.clojure-mxnet.optimizer :as optimizer]
+            [org.apache.clojure-mxnet.symbol :as sym]))
+
+(def model-path-prefix "data/static_bert_base_net")
+;; epoch number of the model
+(def epoch 0)
+;; the vocabulary used in the model
+(def model-vocab "data/vocab.json")
+;; the input question
+;; the maximum length of the sequence
+(def seq-length 128)
+
+
+(defn pre-processing [ctx idx->token token->idx train-item]
+    (let [[sentence-a sentence-b label] train-item
+       ;;; pre-processing tokenize sentence
+          token-1 (bert-util/tokenize (string/lower-case sentence-a))
+          token-2 (bert-util/tokenize (string/lower-case sentence-b))
+          valid-length (+ (count token-1) (count token-2))
+        ;;; generate token types [0000...1111...0000]
+          qa-embedded (into (bert-util/pad [] 0 (count token-1))
+                            (bert-util/pad [] 1 (count token-2)))
+          token-types (bert-util/pad qa-embedded 0 seq-length)
+        ;;; make BERT pre-processing standard
+          token-2 (conj token-2 "[SEP]")
+          token-1 (into [] (concat ["[CLS]"] token-1 ["[SEP]"] token-2))
+          tokens (bert-util/pad token-1 "[PAD]" seq-length)
+        ;;; pre-processing - token to index translation
+          indexes (bert-util/tokens->idxs token->idx tokens)]
+    {:input-batch [indexes
+                   token-types
+                   [valid-length]]
+     :label (if (= "0" label)
+              [0]
+              [1])
+     :tokens tokens
+     :train-item train-item}))
+
+(defn fine-tune-model
+  "msymbol: the pretrained network symbol
+    arg-params: the argument parameters of the pretrained model
 
 Review comment:
   `arg-params` does not seem to be a param to the function here?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to