[GitHub] [tvm-vta] aasorokiin commented on a change in pull request #32: VTA Chisel Wide memory interface.

2021-09-03 Thread GitBox


aasorokiin commented on a change in pull request #32:
URL: https://github.com/apache/tvm-vta/pull/32#discussion_r702038382



##
File path: hardware/chisel/src/test/scala/unittest/SyncQueue2PortMemTest.scala
##
@@ -0,0 +1,209 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package unittest
+
+import chisel3._
+import chisel3.util._
+import chisel3.iotesters.{ChiselFlatSpec, Driver, PeekPokeTester}
+import scala.util.Random
+import unittest.util._
+import vta.util._
+import vta.util.config._
+
+class Checker2P(c: SyncQueue2PTestWrapper[UInt], t: 
PeekPokeTester[SyncQueue2PTestWrapper[UInt]]) {
+
+  def bits (bits: Int) = {
+t.expect(c.io.tq.deq.bits, bits)
+t.expect(c.io.rq.deq.bits, bits)
+
+  }
+  def ready (bits: Int) = {
+t.expect(c.io.tq.enq.ready, bits)
+t.expect(c.io.rq.enq.ready, bits)
+
+  }
+  def valid (bits: Int) = {
+t.expect(c.io.tq.deq.valid, bits)
+t.expect(c.io.rq.deq.valid, bits)
+
+  }
+  def status () = {
+val rv = t.peek(c.io.rq.enq.ready)
+t.expect(c.io.tq.enq.ready, rv)
+val rc = t.peek(c.io.rq.count)
+t.expect(c.io.tq.count, rc)
+val vv = t.peek(c.io.rq.deq.valid)
+t.expect(c.io.tq.deq.valid, vv)
+if (vv != 0) {
+  val bv = t.peek(c.io.rq.deq.bits)
+  t.expect(c.io.tq.deq.bits, bv)
+}
+t.peek(c.io.rq.count)
+t.peek(c.io.tq.count)
+  }
+}
+class TestSyncQueue2PLongRead(c: SyncQueue2PTestWrapper[UInt]) extends 
PeekPokeTester(c) {
+
+  val chr = new Checker2P (c, this)
+
+  def testFillRW(depth: Int) = {
+//println(s"-D- run test depth ${depth}")

Review comment:
   fixed in PR

##
File path: hardware/chisel/src/main/scala/core/TensorLoadNarrowVME.scala
##
@@ -0,0 +1,740 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package vta.core
+
+import scala.math.pow
+import scala.math.sqrt
+
+import chisel3._
+import chisel3.util._
+import vta.util.config._
+import vta.shell._
+
+
+/** TensorLoad.
+ *
+ * Load 1D and 2D tensors from main memory (DRAM) to input/weight
+ * scratchpads (SRAM). Also, there is support for zero padding, while
+ * doing the load.
+ */
+class TensorLoadNarrowVME(tensorType: String = "none", debug: Boolean = false)(
+implicit p: Parameters)
+extends Module {
+  val tp = new TensorParams(tensorType)
+  val mp = p(ShellKey).memParams
+  val io = IO(new Bundle {
+val start = Input(Bool())
+val done = Output(Bool())
+val inst = Input(UInt(INST_BITS.W))
+val baddr = Input(UInt(mp.addrBits.W))
+val vme_rd = new VMEReadMaster
+val tensor = new TensorClient(tensorType)
+  })
+  val writePipeLatency = tp.writePipeLatency
+
+  val sIdle :: sBusy :: Nil =
+Enum(2)
+  val state = RegInit(sIdle)
+
+  val isBusy = state === sBusy
+
+  val localDone = Wire(Bool())
+  when(io.start) {
+state := sBusy
+  }.elsewhen(localDone) {
+state := sIdle
+  }
+
+  val dec = io.inst.asTypeOf(new MemDecode)
+
+  val vmeDataBitsPipe = RegNext(io.vme_rd.data.bits)
+  val vmeDataValidPipe = RegNext(io.vme_rd.data.valid, init = false.B)
+  val vmeDataReadyPipe = RegNext(io.vme_rd.data.ready, init = false.B)
+  val vmeDataFirePipe = vmeDataValidPipe & vmeDataReadyPipe
+
+  //--
+  //--- Generate data load VME command ---
+  //--
+  val vmeCmd = Module (new GenVMECmd(tensorType, debug))
+  vmeCmd.io.start := io.start
+  vmeCmd.io.isBusy := 

[GitHub] [tvm-vta] aasorokiin commented on a change in pull request #32: VTA Chisel Wide memory interface.

2021-09-03 Thread GitBox


aasorokiin commented on a change in pull request #32:
URL: https://github.com/apache/tvm-vta/pull/32#discussion_r702038234



##
File path: hardware/chisel/src/main/scala/core/TensorLoadSimple.scala
##
@@ -0,0 +1,362 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package vta.core
+
+import chisel3._
+import chisel3.util._
+import chisel3.util.experimental._
+import vta.util.config._
+import vta.shell._
+
+/** TensorLoad.
+ *
+ * Load 1D and 2D tensors from main memory (DRAM) to input/weight
+ * scratchpads (SRAM). Also, there is support for zero padding, while
+ * doing the load. Zero-padding works on the y and x axis, and it is
+ * managed by TensorPadCtrl. The TensorDataCtrl is in charge of
+ * handling the way tensors are stored on the scratchpads.
+ */
+class TensorLoadSimple(tensorType: String = "none", debug: Boolean = false)(
+implicit p: Parameters)
+extends Module {
+  val tp = new TensorParams(tensorType)
+  val mp = p(ShellKey).memParams
+  val io = IO(new Bundle {
+val start = Input(Bool())
+val done = Output(Bool())
+val inst = Input(UInt(INST_BITS.W))
+val baddr = Input(UInt(mp.addrBits.W))
+val vme_rd = new VMEReadMaster
+val tensor = new TensorClient(tensorType)
+  })
+
+  require(tp.numMemBlock > 0, s"-F- Unexpected data to tensor bit size ratio. 
${tensorType} ${tp.numMemBlock}")
+  require(tp.splitWidth == 1 && tp.splitLength == 1, s"-F- Cannot do split 
direct access")
+
+  val sizeFactor = tp.tensorLength * tp.numMemBlock
+  val strideFactor = tp.tensorLength * tp.tensorWidth
+
+  val dec = io.inst.asTypeOf(new MemDecode)
+  val dataCtrl = Module(
+new TensorDataCtrl(tensorType, sizeFactor, strideFactor))
+  val dataCtrlDone = RegInit(false.B)
+  val yPadCtrl0 = Module(new TensorPadCtrl(padType = "YPad0", sizeFactor))
+  val yPadCtrl1 = Module(new TensorPadCtrl(padType = "YPad1", sizeFactor))
+  val xPadCtrl0 = Module(new TensorPadCtrl(padType = "XPad0", sizeFactor))
+  val xPadCtrl1 = Module(new TensorPadCtrl(padType = "XPad1", sizeFactor))
+
+  val tag = Reg(UInt(log2Ceil(tp.numMemBlock).W))
+  val set = Reg(UInt(log2Ceil(tp.tensorLength).W))
+
+  val sIdle :: sYPad0 :: sXPad0 :: sReadCmd :: sReadData :: sXPad1 :: sYPad1 
:: Nil =
+Enum(7)
+  val state = RegInit(sIdle)
+
+  // control
+  switch(state) {
+is(sIdle) {
+  when(io.start) {
+when(dec.ypad_0 =/= 0.U) {
+  state := sYPad0
+}.elsewhen(dec.xpad_0 =/= 0.U) {
+  state := sXPad0
+}.otherwise {
+  state := sReadCmd
+}
+  }
+}
+is(sYPad0) {
+  when(yPadCtrl0.io.done) {
+when(dec.xpad_0 =/= 0.U) {
+  state := sXPad0
+}.otherwise {
+  assert(tag === (tp.numMemBlock - 1).U, "-F- Should not happen mid 
tensor row read")
+  state := sReadCmd
+}
+  }
+}
+is(sXPad0) {
+  when(xPadCtrl0.io.done) {
+assert(tag === (tp.numMemBlock - 1).U, "-F- Should not happen mid 
tensor row read")
+state := sReadCmd
+  }
+}
+is(sReadCmd) {
+  when(io.vme_rd.cmd.ready) {
+state := sReadData
+  }
+}
+is(sReadData) {
+  when(io.vme_rd.data.valid) {
+when(dataCtrl.io.done) {
+  when(dec.xpad_1 =/= 0.U) {
+state := sXPad1
+  }.elsewhen(dec.ypad_1 =/= 0.U) {
+state := sYPad1
+  }.otherwise {
+state := sIdle
+  }
+}.elsewhen(dataCtrl.io.stride) {
+  when(dec.xpad_1 =/= 0.U) {
+state := sXPad1
+  }.elsewhen(dec.xpad_0 =/= 0.U) {
+state := sXPad0
+  }.otherwise {
+assert(tag === (tp.numMemBlock - 1).U, "-F- Should not happen mid 
tensor row read")
+state := sReadCmd
+  }
+}.elsewhen(dataCtrl.io.split) {
+  state := sReadCmd
+}
+  }
+}
+is(sXPad1) {
+  when(xPadCtrl1.io.done) {
+when(dataCtrlDone) {
+  when(dec.ypad_1 =/= 0.U) {
+state := sYPad1
+  }.otherwise {
+state := sIdle
+  }
+}.otherwise {
+  when(dec.xpad_0 =/= 0.U) {
+state := sXPad0
+  

[GitHub] [tvm-vta] aasorokiin commented on a change in pull request #32: VTA Chisel Wide memory interface.

2021-09-03 Thread GitBox


aasorokiin commented on a change in pull request #32:
URL: https://github.com/apache/tvm-vta/pull/32#discussion_r702038035



##
File path: hardware/dpi/tsim_device.cc
##
@@ -58,19 +60,25 @@ void VTAHostDPI(dpi8_t* req_valid,
resp_valid, resp_value);
 }
 
-void VTAMemDPI(dpi8_t req_valid,
-   dpi8_t req_opcode,
-   dpi8_t req_len,
-   dpi64_t req_addr,
+void VTAMemDPI(dpi8_t rd_req_valid,
+   dpi8_t rd_req_len,
+  dpi8_t rd_req_id,
+   dpi64_t rd_req_addr,
+   dpi8_t wr_req_valid,
+   dpi8_t wr_req_len,
+   dpi64_t wr_req_addr,
dpi8_t wr_valid,
-   dpi64_t wr_value,
+   const svOpenArrayHandle wr_value,
+   dpi64_t wr_strb,
dpi8_t* rd_valid,
-   dpi64_t* rd_value,
+  dpi8_t* rd_id,

Review comment:
   fixed in PR

##
File path: hardware/chisel/src/main/scala/core/LoadUop.scala
##
@@ -67,160 +64,43 @@ class LoadUop(debug: Boolean = false)(implicit p: 
Parameters) extends Module {
 val vme_rd = new VMEReadMaster
 val uop = new UopClient
   })
-  val numUop = 2 // store two uops per sram word
-  val uopBits = p(CoreKey).uopBits
-  val uopBytes = uopBits / 8
-  val uopDepth = p(CoreKey).uopMemDepth / numUop
 
-  val dec = io.inst.asTypeOf(new MemDecode)
-  val raddr = Reg(chiselTypeOf(io.vme_rd.cmd.bits.addr))
-  val xcnt = Reg(chiselTypeOf(io.vme_rd.cmd.bits.len))
-  val xlen = Reg(chiselTypeOf(io.vme_rd.cmd.bits.len))
-  val xrem = Reg(chiselTypeOf(dec.xsize))
-  val xsize = (dec.xsize >> log2Ceil(numUop)) + dec.xsize(0) + 
(dec.sram_offset % 2.U) - 1.U
-  val xmax = (1 << mp.lenBits).U
-  val xmax_bytes = ((1 << mp.lenBits) * mp.dataBits / 8).U
+  // force simple load uop implementation be careful if
+  // define simple tensor load
+  val forceSimpleLoadUop = false;
 
-  val dram_even = (dec.dram_offset % 2.U) === 0.U
-  val sram_even = (dec.sram_offset % 2.U) === 0.U
-  val sizeIsEven = (dec.xsize % 2.U) === 0.U
+  if (forceSimpleLoadUop) {
+require(mp.dataBits == 64, "-F- Original LoadUop supports only 64 bit 
memory data transfer")
 
-  val sIdle :: sReadCmd :: sReadData :: Nil = Enum(3)
-  val state = RegInit(sIdle)
+val loadUop = Module(new LoadUopSimple(debug))
 
-  // control
-  switch(state) {
-is(sIdle) {
-  when(io.start) {
-state := sReadCmd
-when(xsize < xmax) {
-  xlen := xsize
-  xrem := 0.U
-}.otherwise {
-  xlen := xmax - 1.U
-  xrem := xsize - xmax
-}
-  }
-}
-is(sReadCmd) {
-  when(io.vme_rd.cmd.ready) {
-state := sReadData
-  }
-}
-is(sReadData) {
-  when(io.vme_rd.data.valid) {
-when(xcnt === xlen) {
-  when(xrem === 0.U) {
-state := sIdle
-  }.otherwise {
-raddr := raddr + xmax_bytes
-when(xrem < xmax) {
-  state := sReadCmd
-  xlen := xrem
-  xrem := 0.U
-}
-.otherwise {
-  state := sReadCmd
-  xlen := xmax - 1.U
-  xrem := xrem - xmax
-}
-  }
-}
-  }
-}
-  }
+loadUop.io.start := io.start
+io.done := loadUop.io.done
+loadUop.io.baddr := io.baddr
+loadUop.io.vme_rd <> io.vme_rd
 
-  // read-from-dram
-  val maskOffset = VecInit(Seq.fill(M_DRAM_OFFSET_BITS)(true.B)).asUInt
-  when(state === sIdle) {
-when(dram_even) {
-  raddr := io.baddr | (maskOffset & (dec.dram_offset << 
log2Ceil(uopBytes)))
-}.otherwise {
-  raddr := (io.baddr | (maskOffset & (dec.dram_offset << 
log2Ceil(uopBytes - uopBytes.U
-}
-  }
+loadUop.io.dec := io.inst.asTypeOf(new MemDecode)
+loadUop.io.uop.idx <> io.uop.idx
+io.uop <> loadUop.io.uop
 
-  io.vme_rd.cmd.valid := state === sReadCmd
-  io.vme_rd.cmd.bits.addr := raddr
-  io.vme_rd.cmd.bits.len := xlen
+  } else {
+//println(

Review comment:
   fixed in PR




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tvm-vta] aasorokiin commented on a change in pull request #32: VTA Chisel Wide memory interface.

2021-09-03 Thread GitBox


aasorokiin commented on a change in pull request #32:
URL: https://github.com/apache/tvm-vta/pull/32#discussion_r702037842



##
File path: hardware/chisel/src/test/scala/unittest/SyncQueueTest.scala
##
@@ -0,0 +1,266 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package unittest
+
+import chisel3._
+import chisel3.util._
+import chisel3.iotesters.{ChiselFlatSpec, Driver, PeekPokeTester}
+import scala.util.Random
+import unittest.util._
+import vta.util._
+import vta.util.config._
+
+class TestOnePortMem(c: OnePortMem[UInt]) extends PeekPokeTester(c) {
+
+  // write a:0 d:24
+  println("-")
+  println("Cycle 0 write 24 to address 0")
+  poke (c.io.wr_en, 1)
+  poke (c.io.wr_data, 24)
+  poke (c.io.ch_en, 1)
+  poke (c.io.addr, 0)
+  step(1)
+  println("-")
+  // read a:0
+  println("Cycle 1 read address 0")
+  poke (c.io.wr_en, 0)
+  poke (c.io.addr, 0)
+  poke (c.io.ch_en, 1)
+  step(1)
+  println("-")
+  // write a:1 d:99
+  println("Cycle 2 write 99 to address 1")
+  poke (c.io.wr_en, 1)
+  poke (c.io.wr_data, 99)
+  poke (c.io.ch_en, 1)
+  poke (c.io.addr, 1)
+  // read d:24
+  println("Cycle 2 read expect data 24")
+  expect (c.io.rd_data, 24)
+  step(1)
+  println("-")
+  println("Cycle 3 should still read data 24")
+  poke (c.io.ch_en, 0)
+  // read d:24
+  expect (c.io.rd_data, 24)
+  step(1)
+  println("-")
+  println("Cycle 4 read address 0")
+  poke (c.io.wr_en, 0)
+  poke (c.io.addr, 0)
+  poke (c.io.ch_en, 1)
+  step(1)
+  println("-")
+  // write a:1 d:99
+  poke (c.io.wr_en, 0)
+  poke (c.io.wr_data, 99)
+  poke (c.io.ch_en, 0)
+  poke (c.io.addr, 1)
+  // read d:24
+  println("Cycle 5 read expect data 24")
+  expect (c.io.rd_data, 24)
+  step(1)
+  println("-")
+}
+class Checker(c: SyncQueueTestWrapper[UInt], t: 
PeekPokeTester[SyncQueueTestWrapper[UInt]]) {
+
+  def bits (bits: Int) = {
+t.expect(c.io.tq.deq.bits, bits)
+t.expect(c.io.rq.deq.bits, bits)
+
+  }
+  def ready (bits: Int) = {
+t.expect(c.io.tq.enq.ready, bits)
+t.expect(c.io.rq.enq.ready, bits)
+
+  }
+  def valid (bits: Int) = {
+t.expect(c.io.tq.deq.valid, bits)
+t.expect(c.io.rq.deq.valid, bits)
+
+  }
+  def status () = {
+val rv = t.peek(c.io.rq.enq.ready)
+t.expect(c.io.tq.enq.ready, rv)
+val rc = t.peek(c.io.rq.count)
+t.expect(c.io.tq.count, rc)
+val vv = t.peek(c.io.rq.deq.valid)
+t.expect(c.io.tq.deq.valid, vv)
+if (vv != 0) {
+  val bv = t.peek(c.io.rq.deq.bits)
+  t.expect(c.io.tq.deq.bits, bv)
+}
+t.peek(c.io.rq.count)
+t.peek(c.io.tq.count)
+  }
+}
+class TestSyncQueueLongRead(c: SyncQueueTestWrapper[UInt]) extends 
PeekPokeTester(c) {
+
+  val chr = new Checker (c, this)
+
+  def testFillRW(depth: Int) = {
+//println(s"-D- run test depth ${depth}")

Review comment:
   fixed in PR

##
File path: hardware/chisel/src/test/scala/unittest/SyncQueueTest.scala
##
@@ -0,0 +1,266 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package unittest
+
+import chisel3._
+import chisel3.util._
+import chisel3.iotesters.{ChiselFlatSpec, Driver, PeekPokeTester}
+import scala.util.Random
+import unittest.util._
+import vta.util._
+import vta.util.config._
+
+class TestOnePortMem(c: 

[GitHub] [tvm-vta] aasorokiin commented on a change in pull request #32: VTA Chisel Wide memory interface.

2021-09-01 Thread GitBox


aasorokiin commented on a change in pull request #32:
URL: https://github.com/apache/tvm-vta/pull/32#discussion_r700459058



##
File path: hardware/chisel/src/main/scala/interface/axi/AXI.scala
##
@@ -211,7 +211,7 @@ class AXIMaster(params: AXIParams) extends AXIBase(params) {
 ar.bits.qos := params.qosConst.U
 ar.bits.region := params.regionConst.U
 ar.bits.size := params.sizeConst.U
-ar.bits.id := params.idConst.U
+//do not override

Review comment:
   I moved write id to VME. Added comments. Check PR




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tvm-vta] aasorokiin commented on a change in pull request #32: VTA Chisel Wide memory interface.

2021-09-01 Thread GitBox


aasorokiin commented on a change in pull request #32:
URL: https://github.com/apache/tvm-vta/pull/32#discussion_r700414594



##
File path: hardware/chisel/src/main/scala/interface/axi/AXI.scala
##
@@ -211,7 +211,7 @@ class AXIMaster(params: AXIParams) extends AXIBase(params) {
 ar.bits.qos := params.qosConst.U
 ar.bits.region := params.regionConst.U
 ar.bits.size := params.sizeConst.U
-ar.bits.id := params.idConst.U
+//do not override

Review comment:
   IMHO, those are not defaults. They were set in setConst. That expected 
no change of value.
   VME write path did not get changed other than supporting wider data.  It was 
not identified critical for hiding memory access latency. DE10 config AXI burst 
of 16 pulses was short enough to make hiding burst-to-burst read latency 
important. Now even 1 pulse bursts can potentially transfer read data without 
gaps.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tvm-vta] aasorokiin commented on a change in pull request #32: VTA Chisel Wide memory interface.

2021-09-01 Thread GitBox


aasorokiin commented on a change in pull request #32:
URL: https://github.com/apache/tvm-vta/pull/32#discussion_r700416051



##
File path: hardware/chisel/src/main/scala/shell/VME.scala
##
@@ -101,12 +152,13 @@ class VMEWriteMaster(implicit p: Parameters) extends 
Bundle {
 class VMEWriteClient(implicit p: Parameters) extends Bundle {
   val dataBits = p(ShellKey).memParams.dataBits
   val cmd = Flipped(Decoupled(new VMECmd))
-  val data = Flipped(Decoupled(UInt(dataBits.W)))
+  val data = Flipped(Decoupled(new VMEWriteData))
   val ack = Output(Bool())
   override def cloneType =
-new VMEWriteClient().asInstanceOf[this.type]
+  new VMEWriteClient().asInstanceOf[this.type]
 }
 
+

Review comment:
   fixed in PR

##
File path: hardware/chisel/src/main/scala/shell/VME.scala
##
@@ -136,68 +188,178 @@ class VMEClient(implicit p: Parameters) extends Bundle {
  * This unit multiplexes the memory controller interface for the Core. 
Currently,
  * it supports single-writer and multiple-reader mode and it is also based on 
AXI.
  */
+

Review comment:
   fixed in PR




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tvm-vta] aasorokiin commented on a change in pull request #32: VTA Chisel Wide memory interface.

2021-09-01 Thread GitBox


aasorokiin commented on a change in pull request #32:
URL: https://github.com/apache/tvm-vta/pull/32#discussion_r700414594



##
File path: hardware/chisel/src/main/scala/interface/axi/AXI.scala
##
@@ -211,7 +211,7 @@ class AXIMaster(params: AXIParams) extends AXIBase(params) {
 ar.bits.qos := params.qosConst.U
 ar.bits.region := params.regionConst.U
 ar.bits.size := params.sizeConst.U
-ar.bits.id := params.idConst.U
+//do not override

Review comment:
   IMHO, those are not defaults. They were set in setConst. That expected 
no change of value.
   VME write path did not get changed other than supporting wider data.  It was 
not identified critical for hiding memory access latency. DE10 config AXI burst 
of 16 pulses was short enough to make hiding burst-to-burst latency important. 
Now even 1 pulse bursts can potentially transfer data without gaps.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tvm-vta] aasorokiin commented on a change in pull request #32: VTA Chisel Wide memory interface.

2021-09-01 Thread GitBox


aasorokiin commented on a change in pull request #32:
URL: https://github.com/apache/tvm-vta/pull/32#discussion_r700391689



##
File path: hardware/chisel/src/main/scala/interface/axi/AXI.scala
##
@@ -211,7 +211,7 @@ class AXIMaster(params: AXIParams) extends AXIBase(params) {
 ar.bits.qos := params.qosConst.U
 ar.bits.region := params.regionConst.U
 ar.bits.size := params.sizeConst.U
-ar.bits.id := params.idConst.U
+//do not override

Review comment:
   id/strb should be handled by VME now. VME can send multiple read 
requests with different IDs. Strb is used by TensorStore write to mark data of 
valid tensors in case when AXI data is wider than tensor data.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tvm-vta] aasorokiin commented on a change in pull request #32: VTA Chisel Wide memory interface.

2021-08-19 Thread GitBox


aasorokiin commented on a change in pull request #32:
URL: https://github.com/apache/tvm-vta/pull/32#discussion_r692642018



##
File path: hardware/chisel/src/main/resources/verilog/VTAMemDPI.v
##
@@ -18,89 +18,153 @@
  */
 
 module VTAMemDPI #
-( parameter LEN_BITS = 8,
-  parameter ADDR_BITS = 64,
-  parameter DATA_BITS = 64
-)
-(
-  inputclock,
-  inputreset,
-  inputdpi_req_valid,
-  inputdpi_req_opcode,
-  input [LEN_BITS-1:0] dpi_req_len,
-  input[ADDR_BITS-1:0] dpi_req_addr,
-  inputdpi_wr_valid,
-  input[DATA_BITS-1:0] dpi_wr_bits,
-  output logic dpi_rd_valid,
-  output logic [DATA_BITS-1:0] dpi_rd_bits,
-  inputdpi_rd_ready
-);
+  ( parameter LEN_BITS = 8,
+parameter ADDR_BITS = 64,
+parameter DATA_BITS = 64,
+parameter STRB_BITS = DATA_BITS/8
+)
+   (
+input   clock,
+input   reset,
+input   dpi_req_ar_valid,
+input [LEN_BITS-1:0]dpi_req_ar_len,
+input [7:0] dpi_req_ar_id, 
+input [ADDR_BITS-1:0]   dpi_req_ar_addr,
+input   dpi_req_aw_valid,
+input [LEN_BITS-1:0]dpi_req_aw_len,
+input [ADDR_BITS-1:0]   dpi_req_aw_addr,
+input   dpi_wr_valid,
+input [DATA_BITS-1:0]   dpi_wr_bits_data,
+input [STRB_BITS-1:0]   dpi_wr_bits_strb,
+output logicdpi_rd_valid,
+output logic [7:0]  dpi_rd_bits_id, 
+output logic [DATA_BITS-1:0] dpi_rd_bits_data,

Review comment:
   Updated PR.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tvm-vta] aasorokiin commented on a change in pull request #32: VTA Chisel Wide memory interface.

2021-08-19 Thread GitBox


aasorokiin commented on a change in pull request #32:
URL: https://github.com/apache/tvm-vta/pull/32#discussion_r692641717



##
File path: src/dpi/module.cc
##
@@ -180,36 +188,84 @@ void HostDevice::WaitPopResponse(HostResponse* r) {
   resp_.WaitPop(r);
 }
 
-void MemDevice::SetRequest(uint8_t opcode, uint64_t addr, uint32_t len) {
+  void MemDevice::  SetRequest(uint8_t rd_req_valid,uint64_t rd_req_addr, 
uint32_t rd_req_len, uint32_t rd_req_id, uint64_t wr_req_addr, uint32_t 
wr_req_len, uint8_t wr_req_valid){
+
   std::lock_guard lock(mutex_);
-  void * vaddr = vta::vmem::VirtualMemoryManager::Global()->GetAddr(addr);
-
-  if (opcode == 1) {
-wlen_ = len + 1;
-waddr_ = reinterpret_cast(vaddr);
-  } else {
-rlen_ = len + 1;
-raddr_ = reinterpret_cast(vaddr);
-  }
+if(rd_req_addr !=0 ){
+ void * rd_vaddr = 
vta::vmem::VirtualMemoryManager::Global()->GetAddr(rd_req_addr);
+ if(rd_req_valid == 1) {
+ rlen_ = rd_req_len + 1;
+ rid_  = rd_req_id;
+ raddr_ = reinterpret_cast(rd_vaddr);
+  }
+}
+
+if(wr_req_addr != 0){
+   void * wr_vaddr = 
vta::vmem::VirtualMemoryManager::Global()->GetAddr(wr_req_addr);
+   if (wr_req_valid == 1) {
+   wlen_ = wr_req_len + 1;
+waddr_ = reinterpret_cast(wr_vaddr);
+ } 
+}
+
+ //if(wr_req_addr != 0 && rd_req_addr!=0){

Review comment:
   Updated PR.

##
File path: hardware/chisel/src/main/scala/core/TensorUtil.scala
##
@@ -79,9 +80,68 @@ class TensorParams(tensorType: String = "none")(implicit p: 
Parameters) extends
 else
   p(CoreKey).outMemDepth
 
+  // the number of cycles Instruction write is delayed
+  // Idle state writes are not delayed
+  // inserted regs are used to physically deliver signal to memories
+  val writePipeLatency =
+if (tensorType == "inp") {
+  0 // VME data load cmd write (per group)
+} else if (tensorType == "wgt") {
+  0 // VME data load cmd write (per group)
+} else if (tensorType == "acc") {
+  0 // VME data load cmd write (per group)
+} else if (tensorType == "fetch") {
+  0
+} else if (tensorType == "uop") {
+  0
+} else if (tensorType == "out") {
+  0 // direct write from core
+} else {
+  0
+}
+
+  // the number of cycles Idle state data read is delayed
+  // inserted regs are used to physically deliver signal to memories
+  val readTensorLatency =
+if (tensorType == "inp") {
+  0 // GEMM inp data read (per memsplit)
+} else if (tensorType == "wgt") {
+  0
+} else if (tensorType == "acc") {
+  0
+} else if (tensorType == "fetch") {
+  0
+} else if (tensorType == "uop") {
+  0
+} else if (tensorType == "out") {
+  0
+} else {
+  0
+}
+  // the number of cycles vme data signals are delayed
+  // This is a global delay of VME data signals. One for all groups
+  //
+  val readVMEDataLatency =
+if (tensorType == "inp") {
+  0 // VME data signals delay
+} else if (tensorType == "wgt") {
+  0 // VME data signals delay
+} else if (tensorType == "acc") {
+  0  // VME data signals delay
+} else if (tensorType == "fetch") {
+  0
+} else if (tensorType == "uop") {
+  0 // VME data signals delay
+} else if (tensorType == "out") {
+  0
+} else {
+  0
+}
+
+
   // acc/wgt parts are grouped to form
   // a physically compact compute entity
-
+  //

Review comment:
   Updated PR.

##
File path: hardware/chisel/src/main/scala/core/TensorUtil.scala
##
@@ -79,9 +80,68 @@ class TensorParams(tensorType: String = "none")(implicit p: 
Parameters) extends
 else
   p(CoreKey).outMemDepth
 
+  // the number of cycles Instruction write is delayed
+  // Idle state writes are not delayed
+  // inserted regs are used to physically deliver signal to memories
+  val writePipeLatency =
+if (tensorType == "inp") {
+  0 // VME data load cmd write (per group)
+} else if (tensorType == "wgt") {
+  0 // VME data load cmd write (per group)
+} else if (tensorType == "acc") {
+  0 // VME data load cmd write (per group)
+} else if (tensorType == "fetch") {
+  0
+} else if (tensorType == "uop") {
+  0
+} else if (tensorType == "out") {
+  0 // direct write from core
+} else {
+  0
+}
+
+  // the number of cycles Idle state data read is delayed
+  // inserted regs are used to physically deliver signal to memories
+  val readTensorLatency =
+if (tensorType == "inp") {
+  0 // GEMM inp data read (per memsplit)
+} else if (tensorType == "wgt") {
+  0
+} else if (tensorType == "acc") {
+  0
+} else if (tensorType == "fetch") {
+  0
+} else if (tensorType == "uop") {
+  0
+} else if (tensorType == "out") {
+  0
+} else {
+  0
+}
+  // the number of cycles vme data signals are delayed
+  // This is a global delay of 

[GitHub] [tvm-vta] aasorokiin commented on a change in pull request #32: VTA Chisel Wide memory interface.

2021-08-19 Thread GitBox


aasorokiin commented on a change in pull request #32:
URL: https://github.com/apache/tvm-vta/pull/32#discussion_r692641676



##
File path: src/dpi/module.cc
##
@@ -118,16 +120,22 @@ class HostDevice {
 
 class MemDevice {
  public:
-  void SetRequest(uint8_t opcode, uint64_t addr, uint32_t len);
-  MemResponse ReadData(uint8_t ready);
-  void WriteData(uint64_t value);
+  void  SetRequest(uint8_t rd_req_valid,uint64_t rd_req_addr, uint32_t 
rd_req_len, uint32_t rd_req_id, uint64_t wr_req_addr, uint32_t wr_req_len, 
uint8_t wr_req_valid);

Review comment:
   If it is not run by TVM make, than it is not. Fixed long lines in a new 
PR




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tvm-vta] aasorokiin commented on a change in pull request #32: VTA Chisel Wide memory interface.

2021-08-19 Thread GitBox


aasorokiin commented on a change in pull request #32:
URL: https://github.com/apache/tvm-vta/pull/32#discussion_r692641297



##
File path: hardware/chisel/src/main/scala/util/SyncQueue.scala
##
@@ -0,0 +1,517 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package vta.util
+
+import chisel3._
+import chisel3.util._
+
+import vta.util.config._
+
+//! Queue with SRAM one port or 1r1W
+class SyncQueueVTA[T <: Data](

Review comment:
   Yes, it doesn't matter much. Updated PR.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org