[Weekly Review] 2020/01/13-19

Published: by Creative Commons Licence (Last updated: )

2020/01/13-19

This week I read another paper of Eyeriss v2 and found the new PE architecture it uses. So I changed the design to fit this new one. And because of the complexity of CSC coding format it uses, I spent almost two days to work out the read control logic of CSC hence no time for the software analysis system. And today I thought out a more hardware-friendly CSC format, and I will work on it. Currently, this PE project has passed two basic reads and write tests.

Next week, I have to finish the PE with a test file, and then try to establish the software analysis system.


Chisel Syntax

MAC module Delay

class TheMACModule extends Module {
 val io = IO(new Bundle {
  val a  = Input(UInt(4.W))
  val b  = Input(UInt(4.W))
  val c  = Input(UInt(4.W))
  val out_reg0 = Output(UInt(4.W))
  val out_reg1 = Output(UInt(4.W))
  val out_reg2 = Output(UInt(4.W))
 })

  val a_reg = RegInit(0.U(4.W))
  val b_reg = RegInit(0.U(4.W))
  a_reg := io.a * io.b
  b_reg := a_reg + io.c
 
  io.out_reg0 := io.a * io.b + io.c
  io.out_reg1 := a_reg + io.c
  io.out_reg2 := b_reg
}
class MyMACTester(c: TheMACModule) extends PeekPokeTester(c) {
  val in_a = 2
  val in_b = 3
  val in_c = 4
  poke(c.io.a, in_a)
  poke(c.io.b, in_b)
  poke(c.io.c, in_c)
  println(s"clock 0, out_reg0 = ${peek(c.io.out_reg0)}")
  println(s"         out_reg1 = ${peek(c.io.out_reg1)}")
  println(s"         out_reg2 = ${peek(c.io.out_reg2)}")
  step(1)
  println(s"clock 1, out_reg0 = ${peek(c.io.out_reg0)}")
  println(s"         out_reg1 = ${peek(c.io.out_reg1)}")
  println(s"         out_reg2 = ${peek(c.io.out_reg2)}")
  step(1)
  println(s"clock 2, out_reg0 = ${peek(c.io.out_reg0)}")
  println(s"         out_reg1 = ${peek(c.io.out_reg1)}")
  println(s"         out_reg2 = ${peek(c.io.out_reg2)}")
}
assert(Driver(() => new TheMACModule) {c => new MyMACTester(c)})
[info] [0.007] clock 0, out_reg0 = 10 // without pipe
[info] [0.007]          out_reg1 = 4
[info] [0.008]          out_reg2 = 0
[info] [0.009] clock 1, out_reg0 = 10
[info] [0.010]          out_reg1 = 10 // two stages pipe
[info] [0.010]          out_reg2 = 4
[info] [0.012] clock 2, out_reg0 = 10
[info] [0.012]          out_reg1 = 10
[info] [0.013]          out_reg2 = 10 // three stages pipe

MEM Read Test

class TheMACModule extends Module {
 val io = IO(new Bundle {
  val r_or_w  = Input(Bool())
  val rst  = Input(Bool())
  //val r_after_w  = Input(Bool())
  val in_vec = Input(Vec(3, UInt(4.W)))
  val out_vec = Output(Vec(9, UInt(4.W)))
 })

  /*val a_reg = RegInit(0.U(4.W))
  val b_reg = RegInit(0.U(4.W))*/
  val sram_te = SyncReadMem(3, UInt(4.W))
  val mem_te = Mem(10, UInt(4.W))
  val reg_te = RegInit(VecInit(Seq.fill(3)(0.U(4.W))))
  val wire_vec = Wire(Vec(9, UInt(4.W)))
  for (i <- 0 until 3) {
    wire_vec(i) := sram_te(i.U)
    wire_vec(i+3) := mem_te(i.U)
    wire_vec(i+6) := reg_te(i.U)
  }
  
  when (!io.r_or_w) {
   for (i <- 0 until 3) {
     sram_te.write(i.U, io.in_vec(i))
     mem_te.write(i.U, io.in_vec(i))
     reg_te(i.U) := io.in_vec(i)
   }
  }
  when (io.rst) {
   for (i <- 0 until 3) {
     sram_te.write(i.U, 0.U)
     mem_te.write(i.U, 0.U)
     reg_te(i.U) := 0.U
   }
  }
  io.out_vec <> wire_vec
}
class MyMACTester(c: TheMACModule) extends PeekPokeTester(c) {
  val in_a = 2
  val in_b = 3
  val in_c = 4
  def print_out(cyclen: Int): Bool = {
   println(s"------- cycle ${cyclen} ---------")
   for (i <- 0 until 9) {
     println(s"    out_vec(${i}) = ${peek(c.io.out_vec(i))}")
   }
   true.B
  }
  
  poke(c.io.rst, true.B) // reset the memory
  step(1)
  println("------ write begin ------")
  poke(c.io.r_or_w, false.B)
  poke(c.io.rst, false.B)
  //poke(c.io.r_after_w, false.B)
  poke(c.io.in_vec(0), in_a)
  poke(c.io.in_vec(1), in_b)
  poke(c.io.in_vec(2), in_c)
  
  print_out(1)
  step(1)
  print_out(2)
  step(2)
  println("------ read begin -------")
  
  poke(c.io.r_or_w, true.B)
  
  for (i <- 1 until 6) {
    print_out(i+2)
    step(1)
  }
}
assert(Driver(() => new TheMACModule) {c => new MyMACTester(c)})
[info] [0.003] ------ write begin ------
[info] [0.003] ------- cycle 1 ---------
[info] [0.007]     out_vec(0) = 0
[info] [0.008]     out_vec(1) = 0
[info] [0.008]     out_vec(2) = 0
[info] [0.008]     out_vec(3) = 0
[info] [0.009]     out_vec(4) = 0
[info] [0.009]     out_vec(5) = 0
[info] [0.009]     out_vec(6) = 0
[info] [0.009]     out_vec(7) = 0
[info] [0.010]     out_vec(8) = 0
[info] [0.012] ------ read begin -------
[info] [0.013] ------- cycle 2 ---------
[info] [0.015]     out_vec(0) = 0
[info] [0.016]     out_vec(1) = 0
[info] [0.016]     out_vec(2) = 0
[info] [0.018]     out_vec(3) = 2 // Mem can read
[info] [0.018]     out_vec(4) = 3 // with a cycle
[info] [0.018]     out_vec(5) = 4
[info] [0.018]     out_vec(6) = 2 // Reg file is
[info] [0.018]     out_vec(7) = 3 // the same as
[info] [0.018]     out_vec(8) = 4 // Mem
[info] [0.020] ------- cycle 3 ---------
[info] [0.020]     out_vec(0) = 2 // Sram has to
[info] [0.020]     out_vec(1) = 3 // wait a cycle
[info] [0.020]     out_vec(2) = 4
[info] [0.020]     out_vec(3) = 2
[info] [0.020]     out_vec(4) = 3
[info] [0.020]     out_vec(5) = 4
[info] [0.020]     out_vec(6) = 2
[info] [0.020]     out_vec(7) = 3
[info] [0.020]     out_vec(8) = 4

So We'd better use a Mux:

for (i <- 0 until 3) {
 wire_vec(i) := Mux(io.r_or_w, sram_te.read(i.U), 0.U)
 wire_vec(i+3) := Mux(io.r_or_w, mem_te(i.U), 0.U)
 wire_vec(i+6) := Mux(io.r_or_w, reg_te(i.U), 0.U)
}

when (!io.r_or_w) {
 for (i <- 0 until 3) {
  sram_te.write(i.U, io.in_vec(i))
  mem_te.write(i.U, io.in_vec(i))
  reg_te(i.U) := io.in_vec(i)
 }
}
[info] [0.002] ------ write begin ------
[info] [0.003] ------- cycle 1 ---------
[info] [0.004]     out_vec(0) = 0
[info] [0.004]     out_vec(1) = 0
[info] [0.004]     out_vec(2) = 0
[info] [0.004]     out_vec(3) = 0
[info] [0.004]     out_vec(4) = 0
[info] [0.005]     out_vec(5) = 0
[info] [0.005]     out_vec(6) = 0
[info] [0.005]     out_vec(7) = 0
[info] [0.005]     out_vec(8) = 0
[info] [0.005] ------- cycle 2 ---------
[info] [0.006]     out_vec(0) = 0
[info] [0.006]     out_vec(1) = 0
[info] [0.006]     out_vec(2) = 0
[info] [0.006]     out_vec(3) = 0
[info] [0.006]     out_vec(4) = 0
[info] [0.006]     out_vec(5) = 0
[info] [0.006]     out_vec(6) = 0
[info] [0.006]     out_vec(7) = 0
[info] [0.006]     out_vec(8) = 0
[info] [0.010] ------- cycle 3 ---------
[info] [0.013]     out_vec(0) = 0
[info] [0.016]     out_vec(1) = 0
[info] [0.028]     out_vec(2) = 0
[info] [0.040]     out_vec(3) = 0
[info] [0.045]     out_vec(4) = 0
[info] [0.047]     out_vec(5) = 0
[info] [0.052]     out_vec(6) = 0
[info] [0.054]     out_vec(7) = 0
[info] [0.056]     out_vec(8) = 0
[info] [0.059] ------ read begin -------
[info] [0.060] ------- cycle 4 ---------
[info] [0.062]     out_vec(0) = 14
[info] [0.063]     out_vec(1) = 7
[info] [0.070]     out_vec(2) = 9
[info] [0.080]     out_vec(3) = 2
[info] [0.086]     out_vec(4) = 3
[info] [0.087]     out_vec(5) = 4
[info] [0.088]     out_vec(6) = 2
[info] [0.090]     out_vec(7) = 3
[info] [0.095]     out_vec(8) = 4
[info] [0.099] ------- cycle 5 ---------
[info] [0.102]     out_vec(0) = 2
[info] [0.108]     out_vec(1) = 3
[info] [0.108]     out_vec(2) = 4
[info] [0.111]     out_vec(3) = 2
[info] [0.113]     out_vec(4) = 3
[info] [0.114]     out_vec(5) = 4
[info] [0.120]     out_vec(6) = 2
[info] [0.121]     out_vec(7) = 3
[info] [0.127]     out_vec(8) = 4

Reading After Reading From Another Mem

class TheMACModule extends Module {
 val io = IO(new Bundle {
  val r_or_w  = Input(Bool())
  val rst  = Input(Bool())
  val in_vec = Input(Vec(6, UInt(4.W)))
  val out_vec = Output(Vec(6, UInt(4.W)))
 })

 val sram_te = SyncReadMem(3, UInt(4.W))
 val reg_te = RegInit(VecInit(Seq.fill(3)(0.U(4.W))))
 val wire_vec = Wire(Vec(6, UInt(4.W)))
 for (i <- 0 until 3) {
  wire_vec(i) := Mux(io.r_or_w, sram_te.read(wire_vec(i+3)), 0.U)
  //wire_vec(i) := Mux(false.B, sram_te.read(wire_vec(i+3)), 0.U)
  wire_vec(i+3) := Mux(io.r_or_w, reg_te((i).U), 0.U)
 }
 
 when (!io.r_or_w) {
  for (i <- 0 until 3) {
    sram_te.write(i.U, io.in_vec(i))
    reg_te(i.U) := io.in_vec(i+3)
  }
 }
 when (io.rst) {
  for (i <- 0 until 3) {
    sram_te.write(i.U, 0.U)
    reg_te(i.U) := 0.U
  }
 }
 for (i <- 0 until 6) {
  io.out_vec(i) := wire_vec(i)
 }
  
}
class MyMACTester(c: TheMACModule) extends PeekPokeTester(c) {
 val in_a = 0
 val in_b = 1
 val in_c = 2
 
 val in_d = 5
 val in_e = 7
 val in_f = 9
 def print_out(cyclen: Int): Bool = {
  println(s"------- cycle ${cyclen} ---------")
  for (i <- 0 until 6) {
    println(s"    out_vec(${i}) = ${peek(c.io.out_vec(i))}")
  }
  true.B
 }
  
 poke(c.io.rst, true.B)
 step(1)
 println("------ write begin ------")
 poke(c.io.r_or_w, false.B)
 poke(c.io.rst, false.B)
 poke(c.io.in_vec(0), in_d)
 poke(c.io.in_vec(1), in_e)
 poke(c.io.in_vec(2), in_f)
 poke(c.io.in_vec(3), in_a)
 poke(c.io.in_vec(4), in_b)
 poke(c.io.in_vec(5), in_c)
 
 print_out(1)
 step(1)
 print_out(2)
 step(1)
 print_out(3)
 step(1)
 println("------ read begin -------")
 
 poke(c.io.r_or_w, true.B)
 
 for (i <- 1 until 6) {
  print_out(i+3)
  step(1)
 }
  
}
assert(Driver(() => new TheMACModule) {c => new MyMACTester(c)})
[info] [0.001] ------ write begin ------
[info] [0.001] ------- cycle 1 ---------
[info] [0.002]     out_vec(0) = 0
[info] [0.002]     out_vec(1) = 0
[info] [0.002]     out_vec(2) = 0
[info] [0.003]     out_vec(3) = 0
[info] [0.003]     out_vec(4) = 0
[info] [0.003]     out_vec(5) = 0
[info] [0.003] ------- cycle 2 ---------
[info] [0.003]     out_vec(0) = 0
[info] [0.004]     out_vec(1) = 0
[info] [0.004]     out_vec(2) = 0
[info] [0.004]     out_vec(3) = 0
[info] [0.004]     out_vec(4) = 0
[info] [0.004]     out_vec(5) = 0
[info] [0.004] ------- cycle 3 ---------
[info] [0.004]     out_vec(0) = 0
[info] [0.004]     out_vec(1) = 0
[info] [0.005]     out_vec(2) = 0
[info] [0.005]     out_vec(3) = 0
[info] [0.005]     out_vec(4) = 0
[info] [0.009]     out_vec(5) = 0
[info] [0.018] ------ read begin -------
[info] [0.018] ------- cycle 4 ---------
[info] [0.021]     out_vec(0) = 5
[info] [0.022]     out_vec(1) = 5
[info] [0.023]     out_vec(2) = 5
[info] [0.024]     out_vec(3) = 0 // get address
[info] [0.026]     out_vec(4) = 1
[info] [0.034]     out_vec(5) = 2
[info] [0.036] ------- cycle 5 ---------
[info] [0.036]     out_vec(0) = 5 // get the data
[info] [0.037]     out_vec(1) = 7
[info] [0.037]     out_vec(2) = 9
[info] [0.038]     out_vec(3) = 0
[info] [0.039]     out_vec(4) = 1
[info] [0.044]     out_vec(5) = 2

If we use RegNext in Mux for SRAM read, then it will be better:

wire_vec(i) := Mux(RegNext(io.r_or_w), sram_te.read(wire_vec(i+3)), 0.U)
[info] [0.045] ------ read begin -------
[info] [0.049] ------- cycle 4 ---------
[info] [0.050]     out_vec(0) = 0
[info] [0.050]     out_vec(1) = 0
[info] [0.050]     out_vec(2) = 0
[info] [0.051]     out_vec(3) = 2
[info] [0.055]     out_vec(4) = 1
[info] [0.055]     out_vec(5) = 0
[info] [0.057] ------- cycle 5 ---------
[info] [0.057]     out_vec(0) = 9
[info] [0.058]     out_vec(1) = 7
[info] [0.059]     out_vec(2) = 5
[info] [0.059]     out_vec(3) = 2
[info] [0.060]     out_vec(4) = 1
[info] [0.069]     out_vec(5) = 0

But it is not friendly to data write!

Read Part of UInt

		// Example
		val uint = 0xc.U
		val vec = Vec(uint.toBools)
		printf(p"$vec") // Vec(0, 0, 1, 1)

		// Test
		assert(vec(0) === false.B)
		assert(vec(1) === false.B)
		assert(vec(2) === true.B)
		assert(vec(3) === true.B)

		// Example
		val vec = Vec(true.B, false.B, true.B, true.B)
		val uint = vec.asUInt
		printf(p"$uint") // 13
		
		/* Test
		 *
		 * (remember leftmost Bool in Vec is low order bit)
		 */
		assert(0xd.U === uint)

val iactDataCountVec: Seq[Bool] = iactDataSPad.io.commonIO.readOutData.toBools
				io.iactMatrixData := Cat(iactDataCountVec.take(8)).asUInt
				io.iactMatrixRow := Cat(iactDataCountVec.takeRight(4)).asUInt

Combine Two Integers as Bits

val inDataWithIndex = Seq(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22).zipWithIndex
def toBinary(i: Int, digits: Int = 8) =
		 String.format("%" + digits + "s", i.toBinaryString).replace(' ', '0')
val inDataCountBinary = inDataWithIndex.map{case (x: Int, y: Int) => toBinary(x, 8) + toBinary(y, 4)}
val inDataCountDec = inDataCountBinary.map(x => Integer.parseInt(x, 2))
for (i <- inDataCountDec.indices) {
		println(inDataCountDec(i))
		println(inDataCountBinary(i))
}