[Weekly Review] 2020/01/13-19
2020/01/13-19
This week I read another paper of Eyeriss v2 and found the new PE architecture it uses. So I changed the design to fit this new one. And because of the complexity of CSC coding format it uses, I spent almost two days to work out the read control logic of CSC hence no time for the software analysis system. And today I thought out a more hardware-friendly CSC format, and I will work on it. Currently, this PE project has passed two basic reads and write tests.
Next week, I have to finish the PE with a test file, and then try to establish the software analysis system.
Chisel Syntax
MAC module Delay
class TheMACModule extends Module {
val io = IO(new Bundle {
val a = Input(UInt(4.W))
val b = Input(UInt(4.W))
val c = Input(UInt(4.W))
val out_reg0 = Output(UInt(4.W))
val out_reg1 = Output(UInt(4.W))
val out_reg2 = Output(UInt(4.W))
})
val a_reg = RegInit(0.U(4.W))
val b_reg = RegInit(0.U(4.W))
a_reg := io.a * io.b
b_reg := a_reg + io.c
io.out_reg0 := io.a * io.b + io.c
io.out_reg1 := a_reg + io.c
io.out_reg2 := b_reg
}
class MyMACTester(c: TheMACModule) extends PeekPokeTester(c) {
val in_a = 2
val in_b = 3
val in_c = 4
poke(c.io.a, in_a)
poke(c.io.b, in_b)
poke(c.io.c, in_c)
println(s"clock 0, out_reg0 = ${peek(c.io.out_reg0)}")
println(s" out_reg1 = ${peek(c.io.out_reg1)}")
println(s" out_reg2 = ${peek(c.io.out_reg2)}")
step(1)
println(s"clock 1, out_reg0 = ${peek(c.io.out_reg0)}")
println(s" out_reg1 = ${peek(c.io.out_reg1)}")
println(s" out_reg2 = ${peek(c.io.out_reg2)}")
step(1)
println(s"clock 2, out_reg0 = ${peek(c.io.out_reg0)}")
println(s" out_reg1 = ${peek(c.io.out_reg1)}")
println(s" out_reg2 = ${peek(c.io.out_reg2)}")
}
assert(Driver(() => new TheMACModule) {c => new MyMACTester(c)})
[info] [0.007] clock 0, out_reg0 = 10 // without pipe
[info] [0.007] out_reg1 = 4
[info] [0.008] out_reg2 = 0
[info] [0.009] clock 1, out_reg0 = 10
[info] [0.010] out_reg1 = 10 // two stages pipe
[info] [0.010] out_reg2 = 4
[info] [0.012] clock 2, out_reg0 = 10
[info] [0.012] out_reg1 = 10
[info] [0.013] out_reg2 = 10 // three stages pipe
MEM Read Test
class TheMACModule extends Module {
val io = IO(new Bundle {
val r_or_w = Input(Bool())
val rst = Input(Bool())
//val r_after_w = Input(Bool())
val in_vec = Input(Vec(3, UInt(4.W)))
val out_vec = Output(Vec(9, UInt(4.W)))
})
/*val a_reg = RegInit(0.U(4.W))
val b_reg = RegInit(0.U(4.W))*/
val sram_te = SyncReadMem(3, UInt(4.W))
val mem_te = Mem(10, UInt(4.W))
val reg_te = RegInit(VecInit(Seq.fill(3)(0.U(4.W))))
val wire_vec = Wire(Vec(9, UInt(4.W)))
for (i <- 0 until 3) {
wire_vec(i) := sram_te(i.U)
wire_vec(i+3) := mem_te(i.U)
wire_vec(i+6) := reg_te(i.U)
}
when (!io.r_or_w) {
for (i <- 0 until 3) {
sram_te.write(i.U, io.in_vec(i))
mem_te.write(i.U, io.in_vec(i))
reg_te(i.U) := io.in_vec(i)
}
}
when (io.rst) {
for (i <- 0 until 3) {
sram_te.write(i.U, 0.U)
mem_te.write(i.U, 0.U)
reg_te(i.U) := 0.U
}
}
io.out_vec <> wire_vec
}
class MyMACTester(c: TheMACModule) extends PeekPokeTester(c) {
val in_a = 2
val in_b = 3
val in_c = 4
def print_out(cyclen: Int): Bool = {
println(s"------- cycle ${cyclen} ---------")
for (i <- 0 until 9) {
println(s" out_vec(${i}) = ${peek(c.io.out_vec(i))}")
}
true.B
}
poke(c.io.rst, true.B) // reset the memory
step(1)
println("------ write begin ------")
poke(c.io.r_or_w, false.B)
poke(c.io.rst, false.B)
//poke(c.io.r_after_w, false.B)
poke(c.io.in_vec(0), in_a)
poke(c.io.in_vec(1), in_b)
poke(c.io.in_vec(2), in_c)
print_out(1)
step(1)
print_out(2)
step(2)
println("------ read begin -------")
poke(c.io.r_or_w, true.B)
for (i <- 1 until 6) {
print_out(i+2)
step(1)
}
}
assert(Driver(() => new TheMACModule) {c => new MyMACTester(c)})
[info] [0.003] ------ write begin ------
[info] [0.003] ------- cycle 1 ---------
[info] [0.007] out_vec(0) = 0
[info] [0.008] out_vec(1) = 0
[info] [0.008] out_vec(2) = 0
[info] [0.008] out_vec(3) = 0
[info] [0.009] out_vec(4) = 0
[info] [0.009] out_vec(5) = 0
[info] [0.009] out_vec(6) = 0
[info] [0.009] out_vec(7) = 0
[info] [0.010] out_vec(8) = 0
[info] [0.012] ------ read begin -------
[info] [0.013] ------- cycle 2 ---------
[info] [0.015] out_vec(0) = 0
[info] [0.016] out_vec(1) = 0
[info] [0.016] out_vec(2) = 0
[info] [0.018] out_vec(3) = 2 // Mem can read
[info] [0.018] out_vec(4) = 3 // with a cycle
[info] [0.018] out_vec(5) = 4
[info] [0.018] out_vec(6) = 2 // Reg file is
[info] [0.018] out_vec(7) = 3 // the same as
[info] [0.018] out_vec(8) = 4 // Mem
[info] [0.020] ------- cycle 3 ---------
[info] [0.020] out_vec(0) = 2 // Sram has to
[info] [0.020] out_vec(1) = 3 // wait a cycle
[info] [0.020] out_vec(2) = 4
[info] [0.020] out_vec(3) = 2
[info] [0.020] out_vec(4) = 3
[info] [0.020] out_vec(5) = 4
[info] [0.020] out_vec(6) = 2
[info] [0.020] out_vec(7) = 3
[info] [0.020] out_vec(8) = 4
So We'd better use a Mux
:
for (i <- 0 until 3) {
wire_vec(i) := Mux(io.r_or_w, sram_te.read(i.U), 0.U)
wire_vec(i+3) := Mux(io.r_or_w, mem_te(i.U), 0.U)
wire_vec(i+6) := Mux(io.r_or_w, reg_te(i.U), 0.U)
}
when (!io.r_or_w) {
for (i <- 0 until 3) {
sram_te.write(i.U, io.in_vec(i))
mem_te.write(i.U, io.in_vec(i))
reg_te(i.U) := io.in_vec(i)
}
}
[info] [0.002] ------ write begin ------
[info] [0.003] ------- cycle 1 ---------
[info] [0.004] out_vec(0) = 0
[info] [0.004] out_vec(1) = 0
[info] [0.004] out_vec(2) = 0
[info] [0.004] out_vec(3) = 0
[info] [0.004] out_vec(4) = 0
[info] [0.005] out_vec(5) = 0
[info] [0.005] out_vec(6) = 0
[info] [0.005] out_vec(7) = 0
[info] [0.005] out_vec(8) = 0
[info] [0.005] ------- cycle 2 ---------
[info] [0.006] out_vec(0) = 0
[info] [0.006] out_vec(1) = 0
[info] [0.006] out_vec(2) = 0
[info] [0.006] out_vec(3) = 0
[info] [0.006] out_vec(4) = 0
[info] [0.006] out_vec(5) = 0
[info] [0.006] out_vec(6) = 0
[info] [0.006] out_vec(7) = 0
[info] [0.006] out_vec(8) = 0
[info] [0.010] ------- cycle 3 ---------
[info] [0.013] out_vec(0) = 0
[info] [0.016] out_vec(1) = 0
[info] [0.028] out_vec(2) = 0
[info] [0.040] out_vec(3) = 0
[info] [0.045] out_vec(4) = 0
[info] [0.047] out_vec(5) = 0
[info] [0.052] out_vec(6) = 0
[info] [0.054] out_vec(7) = 0
[info] [0.056] out_vec(8) = 0
[info] [0.059] ------ read begin -------
[info] [0.060] ------- cycle 4 ---------
[info] [0.062] out_vec(0) = 14
[info] [0.063] out_vec(1) = 7
[info] [0.070] out_vec(2) = 9
[info] [0.080] out_vec(3) = 2
[info] [0.086] out_vec(4) = 3
[info] [0.087] out_vec(5) = 4
[info] [0.088] out_vec(6) = 2
[info] [0.090] out_vec(7) = 3
[info] [0.095] out_vec(8) = 4
[info] [0.099] ------- cycle 5 ---------
[info] [0.102] out_vec(0) = 2
[info] [0.108] out_vec(1) = 3
[info] [0.108] out_vec(2) = 4
[info] [0.111] out_vec(3) = 2
[info] [0.113] out_vec(4) = 3
[info] [0.114] out_vec(5) = 4
[info] [0.120] out_vec(6) = 2
[info] [0.121] out_vec(7) = 3
[info] [0.127] out_vec(8) = 4
Reading After Reading From Another Mem
class TheMACModule extends Module {
val io = IO(new Bundle {
val r_or_w = Input(Bool())
val rst = Input(Bool())
val in_vec = Input(Vec(6, UInt(4.W)))
val out_vec = Output(Vec(6, UInt(4.W)))
})
val sram_te = SyncReadMem(3, UInt(4.W))
val reg_te = RegInit(VecInit(Seq.fill(3)(0.U(4.W))))
val wire_vec = Wire(Vec(6, UInt(4.W)))
for (i <- 0 until 3) {
wire_vec(i) := Mux(io.r_or_w, sram_te.read(wire_vec(i+3)), 0.U)
//wire_vec(i) := Mux(false.B, sram_te.read(wire_vec(i+3)), 0.U)
wire_vec(i+3) := Mux(io.r_or_w, reg_te((i).U), 0.U)
}
when (!io.r_or_w) {
for (i <- 0 until 3) {
sram_te.write(i.U, io.in_vec(i))
reg_te(i.U) := io.in_vec(i+3)
}
}
when (io.rst) {
for (i <- 0 until 3) {
sram_te.write(i.U, 0.U)
reg_te(i.U) := 0.U
}
}
for (i <- 0 until 6) {
io.out_vec(i) := wire_vec(i)
}
}
class MyMACTester(c: TheMACModule) extends PeekPokeTester(c) {
val in_a = 0
val in_b = 1
val in_c = 2
val in_d = 5
val in_e = 7
val in_f = 9
def print_out(cyclen: Int): Bool = {
println(s"------- cycle ${cyclen} ---------")
for (i <- 0 until 6) {
println(s" out_vec(${i}) = ${peek(c.io.out_vec(i))}")
}
true.B
}
poke(c.io.rst, true.B)
step(1)
println("------ write begin ------")
poke(c.io.r_or_w, false.B)
poke(c.io.rst, false.B)
poke(c.io.in_vec(0), in_d)
poke(c.io.in_vec(1), in_e)
poke(c.io.in_vec(2), in_f)
poke(c.io.in_vec(3), in_a)
poke(c.io.in_vec(4), in_b)
poke(c.io.in_vec(5), in_c)
print_out(1)
step(1)
print_out(2)
step(1)
print_out(3)
step(1)
println("------ read begin -------")
poke(c.io.r_or_w, true.B)
for (i <- 1 until 6) {
print_out(i+3)
step(1)
}
}
assert(Driver(() => new TheMACModule) {c => new MyMACTester(c)})
[info] [0.001] ------ write begin ------
[info] [0.001] ------- cycle 1 ---------
[info] [0.002] out_vec(0) = 0
[info] [0.002] out_vec(1) = 0
[info] [0.002] out_vec(2) = 0
[info] [0.003] out_vec(3) = 0
[info] [0.003] out_vec(4) = 0
[info] [0.003] out_vec(5) = 0
[info] [0.003] ------- cycle 2 ---------
[info] [0.003] out_vec(0) = 0
[info] [0.004] out_vec(1) = 0
[info] [0.004] out_vec(2) = 0
[info] [0.004] out_vec(3) = 0
[info] [0.004] out_vec(4) = 0
[info] [0.004] out_vec(5) = 0
[info] [0.004] ------- cycle 3 ---------
[info] [0.004] out_vec(0) = 0
[info] [0.004] out_vec(1) = 0
[info] [0.005] out_vec(2) = 0
[info] [0.005] out_vec(3) = 0
[info] [0.005] out_vec(4) = 0
[info] [0.009] out_vec(5) = 0
[info] [0.018] ------ read begin -------
[info] [0.018] ------- cycle 4 ---------
[info] [0.021] out_vec(0) = 5
[info] [0.022] out_vec(1) = 5
[info] [0.023] out_vec(2) = 5
[info] [0.024] out_vec(3) = 0 // get address
[info] [0.026] out_vec(4) = 1
[info] [0.034] out_vec(5) = 2
[info] [0.036] ------- cycle 5 ---------
[info] [0.036] out_vec(0) = 5 // get the data
[info] [0.037] out_vec(1) = 7
[info] [0.037] out_vec(2) = 9
[info] [0.038] out_vec(3) = 0
[info] [0.039] out_vec(4) = 1
[info] [0.044] out_vec(5) = 2
If we use RegNext
in Mux
for SRAM read, then it will be better:
wire_vec(i) := Mux(RegNext(io.r_or_w), sram_te.read(wire_vec(i+3)), 0.U)
[info] [0.045] ------ read begin -------
[info] [0.049] ------- cycle 4 ---------
[info] [0.050] out_vec(0) = 0
[info] [0.050] out_vec(1) = 0
[info] [0.050] out_vec(2) = 0
[info] [0.051] out_vec(3) = 2
[info] [0.055] out_vec(4) = 1
[info] [0.055] out_vec(5) = 0
[info] [0.057] ------- cycle 5 ---------
[info] [0.057] out_vec(0) = 9
[info] [0.058] out_vec(1) = 7
[info] [0.059] out_vec(2) = 5
[info] [0.059] out_vec(3) = 2
[info] [0.060] out_vec(4) = 1
[info] [0.069] out_vec(5) = 0
But it is not friendly to data write!
Read Part of UInt
// Example
val uint = 0xc.U
val vec = Vec(uint.toBools)
printf(p"$vec") // Vec(0, 0, 1, 1)
// Test
assert(vec(0) === false.B)
assert(vec(1) === false.B)
assert(vec(2) === true.B)
assert(vec(3) === true.B)
// Example
val vec = Vec(true.B, false.B, true.B, true.B)
val uint = vec.asUInt
printf(p"$uint") // 13
/* Test
*
* (remember leftmost Bool in Vec is low order bit)
*/
assert(0xd.U === uint)
val iactDataCountVec: Seq[Bool] = iactDataSPad.io.commonIO.readOutData.toBools
io.iactMatrixData := Cat(iactDataCountVec.take(8)).asUInt
io.iactMatrixRow := Cat(iactDataCountVec.takeRight(4)).asUInt
Combine Two Integers as Bits
val inDataWithIndex = Seq(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22).zipWithIndex
def toBinary(i: Int, digits: Int = 8) =
String.format("%" + digits + "s", i.toBinaryString).replace(' ', '0')
val inDataCountBinary = inDataWithIndex.map{case (x: Int, y: Int) => toBinary(x, 8) + toBinary(y, 4)}
val inDataCountDec = inDataCountBinary.map(x => Integer.parseInt(x, 2))
for (i <- inDataCountDec.indices) {
println(inDataCountDec(i))
println(inDataCountBinary(i))
}