Python 实现cholesky分解的Verilog综合
我在verilog中实现,遵循下面的python代码Python 实现cholesky分解的Verilog综合,python,verilog,synthesis,matrix-decomposition,Python,Verilog,Synthesis,Matrix Decomposition,我在verilog中实现,遵循下面的python代码 def cholesky(A): n = len(A) L = [[0.0] * n for i in xrange(n)] for i in xrange(n): for j in xrange(i+1): tmp_sum = sum(L[i][k] * L[j][k] for k in xrange(j)) if (i == j): # Diag
def cholesky(A):
n = len(A)
L = [[0.0] * n for i in xrange(n)]
for i in xrange(n):
for j in xrange(i+1):
tmp_sum = sum(L[i][k] * L[j][k] for k in xrange(j))
if (i == j): # Diagonal element
L[i][j] = sqrt(A[i][i] - tmp_sum)
else:
L[i][j] = (1.0/L[j][j] * (A[i][j] - tmp_sum))
return L
我试着用3x3的输入大小做一个简单的。因为它需要除法和平方根,所以我还使用标准方法(从互联网上复制,经过一些修改)编写除法,并使用(牛顿方法的变体)编写sqrt。这是:
分部
这是我的3x3 cholesky分解代码:
module cholesky_template(clk, rst, g_input, e_input, o);
input clk, rst;
input [143:0] g_input;
input e_input;
output [215:0] o;
reg [23:0] L [0:2][0:2];
reg [23:0] A [0:2][0:2] ;
assign o = {
L[0][0], L[0][1], L[0][2],
L[1][0], L[1][1], L[1][2],
L[2][0], L[2][1], L[2][2]
};
reg [23:0] tmp_A00_minus_sum;
reg [23:0] tmp_A11_minus_sum;
reg [23:0] tmp_A22_minus_sum
reg [23:0] tmp_A10_minus_sum;
reg [23:0] tmp_A20_minus_sum;
reg [23:0] tmp_A21_minus_sum;
reg [23:0] div_1_L00;
reg [23:0] div_1_L11;
Sqrt sqrt0(tmp_A00_minus_sum, L[0][0]);
Div div0(1'b1, L[0][0], div_1_L00);
Sqrt sqrt1(tmp_A11_minus_sum, L[1][1]);
Div div1(1'b1, L[1][1], div_1_L11);
Sqrt sqrt2(tmp_A22_minus_sum, L[2][2]);
always @ (posedge clk or posedge rst) begin
if (rst)
L[0][0] = 1'b0;
L[0][1] = 1'b0;
L[0][2] = 1'b0;
L[1][0] = 1'b0;
L[1][1] = 1'b0;
L[1][2] = 1'b0;
L[2][0] = 1'b0;
L[2][1] = 1'b0;
L[2][2] = 1'b0;
tmp_sum = 1'b0;
A[0][0] ={8'b00000000, g_input[15:0]};
A[0][1] =24'b0; // will not be used
A[0][2] =24'b0; // will not be used
A[1][0] ={8'b00000000, g_input[63:48]};
A[1][1] ={8'b00000000, g_input[79:64]};
A[1][2] =24'b0; // will not be used
A[2][0] ={8'b00000000, g_input[111:96]};
A[2][1] ={8'b00000000, g_input[127:112]};
A[2][2] ={8'b00000000, g_input[143:128]};
end else begin
tmp_A00_minus_sum = A[0][0] - tmp_sum;
tmp_A10_minus_sum = A[1][0] - tmp_sum;
L[1][0] = div_1_L00 * tmp_A10_minus_sum;
tmp_sum = tmp_sum + L[1][0] * L[1][0];
tmp_A11_minus_sum = A[1][1] - tmp_sum;
tmp_A20_minus_sum = A[2][0] - tmp_sum;
L[2][0] = div_1_L00 * tmp_A20_minus_sum;
tmp_sum = tmp_sum + L[2][0] * L[1][0];
tmp_A21_minus_sum = A[2][1] - tmp_sum;
L[2][1] = div_1_L11 * tmp_A21_minus_sum;
tmp_sum = tmp_sum + L[2][0] * L[2][0];
tmp_sum = tmp_sum + L[2][1] * L[2][1];
tmp_A22_minus_sum = A[2][2] - tmp_sum;
end
end
endmodule
关于代码的一些解释:我没有使用for循环,所以我将它们展开为类似于tmp_A10_减_sum=A[1][0]-tmp_sum代码>。映射到python代码应该相当容易。在A
之前插入8个零的原因是,我将尝试“升级”代码,使其使用24位,以便更精确。这不是问题所在
三州巴士警告
问题是,当我使用Synopsys DC编译它时,它会输出如下警告:
“警告:在设计‘cholesky_模板’中,三态总线‘tmp_A00_减_和[23]”具有非三态驱动器‘tmp_A00_减_和[23]/Q”。(LINT-34)
这是DC对LINT-34的描述:
名字
LINT-34(警告)在设计“%s”中,三状态总线“%s”具有非三状态-
状态驱动程序“%s”
描述
Synopsys库包含了对服务器上三种状态驱动引脚的描述
组件。Synopsys工具将网络分类为三状态网络,如果
由至少一个具有此三状态属性的管脚驱动。
通常,如果此类网络上存在多个驱动程序,则假定
所有驱动销都应该是三状态驱动器,以便正确操作-
三州巴士的停驶。此警告消息表示出现故障-
至少有一个非三态驱动程序出现在三态计算机上
净
接下来呢
确认这是您针对给定网络的目标。如果
消息中指定的非三态驱动程序pin确实位于
在您的ASIC技术中的三态驱动程序,验证该技术
库描述是正确的
为什么设计中有三个状态属性?我如何纠正它们
目标库不包含寄存器的替换项
这是我得到的另一个警告,例如:
警告:目标库不包含寄存器“A_reg[1][0][7]”的替换项(FFGEN)。(TRANS-4)
这是我的库代码,我想知道这是否与三州巴士警告有关?如果有,是否有任何参考来设计适当的单元
library(HML){
cell(AND) {
area: 6;
pin(A) {
direction: input;
capacitance: 1;
}
pin(B) {
direction: input;
capacitance: 1;
}
pin(Z) {
direction: output;
function: "A B";
timing() {
intrinsic_rise: 0.48;
intrinsic_fall: 0.77;
rise_resistance: 0.1443;
fall_resistance: 0.0523;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "A";
}
timing() {
intrinsic_rise: 0.48;
intrinsic_fall: 0.77;
rise_resistance: 0.1443;
fall_resistance: 0.0523;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "B";
}
}
}
cell(OR) {
area: 6;
pin(A) {
direction: input;
capacitance: 1;
}
pin(B) {
direction: input;
capacitance: 1;
}
pin(Z) {
direction: output;
function: "A+B";
timing() {
intrinsic_rise: 0.28;
intrinsic_fall: 0.85;
rise_resistance: 0.1443;
fall_resistance: 0.0589;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "A";
}
timing() {
intrinsic_rise: 0.28;
intrinsic_fall: 0.85;
rise_resistance: 0.1443;
fall_resistance: 0.0589;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "B";
}
}
}
cell(XOR) {
area: 0;
pin(A) {
direction: input;
capacitance: 1;
}
pin(B) {
direction: input;
capacitance: 1
}
pin(Z) {
direction: output;
function: "A^B";
timing() {
intrinsic_rise: 0.28;
intrinsic_fall: 0.85;
rise_resistance: 0.1443;
fall_resistance: 0.0589;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "A";
}
timing() {
intrinsic_rise: 0.28;
intrinsic_fall: 0.85;
rise_resistance: 0.1443;
fall_resistance: 0.0589;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "B";
}
}
}
cell(NAND) {
area: 6;
pin(A) {
direction: input;
capacitance: 1;
}
pin(B) {
direction: input;
capacitance: 1
}
pin(Z) {
direction: output;
function: "(A B)'";
timing() {
intrinsic_rise: 0.28;
intrinsic_fall: 0.85;
rise_resistance: 0.1443;
fall_resistance: 0.0589;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "A";
}
timing() {
intrinsic_rise: 0.28;
intrinsic_fall: 0.85;
rise_resistance: 0.1443;
fall_resistance: 0.0589;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "B";
}
}
}
cell(NOR) {
area: 6;
pin(A) {
direction: input;
capacitance: 1;
}
pin(B) {
direction: input;
capacitance: 1
}
pin(Z) {
direction: output;
function: "(A+B)'";
timing() {
intrinsic_rise: 0.28;
intrinsic_fall: 0.85;
rise_resistance: 0.1443;
fall_resistance: 0.0589;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "A";
}
timing() {
intrinsic_rise: 0.28;
intrinsic_fall: 0.85;
rise_resistance: 0.1443;
fall_resistance: 0.0589;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "B";
}
}
}
cell(XNOR) {
area: 6;
pin(A) {
direction: input;
capacitance: 1;
}
pin(B) {
direction: input;
capacitance: 1
}
pin(Z) {
direction: output;
function: "(A^B)'";
timing() {
intrinsic_rise: 0.28;
intrinsic_fall: 0.85;
rise_resistance: 0.1443;
fall_resistance: 0.0589;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "A";
}
timing() {
intrinsic_rise: 0.28;
intrinsic_fall: 0.85;
rise_resistance: 0.1443;
fall_resistance: 0.0589;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "B";
}
}
}
cell(DFF) {
area : 9;
pin(D) {
direction : input;
capacitance : 1;
timing() {
timing_type : setup_rising;
intrinsic_rise : 0.85;
intrinsic_fall : 0.85;
related_pin : "CLK";
}
timing() {
timing_type : hold_rising;
intrinsic_rise : 0.4;
intrinsic_fall : 0.4;
related_pin : "CLK";
}
}
pin(I) {
direction : input;
capacitance : 1;
timing() {
timing_type : setup_rising;
intrinsic_rise : 0.85;
intrinsic_fall : 0.85;
related_pin : "CLK";
}
timing() {
timing_type : hold_rising;
intrinsic_rise : 0.4;
intrinsic_fall : 0.4;
related_pin : "CLK";
}
}
pin(CLK) {
direction : input;
capacitance : 1;
}
pin(RST) {
direction : input;
capacitance : 2;
}
ff("IQ", "IQN") {
next_state : "D";
clocked_on : "CLK";
clear : "RST (I')";
preset: "RST I";
clear_preset_var1: L;
clear_preset_var2: H;
}
pin(Q) {
direction : output;
function : "IQ";
internal_node : "Q";
timing() {
timing_type : rising_edge;
intrinsic_rise : 1.19;
intrinsic_fall : 1.37;
rise_resistance : 0.1458;
fall_resistance : 0.0523;
related_pin : "CLK";
}
timing() {
timing_type : clear;
timing_sense : positive_unate;
intrinsic_fall : 1.29;
fall_resistance : 0.0516;
related_pin : "RST";
}
timing() {
timing_type : preset;
timing_sense : positive_unate;
intrinsic_fall : 1.29;
fall_resistance : 0.0516;
related_pin : "I";
}
}
}
cell(IV){
area:0;
cell_footprint : "iv";
pin(A) {
direction: input;
capacitance: 1;
}
pin(Z) {
direction: output;
function : "A'";
timing() {
intrinsic_rise : 0.38;
intrinsic_fall : 0.15;
rise_resistance : 0.1443;
fall_resistance : 0.0589;
slope_rise : 0.0;
slope_fall : 0.0;
related_pin : "A";
}
}
}
}
抱歉发了这么长的帖子。我希望我的问题问得很清楚。时间已经晚了,但我只是偶然发现了它。我不确定三态的东西,但我刚刚遇到了你的FFGEN错误。合成器使用可用的部件将代码编译成一系列门。当您在vhdl中指定库没有部分实现该行为的行为(在我的例子中,是带有异步重置的flipflip(FF))时,合成器不知道在执行和(GEN)操作部件时使用哪种部件,因此出现错误FFGEN。然而,合成器将为该寄存器放置一个占位符,用于描述该元素的输入输出和时钟信号(如果您查看网络列表,您可以看到该占位符。我的是这样的
\**FFGEN**\inst_clk_除法器/cnt_reg[1](.下一状态(n299),.时钟接通(clk),
.力_00(1'b0),
.force_01(rst),
.力_10(1'b0),
.力_11(1'b0),
.Q(\inst_clk_divider/cnt[1]);
我不确定这是否与您的问题有关,但您的Sqrt\u newton
的异步反馈看起来很奇怪。另外,如果它使用tmp\u inout2
和tmp\u inout4
编译,我会变得超级大。它总是块敏感度列表不完整;如果您跳过模拟并进行搜索,这不会是一个问题ht用于合成,但在模拟中更容易发现逻辑错误。写得不好的敏感度列表会导致模拟和合成之间的行为不匹配。@Greg,谢谢,不允许异步反馈吗?我已将其更改为同步反馈,它似乎可以工作异步反馈很棘手。要工作,它们需要自我稳定(例如,进入稳定、非振荡状态/数值)。不适当平衡的栅极传播延迟、RC寄生、温度/电压变化以及任何其他可能影响时序的因素,都可能使同步反馈设计进入意外和/或振荡输出(如果设计中未考虑)。同步设计没有这种设计挑战,这就是为什么它更常见的原因。谢谢你知道。但是我看到Div也是异步的,会有什么问题吗?我如何设计一个同步除法?Div
本身是一个线性链;它不会反馈到自己身上。它确实需要时间来解决。这个时间可能比一个时钟周期长,这是需要注意的,并且会在静态计时分析中增加。
module cholesky_template(clk, rst, g_input, e_input, o);
input clk, rst;
input [143:0] g_input;
input e_input;
output [215:0] o;
reg [23:0] L [0:2][0:2];
reg [23:0] A [0:2][0:2] ;
assign o = {
L[0][0], L[0][1], L[0][2],
L[1][0], L[1][1], L[1][2],
L[2][0], L[2][1], L[2][2]
};
reg [23:0] tmp_A00_minus_sum;
reg [23:0] tmp_A11_minus_sum;
reg [23:0] tmp_A22_minus_sum
reg [23:0] tmp_A10_minus_sum;
reg [23:0] tmp_A20_minus_sum;
reg [23:0] tmp_A21_minus_sum;
reg [23:0] div_1_L00;
reg [23:0] div_1_L11;
Sqrt sqrt0(tmp_A00_minus_sum, L[0][0]);
Div div0(1'b1, L[0][0], div_1_L00);
Sqrt sqrt1(tmp_A11_minus_sum, L[1][1]);
Div div1(1'b1, L[1][1], div_1_L11);
Sqrt sqrt2(tmp_A22_minus_sum, L[2][2]);
always @ (posedge clk or posedge rst) begin
if (rst)
L[0][0] = 1'b0;
L[0][1] = 1'b0;
L[0][2] = 1'b0;
L[1][0] = 1'b0;
L[1][1] = 1'b0;
L[1][2] = 1'b0;
L[2][0] = 1'b0;
L[2][1] = 1'b0;
L[2][2] = 1'b0;
tmp_sum = 1'b0;
A[0][0] ={8'b00000000, g_input[15:0]};
A[0][1] =24'b0; // will not be used
A[0][2] =24'b0; // will not be used
A[1][0] ={8'b00000000, g_input[63:48]};
A[1][1] ={8'b00000000, g_input[79:64]};
A[1][2] =24'b0; // will not be used
A[2][0] ={8'b00000000, g_input[111:96]};
A[2][1] ={8'b00000000, g_input[127:112]};
A[2][2] ={8'b00000000, g_input[143:128]};
end else begin
tmp_A00_minus_sum = A[0][0] - tmp_sum;
tmp_A10_minus_sum = A[1][0] - tmp_sum;
L[1][0] = div_1_L00 * tmp_A10_minus_sum;
tmp_sum = tmp_sum + L[1][0] * L[1][0];
tmp_A11_minus_sum = A[1][1] - tmp_sum;
tmp_A20_minus_sum = A[2][0] - tmp_sum;
L[2][0] = div_1_L00 * tmp_A20_minus_sum;
tmp_sum = tmp_sum + L[2][0] * L[1][0];
tmp_A21_minus_sum = A[2][1] - tmp_sum;
L[2][1] = div_1_L11 * tmp_A21_minus_sum;
tmp_sum = tmp_sum + L[2][0] * L[2][0];
tmp_sum = tmp_sum + L[2][1] * L[2][1];
tmp_A22_minus_sum = A[2][2] - tmp_sum;
end
end
endmodule
library(HML){
cell(AND) {
area: 6;
pin(A) {
direction: input;
capacitance: 1;
}
pin(B) {
direction: input;
capacitance: 1;
}
pin(Z) {
direction: output;
function: "A B";
timing() {
intrinsic_rise: 0.48;
intrinsic_fall: 0.77;
rise_resistance: 0.1443;
fall_resistance: 0.0523;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "A";
}
timing() {
intrinsic_rise: 0.48;
intrinsic_fall: 0.77;
rise_resistance: 0.1443;
fall_resistance: 0.0523;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "B";
}
}
}
cell(OR) {
area: 6;
pin(A) {
direction: input;
capacitance: 1;
}
pin(B) {
direction: input;
capacitance: 1;
}
pin(Z) {
direction: output;
function: "A+B";
timing() {
intrinsic_rise: 0.28;
intrinsic_fall: 0.85;
rise_resistance: 0.1443;
fall_resistance: 0.0589;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "A";
}
timing() {
intrinsic_rise: 0.28;
intrinsic_fall: 0.85;
rise_resistance: 0.1443;
fall_resistance: 0.0589;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "B";
}
}
}
cell(XOR) {
area: 0;
pin(A) {
direction: input;
capacitance: 1;
}
pin(B) {
direction: input;
capacitance: 1
}
pin(Z) {
direction: output;
function: "A^B";
timing() {
intrinsic_rise: 0.28;
intrinsic_fall: 0.85;
rise_resistance: 0.1443;
fall_resistance: 0.0589;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "A";
}
timing() {
intrinsic_rise: 0.28;
intrinsic_fall: 0.85;
rise_resistance: 0.1443;
fall_resistance: 0.0589;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "B";
}
}
}
cell(NAND) {
area: 6;
pin(A) {
direction: input;
capacitance: 1;
}
pin(B) {
direction: input;
capacitance: 1
}
pin(Z) {
direction: output;
function: "(A B)'";
timing() {
intrinsic_rise: 0.28;
intrinsic_fall: 0.85;
rise_resistance: 0.1443;
fall_resistance: 0.0589;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "A";
}
timing() {
intrinsic_rise: 0.28;
intrinsic_fall: 0.85;
rise_resistance: 0.1443;
fall_resistance: 0.0589;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "B";
}
}
}
cell(NOR) {
area: 6;
pin(A) {
direction: input;
capacitance: 1;
}
pin(B) {
direction: input;
capacitance: 1
}
pin(Z) {
direction: output;
function: "(A+B)'";
timing() {
intrinsic_rise: 0.28;
intrinsic_fall: 0.85;
rise_resistance: 0.1443;
fall_resistance: 0.0589;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "A";
}
timing() {
intrinsic_rise: 0.28;
intrinsic_fall: 0.85;
rise_resistance: 0.1443;
fall_resistance: 0.0589;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "B";
}
}
}
cell(XNOR) {
area: 6;
pin(A) {
direction: input;
capacitance: 1;
}
pin(B) {
direction: input;
capacitance: 1
}
pin(Z) {
direction: output;
function: "(A^B)'";
timing() {
intrinsic_rise: 0.28;
intrinsic_fall: 0.85;
rise_resistance: 0.1443;
fall_resistance: 0.0589;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "A";
}
timing() {
intrinsic_rise: 0.28;
intrinsic_fall: 0.85;
rise_resistance: 0.1443;
fall_resistance: 0.0589;
slope_rise: 0.0;
slope_fall: 0.0;
related_pin: "B";
}
}
}
cell(DFF) {
area : 9;
pin(D) {
direction : input;
capacitance : 1;
timing() {
timing_type : setup_rising;
intrinsic_rise : 0.85;
intrinsic_fall : 0.85;
related_pin : "CLK";
}
timing() {
timing_type : hold_rising;
intrinsic_rise : 0.4;
intrinsic_fall : 0.4;
related_pin : "CLK";
}
}
pin(I) {
direction : input;
capacitance : 1;
timing() {
timing_type : setup_rising;
intrinsic_rise : 0.85;
intrinsic_fall : 0.85;
related_pin : "CLK";
}
timing() {
timing_type : hold_rising;
intrinsic_rise : 0.4;
intrinsic_fall : 0.4;
related_pin : "CLK";
}
}
pin(CLK) {
direction : input;
capacitance : 1;
}
pin(RST) {
direction : input;
capacitance : 2;
}
ff("IQ", "IQN") {
next_state : "D";
clocked_on : "CLK";
clear : "RST (I')";
preset: "RST I";
clear_preset_var1: L;
clear_preset_var2: H;
}
pin(Q) {
direction : output;
function : "IQ";
internal_node : "Q";
timing() {
timing_type : rising_edge;
intrinsic_rise : 1.19;
intrinsic_fall : 1.37;
rise_resistance : 0.1458;
fall_resistance : 0.0523;
related_pin : "CLK";
}
timing() {
timing_type : clear;
timing_sense : positive_unate;
intrinsic_fall : 1.29;
fall_resistance : 0.0516;
related_pin : "RST";
}
timing() {
timing_type : preset;
timing_sense : positive_unate;
intrinsic_fall : 1.29;
fall_resistance : 0.0516;
related_pin : "I";
}
}
}
cell(IV){
area:0;
cell_footprint : "iv";
pin(A) {
direction: input;
capacitance: 1;
}
pin(Z) {
direction: output;
function : "A'";
timing() {
intrinsic_rise : 0.38;
intrinsic_fall : 0.15;
rise_resistance : 0.1443;
fall_resistance : 0.0589;
slope_rise : 0.0;
slope_fall : 0.0;
related_pin : "A";
}
}
}
}