Question: The following code performs matrix multiplication in c + + . float dot _ product _ f ( float * m 1 , float *

The following code performs matrix multiplication in c++.
float dot_product_f(float* m1, float* m2, int r, int c, int l)
{
float product =0;
for(int i=0; i < l; i++)
{
float a = m1[i + r * l];
float b = m2[c + i * l];
product += a *b;
//printf("%1f*%1f +", a, b);
}
//printf("
");
return product;
}
The following is the assembly for the above c++ code. The requirement is to remove redundant instructions to improve performance. Please provide new working assembly file with improved code. Thanks.
.text
.file "dp_float.cpp"
.globl _Z13dot_product_fPfS_iii # -- Begin function _Z13dot_product_fPfS_iii
.p2align 4,0x90
.type _Z13dot_product_fPfS_iii,@function
_Z13dot_product_fPfS_iii: # @_Z13dot_product_fPfS_iii
.cfi_startproc
# %bb.0:
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp,-16
movq %rsp,%rbp
.cfi_def_cfa_register %rbp
movq %rdi, -8(%rbp)
movq %rsi, -16(%rbp)
movl %edx, -20(%rbp)
movl %ecx, -24(%rbp)
movl %r8d,-28(%rbp)
xorps %xmm0,%xmm0
movss %xmm0,-32(%rbp)
movl $0,-36(%rbp)
.LBB0_1: # =>This Inner Loop Header: Depth=1
movl -36(%rbp),%eax
cmpl -28(%rbp),%eax
jge .LBB0_4
# %bb.2: # in Loop: Header=BB0_1 Depth=1
movq -8(%rbp),%rax
movl -36(%rbp),%ecx
movl -20(%rbp),%edx
imull -28(%rbp),%edx
addl %edx, %ecx
movslq %ecx, %rcx
movss (%rax,%rcx,4),%xmm0 # xmm0= mem[0],zero,zero,zero
movss %xmm0,-40(%rbp)
movq -16(%rbp),%rax
movl -24(%rbp),%ecx
movl -36(%rbp),%edx
imull -28(%rbp),%edx
addl %edx, %ecx
movslq %ecx, %rcx
movss (%rax,%rcx,4),%xmm0 # xmm0= mem[0],zero,zero,zero
movss %xmm0,-44(%rbp)
movss -40(%rbp),%xmm0 # xmm0= mem[0],zero,zero,zero
movss -44(%rbp),%xmm2 # xmm2= mem[0],zero,zero,zero
movss -32(%rbp),%xmm1 # xmm1= mem[0],zero,zero,zero
mulss %xmm2,%xmm0
addss %xmm1,%xmm0
movss %xmm0,-32(%rbp)
# %bb.3: # in Loop: Header=BB0_1 Depth=1
movl -36(%rbp),%eax
addl $1,%eax
movl %eax, -36(%rbp)
jmp .LBB0_1
.LBB0_4:
movss -32(%rbp),%xmm0 # xmm0= mem[0],zero,zero,zero
popq %rbp
.cfi_def_cfa %rsp,8
retq
.Lfunc_end0:
.size _Z13dot_product_fPfS_iii, .Lfunc_end0-_Z13dot_product_fPfS_iii
.cfi_endproc
# -- End function
.ident "clang version 17.0.6(CentOS 17.0.6-5.el9)"
.section ".note.GNU-stack","",@progbits
.addrsig

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Programming Questions!