Question: Optimize the following dgemm matrix multiplication function by writing function or functions that will combine the techniques of unrolling (superscalar), blocking, and intrinsic. Note: The
Optimize the following dgemm matrix multiplication function by writing function or functions that will combine the techniques of unrolling (superscalar), blocking, and intrinsic.
Note: The execution command ./a.out corr in the terminal is used to check the correctness of the algorithm. It must be checked in order to be counted as valid.
Note: You may not import external libraries or resources.
Note: Let this file be named dgemm.c.
#include#include #include #include #include #include #include #include #define ALIGN __attribute__ ((aligned (32))) double ALIGN a[1024 * 1024]; double ALIGN b[1024 * 1024]; double ALIGN c[1024 * 1024]; double ALIGN c1[1024 * 1024]; void dgemm(int n) { int i,j,k; for(i=0; i 1) { if(strcmp(argv[1], "corr") == 0) { check_correctness = 1; } } /* Initialize random number generator */ srand((unsigned) time(&t)); /* Populate the arrays with random values */ for(i=0; i<1024; i++) { for(j=0; j<1024; j++) { a[i*1024+j] = (double)rand() / (RAND_MAX + 1.0); b[i*1024+j] = (double)rand() / (RAND_MAX + 1.0); c[i*1024+j] = 0.0; c1[i*1024+j] = 0.0; } } gettimeofday(&start, NULL); /* Call you optimized function optimized_dgemm */ optimized_dgemm(1024); gettimeofday(&end, NULL); /* For TA use only */ if(check_correctness) { dgemm(1024); for(i=0; (i<1024) && correct ; i++) { for(j=0; (j<1024) && correct; j++) { if(fabs(c[i*1024+j]-c1[i*1024+j]) >= 0.0000001) { printf("%f != %f ", c[i*1024+j], c1[i*1024+j]); correct = 0; } } } if(correct) printf("Result is correct! "); else printf("Result is incorrect! "); } elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0; elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0; printf("dgemm finished in %f milliseconds. ", elapsed_time); }
Step by Step Solution
There are 3 Steps involved in it
Get step-by-step solutions from verified subject matter experts
