130 lines
3.4 KiB
C
130 lines
3.4 KiB
C
|
/*
|
||
|
* trans.c - Matrix transpose B = A^T
|
||
|
*
|
||
|
* Each transpose function must have a prototype of the form:
|
||
|
* void trans(int M, int N, int A[N][M], int B[M][N]);
|
||
|
*
|
||
|
* A transpose function is evaluated by counting the number of misses
|
||
|
* on a 1KB direct mapped cache with a block size of 32 bytes.
|
||
|
*/
|
||
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include "cachelab.h"
|
||
|
|
||
|
int is_transpose(int M, int N, int A[N][M], int B[M][N]);
|
||
|
|
||
|
/*
|
||
|
* transpose_submit - This is the solution transpose function that you
|
||
|
* will be graded on for Part B of the assignment. Do not change
|
||
|
* the description string "Transpose submission", as the driver
|
||
|
* searches for that string to identify the transpose function to
|
||
|
* be graded.
|
||
|
*/
|
||
|
char transpose_submit_desc[] = "Transpose submission";
|
||
|
void transpose_submit(int M, int N, int A[N][M], int B[M][N]) {
|
||
|
int i, j, bi, bj;
|
||
|
|
||
|
if (M == 32 && N == 32) {
|
||
|
for (bi = 0; bi < N; bi += 8) {
|
||
|
for (bj = 0; bj < M; bj += 8) {
|
||
|
for (j = bj; j < bj + 8; ++j) {
|
||
|
for (i = bi; i < bi + 8; ++i) {
|
||
|
if (i != j) {
|
||
|
B[j][i] = A[i][j];
|
||
|
}
|
||
|
}
|
||
|
if (bi == bj) {
|
||
|
B[j][j] = A[j][j];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
} else if (M == 64 && N == 64) {
|
||
|
for (bi = 0; bi < N; bi += 4) {
|
||
|
for (bj = 0; bj < M; bj += 4) {
|
||
|
for (j = bj; j < bj + 4; ++j) {
|
||
|
for (i = bi; i < bi + 4; ++i) {
|
||
|
if (i != j) {
|
||
|
B[i][j] = A[j][i];
|
||
|
}
|
||
|
}
|
||
|
if (bi == bj) {
|
||
|
B[j][j] = A[j][j];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
} else {
|
||
|
for (bi = 0; bi < N; bi += 16) {
|
||
|
for (bj = 0; bj < M; bj += 16) {
|
||
|
for (j = bj; j < bj + 16; ++j) {
|
||
|
if (j >= M) continue;
|
||
|
for (i = bi; i < bi + 16; ++i) {
|
||
|
if (i >= N) continue;
|
||
|
if (i != j) {
|
||
|
// printf("setting B[%d][%d] to %d\n", i, j, A[j][i]);
|
||
|
B[j][i] = A[i][j];
|
||
|
}
|
||
|
}
|
||
|
if (bi == bj) {
|
||
|
B[j][j] = A[j][j];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* You can define additional transpose functions below. We've defined
|
||
|
* a simple one below to help you get started.
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
* trans - A simple baseline transpose function, not optimized for the cache.
|
||
|
*/
|
||
|
char trans_desc[] = "Simple row-wise scan transpose";
|
||
|
void trans(int M, int N, int A[N][M], int B[M][N]) {
|
||
|
int i, j, tmp;
|
||
|
|
||
|
for (i = 0; i < N; i++) {
|
||
|
for (j = 0; j < M; j++) {
|
||
|
tmp = A[i][j];
|
||
|
B[j][i] = tmp;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* registerFunctions - This function registers your transpose
|
||
|
* functions with the driver. At runtime, the driver will
|
||
|
* evaluate each of the registered functions and summarize their
|
||
|
* performance. This is a handy way to experiment with different
|
||
|
* transpose strategies.
|
||
|
*/
|
||
|
void registerFunctions() {
|
||
|
/* Register your solution function */
|
||
|
registerTransFunction(transpose_submit, transpose_submit_desc);
|
||
|
|
||
|
/* Register any additional transpose functions */
|
||
|
registerTransFunction(trans, trans_desc);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* is_transpose - This helper function checks if B is the transpose of
|
||
|
* A. You can check the correctness of your transpose by calling
|
||
|
* it before returning from the transpose function.
|
||
|
*/
|
||
|
int is_transpose(int M, int N, int A[N][M], int B[M][N]) {
|
||
|
int i, j;
|
||
|
|
||
|
for (i = 0; i < N; i++) {
|
||
|
for (j = 0; j < M; ++j) {
|
||
|
if (A[i][j] != B[j][i]) {
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return 1;
|
||
|
}
|