#include #include #include #include #include #include #include #include #include #include #include #include #include "mpi.h" #include "vector.h" extern "C" { unsigned short *RadKernel(int nQuads, unsigned short *ffPtr, unsigned short *radPtr, unsigned short *material, unsigned short *results); }; #define COMMUNICATION 1 #define ITERATIONS 500 #define BATCH_SIZE 8000 #define MAX_QUADS 15000 #define MAX_MATERIALS 10 #define MAX_SIMPLE_POLYS 1000 #define MAX_SIMPLE_POLY_VERTICES 5000 #define MAX_CYLINDERS 300 #define MAX_LIGHTS 64 #define SCALE_PROBLEM 0.4f unsigned short *formFactor; unsigned short baseRadiosity[MAX_QUADS][3]; unsigned short radiosity[MAX_QUADS][3]; int sock; int sockClient; int networkThreadDie; int nMaterials; int nMaterialQuads[MAX_MATERIALS]; float diffuse[MAX_MATERIALS][4]; int nQuads; VECTOR vertices[MAX_QUADS][4]; VECTOR normals[MAX_QUADS]; float areas[MAX_QUADS]; int nSimplePolys; int nTotalSimplePolyVertices; int nSimplePolyVertices[MAX_SIMPLE_POLYS]; VECTOR simplePolyNormals[MAX_SIMPLE_POLYS]; float simplePolyDistances[MAX_SIMPLE_POLYS]; VECTOR simplePolyVertices[MAX_SIMPLE_POLY_VERTICES]; int nCylinders; int cylinderPolyStart[MAX_CYLINDERS]; int cylinderPolyVertexStart[MAX_CYLINDERS]; float cylinderX[MAX_CYLINDERS]; float cylinderY[MAX_CYLINDERS]; float cylinderMinZ[MAX_CYLINDERS]; float cylinderMaxZ[MAX_CYLINDERS]; float cylinderR[MAX_CYLINDERS]; int nLights; VECTOR lightPositions[MAX_LIGHTS]; int rank, size; inline bool TestCylinder(VECTOR source, VECTOR direction, float maxDist, int i) { float x = cylinderX[i]; float y = cylinderY[i]; float minZ = cylinderMinZ[i]; float maxZ = cylinderMaxZ[i]; float r = cylinderR[i]; float a, b, c; float disc, low, high, low2, high2, temp; VECTOR dirNoZ = VECTOR(direction.x, direction.y, 0.0f); // Compute quadratic equation coefficients (a*t^2 + b*t + c = 0) a = SquareMagnitude(dirNoZ); b = 2.0f*DotProduct(source - VECTOR(x,y,0), dirNoZ); c = SquareMagnitude(source - VECTOR(x,y,source.z)) - r*r; disc = b*b - 4.0f*a*c; if (disc < 0.0f) { return false; } disc = sqrtf(disc); // Solutions: range of ray inside cylinder in X/Y a = 0.5f / a; low = a * (-b - disc); high = a * (-b + disc); // Compute range of ray inside cylinder in Z temp = 1.0f / direction.z; low2 = (minZ - source.z) * temp; high2 = (maxZ - source.z) * temp; if (low2 > high2) { temp = low2; low2 = high2; high2 = temp; } if (low2 > low) { low = low2; } if (high2 < high) { high = high2; } if ((low > maxDist-0.005f) || (high < 0.005f)) { return false; } return true; } bool TestVisibility(VECTOR source, VECTOR direction, float maxDist) { int i; int sp, spv, spvEnd; float intersect; bool c; // Test global cylinder first if (!TestCylinder(source, direction, maxDist, 0)) { return true; } for (i = 1; i < nCylinders; i++) { c = TestCylinder(source, direction, maxDist, i); if (c) { sp = cylinderPolyStart[i]; spv = cylinderPolyVertexStart[i]; // Empty cylinder means just use the cylinder and nothing more if (sp == cylinderPolyStart[i+1]) { return false; } for (; sp < cylinderPolyStart[i+1]; sp++, spv = spvEnd) { float num, den; VECTOR u, v; spvEnd = nSimplePolyVertices[sp] + spv; den = DotProduct(simplePolyNormals[sp], direction); if (den == 0) { continue; } num = simplePolyDistances[sp] - DotProduct(simplePolyNormals[sp], source); intersect = num/den; if ((intersect < 0.005f) || (intersect > maxDist-0.005f)) { continue; } VECTOR intersectPoint = source + direction*intersect; if (spvEnd == spv+4) { u = CrossProduct(intersectPoint-simplePolyVertices[spv+3], simplePolyVertices[spv+0]-simplePolyVertices[spv+3]); v = CrossProduct(intersectPoint-simplePolyVertices[spv+0], simplePolyVertices[spv+1]-simplePolyVertices[spv+0]); if (DotProduct(u, v) <= 0) { continue; } u = CrossProduct(intersectPoint-simplePolyVertices[spv+1], simplePolyVertices[spv+2]-simplePolyVertices[spv+1]); if (DotProduct(u, v) <= 0) { continue; } v = CrossProduct(intersectPoint-simplePolyVertices[spv+2], simplePolyVertices[spv+3]-simplePolyVertices[spv+2]); if (DotProduct(u, v) <= 0) { continue; } } else { u = CrossProduct(intersectPoint-simplePolyVertices[spvEnd-1], simplePolyVertices[spv]-simplePolyVertices[spvEnd-1]); for (; spv < spvEnd-1; spv++) { v = CrossProduct(intersectPoint-simplePolyVertices[spv], simplePolyVertices[spv+1]-simplePolyVertices[spv]); if (DotProduct(u, v) <= 0) { goto next; } u = v; } } return false; next: ; } } } return true; } void *NetworkProc(void *arg) { while (!networkThreadDie) { int res; res = write(sockClient, lightPositions, nLights*sizeof(VECTOR)); res = write(sockClient, baseRadiosity, nQuads*3*sizeof(unsigned short)); res = write(sockClient, radiosity, nQuads*3*sizeof(unsigned short)); } close(sockClient); } void UpdateBaseRadiosities(void) { int i, j, k; int mat, matRemaining; int start, end; start = nQuads * rank / size; end = nQuads * (rank+1) / size; mat = 0; matRemaining = nMaterialQuads[0]; for (i = 0; i < start; i++) { matRemaining--; if (matRemaining == 0) { mat++; matRemaining = nMaterialQuads[mat]; } } for (j = start; j < end; j++) { float totalBright = 0; VECTOR position; position = 0.25*(vertices[j][0] + vertices[j][1] + vertices[j][2] + vertices[j][3]); for (k = 0; k < nLights; k++) { VECTOR light; float dist, brightness; light = lightPositions[k] - position; dist = Magnitude(light); light /= dist; brightness = DotProduct(light, normals[j]) / (dist*dist); if (brightness > 0) { if (TestVisibility(position, light, dist)) { totalBright += brightness; } } } totalBright *= 65536.0f*SCALE_PROBLEM; baseRadiosity[j][0] = (int)(diffuse[mat][0]*totalBright + 0.5f); baseRadiosity[j][1] = (int)(diffuse[mat][1]*totalBright + 0.5f); baseRadiosity[j][2] = (int)(diffuse[mat][2]*totalBright + 0.5f); matRemaining--; if (matRemaining == 0) { mat++; matRemaining = nMaterialQuads[mat]; } } } int main(int argc, char **argv) { MPI_Status status, statuses[20]; MPI_Request requestsA[20], requestsB[20], requestsC[20]; int offsets[20], counts[20]; unsigned short *ffPtr, *lastRunLength; int i, j, k, res, flags; int mat, matRemaining; int start, end, runStart, runLength; double startTime, endTime; struct hostent *host; struct sockaddr_in sa, saClient; pthread_t thread; FILE *f; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); // Node zero does the communication if (rank == 0) { socklen_t addrSize; flags = MSG_WAITALL; sock = socket(AF_INET, SOCK_STREAM, 0); host = gethostbyname("beowulf.lcs.mit.edu"); memset(&sa, 0, sizeof(sa)); sa.sin_family = AF_INET; sa.sin_port = htons(5353); memcpy(&sa.sin_addr, host->h_addr_list[0], sizeof(sa.sin_addr)); bind(sock, (struct sockaddr *)&sa, sizeof(sa)); addrSize = sizeof(sa); getsockname(sock, (struct sockaddr *)&sa, &addrSize); printf("Listening for connection at %s:%d...\n", inet_ntoa(sa.sin_addr), ntohs(sa.sin_port)); listen(sock, 1); sockClient = accept(sock, (struct sockaddr *)&saClient, &addrSize); // Receive the scene from the client printf("Receiving scene... "); res = recv(sockClient, &nMaterials, sizeof(int), flags); res = recv(sockClient, nMaterialQuads, nMaterials*sizeof(int), flags); res = recv(sockClient, diffuse, nMaterials*4*sizeof(float), flags); res = recv(sockClient, &nQuads, sizeof(int), flags); for (i = 0; i < nQuads; i++) { res = recv(sockClient, &vertices[i], 4*sizeof(VECTOR), flags); res = recv(sockClient, &normals[i], sizeof(VECTOR), flags); } res = recv(sockClient, &nSimplePolys, sizeof(int), flags); res = recv(sockClient, &nTotalSimplePolyVertices, sizeof(int), flags); res = recv(sockClient, nSimplePolyVertices, nSimplePolys*sizeof(int), flags); res = recv(sockClient, simplePolyNormals, nSimplePolys*sizeof(VECTOR), flags); res = recv(sockClient, simplePolyDistances, nSimplePolys*sizeof(float), flags); res = recv(sockClient, simplePolyVertices, nTotalSimplePolyVertices*sizeof(VECTOR), flags); res = recv(sockClient, &nCylinders, sizeof(int), flags); res = recv(sockClient, cylinderPolyStart, nCylinders*sizeof(int), flags); res = recv(sockClient, cylinderPolyVertexStart, nCylinders*sizeof(int), flags); res = recv(sockClient, cylinderX, nCylinders*sizeof(float), flags); res = recv(sockClient, cylinderY, nCylinders*sizeof(float), flags); res = recv(sockClient, cylinderMinZ, nCylinders*sizeof(float), flags); res = recv(sockClient, cylinderMaxZ, nCylinders*sizeof(float), flags); res = recv(sockClient, cylinderR, nCylinders*sizeof(float), flags); res = recv(sockClient, &nLights, sizeof(int), flags); for (i = 0; i < nLights; i++) { res = recv(sockClient, &lightPositions[i], sizeof(VECTOR), flags); } printf("done (%d mats, %d quads, %d/%d simple polys, %d cyls, %d lights)\n", nMaterials, nQuads, nSimplePolys, nTotalSimplePolyVertices, nCylinders, nLights); close(sock); } // Broadcast scene to all nodes if (rank == 0) { printf("Broadcasting scene... "); } MPI_Bcast(&nMaterials, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(nMaterialQuads, nMaterials, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(diffuse, nMaterials*4, MPI_FLOAT, 0, MPI_COMM_WORLD); MPI_Bcast(&nQuads, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(vertices, nQuads*4*3, MPI_FLOAT, 0, MPI_COMM_WORLD); MPI_Bcast(normals, nQuads*3, MPI_FLOAT, 0, MPI_COMM_WORLD); MPI_Bcast(&nSimplePolys, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&nTotalSimplePolyVertices, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(nSimplePolyVertices, nSimplePolys, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(simplePolyNormals, nSimplePolys*3, MPI_FLOAT, 0, MPI_COMM_WORLD); MPI_Bcast(simplePolyDistances, nSimplePolys, MPI_FLOAT, 0, MPI_COMM_WORLD); MPI_Bcast(simplePolyVertices, nTotalSimplePolyVertices*3, MPI_FLOAT, 0, MPI_COMM_WORLD); MPI_Bcast(&nCylinders, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(cylinderPolyStart, nCylinders, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(cylinderPolyVertexStart, nCylinders, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(cylinderX, nCylinders, MPI_FLOAT, 0, MPI_COMM_WORLD); MPI_Bcast(cylinderY, nCylinders, MPI_FLOAT, 0, MPI_COMM_WORLD); MPI_Bcast(cylinderMinZ, nCylinders, MPI_FLOAT, 0, MPI_COMM_WORLD); MPI_Bcast(cylinderMaxZ, nCylinders, MPI_FLOAT, 0, MPI_COMM_WORLD); MPI_Bcast(cylinderR, nCylinders, MPI_FLOAT, 0, MPI_COMM_WORLD); MPI_Bcast(&nLights, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(lightPositions, nLights*3, MPI_FLOAT, 0, MPI_COMM_WORLD); if (rank == 0) { printf("done\n"); } cylinderPolyStart[nCylinders] = nSimplePolys; // Compute quad areas for (i = 0; i < nQuads; i++) { areas[i] = DotProduct(CrossProduct(vertices[i][1] - vertices[i][0], vertices[i][2] - vertices[i][0]), normals[i]); areas[i] += DotProduct(CrossProduct(vertices[i][2] - vertices[i][0], vertices[i][3] - vertices[i][0]), normals[i]); areas[i] = 0.5f * fabsf(areas[i]); } MPI_Barrier(MPI_COMM_WORLD); for (i = 0; i < size; i++) { offsets[i] = nQuads * i / size; } offsets[size] = nQuads; for (i = 0; i < size; i++) { counts[i] = offsets[i+1]-offsets[i]; } // Read form factor matrix from disk startTime = MPI_Wtime(); start = nQuads * rank / size; end = nQuads * (rank+1) / size; formFactor = (unsigned short *)malloc((end-start)*nQuads*sizeof(unsigned short)); f = fopen("form_factors", "rb"); for (i = 0; i < start; i++) { runStart = 0; while (runStart < nQuads) { int descriptor = fgetc(f); runLength = descriptor / 3; descriptor %= 3; fread(formFactor, descriptor, runLength, f); runStart += runLength; } } ffPtr = &formFactor[0]; for (i = 0; i < end-start; i++) { runStart = 0; lastRunLength = NULL; while (runStart < nQuads) { int descriptor = fgetc(f); runLength = descriptor / 3; descriptor %= 3; switch (descriptor) { case 0: if (lastRunLength && (lastRunLength[1] == 0)) { lastRunLength[0] += runLength; } else { lastRunLength = ffPtr; *ffPtr++ = runLength; *ffPtr++ = 0; } break; case 1: if (lastRunLength && (lastRunLength[1] == 1)) { lastRunLength[0] += runLength; } else { lastRunLength = ffPtr; *ffPtr++ = runLength; *ffPtr++ = 1; } for (j = 0; j < runLength; j++) { int ff = fgetc(f); *ffPtr++ = ff; } break; case 2: if (lastRunLength && (lastRunLength[1] == 1)) { lastRunLength[0] += runLength; } else { lastRunLength = ffPtr; *ffPtr++ = runLength; *ffPtr++ = 1; } for (j = 0; j < runLength; j++) { int ff = fgetc(f) + 256*fgetc(f); *ffPtr++ = ff; } break; } runStart += runLength; } } fclose(f); endTime = MPI_Wtime(); printf("Form factor: %.2f seconds elapsed (compression: %d / %d; %d%%)\n", endTime - startTime, ffPtr - formFactor, (end-start)*nQuads, 100*(ffPtr - formFactor)/((end-start)*nQuads)); // Start continuously updating radiosity on frontend if (rank == 0) { pthread_create(&thread, NULL, NetworkProc, NULL); } UpdateBaseRadiosities(); MPI_Barrier(MPI_COMM_WORLD); startTime = MPI_Wtime(); #if COMMUNICATION // Prime the loop for (j = 1; j < size; j++) { int to = (rank+j) % size; int from = (rank+size-j) % size; MPI_Isend(&radiosity[offsets[rank]][0], 3*counts[rank], MPI_UNSIGNED_SHORT, to, 0, MPI_COMM_WORLD, &requestsB[j]); MPI_Irecv(&radiosity[offsets[from]][0], 3*counts[from], MPI_UNSIGNED_SHORT, from, 0, MPI_COMM_WORLD, &requestsC[j]); } #endif // Iterate for (k = 0; k < ITERATIONS; k++) { unsigned short material[4]; mat = 0; matRemaining = nMaterialQuads[0]; material[0] = (int)(diffuse[mat][0] * 65536.0f); material[1] = (int)(diffuse[mat][1] * 65536.0f); material[2] = (int)(diffuse[mat][2] * 65536.0f); /* // Update lights if (rank == 0) { double curTime = MPI_Wtime() - startTime; for (i = 0; i < nLights; i++) { lightPositions[i].x = 5.0f*cosf(0.5f*curTime + 0.3f*i); lightPositions[i].y = 5.0f*sinf(0.5f*curTime + 0.3f*i); lightPositions[i].z = 2.0f; } } MPI_Bcast(lightPositions, nLights*3, MPI_FLOAT, 0, MPI_COMM_WORLD); */ UpdateBaseRadiosities(); #if COMMUNICATION for (j = 1; j < size; j++) { if (rank == 0) { MPI_Irecv(&baseRadiosity[offsets[j]][0], 3*counts[j], MPI_UNSIGNED_SHORT, j, 0, MPI_COMM_WORLD, &requestsA[j]); } else if (j == rank) { MPI_Isend(&baseRadiosity[offsets[j]][0], 3*counts[j], MPI_UNSIGNED_SHORT, 0, 0, MPI_COMM_WORLD, &requestsA[0]); } } MPI_Waitall(size-1, &requestsB[1], &statuses[1]); MPI_Waitall(size-1, &requestsC[1], &statuses[1]); #endif start = offsets[rank]; end = offsets[rank+1]; for (i = 0; i < start; i++) { matRemaining--; if (matRemaining == 0) { mat++; matRemaining = nMaterialQuads[mat]; material[0] = (int)(diffuse[mat][0] * 65536.0f); material[1] = (int)(diffuse[mat][1] * 65536.0f); material[2] = (int)(diffuse[mat][2] * 65536.0f); } } ffPtr = &formFactor[0]; for (i = 0; i < end-start; i++) { unsigned short sums[4]; ffPtr = RadKernel(nQuads, ffPtr, &radiosity[0][0], material, sums); radiosity[i+start][0] = sums[0] + baseRadiosity[i+start][0]; radiosity[i+start][1] = sums[1] + baseRadiosity[i+start][1]; radiosity[i+start][2] = sums[2] + baseRadiosity[i+start][2]; matRemaining--; if (matRemaining == 0) { mat++; matRemaining = nMaterialQuads[mat]; material[0] = (int)(diffuse[mat][0] * 65536.0f); material[1] = (int)(diffuse[mat][1] * 65536.0f); material[2] = (int)(diffuse[mat][2] * 65536.0f); } } #if COMMUNICATION for (j = 1; j < size; j++) { int to = (rank+j) % size; int from = (rank+size-j) % size; MPI_Isend(&radiosity[offsets[rank]][0], 3*counts[rank], MPI_UNSIGNED_SHORT, to, 0, MPI_COMM_WORLD, &requestsB[j]); MPI_Irecv(&radiosity[offsets[from]][0], 3*counts[from], MPI_UNSIGNED_SHORT, from, 0, MPI_COMM_WORLD, &requestsC[j]); } if (rank == 0) { MPI_Waitall(size-1, &requestsA[1], &statuses[1]); } else { MPI_Wait(&requestsA[0], &status); } #endif } #if COMMUNICATION // Finish final transfers MPI_Waitall(size-1, &requestsB[1], &statuses[1]); MPI_Waitall(size-1, &requestsC[1], &statuses[1]); #endif endTime = MPI_Wtime(); if (rank == 0) { printf("%.2f seconds elapsed\n", endTime-startTime); } networkThreadDie = 1; MPI_Finalize(); return 0; }