/* vaguely derived from code vaguely derived by Pathscale from: * Derived from code supplied by Ohio State University: * Copyright (C) 2002-2003 the Network-Based Computing Laboratory * (NBCL), The Ohio State University. * http://nowlab.cis.ohio-state.edu/projects/mpi-iba/ */ #include #include #include #include #include #include #include #include #include #include #include #define BUFSIZE (4*1024*1024) char buf[BUFSIZE]; FILE *fp; double gtod() { struct timeval tv; gettimeofday(&tv,0); return tv.tv_sec + 1e-6 * tv.tv_usec; } typedef unsigned long long u64; #define serialize() \ __asm__ __volatile__("cpuid"::"a"(0):"eax","ebx","ecx","edx") u64 rdtsc() { u64 tsc; serialize(); __asm__ __volatile__("rdtsc" : "=A" (tsc)); serialize(); return tsc; } //#define DEBUG #ifdef DEBUG # define debug if (0) {} else debug_printf #else # define debug if (1) {} else debug_printf #endif void debug_printf(char *format, ...) { va_list ap; va_start(ap,format); vfprintf(stderr,format,ap); if (format[strlen(format)-1] != '\n') fputc('\n',stderr); va_end(ap); } void mysleep(double delay) { double sec = floor(delay); double usec = 1e6 * delay - sec; struct timeval tv = {(unsigned)sec,(unsigned)usec}; select(0,0,0,0,&tv); } void yield() { struct timeval tv = {0,10000}; select(0,0,0,0,&tv); } class Stats { unsigned n; double sum,min,max; float data[1000]; public: Stats(): n(0), sum(0), min(FLT_MAX), max(FLT_MIN) {} void put(double v) { if (v > max) max = v; if (v < min) min = v; sum += v; if (n < (sizeof(data)/sizeof(data[0]))) data[n] = v; n++; } unsigned get_n() { return n; } double get_min() { if (!n) return 0; return min; } double get_max() { if (!n) return 0; return max; } double get_avg() { if (!n) return 0; return sum/n; } double dump(char *fname) { FILE *fp = fopen(fname,"w"); for (int i=0; i %s (%d) done(%d)\n",name,peer_name,peer_node,done[peer_node]); if (!done[peer_node]) { // done[peer_node] = 1; for (isize=0; isize %s",name,bufsize,peer_name); MPI_Send(&bufsize, 1, MPI_INT, peer, tag_size, MPI_COMM_WORLD); buf[0] = 1; // do a few warm-ups for (int times = 0; times < 10; times++) { MPI_Send(buf, bufsize, MPI_INT, peer, tag_data, MPI_COMM_WORLD); MPI_Recv(buf, bufsize, MPI_INT, peer, tag_data, MPI_COMM_WORLD, &stat); } double start = gtod(); double end = start + bench_time; while (1) { u64 a = rdtsc(); MPI_Send(buf, bufsize, MPI_INT, peer, tag_data, MPI_COMM_WORLD); MPI_Recv(buf, bufsize, MPI_INT, peer, tag_data, MPI_COMM_WORLD, &stat); u64 b = rdtsc(); if (b <= a) continue; // just redo if the TSC wrapped st.put(ticks_to_secs * (b - a)); // continue until either enough reps or enough time and reps if (st.get_n() > max_bench_times || (st.get_n() > min_bench_times && gtod() > end)) break; } debug("%s tx done (%u times, avg %.3f); sending end\n", name,st.get_n(),1e6*st.get_avg()); buf[0] = 0; MPI_Send(buf, bufsize, MPI_INT, peer, tag_data, MPI_COMM_WORLD); } fprintf(fp,"%d %d ",rank,peer); fprintf(fp,"%s %s ",name,peer_name); for (isize=0; isize= 10) break; } clock = sum / times; ticks_to_secs = 1.0 / clock; debug("final %5.3f GHz",clock*1e-9); } void usage() { fprintf(stderr,"checker [-s size]\n"); exit(0); } int main(int argc, char *argv[]) { int src,dst; char *oname = 0; // message sizes in 4B units. int sizes[10] = {1}; int csizes = 1; int letter; while ((letter = getopt(argc,argv,"o:s:")) != -1) { switch(letter) { case 'o': oname = optarg; break; case 's': sizes[csizes++] = atoi(optarg); break; default: usage(); } } if (csizes == 1) sizes[csizes++] = 4096; for (int i=0; idst sends, s/nprocs/src/ here: // for (dst = 0; dst < nprocs; dst++) { for (dst = 0; dst < src; dst++) { if (src == dst) continue; MPI_Barrier(MPI_COMM_WORLD); if (rank == src) do_tx(dst,csizes,sizes); if (rank == dst) do_rx(src); ntests++; } } MPI_Finalize(); if (rank == 0) { double elapsed = gtod() - start; fprintf(fp,"# %u tests in %.3f seconds (%.1f)\n", ntests,elapsed,ntests/elapsed); } return 0; }