From 528f329c5972e0e50322a6255cc981bf8e71459a Mon Sep 17 00:00:00 2001 From: Florian Stecker Date: Fri, 1 Oct 2021 13:36:56 -0500 Subject: [PATCH] use mpi to run on cluster --- Makefile | 18 +- singular_values.c | 495 +++++++++++++++++++++++++++++----------------- 2 files changed, 324 insertions(+), 189 deletions(-) diff --git a/Makefile b/Makefile index 813e839..b045eeb 100644 --- a/Makefile +++ b/Makefile @@ -3,20 +3,24 @@ HEADERS=linalg.h mat.h coxeter.h #SPECIAL_OPTIONS=-O0 -g -D_DEBUG #SPECIAL_OPTIONS=-O3 -pg -funroll-loops -fno-inline SPECIAL_OPTIONS=-O3 -flto -funroll-loops -Winline +#SPECIAL_OPTIONS=-O3 -flto -funroll-loops -Winline -mavx512f -mavx512cd -mavx512er -mavx512pf # KNL #SPECIAL_OPTIONS= -OPTIONS=-m64 -march=native -mtune=native -std=gnu99 -D_GNU_SOURCE $(SPECIAL_OPTIONS) +OPTIONS=-I../mps/include -L../mps/lib -pthread -m64 -std=gnu99 -D_GNU_SOURCE $(SPECIAL_OPTIONS) -all: singular_values special_element +all: singular_values special_element convert -singular_values: singular_values.o coxeter.o linalg.o mat.o - gcc $(OPTIONS) -o singular_values coxeter.o linalg.o singular_values.o mat.o -lm -lgsl -lcblas -lgmp -lmps -lpthread +convert: convert.hs + ghc --make -dynamic convert.hs + +singular_values: singular_values.o coxeter.o mat.o + mpicc $(OPTIONS) -o singular_values coxeter.o singular_values.o mat.o -lm -lgmp -lmps special_element: special_element.o coxeter.o linalg.o mat.o - gcc $(OPTIONS) -o special_element coxeter.o linalg.o special_element.o mat.o -lm -lgsl -lcblas -lgmp -lmps -lpthread + gcc $(OPTIONS) -o special_element coxeter.o linalg.o special_element.o mat.o -lm -lgmp -lmps singular_values.o: singular_values.c $(HEADERS) - gcc $(OPTIONS) -c singular_values.c + mpicc $(OPTIONS) -c singular_values.c special_element.o: special_element.c $(HEADERS) gcc $(OPTIONS) -c special_element.c @@ -31,4 +35,4 @@ mat.o: mat.c $(HEADERS) gcc $(OPTIONS) -c mat.c clean: - rm -f singular_values special_element coxeter.o linalg.o singular_values.o mat.o special_element.o + rm -f singular_values special_element coxeter.o linalg.o singular_values.o mat.o special_element.o convert.hi convert.o convert diff --git a/singular_values.c b/singular_values.c index 25600d7..0f9e1e4 100644 --- a/singular_values.c +++ b/singular_values.c @@ -1,18 +1,65 @@ #include "coxeter.h" -#include "linalg.h" +//#include "linalg.h" #include "mat.h" -#include +//#include #include +#include +#include +#include +#include +#include +#include +#include +#define MIN(x,y) ((x)<(y)?(x):(y)) #define SWAP(t,x,y) do { t _tmp = (x); (x) = (y); (y) = _tmp; } while (0); #define DEBUG(msg, ...) do { print_time(); fprintf(stderr, msg, ##__VA_ARGS__); } while (0); //#define DEBUG(msg, ...) -//#define OUTPUT_POINTS -#define OUTPUT_SUMMARY +#define TDIV 10 +#define TFROM 1 +#define TTO 9 +#define MDIV 10 +#define MFROM 1 +#define MTO 9 +#define JOBNR(t,m) (((t)-TFROM) + ((m)-MFROM)*(TTO-TFROM+1)) +#define NJOBS ((TTO-TFROM+1)*(MTO-MFROM+1)) +#define FLUSH_INTERVAL 100 + +enum message_tag { + JOB_ORDER, + JOB_RESULT, + JOB_SHUTDOWN, +}; + +struct job { + int tparam, mparam; + int done; + double max_slope; + double time; +}; + +struct result { + mpq_t tr; + mpq_t trinv; +}; + +struct global_data { + int n; + group_t *group; + mat* matrices; + struct result *invariants; + struct result **distinct_invariants; + mps_context *solver; +}; + struct timespec starttime; +char processor_name[MPI_MAX_PROCESSOR_NAME]; +int world_rank; +int world_size; +MPI_Datatype job_datatype; void print_time() { @@ -23,14 +70,43 @@ void print_time() diff = (current.tv_sec - starttime.tv_sec) + (current.tv_nsec - starttime.tv_nsec)*1e-9; - fprintf(stderr, "[%.3f] ", diff); + fprintf(stderr, "[%04d %.3f] ", world_rank, diff); } +static struct global_data allocate_global_data(int n) +{ + struct global_data result; + result.n = n; + result.matrices = malloc(n*sizeof(mat)); + for(int i = 0; i < n; i++) + mat_init(result.matrices[i], 3); + result.invariants = malloc(n*sizeof(struct result)); + result.distinct_invariants = malloc(n*sizeof(struct result*)); + for(int i = 0; i < n; i++) { + mpq_init(result.invariants[i].tr); + mpq_init(result.invariants[i].trinv); + result.distinct_invariants[i] = &result.invariants[i]; + } + result.solver = mps_context_new(); + mps_context_set_output_prec(result.solver, 20); // relative precision + mps_context_set_output_goal(result.solver, MPS_OUTPUT_GOAL_APPROXIMATE); -struct result { - mpq_t tr; - mpq_t trinv; -}; + return result; +} + +void free_global_data(struct global_data dat) +{ + for(int i = 0; i < dat.n; i++) + mat_clear(dat.matrices[i]); + free(dat.matrices); + for(int i = 0; i < dat.n; i++) { + mpq_clear(dat.invariants[i].tr); + mpq_clear(dat.invariants[i].trinv); + } + free(dat.invariants); + free(dat.distinct_invariants); + mps_context_free(dat.solver); +} static int compare_result(const void *a_, const void *b_) { @@ -272,23 +348,57 @@ void enumerate(group_t *group, mat *matrices, mpq_t m, mpq_t t) mat_workspace_clear(ws); } -void output_invariants(group_t *group, mat *matrices, mpq_t s, mpq_t q, mps_context *solver) + +double compute_max_slope(struct global_data dat, mpq_t t, mpq_t m) { - mpq_t tr, trinv; - char buf[100]; - double evs[3]; +// mpq_set_ui(t, ttick, 100); +// mpq_set_ui(m, mtick, 100); // 414/1000 ~ sqrt(2)-1 <-> s=1 +// s = (1-mpq_get_d(m)*mpq_get_d(m))/(2*mpq_get_d(m)); + + int n = 0; + int nmax = dat.n; + int nuniq; + double max_slope; int retval; + double evs[3]; - mpq_inits(tr, trinv, NULL); + group_t *group = dat.group; + mat *matrices = dat.matrices; + struct result *invariants = dat.invariants; + struct result **distinct_invariants = dat.distinct_invariants; + mps_context *solver = dat.solver; - for(int i = 0; i < group->size; i++) { +// DEBUG("Compute matrices\n"); + enumerate(group, matrices, m, t); + +// DEBUG("Compute traces\n"); + n = 0; + for(int i = 0; i < nmax; i++) { if(group->elements[i].length % 2 != 0 || !group->elements[i].inverse) continue; - mat_trace(tr, matrices[i]); - mat_trace(trinv, matrices[group->elements[i].inverse->id]); + mat_trace(invariants[i].tr, matrices[i]); + mat_trace(invariants[i].trinv, matrices[group->elements[i].inverse->id]); - retval = solve_characteristic_polynomial(solver, tr, trinv, evs); + distinct_invariants[n++] = &invariants[i]; + } + +// DEBUG("Get unique traces\n"); + + qsort(distinct_invariants, n, sizeof(struct result*), compare_result); + + nuniq = 0; + for(int i = 0; i < n; i++) { + if(i == 0 || compare_result(&distinct_invariants[i], &distinct_invariants[nuniq-1]) != 0) + distinct_invariants[nuniq++] = distinct_invariants[i]; + } + + max_slope = 0; + int max_slope_index; + +// DEBUG("Solve characteristic polynomials\n"); + for(int i = 0; i < nuniq; i++) { + retval = solve_characteristic_polynomial(solver, distinct_invariants[i]->tr, distinct_invariants[i]->trinv, evs); if(retval == 1) { fprintf(stderr, "Error! Could not solve polynomial.\n"); continue; @@ -303,209 +413,230 @@ void output_invariants(group_t *group, mat *matrices, mpq_t s, mpq_t q, mps_cont if(fabs(evs[0]) < fabs(evs[1])) SWAP(double, evs[0], evs[1]); - gmp_printf("%d %d %s %Qd %Qd %f %f\n", i, group->elements[i].length, print_word(&group->elements[i], buf), tr, trinv, log(evs[0]), -log(evs[2])); + double x = log(fabs(evs[0])); + double y = -log(fabs(evs[2])); + + if(y/x > max_slope && (x > 0.1 || y > 0.1)) { + max_slope_index = distinct_invariants[i] - invariants; + max_slope = y/x; + } + +// gmp_printf("%Qd %Qd %f %f %f\n", distinct_invariants[i]->tr, distinct_invariants[i]->trinv, x, y, y/x); } - mpq_clears(tr, trinv, NULL); + return max_slope; } -/* -double max_slope(groupelement_t *group, mat *matrices, mpq_t s, mpq_t t, int *index) +void write_results_and_end(struct job *jobs, const char *outfile) { - double max = 0; - double slope; + DEBUG("writing output and shutting down\n"); - mpq_t tr, trinv; - char buf[100]; + FILE *f = fopen(outfile, "w"); + for(int i = 0; i < NJOBS; i++) + fprintf(f, "%d/%d %d/%d %.10f %.10f %.10f %.3f\n", + jobs[i].tparam, TDIV, jobs[i].mparam, MDIV, + (double)jobs[i].tparam/TDIV, (double)jobs[i].mparam/MDIV, jobs[i].max_slope, + jobs[i].time); + fclose(f); - mpq_inits(tr, trinv, NULL); + for(int i = 1; i < world_size; i++) + MPI_Send(NULL, 0, job_datatype, i, JOB_SHUTDOWN, MPI_COMM_WORLD); - for(int i = 0; i < MAX_ELEMENTS; i++) { - if(group[i].length % 2 != 0 || !group[i].inverse) - continue; +} - mat_trace(tr, matrices[i]); - mat_trace(trinv, matrices[group[i].inverse->id]); +void run_master_process(int nmax, const char *restart, const char *outfile) +{ + int total_jobs = NJOBS; + int completed = 0; + int queue_jobs = MIN(total_jobs, 2*world_size); + struct job current_job; + MPI_Status status; + FILE *f; + int continuing = 1; + int restartf; + struct job *alljobs; + struct job *current; - slope = log(mpq_get_d(trinv))/log(mpq_get_d(tr)); - if(slope > max) - { - *index = i; - max = slope; + restartf = open(restart, O_RDWR); + if(restartf == -1 && errno == ENOENT) { + restartf = open(restart, O_RDWR | O_CREAT, 0666); + continuing = 0; + } + if(restartf == -1) { + DEBUG("error opening restart file: %s\n", strerror(errno)); + exit(1); + } + ftruncate(restartf, total_jobs*sizeof(struct job)); + alljobs = (struct job*) mmap(0, total_jobs*sizeof(struct job), PROT_READ | PROT_WRITE, MAP_SHARED, restartf, 0); + if(alljobs == MAP_FAILED) { + DEBUG("error mapping restart file: %s\n", strerror(errno)); + exit(1); + } + + if(continuing) { + for(int i = 0; i < total_jobs; i++) + if(alljobs[i].done) + completed++; + } else { + for(int tparam = TFROM; tparam <= TTO; tparam++) { + for(int mparam = MFROM; mparam <= MTO; mparam++) { + alljobs[JOBNR(tparam,mparam)].tparam = tparam; + alljobs[JOBNR(tparam,mparam)].mparam = mparam; + alljobs[JOBNR(tparam,mparam)].done = 0; + } } } - mpq_clears(tr, trinv, NULL); + fsync(restartf); - return max; + if(continuing) { + DEBUG("continuing from restart file, %d/%d jobs completed, %d nodes\n", completed, total_jobs, world_size); + } else { + DEBUG("starting from scratch, %d jobs, %d nodes\n", total_jobs, world_size); + } + + if(completed >= total_jobs) + { + write_results_and_end(alljobs, outfile); + goto cleanup; + } + + // assign initial jobs + current = alljobs-1; + for(int i = 0; i < 2*world_size; i++) { + do { + current++; + } while(current < alljobs + total_jobs && current->done); + if(current >= alljobs + total_jobs) // all jobs are assigned + break; + MPI_Send(current, 1, job_datatype, 1 + i%(world_size-1), JOB_ORDER, MPI_COMM_WORLD); + } + + while(1) { + MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); + if(status.MPI_TAG == JOB_RESULT) { + MPI_Recv(¤t_job, 1, job_datatype, MPI_ANY_SOURCE, JOB_RESULT, MPI_COMM_WORLD, &status); + completed++; + + DEBUG("job (%d,%d) completed by instance %d in %f seconds, result = %.3f, %d/%d done\n", + current_job.tparam, current_job.mparam, + status.MPI_SOURCE, current_job.time, current_job.max_slope, completed, total_jobs); + + int nr = JOBNR(current_job.tparam, current_job.mparam); + memcpy(&alljobs[nr], ¤t_job, sizeof(struct job)); + alljobs[nr].done = 1; + + if(completed % FLUSH_INTERVAL == 0) + fsync(restartf); + + // find the next unassigned job + do { + current++; + } while(current < alljobs + total_jobs && current->done); + + if(current < alljobs + total_jobs) { + MPI_Send(current, 1, job_datatype, status.MPI_SOURCE, JOB_ORDER, MPI_COMM_WORLD); + } + + if(completed >= total_jobs) { + write_results_and_end(alljobs, outfile); + goto cleanup; + } + } + } + +cleanup: + + munmap(alljobs, total_jobs*sizeof(struct job)); + close(restartf); } -*/ int main(int argc, char *argv[]) { - mpq_t m, t, tmp; - double s; - mat *matrices; - group_t *group; - int index; - mps_context *solver; - int acc = 100; - int n, nuniq, nmax; - int retval; - double evs[3]; - double max_slope; - char buf[100]; - char buf2[100]; + int name_len; - struct result *invariants; - struct result **distinct_invariants; + MPI_Status status; + + mpq_t m, t; + double s; + struct job current_job; + int nmax; + double max_slope; + struct global_data dat; + double jobtime; clock_gettime(CLOCK_REALTIME, &starttime); + if(argc < 4) { + fprintf(stderr, "Usage: mpirun -n --hostfile %s \n", argv[0]); + return 0; + } nmax = atoi(argv[1]); - DEBUG("Allocate\n"); + MPI_Init(NULL, NULL); + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + MPI_Get_processor_name(processor_name, &name_len); - mpq_inits(m, t, tmp, NULL); - matrices = malloc(nmax*sizeof(mat)); - for(int i = 0; i < nmax; i++) - mat_init(matrices[i], 3); - invariants = malloc(nmax*sizeof(struct result)); - distinct_invariants = malloc(nmax*sizeof(struct result)); - for(int i = 0; i < nmax; i++) { - mpq_init(invariants[i].tr); - mpq_init(invariants[i].trinv); - distinct_invariants[i] = &invariants[i]; +// DEBUG("instance %d/%d started on %s\n", world_rank, world_size, processor_name); + + int blocklengths[2] = {3, 2}; + MPI_Datatype types[2] = {MPI_INT, MPI_DOUBLE}; + MPI_Aint displacements[2] = {(size_t)&((struct job*)0)->tparam, (size_t)&((struct job*)0)->max_slope}; + MPI_Type_create_struct(2, blocklengths, displacements, types, &job_datatype); + MPI_Type_commit(&job_datatype); + + if(world_rank == 0) { // master processor + run_master_process(nmax, argv[2], argv[3]); + MPI_Finalize(); + return 0; } - solver = mps_context_new(); - mps_context_set_output_prec(solver, 20); // relative precision - mps_context_set_output_goal(solver, MPS_OUTPUT_GOAL_APPROXIMATE); +// DEBUG("Allocate & generate group\n"); + mpq_inits(m, t, NULL); + dat = allocate_global_data(nmax); + dat.group = coxeter_init_triangle(4, 4, 4, nmax); - /* - DEBUG("Approximate parameters\n"); +// fprintf(stderr, "max word length = %d\n", dat.group->elements[nmax-1].length); - // get approximate s and q values - sapprox = atof(argv[2]); - tapprox = atof(argv[3]); - tqfactor = pow((sapprox*sapprox-sapprox+1)*(sapprox*sapprox-sapprox+1)*(sapprox*sapprox+1), 1/6.0); - qapprox = tapprox/tqfactor; - - for(int i = 0; ; i++) { - continued_fraction_approximation(tmp, sapprox, i); - if(fabs(mpq_get_d(t)-sapprox) < 1e-10 - || (mpz_cmpabs_ui(mpq_numref(tmp),acc) > 0 && mpz_cmpabs_ui(mpq_denref(tmp),acc) > 0)) + while(1) { + MPI_Probe(0, MPI_ANY_TAG, MPI_COMM_WORLD, &status); +// MPI_Recv(¤t_job, 1, job_datatype, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status); + if(status.MPI_TAG == JOB_SHUTDOWN) { +// DEBUG("instance %d shutting down\n", world_rank); break; - mpq_set(s, tmp); - } - mpq_canonicalize(s); + } + else if(status.MPI_TAG == JOB_ORDER) { + MPI_Recv(¤t_job, 1, job_datatype, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status); + DEBUG("instance %d starting order (%d,%d)\n", world_rank, current_job.tparam, current_job.mparam); - for(int i = 0; ; i++) { - continued_fraction_approximation(tmp, qapprox, i); - if(fabs(mpq_get_d(t)-qapprox) < 1e-10 - || (mpz_cmpabs_ui(mpq_numref(tmp),acc) > 0 && mpz_cmpabs_ui(mpq_denref(tmp),acc) > 0)) - break; - mpq_set(q, tmp); - } - mpq_canonicalize(q); + jobtime = -MPI_Wtime(); - tqfactor = pow((mpq_get_d(s)*mpq_get_d(s)-mpq_get_d(s)+1)*(mpq_get_d(s)*mpq_get_d(s)-mpq_get_d(s)+1)*(mpq_get_d(s)*mpq_get_d(s)+1), 1/6.0); - -#ifdef OUTPUT_POINTS - gmp_fprintf(stdout, "\"s = %Qd = %.3f, q = %Qd, t = %.3f\"\n", s, mpq_get_d(s), q, mpq_get_d(q)*tqfactor); -#endif - */ - - // group - DEBUG("Generate group\n"); - group = coxeter_init_triangle(4, 4, 4, nmax); - - fprintf(stderr, "max word length = %d\n", group->elements[nmax-1].length); - - for(int ttick = 45; ttick <= 65; ttick++) { - for(int mtick = 45; mtick < 65; mtick++) { - mpq_set_ui(t, ttick, 100); - mpq_set_ui(m, mtick, 100); // 414/1000 ~ sqrt(2)-1 <-> s=1 + // do the actual work + mpq_set_ui(t, current_job.tparam, TDIV); + mpq_set_ui(m, current_job.mparam, MDIV); s = (1-mpq_get_d(m)*mpq_get_d(m))/(2*mpq_get_d(m)); - DEBUG("Compute matrices\n"); - enumerate(group, matrices, m, t); + max_slope = compute_max_slope(dat, t, m); - n = 0; - DEBUG("Compute traces\n"); - for(int i = 0; i < nmax; i++) { - if(group->elements[i].length % 2 != 0 || !group->elements[i].inverse) - continue; + jobtime += MPI_Wtime(); - mat_trace(invariants[i].tr, matrices[i]); - mat_trace(invariants[i].trinv, matrices[group->elements[i].inverse->id]); +// fprintf(stdout, "%.5f %.5f %.5f %f\n", +// mpq_get_d(t), mpq_get_d(m), s, max_slope); + current_job.max_slope = max_slope; + current_job.time = jobtime; - distinct_invariants[n++] = &invariants[i]; + DEBUG("instance %d finished order (%d,%d) in %f seconds\n", world_rank, current_job.tparam, current_job.mparam, jobtime); -// gmp_printf("%Qd %Qd %d %s\n", invariants[i].tr, invariants[i].trinv, i, print_word(&group->elements[i], buf)); - } - - DEBUG("Get unique traces\n"); - - qsort(distinct_invariants, n, sizeof(struct result*), compare_result); - - nuniq = 0; - for(int i = 0; i < n; i++) { - if(i == 0 || compare_result(&distinct_invariants[i], &distinct_invariants[nuniq-1]) != 0) - distinct_invariants[nuniq++] = distinct_invariants[i]; - } - - max_slope = 0; - int max_slope_index; - - DEBUG("Solve characteristic polynomials\n"); - for(int i = 0; i < nuniq; i++) { - retval = solve_characteristic_polynomial(solver, distinct_invariants[i]->tr, distinct_invariants[i]->trinv, evs); - if(retval == 1) { - fprintf(stderr, "Error! Could not solve polynomial.\n"); - continue; - } else if(retval == 2) { - continue; - } - - if(fabs(evs[0]) < fabs(evs[1])) - SWAP(double, evs[0], evs[1]); - if(fabs(evs[1]) < fabs(evs[2])) - SWAP(double, evs[1], evs[2]); - if(fabs(evs[0]) < fabs(evs[1])) - SWAP(double, evs[0], evs[1]); - - double x = log(fabs(evs[0])); - double y = -log(fabs(evs[2])); - - if(y/x > max_slope && (x > 0.1 || y > 0.1)) { - max_slope_index = distinct_invariants[i] - invariants; - max_slope = y/x; - } - -#ifdef OUTPUT_POINTS - gmp_printf("%Qd %Qd %f %f %f\n", distinct_invariants[i]->tr, distinct_invariants[i]->trinv, x, y, y/x); -#endif - } - -#ifdef OUTPUT_SUMMARY -// fprintf(stdout, "%.5f %.5f %.5f %f %s\n", mpq_get_d(t), mpq_get_d(m), s, max_slope, print_word(&group->elements[max_slope_index], buf)); - fprintf(stdout, "%.5f %.5f %.5f %f %s\n", mpq_get_d(t), mpq_get_d(m), s, max_slope, print_word(&group->elements[max_slope_index], buf)); -#endif + MPI_Send(¤t_job, 1, job_datatype, 0, JOB_RESULT, MPI_COMM_WORLD); } } - DEBUG("Clean up\n"); - for(int i = 0; i < nmax; i++) { - mpq_clear(invariants[i].tr); - mpq_clear(invariants[i].trinv); - } - free(invariants); - free(distinct_invariants); - for(int i = 0; i < nmax; i++) - mat_clear(matrices[i]); - free(matrices); - coxeter_clear(group); - mpq_clears(m, t, tmp, NULL); - mps_context_free(solver); +// DEBUG("Clean up\n"); + coxeter_clear(dat.group); + free_global_data(dat); + mpq_clears(m, t, NULL); + + MPI_Type_free(&job_datatype); + MPI_Finalize(); }