#include "coxeter.h" //#include "linalg.h" #include "mat.h" //#include #include #include #include #include #include #include #include #include #define MIN(x,y) ((x)<(y)?(x):(y)) #define SWAP(t,x,y) do { t _tmp = (x); (x) = (y); (y) = _tmp; } while (0); #define DEBUG(msg, ...) do { print_time(); fprintf(stderr, msg, ##__VA_ARGS__); } while (0); //#define DEBUG(msg, ...) #define TDIV 10 #define TFROM 1 #define TTO 9 #define MDIV 10 #define MFROM 1 #define MTO 9 #define JOBNR(t,m) (((t)-TFROM) + ((m)-MFROM)*(TTO-TFROM+1)) #define NJOBS ((TTO-TFROM+1)*(MTO-MFROM+1)) #define FLUSH_INTERVAL 100 enum message_tag { JOB_ORDER, JOB_RESULT, JOB_SHUTDOWN, }; struct job { int tparam, mparam; int done; double max_slope; double time; }; struct result { mpq_t tr; mpq_t trinv; }; struct global_data { int n; group_t *group; mat* matrices; struct result *invariants; struct result **distinct_invariants; mps_context *solver; }; struct timespec starttime; char processor_name[MPI_MAX_PROCESSOR_NAME]; int world_rank; int world_size; MPI_Datatype job_datatype; void print_time() { double diff; struct timespec current; clock_gettime(CLOCK_REALTIME, ¤t); diff = (current.tv_sec - starttime.tv_sec) + (current.tv_nsec - starttime.tv_nsec)*1e-9; fprintf(stderr, "[%04d %.3f] ", world_rank, diff); } static struct global_data allocate_global_data(int n) { struct global_data result; result.n = n; result.matrices = malloc(n*sizeof(mat)); for(int i = 0; i < n; i++) mat_init(result.matrices[i], 3); result.invariants = malloc(n*sizeof(struct result)); result.distinct_invariants = malloc(n*sizeof(struct result*)); for(int i = 0; i < n; i++) { mpq_init(result.invariants[i].tr); mpq_init(result.invariants[i].trinv); result.distinct_invariants[i] = &result.invariants[i]; } result.solver = mps_context_new(); mps_context_set_output_prec(result.solver, 20); // relative precision mps_context_set_output_goal(result.solver, MPS_OUTPUT_GOAL_APPROXIMATE); return result; } void free_global_data(struct global_data dat) { for(int i = 0; i < dat.n; i++) mat_clear(dat.matrices[i]); free(dat.matrices); for(int i = 0; i < dat.n; i++) { mpq_clear(dat.invariants[i].tr); mpq_clear(dat.invariants[i].trinv); } free(dat.invariants); free(dat.distinct_invariants); mps_context_free(dat.solver); } static int compare_result(const void *a_, const void *b_) { int d = 0; struct result **a = (struct result **)a_; struct result **b = (struct result **)b_; d = mpq_cmp((*a)->tr,(*b)->tr); if(d == 0) d = mpq_cmp((*a)->trinv, (*b)->trinv); return d; } int solve_characteristic_polynomial(mps_context *solv, mpq_t tr, mpq_t trinv, double *eigenvalues) { mpq_t coeff1, coeff2, zero; cplx_t *roots; double radii[3]; double *radii_p[3]; mps_monomial_poly *poly; mps_boolean errors; int result = 0; mpq_inits(coeff1, coeff2, zero, NULL); mpq_set(coeff1, trinv); mpq_sub(coeff2, zero, tr); poly = mps_monomial_poly_new(solv, 3); mps_monomial_poly_set_coefficient_int(solv, poly, 0, -1, 0); mps_monomial_poly_set_coefficient_q(solv, poly, 1, coeff1, zero); mps_monomial_poly_set_coefficient_q(solv, poly, 2, coeff2, zero); mps_monomial_poly_set_coefficient_int(solv, poly, 3, 1, 0); mps_context_set_input_poly(solv, (mps_polynomial*)poly); mps_mpsolve(solv); roots = cplx_valloc(3); for(int i = 0; i < 3; i++) radii_p[i] = &(radii[i]); mps_context_get_roots_d(solv, &roots, radii_p); errors = mps_context_has_errors(solv); if(errors) { result = 1; } else { for(int i = 0; i < 3; i++) { eigenvalues[i] = cplx_Re(roots[i]); if(fabs(cplx_Im(roots[i])) > radii[i]) // non-real root result = 2; } } cplx_vfree(roots); mpq_clears(coeff1, coeff2, zero, NULL); return result; } void continued_fraction_approximation(mpq_t out, double in, int level) { mpq_t tmp; if(in < 0) { mpq_init(tmp); mpq_set_ui(tmp, 0, 1); continued_fraction_approximation(out, -in, level); mpq_sub(out, tmp, out); mpq_clear(tmp); return; } if(level == 0) { mpq_set_si(out, (signed long int)round(in), 1); // floor(in) } else { continued_fraction_approximation(out, 1/(in - floor(in)), level - 1); mpq_init(tmp); mpq_set_ui(tmp, 1, 1); mpq_div(out, tmp, out); // out -> 1/out mpq_set_si(tmp, (signed long int)in, 1); // floor(in) mpq_add(out, out, tmp); mpq_clear(tmp); } } void quartic(mpq_t out, mpq_t in, int a, int b, int c, int d, int e) { mpq_t tmp; mpq_init(tmp); mpq_set_si(out, a, 1); mpq_mul(out, out, in); mpq_set_si(tmp, b, 1); mpq_add(out, out, tmp); mpq_mul(out, out, in); mpq_set_si(tmp, c, 1); mpq_add(out, out, tmp); mpq_mul(out, out, in); mpq_set_si(tmp, d, 1); mpq_add(out, out, tmp); mpq_mul(out, out, in); mpq_set_si(tmp, e, 1); mpq_add(out, out, tmp); mpq_clear(tmp); } // this version is only for the (4,4,4) group void initialize_triangle_generators(mat_workspace *ws, mat *gen, mpq_t m, mpq_t t) { mpq_t s,sinv,q,x,y; mpq_t zero, one, two; mpq_t tmp; mpq_inits(s,sinv,q,x,y,zero,one,two,tmp,NULL); mpq_set_ui(zero, 0, 1); mpq_set_ui(one, 1, 1); mpq_set_ui(two, 2, 1); // s = (1-m^2)/2m mpq_mul(s, m, m); mpq_sub(s, one, s); mpq_div(s, s, m); mpq_div(s, s, two); mpq_div(sinv, one, s); // q = (1+m^2)/(1-m^2) = 2/(1-m^2) - 1 mpq_mul(q, m, m); mpq_sub(q, one, q); mpq_div(q, two, q); mpq_sub(q, q, one); // x = -tq, y = -q/t mpq_mul(x, q, t); mpq_sub(x, zero, x); mpq_div(y, q, t); mpq_sub(y, zero, y); // q^2 = xy = 1 + 1/s^2 // [ -s s*y 0] // [ -s*x s*x*y - 1/s 0] // [ -s*y s*y^2 - x 1] LOOP(i,3) { mat_zero(gen[i]); mpq_sub(tmp, zero, s); mat_set(gen[i%3], i%3, i%3, tmp); mpq_mul(tmp, s, y); mat_set(gen[i%3], i%3, (i+1)%3, tmp); mpq_mul(tmp, s, x); mpq_sub(tmp, zero, tmp); mat_set(gen[i%3], (i+1)%3, i%3, tmp); mpq_mul(tmp, s, x); mpq_mul(tmp, tmp, y); mpq_sub(tmp, tmp, sinv); mat_set(gen[i%3], (i+1)%3, (i+1)%3, tmp); mpq_mul(tmp, s, y); mpq_sub(tmp, zero, tmp); mat_set(gen[i%3], (i+2)%3, i%3, tmp); mpq_mul(tmp, s, y); mpq_mul(tmp, tmp, y); mpq_sub(tmp, tmp, x); mat_set(gen[i%3], (i+2)%3, (i+1)%3, tmp); mat_set(gen[i%3], (i+2)%3, (i+2)%3, one); } LOOP(i,3) mat_pseudoinverse(ws, gen[i+3], gen[i]); // debug output /* gmp_printf("m = %Qd, s = %Qd, t = %Qd, q = %Qd, x = %Qd, y = %Qd\n", m, s, t, q, x, y); mat_print(gen[0]); mat_print(gen[1]); mat_print(gen[2]); */ mpq_inits(s,sinv,q,x,y,zero,one,two,tmp,NULL); } char *print_word(groupelement_t *g, char *str) { int i = g->length - 1; str[g->length] = 0; while(g->parent) { str[i--] = 'a' + g->letter; g = g->parent; } return str; } void enumerate(group_t *group, mat *matrices, mpq_t m, mpq_t t) { mat_workspace *ws; mat tmp; mat gen[6]; char buf[100], buf2[100], buf3[100]; // allocate stuff ws = mat_workspace_init(3); for(int i = 0; i < 6; i++) mat_init(gen[i], 3); mat_init(tmp, 3); initialize_triangle_generators(ws, gen, m, t); mat_identity(matrices[0]); for(int i = 1; i < group->size; i++) { if(group->elements[i].length % 2 != 0) continue; if(!group->elements[i].inverse) continue; int parent = group->elements[i].parent->id; int grandparent = group->elements[i].parent->parent->id; int letter; if(group->elements[parent].letter == 1 && group->elements[i].letter == 2) letter = 0; // p = bc else if(group->elements[parent].letter == 2 && group->elements[i].letter == 0) letter = 1; // q = ca else if(group->elements[parent].letter == 0 && group->elements[i].letter == 1) letter = 2; // r = ab if(group->elements[parent].letter == 2 && group->elements[i].letter == 1) letter = 3; // p^{-1} = cb else if(group->elements[parent].letter == 0 && group->elements[i].letter == 2) letter = 4; // q^{-1} = ac else if(group->elements[parent].letter == 1 && group->elements[i].letter == 0) letter = 5; // r^{-1} = ba mat_multiply(ws, matrices[i], matrices[grandparent], gen[letter]); } // free stuff for(int i = 0; i < 6; i++) mat_clear(gen[i]); mat_clear(tmp); mat_workspace_clear(ws); } double compute_max_slope(struct global_data dat, mpq_t t, mpq_t m) { // mpq_set_ui(t, ttick, 100); // mpq_set_ui(m, mtick, 100); // 414/1000 ~ sqrt(2)-1 <-> s=1 // s = (1-mpq_get_d(m)*mpq_get_d(m))/(2*mpq_get_d(m)); int n = 0; int nmax = dat.n; int nuniq; double max_slope; int retval; double evs[3]; group_t *group = dat.group; mat *matrices = dat.matrices; struct result *invariants = dat.invariants; struct result **distinct_invariants = dat.distinct_invariants; mps_context *solver = dat.solver; // DEBUG("Compute matrices\n"); enumerate(group, matrices, m, t); // DEBUG("Compute traces\n"); n = 0; for(int i = 0; i < nmax; i++) { if(group->elements[i].length % 2 != 0 || !group->elements[i].inverse) continue; mat_trace(invariants[i].tr, matrices[i]); mat_trace(invariants[i].trinv, matrices[group->elements[i].inverse->id]); distinct_invariants[n++] = &invariants[i]; } // DEBUG("Get unique traces\n"); qsort(distinct_invariants, n, sizeof(struct result*), compare_result); nuniq = 0; for(int i = 0; i < n; i++) { if(i == 0 || compare_result(&distinct_invariants[i], &distinct_invariants[nuniq-1]) != 0) distinct_invariants[nuniq++] = distinct_invariants[i]; } max_slope = 0; int max_slope_index; // DEBUG("Solve characteristic polynomials\n"); for(int i = 0; i < nuniq; i++) { retval = solve_characteristic_polynomial(solver, distinct_invariants[i]->tr, distinct_invariants[i]->trinv, evs); if(retval == 1) { fprintf(stderr, "Error! Could not solve polynomial.\n"); continue; } else if(retval == 2) { continue; } if(fabs(evs[0]) < fabs(evs[1])) SWAP(double, evs[0], evs[1]); if(fabs(evs[1]) < fabs(evs[2])) SWAP(double, evs[1], evs[2]); if(fabs(evs[0]) < fabs(evs[1])) SWAP(double, evs[0], evs[1]); double x = log(fabs(evs[0])); double y = -log(fabs(evs[2])); if(y/x > max_slope && (x > 0.1 || y > 0.1)) { max_slope_index = distinct_invariants[i] - invariants; max_slope = y/x; } // gmp_printf("%Qd %Qd %f %f %f\n", distinct_invariants[i]->tr, distinct_invariants[i]->trinv, x, y, y/x); } return max_slope; } void write_results_and_end(struct job *jobs, const char *outfile) { DEBUG("writing output and shutting down\n"); FILE *f = fopen(outfile, "w"); for(int i = 0; i < NJOBS; i++) fprintf(f, "%d/%d %d/%d %.10f %.10f %.10f %.3f\n", jobs[i].tparam, TDIV, jobs[i].mparam, MDIV, (double)jobs[i].tparam/TDIV, (double)jobs[i].mparam/MDIV, jobs[i].max_slope, jobs[i].time); fclose(f); for(int i = 1; i < world_size; i++) MPI_Send(NULL, 0, job_datatype, i, JOB_SHUTDOWN, MPI_COMM_WORLD); } void run_master_process(int nmax, const char *restart, const char *outfile) { int total_jobs = NJOBS; int completed = 0; int queue_jobs = MIN(total_jobs, 2*world_size); struct job current_job; MPI_Status status; FILE *f; int continuing = 1; int restartf; struct job *alljobs; struct job *current; restartf = open(restart, O_RDWR); if(restartf == -1 && errno == ENOENT) { restartf = open(restart, O_RDWR | O_CREAT, 0666); continuing = 0; } if(restartf == -1) { DEBUG("error opening restart file: %s\n", strerror(errno)); exit(1); } ftruncate(restartf, total_jobs*sizeof(struct job)); alljobs = (struct job*) mmap(0, total_jobs*sizeof(struct job), PROT_READ | PROT_WRITE, MAP_SHARED, restartf, 0); if(alljobs == MAP_FAILED) { DEBUG("error mapping restart file: %s\n", strerror(errno)); exit(1); } if(continuing) { for(int i = 0; i < total_jobs; i++) if(alljobs[i].done) completed++; } else { for(int tparam = TFROM; tparam <= TTO; tparam++) { for(int mparam = MFROM; mparam <= MTO; mparam++) { alljobs[JOBNR(tparam,mparam)].tparam = tparam; alljobs[JOBNR(tparam,mparam)].mparam = mparam; alljobs[JOBNR(tparam,mparam)].done = 0; } } } fsync(restartf); if(continuing) { DEBUG("continuing from restart file, %d/%d jobs completed, %d nodes\n", completed, total_jobs, world_size); } else { DEBUG("starting from scratch, %d jobs, %d nodes\n", total_jobs, world_size); } if(completed >= total_jobs) { write_results_and_end(alljobs, outfile); goto cleanup; } // assign initial jobs current = alljobs-1; for(int i = 0; i < 2*world_size; i++) { do { current++; } while(current < alljobs + total_jobs && current->done); if(current >= alljobs + total_jobs) // all jobs are assigned break; MPI_Send(current, 1, job_datatype, 1 + i%(world_size-1), JOB_ORDER, MPI_COMM_WORLD); } while(1) { MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); if(status.MPI_TAG == JOB_RESULT) { MPI_Recv(¤t_job, 1, job_datatype, MPI_ANY_SOURCE, JOB_RESULT, MPI_COMM_WORLD, &status); completed++; DEBUG("job (%d,%d) completed by instance %d in %f seconds, result = %.3f, %d/%d done\n", current_job.tparam, current_job.mparam, status.MPI_SOURCE, current_job.time, current_job.max_slope, completed, total_jobs); int nr = JOBNR(current_job.tparam, current_job.mparam); memcpy(&alljobs[nr], ¤t_job, sizeof(struct job)); alljobs[nr].done = 1; if(completed % FLUSH_INTERVAL == 0) fsync(restartf); // find the next unassigned job do { current++; } while(current < alljobs + total_jobs && current->done); if(current < alljobs + total_jobs) { MPI_Send(current, 1, job_datatype, status.MPI_SOURCE, JOB_ORDER, MPI_COMM_WORLD); } if(completed >= total_jobs) { write_results_and_end(alljobs, outfile); goto cleanup; } } } cleanup: munmap(alljobs, total_jobs*sizeof(struct job)); close(restartf); } int main(int argc, char *argv[]) { int name_len; MPI_Status status; mpq_t m, t; double s; struct job current_job; int nmax; double max_slope; struct global_data dat; double jobtime; clock_gettime(CLOCK_REALTIME, &starttime); if(argc < 4) { fprintf(stderr, "Usage: mpirun -n --hostfile %s \n", argv[0]); return 0; } nmax = atoi(argv[1]); MPI_Init(NULL, NULL); MPI_Comm_size(MPI_COMM_WORLD, &world_size); MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); MPI_Get_processor_name(processor_name, &name_len); // DEBUG("instance %d/%d started on %s\n", world_rank, world_size, processor_name); int blocklengths[2] = {3, 2}; MPI_Datatype types[2] = {MPI_INT, MPI_DOUBLE}; MPI_Aint displacements[2] = {(size_t)&((struct job*)0)->tparam, (size_t)&((struct job*)0)->max_slope}; MPI_Type_create_struct(2, blocklengths, displacements, types, &job_datatype); MPI_Type_commit(&job_datatype); if(world_rank == 0) { // master processor run_master_process(nmax, argv[2], argv[3]); MPI_Finalize(); return 0; } // DEBUG("Allocate & generate group\n"); mpq_inits(m, t, NULL); dat = allocate_global_data(nmax); dat.group = coxeter_init_triangle(4, 4, 4, nmax); // fprintf(stderr, "max word length = %d\n", dat.group->elements[nmax-1].length); while(1) { MPI_Probe(0, MPI_ANY_TAG, MPI_COMM_WORLD, &status); // MPI_Recv(¤t_job, 1, job_datatype, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status); if(status.MPI_TAG == JOB_SHUTDOWN) { // DEBUG("instance %d shutting down\n", world_rank); break; } else if(status.MPI_TAG == JOB_ORDER) { MPI_Recv(¤t_job, 1, job_datatype, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status); DEBUG("instance %d starting order (%d,%d)\n", world_rank, current_job.tparam, current_job.mparam); jobtime = -MPI_Wtime(); // do the actual work mpq_set_ui(t, current_job.tparam, TDIV); mpq_set_ui(m, current_job.mparam, MDIV); s = (1-mpq_get_d(m)*mpq_get_d(m))/(2*mpq_get_d(m)); max_slope = compute_max_slope(dat, t, m); jobtime += MPI_Wtime(); // fprintf(stdout, "%.5f %.5f %.5f %f\n", // mpq_get_d(t), mpq_get_d(m), s, max_slope); current_job.max_slope = max_slope; current_job.time = jobtime; DEBUG("instance %d finished order (%d,%d) in %f seconds\n", world_rank, current_job.tparam, current_job.mparam, jobtime); MPI_Send(¤t_job, 1, job_datatype, 0, JOB_RESULT, MPI_COMM_WORLD); } } // DEBUG("Clean up\n"); coxeter_clear(dat.group); free_global_data(dat); mpq_clears(m, t, NULL); MPI_Type_free(&job_datatype); MPI_Finalize(); }