create MPI library; not very optimized yet
This commit is contained in:
parent
2735281300
commit
d2d91d68b3
2
.gitignore
vendored
2
.gitignore
vendored
@ -12,3 +12,5 @@ billiard_words
|
|||||||
*.png
|
*.png
|
||||||
*.hi
|
*.hi
|
||||||
gmon.out
|
gmon.out
|
||||||
|
restart
|
||||||
|
core
|
||||||
|
25
Makefile
25
Makefile
@ -1,14 +1,14 @@
|
|||||||
HEADERS=linalg.h mat.h coxeter.h enumerate_triangle_group.h
|
HEADERS=linalg.h mat.h coxeter.h enumerate_triangle_group.h parallel.h
|
||||||
|
|
||||||
#SPECIAL_OPTIONS=-O0 -g -D_DEBUG
|
#SPECIAL_OPTIONS=-O0 -g -D_DEBUG
|
||||||
SPECIAL_OPTIONS=-O3 -pg -g -funroll-loops -fno-inline
|
#SPECIAL_OPTIONS=-O3 -pg -g -funroll-loops -fno-inline
|
||||||
#SPECIAL_OPTIONS=-O3 -flto -funroll-loops -Winline
|
SPECIAL_OPTIONS=-O3 -flto -funroll-loops -Winline
|
||||||
#SPECIAL_OPTIONS=-O3 -flto -funroll-loops -Winline -mavx512f -mavx512cd -mavx512er -mavx512pf # KNL
|
#SPECIAL_OPTIONS=-O3 -flto -funroll-loops -Winline -mavx512f -mavx512cd -mavx512er -mavx512pf # KNL
|
||||||
#SPECIAL_OPTIONS=
|
#SPECIAL_OPTIONS=
|
||||||
|
|
||||||
OPTIONS=-I../mps/include -L../mps/lib -pthread -m64 -std=gnu99 -D_GNU_SOURCE $(SPECIAL_OPTIONS)
|
OPTIONS=-I../mps/include -L../mps/lib -pthread -m64 -std=gnu99 -D_GNU_SOURCE $(SPECIAL_OPTIONS)
|
||||||
|
|
||||||
all: singular_values special_element singular_values_mpi convert billiard_words
|
all: singular_values special_element convert billiard_words
|
||||||
|
|
||||||
convert: convert.hs
|
convert: convert.hs
|
||||||
ghc --make -dynamic convert.hs
|
ghc --make -dynamic convert.hs
|
||||||
@ -16,11 +16,11 @@ convert: convert.hs
|
|||||||
billiard_words: billiard_words.hs
|
billiard_words: billiard_words.hs
|
||||||
ghc --make -dynamic billiard_words.hs
|
ghc --make -dynamic billiard_words.hs
|
||||||
|
|
||||||
singular_values: singular_values.o coxeter.o mat.o enumerate_triangle_group.o
|
singular_values: singular_values.o coxeter.o mat.o enumerate_triangle_group.o parallel.o
|
||||||
gcc $(OPTIONS) -o singular_values coxeter.o singular_values.o mat.o enumerate_triangle_group.o -lm -lgmp -lmps
|
mpicc $(OPTIONS) -o singular_values coxeter.o singular_values.o mat.o enumerate_triangle_group.o parallel.o -lm -lgmp -lmps
|
||||||
|
|
||||||
singular_values_mpi: singular_values_mpi.o coxeter.o mat.o
|
#singular_values_mpi: singular_values_mpi.o coxeter.o mat.o
|
||||||
mpicc $(OPTIONS) -o singular_values_mpi coxeter.o singular_values_mpi.o mat.o -lm -lgmp -lmps
|
# mpicc $(OPTIONS) -o singular_values_mpi coxeter.o singular_values_mpi.o mat.o -lm -lgmp -lmps
|
||||||
|
|
||||||
special_element: special_element.o coxeter.o linalg.o mat.o enumerate_triangle_group.o
|
special_element: special_element.o coxeter.o linalg.o mat.o enumerate_triangle_group.o
|
||||||
gcc $(OPTIONS) -o special_element coxeter.o linalg.o special_element.o mat.o enumerate_triangle_group.o -lm -lgmp -lmps -lgsl -lcblas
|
gcc $(OPTIONS) -o special_element coxeter.o linalg.o special_element.o mat.o enumerate_triangle_group.o -lm -lgmp -lmps -lgsl -lcblas
|
||||||
@ -28,8 +28,8 @@ special_element: special_element.o coxeter.o linalg.o mat.o enumerate_triangle_g
|
|||||||
singular_values.o: singular_values.c $(HEADERS)
|
singular_values.o: singular_values.c $(HEADERS)
|
||||||
gcc $(OPTIONS) -c singular_values.c
|
gcc $(OPTIONS) -c singular_values.c
|
||||||
|
|
||||||
singular_values_mpi.o: singular_values_mpi.c $(HEADERS)
|
#singular_values_mpi.o: singular_values_mpi.c $(HEADERS)
|
||||||
mpicc $(OPTIONS) -c singular_values_mpi.c
|
# mpicc $(OPTIONS) -c singular_values_mpi.c
|
||||||
|
|
||||||
special_element.o: special_element.c $(HEADERS)
|
special_element.o: special_element.c $(HEADERS)
|
||||||
gcc $(OPTIONS) -c special_element.c
|
gcc $(OPTIONS) -c special_element.c
|
||||||
@ -46,5 +46,8 @@ coxeter.o: coxeter.c $(HEADERS)
|
|||||||
mat.o: mat.c $(HEADERS)
|
mat.o: mat.c $(HEADERS)
|
||||||
gcc $(OPTIONS) -c mat.c
|
gcc $(OPTIONS) -c mat.c
|
||||||
|
|
||||||
|
parallel.o: parallel.c $(HEADERS)
|
||||||
|
gcc $(OPTIONS) -c parallel.c
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f singular_values special_element singular_values_mpi coxeter.o linalg.o singular_values.o singular_values_mpi.o mat.o special_element.o convert.hi convert.o convert billiard_words.hi billiard_words.o billiard_words enumerate_triangle_group.o
|
rm -f singular_values special_element singular_values_mpi coxeter.o linalg.o singular_values.o singular_values_mpi.o mat.o special_element.o convert.hi convert.o convert billiard_words.hi billiard_words.o billiard_words enumerate_triangle_group.o parallel.o
|
||||||
|
349
parallel.c
Normal file
349
parallel.c
Normal file
@ -0,0 +1,349 @@
|
|||||||
|
#include "parallel.h"
|
||||||
|
|
||||||
|
#include <mpi.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <malloc.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#define DEBUG(msg, ...) fprintf(stderr, "[%003d%10.3f] " msg, mpi_rank(0), runtime(), ##__VA_ARGS__)
|
||||||
|
//#define DEBUG(msg, ...) fprintf(stderr, "[ %10.3f] " msg, runtime(), ##__VA_ARGS__)
|
||||||
|
//#define DEBUG_MPI(msg, node, ...) fprintf(stderr, "[%003d%10.3f] " msg, node, runtime(), ##__VA_ARGS__)
|
||||||
|
#define DONE(x) *((int*)(x))
|
||||||
|
|
||||||
|
enum message_tag {
|
||||||
|
PARALLEL_ORDER,
|
||||||
|
PARALLEL_RESULT,
|
||||||
|
PARALLEL_SHUTDOWN,
|
||||||
|
PARALLEL_GLOBAL_DATA
|
||||||
|
};
|
||||||
|
|
||||||
|
struct timespec starttime;
|
||||||
|
|
||||||
|
int mpi_rank(int activate_mpi)
|
||||||
|
{
|
||||||
|
static int active = 0;
|
||||||
|
if(activate_mpi)
|
||||||
|
active = 1;
|
||||||
|
|
||||||
|
if(!active)
|
||||||
|
return 0;
|
||||||
|
else {
|
||||||
|
int rank;
|
||||||
|
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||||
|
return rank;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void start_timer()
|
||||||
|
{
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &starttime);
|
||||||
|
}
|
||||||
|
|
||||||
|
double runtime()
|
||||||
|
{
|
||||||
|
struct timespec curtime;
|
||||||
|
double diff;
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &curtime);
|
||||||
|
return (curtime.tv_sec - starttime.tv_sec) + (curtime.tv_nsec - starttime.tv_nsec) / 1e9;
|
||||||
|
}
|
||||||
|
|
||||||
|
parallel_context *parallel_init()
|
||||||
|
{
|
||||||
|
parallel_context *ctx = malloc(sizeof(parallel_context));
|
||||||
|
|
||||||
|
if(!getenv("OMPI_COMM_WORLD_SIZE")) {
|
||||||
|
ctx->mpi_mode = 0;
|
||||||
|
DEBUG("Running standalone.\n");
|
||||||
|
return ctx;
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx->mpi_mode = 1;
|
||||||
|
int result = MPI_Init(NULL, NULL);
|
||||||
|
MPI_Comm_size(MPI_COMM_WORLD, &ctx->size);
|
||||||
|
MPI_Comm_rank(MPI_COMM_WORLD, &ctx->rank);
|
||||||
|
MPI_Get_processor_name(ctx->processor_name, &ctx->processor_name_len);
|
||||||
|
|
||||||
|
mpi_rank(1); // display the rank in debug output from now on
|
||||||
|
|
||||||
|
if(ctx->rank == 0)
|
||||||
|
DEBUG("Running in mpi mode, %d nodes.\n", ctx->size);
|
||||||
|
|
||||||
|
return ctx;
|
||||||
|
}
|
||||||
|
|
||||||
|
void parallel_destroy(parallel_context* ctx)
|
||||||
|
{
|
||||||
|
if(ctx->mpi_mode) {
|
||||||
|
MPI_Type_free(&ctx->order_datatype);
|
||||||
|
MPI_Type_free(&ctx->result_datatype);
|
||||||
|
MPI_Finalize();
|
||||||
|
}
|
||||||
|
|
||||||
|
free(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
void parallel_set_datasize_and_callbacks(parallel_context *ctx, parallel_callback_init init, parallel_callback_job job, parallel_callback_destroy destroy, int global_data_size, int node_data_size, int input_size, int output_size)
|
||||||
|
{
|
||||||
|
ctx->init = init;
|
||||||
|
ctx->destroy = destroy;
|
||||||
|
ctx->job = job;
|
||||||
|
ctx->global_data_size = global_data_size;
|
||||||
|
ctx->node_data_size = node_data_size;
|
||||||
|
ctx->input_size = input_size;
|
||||||
|
ctx->output_size = output_size;
|
||||||
|
|
||||||
|
if(ctx->mpi_mode) {
|
||||||
|
// create a datatype for job orders, consisting of an integer (the job id) and a user-defined section
|
||||||
|
int order_blocklengths[2] = {1, input_size};
|
||||||
|
MPI_Aint order_displacements[2] = {0, sizeof(int)};
|
||||||
|
MPI_Datatype order_types[2] = {MPI_INT, MPI_BYTE};
|
||||||
|
MPI_Type_create_struct(2, order_blocklengths, order_displacements, order_types, &ctx->order_datatype);
|
||||||
|
MPI_Type_commit(&ctx->order_datatype);
|
||||||
|
|
||||||
|
int result_blocklengths[2] = {1, output_size};
|
||||||
|
MPI_Aint result_displacements[2] = {0, sizeof(int)};
|
||||||
|
MPI_Datatype result_types[2] = {MPI_INT, MPI_BYTE};
|
||||||
|
MPI_Type_create_struct(2, result_blocklengths, result_displacements, result_types, &ctx->result_datatype);
|
||||||
|
MPI_Type_commit(&ctx->result_datatype);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int parallel_work(parallel_context *ctx)
|
||||||
|
{
|
||||||
|
// do nothing in non-mpi mode
|
||||||
|
if(ctx->mpi_mode == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
MPI_Status status;
|
||||||
|
void *global_data = malloc(ctx->global_data_size);
|
||||||
|
void *node_data = malloc(ctx->node_data_size);
|
||||||
|
void *input_and_job_nr = malloc(ctx->input_size + sizeof(int));
|
||||||
|
void *input = input_and_job_nr + sizeof(int);
|
||||||
|
int *job_nr = (int *)input_and_job_nr;
|
||||||
|
void *output_and_job_nr = malloc(ctx->output_size + sizeof(int));
|
||||||
|
void *output = output_and_job_nr + sizeof(int);
|
||||||
|
int *output_job_nr = (int *)output_and_job_nr;
|
||||||
|
double jobtime;
|
||||||
|
|
||||||
|
// wait for global data
|
||||||
|
MPI_Bcast(global_data, ctx->global_data_size, MPI_BYTE, 0, MPI_COMM_WORLD);
|
||||||
|
|
||||||
|
DEBUG("Global data received\n");
|
||||||
|
|
||||||
|
// initialize node_data (and do once-per-node computation)
|
||||||
|
ctx->init(global_data, node_data);
|
||||||
|
|
||||||
|
DEBUG("Initialization completed\n");
|
||||||
|
|
||||||
|
while(1) {
|
||||||
|
MPI_Probe(0, MPI_ANY_TAG, MPI_COMM_WORLD,
|
||||||
|
&status);
|
||||||
|
|
||||||
|
DEBUG("Message received: source = %d, tag = %d\n", status.MPI_SOURCE, status.MPI_TAG);
|
||||||
|
|
||||||
|
if(status.MPI_TAG == PARALLEL_SHUTDOWN) {
|
||||||
|
DEBUG("Shutting down\n");
|
||||||
|
break;
|
||||||
|
} else if(status.MPI_TAG == PARALLEL_ORDER) {
|
||||||
|
MPI_Recv(input_and_job_nr,
|
||||||
|
1, ctx->order_datatype,
|
||||||
|
0, PARALLEL_ORDER, MPI_COMM_WORLD,
|
||||||
|
&status);
|
||||||
|
|
||||||
|
DEBUG("Working on job %d\n", *job_nr);
|
||||||
|
|
||||||
|
jobtime = -MPI_Wtime();
|
||||||
|
|
||||||
|
// do the actual work
|
||||||
|
ctx->job(global_data, node_data, input, output);
|
||||||
|
|
||||||
|
jobtime += MPI_Wtime();
|
||||||
|
|
||||||
|
DEBUG("Finished job %d in %f seconds\n", *job_nr, jobtime);
|
||||||
|
|
||||||
|
*output_job_nr = *job_nr;
|
||||||
|
MPI_Send(output_and_job_nr,
|
||||||
|
1, ctx->result_datatype,
|
||||||
|
0, PARALLEL_RESULT, MPI_COMM_WORLD);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx->destroy(global_data, node_data);
|
||||||
|
|
||||||
|
free(global_data);
|
||||||
|
free(node_data);
|
||||||
|
free(input_and_job_nr);
|
||||||
|
free(output_and_job_nr);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int parallel_run(parallel_context *ctx, const void *global_data, const void *input_array, void *output_array, unsigned int njobs, const char *_restart_filename)
|
||||||
|
{
|
||||||
|
// in non-mpi-mode, just run init1, init2, forall(jobs) job
|
||||||
|
if(ctx->mpi_mode == 0) {
|
||||||
|
int result;
|
||||||
|
void *node_data = malloc(ctx->node_data_size);
|
||||||
|
result = ctx->init(global_data, node_data);
|
||||||
|
if(result != 0)
|
||||||
|
goto cleanup_standalone;
|
||||||
|
|
||||||
|
for(int i = 0; i < njobs; i++) {
|
||||||
|
result = ctx->job(
|
||||||
|
global_data,
|
||||||
|
node_data,
|
||||||
|
input_array + ctx->input_size*i,
|
||||||
|
output_array + ctx->output_size*i);
|
||||||
|
if(result != 0)
|
||||||
|
goto cleanup_standalone;
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup_standalone:
|
||||||
|
ctx->destroy(global_data, node_data);
|
||||||
|
return result;
|
||||||
|
} else {
|
||||||
|
// if no restart file was specified, pick a filename
|
||||||
|
char *restart_filename;
|
||||||
|
char buffer[128];
|
||||||
|
int restartf;
|
||||||
|
if(_restart_filename == NULL) {
|
||||||
|
time_t t = time(NULL);
|
||||||
|
struct tm *loctm = localtime(&t);
|
||||||
|
strftime(buffer, sizeof(buffer), "restart/restart_%y%m%d_%H%M%S", loctm);
|
||||||
|
restart_filename = buffer;
|
||||||
|
} else {
|
||||||
|
restart_filename = (char *)_restart_filename;
|
||||||
|
}
|
||||||
|
|
||||||
|
// open restart file if it exists, otherwise create it
|
||||||
|
int continuing = 1;
|
||||||
|
restartf = open(restart_filename, O_RDWR);
|
||||||
|
if(restartf == -1 && errno == ENOENT) {
|
||||||
|
restartf = open(restart_filename, O_RDWR | O_CREAT, 0666);
|
||||||
|
continuing = 0;
|
||||||
|
}
|
||||||
|
if(restartf == -1) {
|
||||||
|
DEBUG("Error opening restart file: %s\n", strerror(errno));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// map restart file
|
||||||
|
int itemsize = (ctx->output_size + sizeof(int)); // for every job, store output, and completed flag
|
||||||
|
ftruncate(restartf, njobs*itemsize);
|
||||||
|
void *alljobs = mmap(0, njobs*itemsize, PROT_READ | PROT_WRITE, MAP_SHARED, restartf, 0);
|
||||||
|
if(alljobs == MAP_FAILED) {
|
||||||
|
DEBUG("Error mapping restart file: %s\n", strerror(errno));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// count completed jobs, or initialize jobs
|
||||||
|
int completed = 0;
|
||||||
|
if(continuing) {
|
||||||
|
for(int i = 0; i < njobs; i++)
|
||||||
|
if(DONE(alljobs + i*itemsize))
|
||||||
|
completed++;
|
||||||
|
} else {
|
||||||
|
for(int i = 0; i < njobs; i++) {
|
||||||
|
DONE(alljobs + i*itemsize) = 0;
|
||||||
|
memcpy(alljobs + i*itemsize + sizeof(int), input_array + i*ctx->input_size, ctx->input_size); // copy input data
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fsync(restartf);
|
||||||
|
|
||||||
|
if(continuing) {
|
||||||
|
DEBUG("Continuing from restart file, %d/%d jobs completed, %d nodes\n", completed, njobs, ctx->size);
|
||||||
|
} else {
|
||||||
|
DEBUG("Starting from scratch, %d jobs, %d nodes\n", njobs, ctx->size);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(completed >= njobs)
|
||||||
|
goto cleanup_mpi;
|
||||||
|
|
||||||
|
/* Send global data */
|
||||||
|
MPI_Bcast((void*)global_data, ctx->global_data_size, MPI_BYTE, 0, MPI_COMM_WORLD);
|
||||||
|
|
||||||
|
DEBUG("Global data sent\n");
|
||||||
|
|
||||||
|
void *input_message_buffer = malloc(ctx->input_size + sizeof(int));
|
||||||
|
void *output_message_buffer = malloc(ctx->output_size + sizeof(int));
|
||||||
|
|
||||||
|
// find next unfinished job
|
||||||
|
int current = 0;
|
||||||
|
while(current < njobs && DONE(alljobs + current*itemsize))
|
||||||
|
current++;
|
||||||
|
|
||||||
|
// assign initial jobs, 2 for each worker thread
|
||||||
|
for(int i = 0; i < 2*(ctx->size-1); i++) {
|
||||||
|
if(current >= njobs) // all jobs are assigned
|
||||||
|
break;
|
||||||
|
|
||||||
|
// send job id and input data
|
||||||
|
// send to all nodes except ourself (node 0)
|
||||||
|
*((int*)input_message_buffer) = current;
|
||||||
|
memcpy(input_message_buffer + sizeof(int), input_array + current*ctx->input_size, ctx->input_size);
|
||||||
|
MPI_Send(input_message_buffer, 1, ctx->order_datatype,
|
||||||
|
i%(ctx->size-1)+1, PARALLEL_ORDER, MPI_COMM_WORLD);
|
||||||
|
|
||||||
|
DEBUG("Job %d sent to node %d\n", current, i%(ctx->size-1)+1);
|
||||||
|
current++;
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Status status;
|
||||||
|
int active_worker_nodes = ctx->size - 1;
|
||||||
|
while(1) {
|
||||||
|
MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
|
||||||
|
if(status.MPI_TAG == PARALLEL_RESULT) {
|
||||||
|
MPI_Recv(output_message_buffer, 1, ctx->result_datatype,
|
||||||
|
MPI_ANY_SOURCE, PARALLEL_RESULT, MPI_COMM_WORLD, &status);
|
||||||
|
|
||||||
|
int id = *((int*)output_message_buffer);
|
||||||
|
memcpy(alljobs + id*itemsize + sizeof(int), output_message_buffer + sizeof(int), ctx->output_size);
|
||||||
|
DONE(alljobs + id*itemsize) = 1;
|
||||||
|
completed++;
|
||||||
|
|
||||||
|
DEBUG("job %d completed by node %d\n", id, status.MPI_SOURCE);
|
||||||
|
|
||||||
|
// todo: deal with unresponsive nodes
|
||||||
|
// strategy: when no jobs left, go through unfinished list again, incrementing oversubscribe counter
|
||||||
|
// if oversubscribe counter is at limit, shut node down instead
|
||||||
|
//
|
||||||
|
|
||||||
|
if(current >= njobs) { // all jobs are assigned, shut down node
|
||||||
|
MPI_Send(NULL, 0, MPI_BYTE, status.MPI_SOURCE, PARALLEL_SHUTDOWN, MPI_COMM_WORLD);
|
||||||
|
active_worker_nodes--;
|
||||||
|
if(active_worker_nodes)
|
||||||
|
continue;
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
*((int*)input_message_buffer) = current;
|
||||||
|
memcpy(input_message_buffer + sizeof(int), input_array + current*ctx->input_size, ctx->input_size);
|
||||||
|
MPI_Send(input_message_buffer, 1, ctx->order_datatype,
|
||||||
|
status.MPI_SOURCE, PARALLEL_ORDER, MPI_COMM_WORLD);
|
||||||
|
|
||||||
|
DEBUG("Job %d sent to node %d\n", current, status.MPI_SOURCE);
|
||||||
|
current++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int i = 0; i < njobs; i++) {
|
||||||
|
memcpy(output_array + i*ctx->output_size, alljobs + i*itemsize + sizeof(int), ctx->output_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(input_message_buffer);
|
||||||
|
free(output_message_buffer);
|
||||||
|
|
||||||
|
cleanup_mpi:
|
||||||
|
munmap(alljobs, njobs*itemsize);
|
||||||
|
close(restartf);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
118
parallel.h
Normal file
118
parallel.h
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
#ifndef PARALLEL_H
|
||||||
|
#define PARALLEL_H
|
||||||
|
|
||||||
|
/*
|
||||||
|
this is a library to parallelize workloads which can be split up naturally
|
||||||
|
into a sequence of independent jobs, using MPI. A program will usually
|
||||||
|
|
||||||
|
- do precomputation
|
||||||
|
- fill array with input data
|
||||||
|
- do the parallel work
|
||||||
|
- print the output data
|
||||||
|
|
||||||
|
we want to enable restarts, so that only unfinshed jobs need to be repeated.
|
||||||
|
Further, we want to be resilient to slow/unreliable network and to losing
|
||||||
|
nodes. There is a main node and a number of workers. The main node does the
|
||||||
|
precomputation and then retires do do administrative work, and the workers
|
||||||
|
do the actual jobs. We also want to switch to serial mode if the program is
|
||||||
|
called without MPI.
|
||||||
|
|
||||||
|
The following data has to be transimitted between nodes:
|
||||||
|
- results of the precomputation (read-only, shared between nodes)
|
||||||
|
- job-specific input data, generated by main node before parallel part
|
||||||
|
- output data for each job
|
||||||
|
|
||||||
|
the parallel work shall be given as a callback function which takes
|
||||||
|
input data and precomputation data as parameter
|
||||||
|
|
||||||
|
the above program will look like this for us:
|
||||||
|
|
||||||
|
- parallel_init
|
||||||
|
- if we are a worker, do parallel_work(init_callback, job_callback), exit
|
||||||
|
- do precomputation
|
||||||
|
- fill array with input data
|
||||||
|
- output_array = parallel_run(input_array)
|
||||||
|
- print the output data
|
||||||
|
|
||||||
|
parallel_init:
|
||||||
|
- check if we're running as an mpi program
|
||||||
|
- init mpi, check what kind of node we are
|
||||||
|
|
||||||
|
parallel_work(init_callback1, init_callback2, job_callback):
|
||||||
|
- receive global_precomp (???)
|
||||||
|
- worker_precomp = init_callback2(global_precomp, worker_precomp)
|
||||||
|
- infinite loop:
|
||||||
|
- wait for job on network, receive input
|
||||||
|
- output = job_callback(global_precomp, worker_precomp, input)
|
||||||
|
- send output on network
|
||||||
|
- exit loop on shutdown signal
|
||||||
|
|
||||||
|
parallel_run(global_precomp, input_array, restart file, callbacks):
|
||||||
|
- check if we're running as an MPI program
|
||||||
|
- send global_precomp to all nodes (if MPI)
|
||||||
|
- if(restart file given and exists) read restart file
|
||||||
|
- else create new restart file
|
||||||
|
- until(all jobs finished):
|
||||||
|
- if MPI:
|
||||||
|
- send next job & input to appropriate node
|
||||||
|
- if all jobs are in work, reassign unfinished ones (up to limit)
|
||||||
|
- collect outputs
|
||||||
|
- if no MPI:
|
||||||
|
- worker_precomp = init_callback1
|
||||||
|
- worker_precomp = init_callback2(global_precomp, worker_precomp)
|
||||||
|
- for(j in jobs)
|
||||||
|
- output(j) = job_callback(global_precomp, worker_precomp, input(j))
|
||||||
|
- delete restart file
|
||||||
|
- return array of outputs
|
||||||
|
|
||||||
|
parallel_destroy():
|
||||||
|
- free everything
|
||||||
|
|
||||||
|
have a context? probably yes: parallel_context
|
||||||
|
|
||||||
|
plan:
|
||||||
|
- make interface
|
||||||
|
- implement no-MPI part
|
||||||
|
- restructure singular_values.c to use interface
|
||||||
|
- implement MPI part
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <mpi.h>
|
||||||
|
#include <time.h>
|
||||||
|
|
||||||
|
typedef void (*parallel_callback_destroy)(const void*, void*);
|
||||||
|
typedef int (*parallel_callback_init)(const void*,void*);
|
||||||
|
typedef int (*parallel_callback_job)(const void*,void*,const void*,void*);
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int mpi_mode;
|
||||||
|
struct timespec starttime;
|
||||||
|
char processor_name[MPI_MAX_PROCESSOR_NAME];
|
||||||
|
int processor_name_len;
|
||||||
|
int rank;
|
||||||
|
int size;
|
||||||
|
MPI_Datatype order_datatype;
|
||||||
|
MPI_Datatype result_datatype;
|
||||||
|
parallel_callback_init init;
|
||||||
|
parallel_callback_job job;
|
||||||
|
parallel_callback_destroy destroy;
|
||||||
|
void *global_data;
|
||||||
|
void *node_data;
|
||||||
|
int global_data_size;
|
||||||
|
int node_data_size;
|
||||||
|
int input_size;
|
||||||
|
int output_size;
|
||||||
|
} parallel_context;
|
||||||
|
|
||||||
|
parallel_context *parallel_init();
|
||||||
|
void parallel_set_datasize_and_callbacks(parallel_context *ctx, parallel_callback_init init, parallel_callback_job job, parallel_callback_destroy destroy, int global_data_size, int node_data_size, int input_size, int output_size);
|
||||||
|
int parallel_work(parallel_context *ctx);
|
||||||
|
int parallel_run(parallel_context *ctx, const void *global_data, const void *input_array, void *output_array, unsigned int njobs, const char *restart_filename);
|
||||||
|
void parallel_destroy(parallel_context* ctx);
|
||||||
|
|
||||||
|
int mpi_rank();
|
||||||
|
void start_timer();
|
||||||
|
double runtime();
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
@ -1,6 +1,9 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
nmax=895882 # up to reflection group word length 22
|
# nmax=895882 # up to reflection group word length 22 ( 555 group)
|
||||||
|
nmax=700000 # up to reflection group word length 22 ( 444 group)
|
||||||
# nmax=11575 # up to reflection group word length 14
|
# nmax=11575 # up to reflection group word length 14
|
||||||
|
|
||||||
time mpirun --mca opal_warn_on_missing_libcuda 0 -x LD_LIBRARY_PATH=/home/stecker/svmpi/libs ./singular_values $nmax ejp_trg_restart test.out
|
#time mpirun --mca opal_warn_on_missing_libcuda 0 -x LD_LIBRARY_PATH=/home/stecker/svmpi/libs ./singular_values $nmax ejp_trg_restart test.out
|
||||||
|
|
||||||
|
time mpirun --mca opal_warn_on_missing_libcuda 0 --mca mpi_yield_when_idle 1 -np 4 ./singular_values 700000 4 4 4 1 10 100 1 10 100
|
||||||
|
@ -2,21 +2,55 @@
|
|||||||
#include "linalg.h"
|
#include "linalg.h"
|
||||||
#include "mat.h"
|
#include "mat.h"
|
||||||
#include "enumerate_triangle_group.h"
|
#include "enumerate_triangle_group.h"
|
||||||
|
#include "parallel.h"
|
||||||
|
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
#define SWAP(t,x,y) do { t _tmp = (x); (x) = (y); (y) = _tmp; } while (0);
|
#define SWAP(t,x,y) do { t _tmp = (x); (x) = (y); (y) = _tmp; } while (0);
|
||||||
|
|
||||||
#define DEBUG(msg, ...) fprintf(stderr, "[%10.3f] " msg, runtime(), ##__VA_ARGS__);
|
#define DEBUG(msg, ...) fprintf(stderr, "[%003d%10.3f] " msg, mpi_rank(0), runtime(), ##__VA_ARGS__)
|
||||||
//#define DEBUG(msg, ...)
|
//#define DEBUG(msg, ...)
|
||||||
|
|
||||||
struct result {
|
struct result {
|
||||||
int id;
|
int id;
|
||||||
int count;
|
|
||||||
mpq_t tr;
|
mpq_t tr;
|
||||||
mpq_t trinv;
|
mpq_t trinv;
|
||||||
double x;
|
double x;
|
||||||
double y;
|
double y;
|
||||||
|
double slope;
|
||||||
|
};
|
||||||
|
|
||||||
|
// we want as much as possible to be node data, except if it is only known to the main node
|
||||||
|
// (command line arguments) or should only be computed once (id list)
|
||||||
|
|
||||||
|
struct global_data {
|
||||||
|
// command line arguments
|
||||||
|
unsigned int nmax;
|
||||||
|
unsigned int p1, p2, p3;
|
||||||
|
unsigned int sstart, send, sdenom;
|
||||||
|
unsigned int qstart, qend, qdenom;
|
||||||
|
|
||||||
|
unsigned int *id_list;
|
||||||
|
unsigned int id_list_length;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_data {
|
||||||
|
group_t *group;
|
||||||
|
mat* matrices;
|
||||||
|
struct result *invariants;
|
||||||
|
struct result **distinct_invariants;
|
||||||
|
int distinct_invariants_length;
|
||||||
|
mps_context *solver;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct input_data {
|
||||||
|
unsigned int snum, sden;
|
||||||
|
unsigned int qnum, qden;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct output_data {
|
||||||
|
int max_slope_id;
|
||||||
|
double max_slope;
|
||||||
};
|
};
|
||||||
|
|
||||||
static int compare_result(const void *a_, const void *b_)
|
static int compare_result(const void *a_, const void *b_)
|
||||||
@ -75,20 +109,6 @@ static int compare_result_by_slope(const void *a_, const void *b_)
|
|||||||
return slopea > slopeb ? -1 : slopea < slopeb ? 1 : 0;
|
return slopea > slopeb ? -1 : slopea < slopeb ? 1 : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct timespec starttime;
|
|
||||||
static void start_timer()
|
|
||||||
{
|
|
||||||
clock_gettime(CLOCK_MONOTONIC, &starttime);
|
|
||||||
}
|
|
||||||
|
|
||||||
static double runtime()
|
|
||||||
{
|
|
||||||
struct timespec curtime;
|
|
||||||
double diff;
|
|
||||||
clock_gettime(CLOCK_MONOTONIC, &curtime);
|
|
||||||
return (curtime.tv_sec - starttime.tv_sec) + (curtime.tv_nsec - starttime.tv_nsec) / 1e9;
|
|
||||||
}
|
|
||||||
|
|
||||||
int compute_invariants(group_t *group, mat *matrices, struct result **invariants, int *n, int unique)
|
int compute_invariants(group_t *group, mat *matrices, struct result **invariants, int *n, int unique)
|
||||||
{
|
{
|
||||||
mpq_t tmp;
|
mpq_t tmp;
|
||||||
@ -120,10 +140,8 @@ int compute_invariants(group_t *group, mat *matrices, struct result **invariants
|
|||||||
for(int i = 0; i < ntraces; i++) {
|
for(int i = 0; i < ntraces; i++) {
|
||||||
if(i == 0 || compare_result(&invariants[i], &invariants[nuniq-1]) != 0) {
|
if(i == 0 || compare_result(&invariants[i], &invariants[nuniq-1]) != 0) {
|
||||||
invariants[nuniq] = invariants[i];
|
invariants[nuniq] = invariants[i];
|
||||||
invariants[nuniq]->count = 1;
|
|
||||||
nuniq++;
|
nuniq++;
|
||||||
} else {
|
} else {
|
||||||
invariants[nuniq-1]->count++;
|
|
||||||
int oldlength = group->elements[invariants[nuniq-1]->id].length;
|
int oldlength = group->elements[invariants[nuniq-1]->id].length;
|
||||||
int newlength = group->elements[invariants[i]->id].length;
|
int newlength = group->elements[invariants[i]->id].length;
|
||||||
if(newlength < oldlength)
|
if(newlength < oldlength)
|
||||||
@ -141,7 +159,7 @@ int compute_invariants(group_t *group, mat *matrices, struct result **invariants
|
|||||||
max_slope = 0;
|
max_slope = 0;
|
||||||
for(int i = 0; i < nuniq; i++) {
|
for(int i = 0; i < nuniq; i++) {
|
||||||
retval = solve_characteristic_polynomial(solver, poly, invariants[i]->tr, invariants[i]->trinv, evs);
|
retval = solve_characteristic_polynomial(solver, poly, invariants[i]->tr, invariants[i]->trinv, evs);
|
||||||
retval = 0;evs[0] = 2;evs[1] = 1;evs[2] = 0.5; // fake solving the polynomial for memory leak test
|
|
||||||
if(retval == 1) {
|
if(retval == 1) {
|
||||||
fprintf(stderr, "Error! Could not solve polynomial.\n");
|
fprintf(stderr, "Error! Could not solve polynomial.\n");
|
||||||
continue;
|
continue;
|
||||||
@ -161,6 +179,7 @@ int compute_invariants(group_t *group, mat *matrices, struct result **invariants
|
|||||||
|
|
||||||
invariants[i]->x = x;
|
invariants[i]->x = x;
|
||||||
invariants[i]->y = y;
|
invariants[i]->y = y;
|
||||||
|
invariants[i]->slope = y/x;
|
||||||
|
|
||||||
if(y/x > max_slope + 1e-12 && (x > 0.1 || y > 0.1)) {
|
if(y/x > max_slope + 1e-12 && (x > 0.1 || y > 0.1)) {
|
||||||
max_slope_id = invariants[i]->id;
|
max_slope_id = invariants[i]->id;
|
||||||
@ -194,180 +213,249 @@ long check_memory_usage(mat *matrices, int n)
|
|||||||
return total;
|
return total;
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
|
||||||
|
void destroy_node(const void *_g, void *_n)
|
||||||
{
|
{
|
||||||
mpq_t s, q, t, tmp;
|
struct global_data *g = (struct global_data *)_g;
|
||||||
int p1, p2, p3;
|
struct node_data *n = (struct node_data *)_n;
|
||||||
int sstart, send, sdenom, qstart, qend, qdenom;
|
|
||||||
mat *matrices;
|
|
||||||
group_t *group;
|
|
||||||
int nmax, n;
|
|
||||||
int max_slope_id;
|
|
||||||
char buf[100];
|
|
||||||
char buf2[100];
|
|
||||||
struct result *invariants;
|
|
||||||
struct result **distinct_invariants;
|
|
||||||
|
|
||||||
start_timer();
|
for(int i = 0; i < g->nmax; i++) {
|
||||||
|
mpq_clear(n->invariants[i].tr);
|
||||||
mpq_inits(s, q, t, tmp, NULL);
|
mpq_clear(n->invariants[i].trinv);
|
||||||
if(argc < 11) {
|
|
||||||
fprintf(stderr, "Usage: %s <N> <p1> <p2> <p3> <s start> <s end> <s denom> <q start> <q end> <q denom>\n", argv[0]);
|
|
||||||
exit(1);
|
|
||||||
}
|
}
|
||||||
nmax = atoi(argv[1]);
|
free(n->invariants);
|
||||||
p1 = atoi(argv[2]);
|
free(n->distinct_invariants);
|
||||||
p2 = atoi(argv[3]);
|
for(int i = 0; i < g->nmax; i++)
|
||||||
p3 = atoi(argv[4]);
|
mat_clear(n->matrices[i]);
|
||||||
sstart = atoi(argv[5]);
|
free(n->matrices);
|
||||||
send = atoi(argv[6]);
|
coxeter_clear(n->group);
|
||||||
sdenom = atoi(argv[7]);
|
}
|
||||||
qstart = atoi(argv[8]);
|
|
||||||
qend = atoi(argv[9]);
|
int init_node(const void *_g, void *_n)
|
||||||
qdenom = atoi(argv[10]);
|
{
|
||||||
|
struct global_data *g = (struct global_data *)_g;
|
||||||
|
struct node_data *n = (struct node_data *)_n;
|
||||||
|
|
||||||
DEBUG("Allocate\n");
|
DEBUG("Allocate\n");
|
||||||
matrices = malloc(nmax*sizeof(mat));
|
g->id_list = (int*)(g+1); // pointers get scrambled by transmission, reconstruct
|
||||||
for(int i = 0; i < nmax; i++)
|
n->matrices = malloc(g->nmax*sizeof(mat));
|
||||||
mat_init(matrices[i], 3);
|
for(int i = 0; i < g->nmax; i++)
|
||||||
invariants = malloc(nmax*sizeof(struct result));
|
mat_init(n->matrices[i], 3);
|
||||||
distinct_invariants = malloc(nmax*sizeof(struct result));
|
n->invariants = malloc(g->nmax*sizeof(struct result));
|
||||||
for(int i = 0; i < nmax; i++) {
|
n->distinct_invariants = malloc(g->nmax*sizeof(struct result)); // we won't need that many, but just in case
|
||||||
mpq_init(invariants[i].tr);
|
for(int i = 0; i < g->nmax; i++) {
|
||||||
mpq_init(invariants[i].trinv);
|
mpq_init(n->invariants[i].tr);
|
||||||
|
mpq_init(n->invariants[i].trinv);
|
||||||
|
n->invariants[i].id = i;
|
||||||
}
|
}
|
||||||
|
|
||||||
// order of the triangle reflection generators: a, b, c
|
// order of the triangle reflection generators: a, b, c
|
||||||
// order of the rotation orders: bc, ac, ab
|
// order of the rotation orders: bc, ac, ab
|
||||||
DEBUG("Generate group\n");
|
DEBUG("Generate group\n");
|
||||||
group = coxeter_init_triangle(p1, p2, p3, nmax);
|
n->group = coxeter_init_triangle(g->p1, g->p2, g->p3, g->nmax);
|
||||||
|
|
||||||
// first run; compute all matrices
|
return 0;
|
||||||
for(int i = 0; i < group->size; i++)
|
}
|
||||||
group->elements[i].need_to_compute = 1;
|
|
||||||
|
int do_computation(const void *_g, void *_n, const void *_in, void *_out)
|
||||||
|
{
|
||||||
|
struct global_data *g = (struct global_data *)_g;
|
||||||
|
struct node_data *n = (struct node_data *)_n;
|
||||||
|
struct input_data *in = (struct input_data *)_in;
|
||||||
|
struct output_data *out = (struct output_data *)_out;
|
||||||
|
|
||||||
|
mpq_t s, q;
|
||||||
|
|
||||||
|
mpq_inits(s, q, NULL);
|
||||||
|
mpq_set_ui(s, in->snum, in->sden);
|
||||||
|
mpq_set_ui(q, in->qnum, in->qden);
|
||||||
|
|
||||||
|
DEBUG("Computing max slope element for s = %d/%d and q = %d/%d.\n",
|
||||||
|
in->snum, in->sden,
|
||||||
|
in->qnum, in->qden);
|
||||||
|
|
||||||
|
for(int i = 0; i < n->group->size; i++)
|
||||||
|
n->group->elements[i].need_to_compute = 0;
|
||||||
|
n->group->elements[0].need_to_compute = 1;
|
||||||
|
|
||||||
|
int needed_elements = 1;
|
||||||
|
for(int i = 0; i < g->id_list_length; i++)
|
||||||
|
{
|
||||||
|
int id = g->id_list[i];
|
||||||
|
n->distinct_invariants[i] = &n->invariants[id];
|
||||||
|
groupelement_t *cur = &n->group->elements[id];
|
||||||
|
while(cur->need_to_compute == 0) {
|
||||||
|
cur->need_to_compute = 1;
|
||||||
|
needed_elements++;
|
||||||
|
cur = cur->parent->parent; // also need to compute its even-length ancestors
|
||||||
|
}
|
||||||
|
cur = n->group->elements[id].inverse;
|
||||||
|
while(cur->need_to_compute == 0) {
|
||||||
|
cur->need_to_compute = 1;
|
||||||
|
needed_elements++;
|
||||||
|
cur = cur->parent->parent;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
n->distinct_invariants_length = g->id_list_length;
|
||||||
|
|
||||||
|
DEBUG("Need to compute %d elements to get %d traces up to reflection length %d\n",
|
||||||
|
needed_elements, g->id_list_length, n->group->elements[n->group->size-1].length);
|
||||||
|
|
||||||
|
DEBUG("Compute matrices\n");
|
||||||
|
enumerate(n->group, n->matrices, g->p1, g->p2, g->p3, s, q);
|
||||||
|
|
||||||
|
DEBUG("Compute invariants\n");
|
||||||
|
out->max_slope_id = compute_invariants(
|
||||||
|
n->group, n->matrices,
|
||||||
|
n->distinct_invariants, &n->distinct_invariants_length, 1);
|
||||||
|
out->max_slope = n->invariants[out->max_slope_id].slope;
|
||||||
|
|
||||||
|
mpq_clears(s, q, NULL);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
char buf[100];
|
||||||
|
char buf2[100];
|
||||||
|
struct global_data *g;
|
||||||
|
struct node_data n;
|
||||||
|
|
||||||
|
start_timer();
|
||||||
|
|
||||||
|
// parse command line arguments
|
||||||
|
if(argc < 11) {
|
||||||
|
fprintf(stderr, "Usage: %s <N> <p1> <p2> <p3> <s start> <s end> <s denom> <q start> <q end> <q denom>\n", argv[0]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
int nmax = atoi(argv[1]);
|
||||||
|
g = (struct global_data*)malloc(sizeof(struct global_data) + nmax*sizeof(int));
|
||||||
|
g->id_list = (int*)(g+1);
|
||||||
|
g->nmax = nmax;
|
||||||
|
g->p1 = atoi(argv[2]);
|
||||||
|
g->p2 = atoi(argv[3]);
|
||||||
|
g->p3 = atoi(argv[4]);
|
||||||
|
g->sstart = atoi(argv[5]);
|
||||||
|
g->send = atoi(argv[6]);
|
||||||
|
g->sdenom = atoi(argv[7]);
|
||||||
|
g->qstart = atoi(argv[8]);
|
||||||
|
g->qend = atoi(argv[9]);
|
||||||
|
g->qdenom = atoi(argv[10]);
|
||||||
|
|
||||||
|
// initialize
|
||||||
|
parallel_context *ctx = parallel_init();
|
||||||
|
parallel_set_datasize_and_callbacks(ctx, init_node, do_computation, destroy_node,
|
||||||
|
sizeof(struct global_data) + g->nmax*sizeof(int),
|
||||||
|
sizeof(struct node_data),
|
||||||
|
sizeof(struct input_data),
|
||||||
|
sizeof(struct output_data));
|
||||||
|
if(ctx->mpi_mode == 1 && ctx->rank != 0) {
|
||||||
|
// worker mode
|
||||||
|
parallel_work(ctx);
|
||||||
|
parallel_destroy(ctx);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
init_node(g, &n);
|
||||||
|
|
||||||
// use very generic values for the pilot run unless sstart=send and qstart=qend
|
// use very generic values for the pilot run unless sstart=send and qstart=qend
|
||||||
if(sstart == send && qstart == qend) {
|
struct input_data pilot_input;
|
||||||
mpq_set_ui(s, sstart, sdenom);
|
struct output_data pilot_output;
|
||||||
mpq_set_ui(q, qstart, qdenom);
|
if(g->sstart == g->send && g->qstart == g->qend) {
|
||||||
DEBUG("Single run for s = %d/%d, q = %d/%d\n", sstart, sdenom, qstart, qdenom);
|
pilot_input.snum = g->sstart;
|
||||||
|
pilot_input.sden = g->sdenom;
|
||||||
|
pilot_input.qnum = g->qstart;
|
||||||
|
pilot_input.qden = g->qdenom;
|
||||||
|
DEBUG("Single run for s = %d/%d, q = %d/%d\n", g->sstart, g->sdenom, g->qstart, g->qdenom);
|
||||||
} else {
|
} else {
|
||||||
mpq_set_ui(s, 4, 100);
|
pilot_input.snum = 4;
|
||||||
mpq_set_ui(q, 7, 100);
|
pilot_input.sden = 100;
|
||||||
|
pilot_input.qnum = 7;
|
||||||
|
pilot_input.qden = 100;
|
||||||
DEBUG("Initial run for s = %d/%d, q = %d/%d\n", 4, 100, 7, 100);
|
DEBUG("Initial run for s = %d/%d, q = %d/%d\n", 4, 100, 7, 100);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG("Compute matrices\n");
|
g->id_list_length = 0;
|
||||||
enumerate(group, matrices, p1, p2, p3, s, q);
|
for(int i = 0; i < n.group->size; i++)
|
||||||
|
if(n.group->elements[i].length % 2 == 0 && n.group->elements[i].inverse)
|
||||||
|
g->id_list[g->id_list_length++] = i;
|
||||||
|
|
||||||
// prepare array of ids
|
do_computation(g, &n, &pilot_input, &pilot_output);
|
||||||
n = 0;
|
|
||||||
for(int i = 0; i < group->size; i++)
|
|
||||||
{
|
|
||||||
if(group->elements[i].length % 2 != 0 || !group->elements[i].inverse)
|
|
||||||
continue;
|
|
||||||
invariants[i].id = i;
|
|
||||||
distinct_invariants[n++] = &invariants[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG("Compute invariants\n");
|
for(int i = 0; i < n.distinct_invariants_length; i++)
|
||||||
max_slope_id = compute_invariants(group, matrices, distinct_invariants, &n, 1);
|
g->id_list[i] = n.distinct_invariants[i]->id;
|
||||||
|
g->id_list_length = n.distinct_invariants_length;
|
||||||
|
|
||||||
// prepare for next time; don't need to change ids in distinct_invariants!
|
if(g->sstart != g->send || g->qstart != g->qend) {
|
||||||
for(int i = 0; i < group->size; i++)
|
|
||||||
group->elements[i].need_to_compute = 0;
|
|
||||||
group->elements[0].need_to_compute = 1;
|
|
||||||
int multiplication_count = 1;
|
|
||||||
for(int i = 0; i < n; i++) {
|
|
||||||
groupelement_t *cur = &group->elements[distinct_invariants[i]->id];
|
|
||||||
while(cur->need_to_compute == 0) {
|
|
||||||
cur->need_to_compute = 1;
|
|
||||||
multiplication_count++;
|
|
||||||
cur = cur->parent->parent; // also need to compute its even-length ancestors
|
|
||||||
}
|
|
||||||
cur = group->elements[distinct_invariants[i]->id].inverse;
|
|
||||||
while(cur->need_to_compute == 0) {
|
|
||||||
cur->need_to_compute = 1;
|
|
||||||
multiplication_count++;
|
|
||||||
cur = cur->parent->parent;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
struct input_data *inputs = malloc((g->send - g->sstart + 1)*(g->qend - g->qstart + 1)*sizeof(struct input_data));
|
||||||
|
struct output_data *outputs = malloc((g->send - g->sstart + 1)*(g->qend - g->qstart + 1)*sizeof(struct input_data));
|
||||||
|
|
||||||
DEBUG("Would have needed %d matrix multiplications for %d unique traces up to reflection length %d\n", multiplication_count, n, group->elements[group->size-1].length);
|
int njobs = 0;
|
||||||
|
for(int sloop = g->sstart; sloop <= g->send; sloop++) {
|
||||||
if(sstart != send || qstart != qend) {
|
for(int qloop = g->qstart; qloop <= g->qend; qloop++) {
|
||||||
for(int sloop = sstart; sloop <= send; sloop++) {
|
inputs[njobs].sden = g->sdenom;
|
||||||
for(int qloop = qstart; qloop <= qend; qloop++) {
|
inputs[njobs].qden = g->qdenom;
|
||||||
DEBUG("Loop for s = %d/%d, q = %d/%d\n", sloop, sdenom, qloop, qdenom);
|
inputs[njobs].snum = sloop;
|
||||||
mpq_set_ui(s, sloop, sdenom);
|
inputs[njobs].qnum = qloop;
|
||||||
mpq_set_ui(q, qloop, qdenom);
|
njobs++;
|
||||||
DEBUG("Compute matrices\n");
|
|
||||||
enumerate(group, matrices, p1, p2, p3, s, q);
|
|
||||||
DEBUG("Compute invariants\n");
|
|
||||||
max_slope_id = compute_invariants(group, matrices, distinct_invariants, &n, 0);
|
|
||||||
// output
|
|
||||||
gmp_printf("%Qd %Qd %s\n", s, q,
|
|
||||||
print_word(&group->elements[max_slope_id], buf));
|
|
||||||
fflush(stdout);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
parallel_run(ctx, g, inputs, outputs, njobs, NULL);
|
||||||
|
|
||||||
|
// DEBUG("Loop for s = %d/%d, q = %d/%d\n", sloop, g->sdenom, qloop, g->qdenom);
|
||||||
|
|
||||||
|
for(int i = 0; i < njobs; i++)
|
||||||
|
{
|
||||||
|
gmp_printf("%d/%d %d/%d %s %f\n",
|
||||||
|
inputs[i].snum, inputs[i].sden, inputs[i].qnum, inputs[i].qden,
|
||||||
|
print_word(&n.group->elements[outputs[i].max_slope_id], buf),
|
||||||
|
outputs[i].max_slope);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(inputs);
|
||||||
|
free(outputs);
|
||||||
} else {
|
} else {
|
||||||
// output
|
// output
|
||||||
for(int i = 0; i < n; i++) {
|
for(int i = 0; i < n.distinct_invariants_length; i++) {
|
||||||
double slope = distinct_invariants[i]->y/distinct_invariants[i]->x;
|
|
||||||
|
|
||||||
// exclude tr = trinv = 2/1/0/-1/3
|
// exclude tr = trinv = 2/1/0/-1/3
|
||||||
|
mpq_t tmp;
|
||||||
|
mpq_init(tmp);
|
||||||
mpq_set_si(tmp, 2, 1);
|
mpq_set_si(tmp, 2, 1);
|
||||||
if(mpq_cmp(distinct_invariants[i]->tr, tmp) == 0 &&
|
if(mpq_cmp(n.distinct_invariants[i]->tr, tmp) == 0 &&
|
||||||
mpq_cmp(distinct_invariants[i]->trinv, tmp) == 0)
|
mpq_cmp(n.distinct_invariants[i]->trinv, tmp) == 0)
|
||||||
continue;
|
continue;
|
||||||
mpq_set_si(tmp, 1, 1);
|
mpq_set_si(tmp, 1, 1);
|
||||||
if(mpq_cmp(distinct_invariants[i]->tr, tmp) == 0 &&
|
if(mpq_cmp(n.distinct_invariants[i]->tr, tmp) == 0 &&
|
||||||
mpq_cmp(distinct_invariants[i]->trinv, tmp) == 0)
|
mpq_cmp(n.distinct_invariants[i]->trinv, tmp) == 0)
|
||||||
continue;
|
continue;
|
||||||
mpq_set_si(tmp, 0, 1);
|
mpq_set_si(tmp, 0, 1);
|
||||||
if(mpq_cmp(distinct_invariants[i]->tr, tmp) == 0 &&
|
if(mpq_cmp(n.distinct_invariants[i]->tr, tmp) == 0 &&
|
||||||
mpq_cmp(distinct_invariants[i]->trinv, tmp) == 0)
|
mpq_cmp(n.distinct_invariants[i]->trinv, tmp) == 0)
|
||||||
continue;
|
continue;
|
||||||
mpq_set_si(tmp, -1, 1);
|
mpq_set_si(tmp, -1, 1);
|
||||||
if(mpq_cmp(distinct_invariants[i]->tr, tmp) == 0 &&
|
if(mpq_cmp(n.distinct_invariants[i]->tr, tmp) == 0 &&
|
||||||
mpq_cmp(distinct_invariants[i]->trinv, tmp) == 0)
|
mpq_cmp(n.distinct_invariants[i]->trinv, tmp) == 0)
|
||||||
continue;
|
continue;
|
||||||
mpq_set_si(tmp, 3, 1);
|
mpq_set_si(tmp, 3, 1);
|
||||||
if(mpq_cmp(distinct_invariants[i]->tr, tmp) == 0 &&
|
if(mpq_cmp(n.distinct_invariants[i]->tr, tmp) == 0 &&
|
||||||
mpq_cmp(distinct_invariants[i]->trinv, tmp) == 0)
|
mpq_cmp(n.distinct_invariants[i]->trinv, tmp) == 0)
|
||||||
continue;
|
continue;
|
||||||
|
mpq_clear(tmp);
|
||||||
|
|
||||||
gmp_printf("%d %d %s %f\n",
|
double slope = n.distinct_invariants[i]->y/n.distinct_invariants[i]->x;
|
||||||
distinct_invariants[i]->id, distinct_invariants[i]->count,
|
|
||||||
print_word(&group->elements[distinct_invariants[i]->id], buf),
|
|
||||||
slope
|
|
||||||
);
|
|
||||||
|
|
||||||
/*
|
gmp_printf("%d %s %f\n",
|
||||||
gmp_printf("%d %d %d %Qd %Qd %f %f %f %f %f %s\n",
|
n.distinct_invariants[i]->id,
|
||||||
distinct_invariants[i]->id, distinct_invariants[i]->count, cumulative,
|
print_word(&n.group->elements[n.distinct_invariants[i]->id], buf),
|
||||||
distinct_invariants[i]->tr, distinct_invariants[i]->trinv,
|
slope);
|
||||||
log(fabs(mpq_get_d(distinct_invariants[i]->tr))), log(fabs(mpq_get_d(distinct_invariants[i]->trinv))),
|
|
||||||
distinct_invariants[i]->x, distinct_invariants[i]->y, slope,
|
|
||||||
print_word(&group->elements[distinct_invariants[i]->id], buf)
|
|
||||||
);
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG("Clean up\n");
|
destroy_node(g, &n);
|
||||||
for(int i = 0; i < nmax; i++) {
|
free(g);
|
||||||
mpq_clear(invariants[i].tr);
|
parallel_destroy(ctx);
|
||||||
mpq_clear(invariants[i].trinv);
|
|
||||||
}
|
|
||||||
free(invariants);
|
|
||||||
free(distinct_invariants);
|
|
||||||
for(int i = 0; i < nmax; i++)
|
|
||||||
mat_clear(matrices[i]);
|
|
||||||
free(matrices);
|
|
||||||
coxeter_clear(group);
|
|
||||||
mpq_clears(s, q, t, tmp, NULL);
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user