Some random changes to parallelism

I don't really remember what these changes were about, but it seems to
run; have to look at parallelism stuff again in the future to get decent
performance
This commit is contained in:
Florian Stecker 2022-04-11 11:22:13 -05:00
parent 5b3dbe9e65
commit 7f6ad68f53
4 changed files with 34 additions and 11 deletions

View File

@ -3,16 +3,18 @@
trap 'exit 130' INT trap 'exit 130' INT
wordlength=30 wordlength=30
sdenom=100 sdenom=1
sstart=1 sstart=1
send=100 send=1
qdenom=100 qdenom=100
qstart=1 qstart=1
qend=100 # 1/sqrt(2) = 0.7071... qend=200 # 1/sqrt(2) = 0.7071...
#words="$(./billiard_words $wordlength | awk '{print $1}')" words="$(./billiard_words $wordlength | awk '{print $1}')"
#words="cbabcabacabcacbcab cabacabcacbcabcbab cabcacbcabcbabcaba" #words="cbabcabacabcacbcab cabacabcacbcabcbab cabcacbcabcbabcaba"
words="abcb acbc baca" #words="abcabc abcb cbabcacbcacbab"
#words="abcabc abcbcabcbc"
#words="abcabc bcbab bcbabcac"
for s in $(seq $sstart $send); do for s in $(seq $sstart $send); do
for q in $(seq $qstart $qend); do for q in $(seq $qstart $qend); do

2
mat.h
View File

@ -12,7 +12,7 @@
needed features: needed features:
x multiply matrices x multiply matrices
- inverse - inverse
- pseudoinverse x pseudoinverse
x set x set
- eigenvalues - eigenvalues
*/ */

View File

@ -10,7 +10,8 @@
#include <malloc.h> #include <malloc.h>
#include <stdlib.h> #include <stdlib.h>
#define DEBUG INFO //#define DEBUG INFO
#define DEBUG(msg, ...)
#define INFO(msg, ...) fprintf(stderr, "[%003d%10.3f] " msg, mpi_rank(0), runtime(), ##__VA_ARGS__) #define INFO(msg, ...) fprintf(stderr, "[%003d%10.3f] " msg, mpi_rank(0), runtime(), ##__VA_ARGS__)
//#define DEBUG(msg, ...) fprintf(stderr, "[ %10.3f] " msg, runtime(), ##__VA_ARGS__) //#define DEBUG(msg, ...) fprintf(stderr, "[ %10.3f] " msg, runtime(), ##__VA_ARGS__)
//#define DEBUG_MPI(msg, node, ...) fprintf(stderr, "[%003d%10.3f] " msg, node, runtime(), ##__VA_ARGS__) //#define DEBUG_MPI(msg, node, ...) fprintf(stderr, "[%003d%10.3f] " msg, node, runtime(), ##__VA_ARGS__)
@ -128,8 +129,12 @@ int parallel_job(parallel_context *ctx, const void *global_data, void *node_data
int message_present; int message_present;
if(block) { if(block) {
jobtime = -MPI_Wtime();
MPI_Probe(0, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_Probe(0, MPI_ANY_TAG, MPI_COMM_WORLD,
&status); &status);
jobtime += MPI_Wtime();
INFO("TIMING: Probe() took %f seconds\n", jobtime);
message_present = 1; message_present = 1;
} else { } else {
MPI_Iprobe(0, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_Iprobe(0, MPI_ANY_TAG, MPI_COMM_WORLD,
@ -157,12 +162,16 @@ int parallel_job(parallel_context *ctx, const void *global_data, void *node_data
jobtime += MPI_Wtime(); jobtime += MPI_Wtime();
DEBUG("Finished job %d in %f seconds\n", *job_nr, jobtime); INFO("TIMING: job %d took %f seconds\n", *job_nr, jobtime);
*output_job_nr = *job_nr; *output_job_nr = *job_nr;
jobtime = -MPI_Wtime();
MPI_Send(output_and_job_nr, MPI_Send(output_and_job_nr,
1, ctx->result_datatype, 1, ctx->result_datatype,
0, PARALLEL_RESULT, MPI_COMM_WORLD); 0, PARALLEL_RESULT, MPI_COMM_WORLD);
jobtime += MPI_Wtime();
INFO("TIMING: Send() took %f seconds\n", jobtime);
} }
} else { } else {
result = 2; result = 2;
@ -359,13 +368,25 @@ int parallel_run(parallel_context *ctx, const void *global_data, const void *inp
INFO("job %d completed by node %d, shut down, %d workers remaining\n", id, node, active_worker_nodes); INFO("job %d completed by node %d, shut down, %d workers remaining\n", id, node, active_worker_nodes);
} }
} else { } else {
INFO("job %d completed by node %d, continues with %d\n", id, node, current);
*((int*)input_message_buffer) = current; *((int*)input_message_buffer) = current;
memcpy(input_message_buffer + sizeof(int), input_array + current*ctx->input_size, ctx->input_size); memcpy(input_message_buffer + sizeof(int), input_array + current*ctx->input_size, ctx->input_size);
MPI_Send(input_message_buffer, 1, ctx->order_datatype, MPI_Send(input_message_buffer, 1, ctx->order_datatype,
node, PARALLEL_ORDER, MPI_COMM_WORLD); node, PARALLEL_ORDER, MPI_COMM_WORLD);
active_jobs[node]++; active_jobs[node]++;
current++; current++;
if(active_jobs[node] < 3) {
*((int*)input_message_buffer) = current;
memcpy(input_message_buffer + sizeof(int), input_array + current*ctx->input_size, ctx->input_size);
MPI_Send(input_message_buffer, 1, ctx->order_datatype,
node, PARALLEL_ORDER, MPI_COMM_WORLD);
active_jobs[node]++;
current++;
INFO("job %d completed by node %d, continues with %d and %d\n", id, node, current-1, current-2);
} else {
INFO("job %d completed by node %d, continues with %d\n", id, node, current-1);
}
} }
} }
} }

View File

@ -6,4 +6,4 @@ nmax=700000 # up to reflection group word length 22 ( 444 group)
#time mpirun --mca opal_warn_on_missing_libcuda 0 -x LD_LIBRARY_PATH=/home/stecker/svmpi/libs ./singular_values $nmax ejp_trg_restart test.out #time mpirun --mca opal_warn_on_missing_libcuda 0 -x LD_LIBRARY_PATH=/home/stecker/svmpi/libs ./singular_values $nmax ejp_trg_restart test.out
time mpirun --mca opal_warn_on_missing_libcuda 0 --mca mpi_yield_when_idle 1 -np 4 ./singular_values 700000 4 4 4 1 10 100 1 10 100 time mpirun --mca opal_warn_on_missing_libcuda 0 --mca mpi_yield_when_idle 1 -np 4 ./singular_values 700000 4 4 4 1 1 100 1 100 100 $1