new simpler approach to parallalization

2022-06-15 15:17:54 +02:00
parent 429f0890d6
commit ac80bc9f3f
13 changed files with 76 additions and 824 deletions
--- a/parallelization/runjobs.py
+++ b/parallelization/runjobs.py
@@ -0,0 +1,57 @@
+#!/usr/bin/python
+
+from mpi4py import MPI
+import os
+import re
+import math
+import subprocess
+import time
+
+comm = MPI.COMM_WORLD
+rank = comm.Get_rank()
+nodes = comm.Get_size()
+
+# print(os.path.abspath(os.curdir))
+
+done = set()
+for f in os.listdir('.'):
+	if re.search('^done_[0-9]+', f):
+		fp = open(f, "r")
+		for x in fp:
+			done.add(int(x))
+		fp.close()
+
+f = open("commands", "r")
+idx = 0
+todo = []
+for c in f:
+	if not idx in done:
+		todo.append((idx,c))
+	idx = idx+1
+f.close()
+
+start = math.floor(len(todo)/nodes*rank)
+end = math.floor(len(todo)/nodes*(rank+1))
+if(rank == nodes-1):
+	end = len(todo)
+
+print("{n:d} commands awaiting execution, {nnode:d} of them in node {rank:d}".format(n=len(todo),nnode=end-start,rank=rank))
+
+time.sleep(1) # to make sure all nodes read the status first before more gets done
+
+outfilename = "result_{node:003d}".format(node=rank)
+donefilename = "done_{node:003d}".format(node=rank)
+outfile = open(outfilename, "a")
+donefile = open(donefilename, "a")
+
+for i in range(start, end):
+	result = subprocess.call(todo[i][1], stdout=outfile, shell=True)
+	if result == 0:
+		donefile.write(str(todo[i][0]) + '\n')
+	else:
+		print("Command failed: {cmd}".format(cmd=todo[i][1]))
+	outfile.flush()
+	donefile.flush()
+
+outfile.close()
+donefile.close()