I am working on adapting some C code to Python. In general, I am very inexperienced with C, and the code was written in C90, and when compiled on a local machine will not provide accurate results.
Because of this, I wanted to "future-proof" the program that I am using. While I have a good grasp on what the program does, it runs extraordinarily slow compared to its C equivalent. I would like to see if I can optimize this code without having to add Cython to the mix.
To give an idea of what this code is doing. It begins at a starting point, surrounded by a small 5x5x5 cube. Within the cube, travel times to the central point at each cell are calculated (denoted as time0[index]). From there, the cube expands by 1 cell in each dimension, until calculating travel times for a predefined area (400x400x53 in my case).
The problem with this is that calculating the travel time relies on the neighbouring cells, calculated in previous steps in a for loop (which can be up to 160000 iterations per cell expansion). Each wall of the cube is expanded upon by a for loop, which each contain 49 if statements. This totals 294 if statements for all 6 loops, which run for every expansion of the dimensions of the cube.
To make a long story short, there are a LOT of if statements, being executed here. I have included a sample of the code below for the expanding top portion of the cube. I would definitely appreciate some advice here, since this code is beyond any previous project that I have taken on.
if dz1 > 0:
ii = 0
for j in range(y1+1,y2):
for i in range(x1+1,x2):
sort.time[ii] = T0[i,j,z1+1]
sort.i1[ii] = i
sort.i2[ii] = j
ii += 1
print(j,i)
sort_index = np.argsort(sort.time[0:ii])
sort.time[0:ii] = sort.time[sort_index]
sort.i1[0:ii] = sort.i1[sort_index]
sort.i2[0:ii] = sort.i2[sort_index]
for i in range(0,ii):
X1 = int(sort.i1[i])
X2 = int(sort.i2[i])
index = int(z1*nxy + X2*nx + X1)
lasti = int((z1+1)*nxy + X2*nx + X1)
fhead = 0
guess = time0[index]
if time0[index+1] < 1.e9 and time0[index+nx+1] < 1.e9 and time0[index+nx] < 1.e9 and X2 < ny-1 and X1 < nx-1:
attempt = fdh3d(T0[X1,X2,z1+1],T0[X1+1,X2,z1+1],T0[X1+1,X2+1,z1+1],T0[X1,X2+1,z1+1],T0[X1+1,X2,z1],T0[X1+1,X2+1,z1],T0[X1,X2+1,z1],S0[X1,X2,z1],S0[X1,X2,z1+1],S0[X1+1,X2,z1+1],S0[X1+1,X2+1,z1+1],S0[X1,X2+1,z1+1],S0[X1+1,X2,z1],S0[X1+1,X2+1,z1],S0[X1,X2+1,z1])
if attempt < guess:
guess = attempt
if time0[index-1] < 1.e9 and time0[index+nx-1] < 1.e9 and time0[index+nx] < 1.e9 and X2<ny-1 and X1>0:
attempt = fdh3d(T0[X1,X2,z1+1],T0[X1-1,X2,z1+1],T0[X1-1,X2+1,z1+1],T0[X1,X2+1,z1+1],T0[X1-1,X2,z1],T0[X1-1,X2+1,z1],T0[X1,X2+1,z1],S0[X1,X2,z1],S0[X1,X2,z1+1],S0[X1-1,X2,z1+1],S0[X1-1,X2+1,z1+1],S0[X1,X2+1,z1+1],S0[X1-1,X2,z1],S0[X1-1,X2+1,z1],S0[X1,X2+1,z1])
if attempt<guess:
guess = attempt
if time0[index+1] < 1.e9 and time0[index-nx+1] < 1.e9 and time0[index-nx] < 1.e9 and X2>0 and X1<nx-1:
attempt = fdh3d(T0[X1,X2,z1+1],T0[X1+1,X2,z1+1],T0[X1+1,X2-1,z1+1],T0[X1,X2-1,z1+1],T0[X1+1,X2,z1],T0[X1+1,X2-1,z1],T0[X1,X2-1,z1],S0[X1,X2,z1],S0[X1,X2,z1+1],S0[X1+1,X2,z1+1],S0[X1+1,X2-1,z1+1],S0[X1,X2-1,z1+1],S0[X1+1,X2,z1 ],S0[X1+1,X2-1,z1],S0[X1,X2-1,z1])
if attempt<guess:
guess = attempt
if time0[index-1] < 1.e9 and time0[index-nx-1] < 1.e9 and time0[index-nx] < 1.e9 and X2>0 and X1>0:
attempt = fdh3d(T0[X1,X2,z1+1],T0[X1-1,X2,z1+1],T0[X1-1,X2-1,z1+1],T0[X1,X2-1,z1+1],T0[X1-1,X2,z1 ],T0[X1-1,X2-1,z1],T0[X1,X2-1,z1],S0[X1,X2,z1],S0[X1,X2,z1+1],S0[X1-1,X2,z1+1],S0[X1-1,X2-1,z1+1],S0[X1,X2-1,z1+1],S0[X1-1,X2,z1 ],S0[X1-1,X2-1,z1],S0[X1,X2-1,z1])
if attempt<guess:
guess = attempt
if guess > 1.0e9:
if time0[index+1] < 1.e9 and X1<nx-1 and X2>y1+1 and X2<y2-1:
attempt = fdhne(T0[X1,X2,z1+1],T0[X1+1,X2,z1+1],T0[X1+1,X2,z1],T0[X1+1,X2-1,z1+1],T0[X1+1,X2+1,z1+1], S0[X1,X2,z1],S0[X1,X2,z1+1],S0[X1+1,X2,z1+1],S0[X1+1,X2,z1] )
if attempt<guess:
guess = attempt
if time0[index-1] < 1.e9 and X1>0 and X2>y1+1 and X2<y2-1:
attempt = fdhne(T0[X1,X2,z1+1],T0[X1-1,X2,z1+1],T0[X1-1,X2,z1],T0[X1-1,X2-1,z1+1],T0[X1-1,X2+1,z1+1], S0[X1,X2,z1],S0[X1,X2,z1+1],S0[X1-1,X2,z1+1],S0[X1-1,X2,z1] )
if attempt<guess:
guess = attempt
if time0[index+nx] < 1.e9 and X2<ny-1 and X1>x1+1 and X1<x2-1:
attempt = fdhne(T0[X1,X2,z1+1],T0[X1,X2+1,z1+1],T0[X1,X2+1,z1],T0[X1-1,X2+1,z1+1],T0[X1+1,X2+1,z1+1], S0[X1,X2,z1],S0[X1,X2,z1+1],S0[X1,X2+1,z1+1],S0[X1,X2+1,z1] )
if attempt<guess:
guess = attempt
if time0[index-nx] < 1.e9 and X2>0 and X1>x1+1 and X1<x2-1:
attempt = fdhne(T0[X1,X2,z1+1],T0[X1,X2-1,z1+1],T0[X1,X2-1,z1],T0[X1-1,X2-1,z1+1],T0[X1+1,X2-1,z1+1], S0[X1,X2,z1],S0[X1,X2,z1+1],S0[X1,X2-1,z1+1],S0[X1,X2-1,z1])
if attempt<guess:
guess = attempt
if time0[index+1] < 1.e9 and X1<nx-1:
attempt = fdh2d(T0[X1,X2,z1+1],T0[X1+1,X2,z1+1],T0[X1+1,X2,z1],S0[X1,X2,z1], S0[X1,X2,z1+1],S0[X1+1,X2,z1+1],S0[X1+1,X2,z1])
if attempt<guess:
guess = attempt
if time0[index-1] < 1.e9 and X1>0:
attempt = fdh2d(T0[X1,X2,z1+1],T0[X1-1,X2,z1+1],T0[X1-1,X2,z1],S0[X1,X2,z1], S0[X1,X2,z1+1],S0[X1-1,X2,z1+1],S0[X1-1,X2,z1])
if attempt<guess:
guess = attempt
if time0[index+nx] < 1.e9 and X2<ny-1:
attempt = fdh2d(T0[X1,X2,z1+1],T0[X1,X2+1,z1+1],T0[X1,X2+1,z1],S0[X1,X2,z1], S0[X1,X2,z1+1],S0[X1,X2+1,z1+1],S0[X1,X2+1,z1])
if attempt<guess:
guess = attempt
if time0[index-nx] < 1.e9 and X2>0:
attempt = fdh2d(T0[X1,X2,z1+1],T0[X1,X2-1,z1+1],T0[X1,X2-1,z1],S0[X1,X2,z1], S0[X1,X2,z1+1],S0[X1,X2-1,z1+1],S0[X1,X2-1,z1])
if attempt<guess:
guess = attempt
if time0[index+1] < 1.e9 and time0[index+nx+1] < 1.e9 and time0[index+nx] < 1.e9 and X2<ny-1 and X1<nx-1:
attempt = fdh2d(T0[X1+1,X2,z1],T0[X1+1,X2+1,z1],T0[X1,X2+1,z1],S0[X1,X2,z1], S0[X1+1,X2,z1],S0[X1+1,X2+1,z1],S0[X1,X2+1,z1])
if attempt<guess:
fhead=(guess-attempt)/slow0[index]
guess=attempt
if time0[index+1] < 1.e9 and time0[index-nx+1] < 1.e9 and time0[index-nx] < 1.e9 and X2>0 and X1<nx-1:
attempt = fdh2d(T0[X1+1,X2,z1],T0[X1+1,X2-1,z1],T0[X1,X2-1,z1],S0[X1,X2,z1], S0[X1+1,X2,z1],S0[X1+1,X2-1,z1],S0[X1,X2-1,z1])
if attempt<guess:
fhead=(guess-attempt)/slow0[index]
guess=attempt
if time0[index-1] < 1.e9 and time0[index+nx-1] < 1.e9 and time0[index+nx] < 1.e9 and X2<ny-1 and X1>0:
attempt = fdh2d(T0[X1-1,X2,z1],T0[X1-1,X2+1,z1],T0[X1,X2+1,z1],S0[X1,X2,z1],S0[X1-1,X2,z1],S0[X1-1,X2+1,z1],S0[X1,X2+1,z1])
if attempt<guess:
fhead=(guess-attempt)/slow0[index]
guess=attempt
if time0[index-1] < 1.e9 and time0[index-nx-1] < 1.e9 and time0[index-nx] < 1.e9 and X2>0 and X1>0:
attempt = fdh2d(T0[X1-1,X2,z1],T0[X1-1,X2-1,z1],T0[X1,X2-1,z1],S0[X1,X2,z1],S0[X1-1,X2,z1],S0[X1-1,X2-1,z1],S0[X1,X2-1,z1])
if attempt<guess:
fhead=(guess-attempt)/slow0[index]
guess=attempt
if guess > 1.0e9:
if X1>x1+1 and X1<x2-1 and X2>y1+1 and X2<y2-1:
attempt = fdhnf(T0[X1,X2,z1+1],T0[X1+1,X2,z1+1],T0[X1,X2+1,z1+1],T0[X1-1,X2,z1+1],T0[X1,X2-1,z1+1],S0[X1,X2,z1],S0[X1,X2,z1+1] )
if attempt<guess:
guess = attempt
attempt = T0[X1,X2,z1+1] + .5*(S0[X1,X2,z1]+S0[X1,X2,z1+1])
if attempt<guess:
guess = attempt
if time0[index+1]<1.e9 and X1<nx-1:
attempt = T0[X1+1,X2,z1] + .5*(S0[X1,X2,z1]+S0[X1+1,X2,z1])
if attempt<guess:
fhead=(guess-attempt)/slow0[index]
guess=attempt
if time0[index-1]<1.e9 and X1>0:
attempt = T0[X1-1,X2,z1] + .5*(S0[X1,X2,z1]+S0[X1-1,X2,z1])
if attempt<guess:
fhead=(guess-attempt)/slow0[index]
guess=attempt
if time0[index+nx]<1.e9 and X2<ny-1:
attempt = T0[X1,X2+1,z1] + .5*(S0[X1,X2,z1]+S0[X1,X2+1,z1])
if attempt<guess:
fhead=(guess-attempt)/slow0[index]
guess=attempt
if time0[index-nx]<1.e9 and X2>0:
attempt = T0[X1,X2-1,z1] + .5*(S0[X1,X2,z1]+S0[X1,X2-1,z1]);
if attempt<guess:
fhead=(guess-attempt)/slow0[index]
guess=attempt
if guess < time0[index]:
time0[index] = guess
T0[X1,X2,z1] = guess
if fhead > headtest:
headw[5]+=1
if z1 == 0:
dz1 = 0
z1-=1
I did also attempt to speed up the code by reducing the size of the for loops, however this actually slowed down the code! Here is an example of what I did:
if dz1 > 0:
ii = 0
sort.time = np.ndarray.flatten(T0[x1+1:x2,y1+1:y2,z1+1])
sort.i1 = np.array([(i) for i, j in product(range(x1+1,x2), range(y1+1,y2))])
sort.i2 = np.array([(j) for i, j in product(range(x1+1,x2), range(y1+1,y2))])
ii = sort.time.size
sort_index = np.argsort(sort.time)
sort.time = sort.time[sort_index]
sort.i1 = sort.i1[sort_index]
sort.i2 = sort.i2[sort_index]
for X1,X2 in zip(sort.i1,sort.i2):
index = z1*nxy + X2*nx + X1
Aucun commentaire:
Enregistrer un commentaire