Commit a693e7dc authored by Ryan Gutenkunst's avatar Ryan Gutenkunst
Browse files

Complete 4D GPU integration

parent 410bb61b
...@@ -118,7 +118,7 @@ def _inject_mutations_4D(phi, dt, xx, yy, zz, aa, theta0, frozen1, frozen2, froz ...@@ -118,7 +118,7 @@ def _inject_mutations_4D(phi, dt, xx, yy, zz, aa, theta0, frozen1, frozen2, froz
phi[0,0,0,1] += dt/aa[1] * theta0/2 * 16/((aa[2] - aa[0]) * xx[1] * yy[1] * zz[1]) phi[0,0,0,1] += dt/aa[1] * theta0/2 * 16/((aa[2] - aa[0]) * xx[1] * yy[1] * zz[1])
return phi return phi
def _inject_mutations_5D(phi, dt, xx, yy, zz, aa, theta0, frozen1, frozen2, frozen3, frozen4): def _inject_mutations_5D(phi, dt, xx, yy, zz, aa, bb, theta0, frozen1, frozen2, frozen3, frozen4, frozen5):
""" """
Inject novel mutations for a timestep. Inject novel mutations for a timestep.
""" """
...@@ -136,6 +136,7 @@ def _inject_mutations_5D(phi, dt, xx, yy, zz, aa, theta0, frozen1, frozen2, froz ...@@ -136,6 +136,7 @@ def _inject_mutations_5D(phi, dt, xx, yy, zz, aa, theta0, frozen1, frozen2, froz
# Population 4 # Population 4
if not frozen4: if not frozen4:
phi[0,0,0,1,0] += dt/aa[1] * theta0/2 * 32/((aa[2] - aa[0]) * xx[1] * yy[1] * zz[1] * bb[1]) phi[0,0,0,1,0] += dt/aa[1] * theta0/2 * 32/((aa[2] - aa[0]) * xx[1] * yy[1] * zz[1] * bb[1])
# Population 5
if not frozen5: if not frozen5:
phi[0,0,0,0,1] += dt/aa[1] * theta0/2 * 32/((bb[2] - bb[0]) * xx[1] * yy[1] * zz[1] * aa[1]) phi[0,0,0,0,1] += dt/aa[1] * theta0/2 * 32/((bb[2] - bb[0]) * xx[1] * yy[1] * zz[1] * aa[1])
return phi return phi
...@@ -736,7 +737,7 @@ def five_pops(phi, xx, T, nu1=1, nu2=1, nu3=1, nu4=1, nu5=1, ...@@ -736,7 +737,7 @@ def five_pops(phi, xx, T, nu1=1, nu2=1, nu3=1, nu4=1, nu5=1,
_compute_dt(dy,nu2,[m21,m23,m24,m25],gamma2,h2), _compute_dt(dy,nu2,[m21,m23,m24,m25],gamma2,h2),
_compute_dt(dz,nu3,[m31,m32,m34,m35],gamma3,h3), _compute_dt(dz,nu3,[m31,m32,m34,m35],gamma3,h3),
_compute_dt(da,nu4,[m41,m42,m43,m45],gamma4,h4), _compute_dt(da,nu4,[m41,m42,m43,m45],gamma4,h4),
_compute_dt(da,nu4,[m51,m52,m53,m54],gamma5,h5)) _compute_dt(db,nu5,[m51,m52,m53,m54],gamma5,h5))
this_dt = min(dt, T - current_t) this_dt = min(dt, T - current_t)
next_t = current_t + this_dt next_t = current_t + this_dt
......
...@@ -39,7 +39,7 @@ def _inject_mutations_4D_valcalc(dt, xx, yy, zz, aa, theta0, frozen1, frozen2, f ...@@ -39,7 +39,7 @@ def _inject_mutations_4D_valcalc(dt, xx, yy, zz, aa, theta0, frozen1, frozen2, f
# Population 1 # Population 1
# Normalization based on the multi-dimensional trapezoid rule is # Normalization based on the multi-dimensional trapezoid rule is
# implemented ************** here *************** # implemented ************** here ***************
val1000, val0100, val0010, val0001 = 0, 0, 0 val1000, val0100, val0010, val0001 = 0, 0, 0, 0
if not frozen1: if not frozen1:
val1000 = dt/xx[1] * theta0/2 * 16/((xx[2] - xx[0]) * yy[1] * zz[1] * aa[1]) val1000 = dt/xx[1] * theta0/2 * 16/((xx[2] - xx[0]) * yy[1] * zz[1] * aa[1])
# Population 2 # Population 2
...@@ -548,20 +548,20 @@ def _four_pops_temporal_params(phi, xx, T, initial_t, nu1_f, nu2_f, nu3_f, nu4_f ...@@ -548,20 +548,20 @@ def _four_pops_temporal_params(phi, xx, T, initial_t, nu1_f, nu2_f, nu3_f, nu4_f
V_gpu = gpuarray.empty(L, np.float64) V_gpu = gpuarray.empty(L, np.float64)
VInt_gpu = gpuarray.empty(L-1, np.float64) VInt_gpu = gpuarray.empty(L-1, np.float64)
a_gpu = gpuarray.empty((L,L*L), np.float64) a_gpu = gpuarray.empty((L,M*N*O), np.float64)
b_gpu = gpuarray.empty((L,L*L), np.float64) b_gpu = gpuarray.empty((L,M*N*O), np.float64)
c_gpu = gpuarray.empty((L,L*L), np.float64) c_gpu = gpuarray.empty((L,M*N*O), np.float64)
bsize_int = cusparseDgtsvInterleavedBatch_bufferSizeExt( bsize_int = cusparseDgtsvInterleavedBatch_bufferSizeExt(
cusparse_handle, 0, L, a_gpu.gpudata, b_gpu.gpudata, cusparse_handle, 0, L, a_gpu.gpudata, b_gpu.gpudata,
c_gpu.gpudata, phi_gpu.gpudata, L**2) c_gpu.gpudata, phi_gpu.gpudata, M*N*O)
pBuffer = pycuda.driver.mem_alloc(bsize_int) pBuffer = pycuda.driver.mem_alloc(bsize_int)
while current_t < T: while current_t < T:
dt = min(dadi.Integration._compute_dt(dx, nu1, [m12, m13, m14], gamma1, h1), dt = min(dadi.Integration._compute_dt(dx, nu1, [m12, m13, m14], gamma1, h1),
dadi.Integration._compute_dt(dy, nu2, [m21, m23, m24], gamma2, h2), dadi.Integration._compute_dt(dy, nu2, [m21, m23, m24], gamma2, h2),
dadi.Integration._compute_dt(dz, nu3, [m31, m32, m34], gamma3, h3), dadi.Integration._compute_dt(dz, nu3, [m31, m32, m34], gamma3, h3),
dadi.Integration._compute_dt(dz, nu4, [m41, m42, m43], gamma4, h4)) dadi.Integration._compute_dt(da, nu4, [m41, m42, m43], gamma4, h4))
this_dt = np.float64(min(dt, T - current_t)) this_dt = np.float64(min(dt, T - current_t))
next_t = current_t + this_dt next_t = current_t + this_dt
...@@ -658,7 +658,7 @@ def _four_pops_temporal_params(phi, xx, T, initial_t, nu1_f, nu2_f, nu3_f, nu4_f ...@@ -658,7 +658,7 @@ def _four_pops_temporal_params(phi, xx, T, initial_t, nu1_f, nu2_f, nu3_f, nu4_f
grid=_grid(N), block=_block()) grid=_grid(N), block=_block())
kernels._Vfunc(xInt_gpu, nu3, np.int32(N-1), VInt_gpu, kernels._Vfunc(xInt_gpu, nu3, np.int32(N-1), VInt_gpu,
grid=_grid(N-1), block=_block()) grid=_grid(N-1), block=_block())
kernels._Mfunc3D(xInt_gpu, xx_gpu, xx_gpu, m34, m31, m32, gamma3, h3, kernels._Mfunc4D(xInt_gpu, xx_gpu, xx_gpu, xx_gpu, m34, m31, m32, gamma3, h3,
np.int32(N-1), O, L, M, MInt_gpu, np.int32(N-1), O, L, M, MInt_gpu,
grid=_grid((N-1)*M*L*O), block=_block()) grid=_grid((N-1)*M*L*O), block=_block())
...@@ -681,15 +681,15 @@ def _four_pops_temporal_params(phi, xx, T, initial_t, nu1_f, nu2_f, nu3_f, nu4_f ...@@ -681,15 +681,15 @@ def _four_pops_temporal_params(phi, xx, T, initial_t, nu1_f, nu2_f, nu3_f, nu4_f
a_gpu.gpudata, b_gpu.gpudata, c_gpu.gpudata, phi_gpu.gpudata, a_gpu.gpudata, b_gpu.gpudata, c_gpu.gpudata, phi_gpu.gpudata,
L*M*O, pBuffer) L*M*O, pBuffer)
transpose_gpuarray(phi_gpu, c_gpu.reshape(L*N*O,M)) transpose_gpuarray(phi_gpu, c_gpu.reshape(L*M*O,N))
phi_gpu, c_gpu = c_gpu.reshape(O,L*M*N), phi_gpu.reshape(O,L*M*N) phi_gpu, c_gpu = c_gpu.reshape(O,L*M*N), phi_gpu.reshape(O,L*M*N)
MInt_gpu = c_gpu MInt_gpu = c_gpu
if not frozen4: if not frozen4:
kernels._Vfunc(xx_gpu, nu4, N, V_gpu, kernels._Vfunc(xx_gpu, nu4, O, V_gpu,
grid=_grid(O), block=_block()) grid=_grid(O), block=_block())
kernels._Vfunc(xInt_gpu, nu4, np.int32(O-1), VInt_gpu, kernels._Vfunc(xInt_gpu, nu4, np.int32(O-1), VInt_gpu,
grid=_grid(O-1), block=_block()) grid=_grid(O-1), block=_block())
kernels._Mfunc3D(xInt_gpu, xx_gpu, xx_gpu, m41, m42, m43, gamma4, h4, kernels._Mfunc4D(xInt_gpu, xx_gpu, xx_gpu, xx_gpu, m41, m42, m43, gamma4, h4,
np.int32(O-1), L, M, N, MInt_gpu, np.int32(O-1), L, M, N, MInt_gpu,
grid=_grid((O-1)*M*L*N), block=_block()) grid=_grid((O-1)*M*L*N), block=_block())
...@@ -712,7 +712,7 @@ def _four_pops_temporal_params(phi, xx, T, initial_t, nu1_f, nu2_f, nu3_f, nu4_f ...@@ -712,7 +712,7 @@ def _four_pops_temporal_params(phi, xx, T, initial_t, nu1_f, nu2_f, nu3_f, nu4_f
a_gpu.gpudata, b_gpu.gpudata, c_gpu.gpudata, phi_gpu.gpudata, a_gpu.gpudata, b_gpu.gpudata, c_gpu.gpudata, phi_gpu.gpudata,
L*M*N, pBuffer) L*M*N, pBuffer)
transpose_gpuarray(phi_gpu, c_gpu.reshape(M*N*O,L)) transpose_gpuarray(phi_gpu, c_gpu.reshape(L*M*N,O))
phi_gpu, c_gpu = c_gpu.reshape(L,M*N*O), phi_gpu.reshape(L,M*N*O) phi_gpu, c_gpu = c_gpu.reshape(L,M*N*O), phi_gpu.reshape(L,M*N*O)
current_t += this_dt current_t += this_dt
......
...@@ -7,9 +7,11 @@ mod = SourceModule(open(sourcefile).read()) ...@@ -7,9 +7,11 @@ mod = SourceModule(open(sourcefile).read())
_inject_mutations_2D_vals = mod.get_function("inject_mutations_2D") _inject_mutations_2D_vals = mod.get_function("inject_mutations_2D")
_inject_mutations_3D_vals = mod.get_function("inject_mutations_3D") _inject_mutations_3D_vals = mod.get_function("inject_mutations_3D")
_inject_mutations_4D_vals = mod.get_function("inject_mutations_4D")
_Vfunc = mod.get_function("Vfunc") _Vfunc = mod.get_function("Vfunc")
_Mfunc2D = mod.get_function("Mfunc2D") _Mfunc2D = mod.get_function("Mfunc2D")
_Mfunc3D = mod.get_function("Mfunc3D") _Mfunc3D = mod.get_function("Mfunc3D")
_Mfunc4D = mod.get_function("Mfunc4D")
_cx0 = mod.get_function("cx0") _cx0 = mod.get_function("cx0")
_compute_ab_nobc = mod.get_function("compute_ab_nobc") _compute_ab_nobc = mod.get_function("compute_ab_nobc")
_compute_bc_nobc = mod.get_function("compute_bc_nobc") _compute_bc_nobc = mod.get_function("compute_bc_nobc")
......
...@@ -63,7 +63,7 @@ void implicit_4Dx(double *phi, double *xx, double *yy, double *zz, double *aa, ...@@ -63,7 +63,7 @@ void implicit_4Dx(double *phi, double *xx, double *yy, double *zz, double *aa,
if((yy[jj]==0) && (zz[kk]==0) && (aa[ll]==0) && (Mfirst <= 0)) if((yy[jj]==0) && (zz[kk]==0) && (aa[ll]==0) && (Mfirst <= 0))
b[0] += (0.5/nu1 - Mfirst)*2./dx[0]; b[0] += (0.5/nu1 - Mfirst)*2./dx[0];
if((yy[jj]==1) && (zz[kk]==1) && (aa[ll]==0) && (Mlast >= 0)) if((yy[jj]==1) && (zz[kk]==1) && (aa[ll]==1) && (Mlast >= 0))
b[L-1] += -(-0.5/nu1 - Mlast)*2./dx[L-2]; b[L-1] += -(-0.5/nu1 - Mlast)*2./dx[L-2];
tridiag_premalloc(a, b, c, r, temp, L); tridiag_premalloc(a, b, c, r, temp, L);
...@@ -142,7 +142,7 @@ void implicit_4Dy(double *phi, double *xx, double *yy, double *zz, double *aa, ...@@ -142,7 +142,7 @@ void implicit_4Dy(double *phi, double *xx, double *yy, double *zz, double *aa,
if((xx[ii]==0) && (zz[kk]==0) && (aa[ll]==0) && (Mfirst <= 0)) if((xx[ii]==0) && (zz[kk]==0) && (aa[ll]==0) && (Mfirst <= 0))
b[0] += (0.5/nu2 - Mfirst)*2./dy[0]; b[0] += (0.5/nu2 - Mfirst)*2./dy[0];
if((xx[ii]==1) && (zz[kk]==1) && (aa[ll]==0) && (Mlast >= 0)) if((xx[ii]==1) && (zz[kk]==1) && (aa[ll]==1) && (Mlast >= 0))
b[M-1] += -(-0.5/nu2 - Mlast)*2./dy[M-2]; b[M-1] += -(-0.5/nu2 - Mlast)*2./dy[M-2];
tridiag_premalloc(a, b, c, r, temp, M); tridiag_premalloc(a, b, c, r, temp, M);
...@@ -221,7 +221,7 @@ void implicit_4Dz(double *phi, double *xx, double *yy, double *zz, double *aa, ...@@ -221,7 +221,7 @@ void implicit_4Dz(double *phi, double *xx, double *yy, double *zz, double *aa,
if((xx[ii]==0) && (yy[jj]==0) && (aa[ll] == 0) && (Mfirst <= 0)) if((xx[ii]==0) && (yy[jj]==0) && (aa[ll] == 0) && (Mfirst <= 0))
b[0] += (0.5/nu3 - Mfirst)*2./dz[0]; b[0] += (0.5/nu3 - Mfirst)*2./dz[0];
if((xx[ii]==1) && (yy[jj]==1) && (aa[ll] == 0) && (Mlast >= 0)) if((xx[ii]==1) && (yy[jj]==1) && (aa[ll] == 1) && (Mlast >= 0))
b[N-1] += -(-0.5/nu3 - Mlast)*2./dz[N-2]; b[N-1] += -(-0.5/nu3 - Mlast)*2./dz[N-2];
tridiag_premalloc(a, b, c, r, temp, N); tridiag_premalloc(a, b, c, r, temp, N);
...@@ -299,7 +299,7 @@ void implicit_4Da(double *phi, double *xx, double *yy, double *zz, double *aa, ...@@ -299,7 +299,7 @@ void implicit_4Da(double *phi, double *xx, double *yy, double *zz, double *aa,
if((xx[ii]==0) && (yy[jj]==0) && (zz[kk] == 0) && (Mfirst <= 0)) if((xx[ii]==0) && (yy[jj]==0) && (zz[kk] == 0) && (Mfirst <= 0))
b[0] += (0.5/nu4 - Mfirst)*2./da[0]; b[0] += (0.5/nu4 - Mfirst)*2./da[0];
if((xx[ii]==1) && (yy[jj]==1) && (zz[kk] == 0) && (Mlast >= 0)) if((xx[ii]==1) && (yy[jj]==1) && (zz[kk] == 1) && (Mlast >= 0))
b[O-1] += -(-0.5/nu4 - Mlast)*2./da[O-2]; b[O-1] += -(-0.5/nu4 - Mlast)*2./da[O-2];
tridiag_premalloc(a, b, c, r, &phi[ii*M*N*O + jj*N*O + kk*O], O); tridiag_premalloc(a, b, c, r, &phi[ii*M*N*O + jj*N*O + kk*O], O);
......
...@@ -65,7 +65,7 @@ void implicit_5Dx(double *phi, double *xx, double *yy, double *zz, double *aa, d ...@@ -65,7 +65,7 @@ void implicit_5Dx(double *phi, double *xx, double *yy, double *zz, double *aa, d
if((yy[jj]==0) && (zz[kk]==0) && (aa[ll]==0) && (bb[mm]==0) && (Mfirst <= 0)) if((yy[jj]==0) && (zz[kk]==0) && (aa[ll]==0) && (bb[mm]==0) && (Mfirst <= 0))
b[0] += (0.5/nu1 - Mfirst)*2./dx[0]; b[0] += (0.5/nu1 - Mfirst)*2./dx[0];
if((yy[jj]==1) && (zz[kk]==1) && (aa[ll]==0) && (bb[mm]==0) && (Mlast >= 0)) if((yy[jj]==1) && (zz[kk]==1) && (aa[ll]==1) && (bb[mm]==1) && (Mlast >= 0))
b[L-1] += -(-0.5/nu1 - Mlast)*2./dx[L-2]; b[L-1] += -(-0.5/nu1 - Mlast)*2./dx[L-2];
tridiag_premalloc(a, b, c, r, temp, L); tridiag_premalloc(a, b, c, r, temp, L);
...@@ -147,7 +147,7 @@ void implicit_5Dy(double *phi, double *xx, double *yy, double *zz, double *aa, d ...@@ -147,7 +147,7 @@ void implicit_5Dy(double *phi, double *xx, double *yy, double *zz, double *aa, d
if((xx[ii]==0) && (zz[kk]==0) && (aa[ll]==0) && (bb[mm]==0) && (Mfirst <= 0)) if((xx[ii]==0) && (zz[kk]==0) && (aa[ll]==0) && (bb[mm]==0) && (Mfirst <= 0))
b[0] += (0.5/nu2 - Mfirst)*2./dy[0]; b[0] += (0.5/nu2 - Mfirst)*2./dy[0];
if((xx[ii]==1) && (zz[kk]==1) && (aa[ll]==0) && (bb[mm]==0) && (Mlast >= 0)) if((xx[ii]==1) && (zz[kk]==1) && (aa[ll]==1) && (bb[mm]==1) && (Mlast >= 0))
b[M-1] += -(-0.5/nu2 - Mlast)*2./dy[M-2]; b[M-1] += -(-0.5/nu2 - Mlast)*2./dy[M-2];
tridiag_premalloc(a, b, c, r, temp, M); tridiag_premalloc(a, b, c, r, temp, M);
...@@ -229,7 +229,7 @@ void implicit_5Dz(double *phi, double *xx, double *yy, double *zz, double *aa, d ...@@ -229,7 +229,7 @@ void implicit_5Dz(double *phi, double *xx, double *yy, double *zz, double *aa, d
if((xx[ii]==0) && (yy[jj]==0) && (aa[ll] == 0) && (bb[mm]==0) && (Mfirst <= 0)) if((xx[ii]==0) && (yy[jj]==0) && (aa[ll] == 0) && (bb[mm]==0) && (Mfirst <= 0))
b[0] += (0.5/nu3 - Mfirst)*2./dz[0]; b[0] += (0.5/nu3 - Mfirst)*2./dz[0];
if((xx[ii]==1) && (yy[jj]==1) && (aa[ll] == 0) && (bb[mm]==0) && (Mlast >= 0)) if((xx[ii]==1) && (yy[jj]==1) && (aa[ll] == 1) && (bb[mm]==1) && (Mlast >= 0))
b[N-1] += -(-0.5/nu3 - Mlast)*2./dz[N-2]; b[N-1] += -(-0.5/nu3 - Mlast)*2./dz[N-2];
tridiag_premalloc(a, b, c, r, temp, N); tridiag_premalloc(a, b, c, r, temp, N);
...@@ -311,7 +311,7 @@ void implicit_5Da(double *phi, double *xx, double *yy, double *zz, double *aa, d ...@@ -311,7 +311,7 @@ void implicit_5Da(double *phi, double *xx, double *yy, double *zz, double *aa, d
if((xx[ii]==0) && (yy[jj]==0) && (zz[kk] == 0) && (bb[mm] == 0) && (Mfirst <= 0)) if((xx[ii]==0) && (yy[jj]==0) && (zz[kk] == 0) && (bb[mm] == 0) && (Mfirst <= 0))
b[0] += (0.5/nu4 - Mfirst)*2./da[0]; b[0] += (0.5/nu4 - Mfirst)*2./da[0];
if((xx[ii]==1) && (yy[jj]==1) && (zz[kk] == 0) && (bb[mm] == 0) && (Mlast >= 0)) if((xx[ii]==1) && (yy[jj]==1) && (zz[kk] == 1) && (bb[mm] == 1) && (Mlast >= 0))
b[O-1] += -(-0.5/nu4 - Mlast)*2./da[O-2]; b[O-1] += -(-0.5/nu4 - Mlast)*2./da[O-2];
tridiag_premalloc(a, b, c, r, temp, O); tridiag_premalloc(a, b, c, r, temp, O);
...@@ -392,7 +392,7 @@ void implicit_5Db(double *phi, double *xx, double *yy, double *zz, double *aa, d ...@@ -392,7 +392,7 @@ void implicit_5Db(double *phi, double *xx, double *yy, double *zz, double *aa, d
if((xx[ii]==0) && (yy[jj]==0) && (zz[kk] == 0) && (aa[ll] == 0) && (Mfirst <= 0)) if((xx[ii]==0) && (yy[jj]==0) && (zz[kk] == 0) && (aa[ll] == 0) && (Mfirst <= 0))
b[0] += (0.5/nu5 - Mfirst)*2./db[0]; b[0] += (0.5/nu5 - Mfirst)*2./db[0];
if((xx[ii]==1) && (yy[jj]==1) && (zz[kk] == 0) && (aa[ll] == 0) && (Mlast >= 0)) if((xx[ii]==1) && (yy[jj]==1) && (zz[kk] == 1) && (aa[ll] == 1) && (Mlast >= 0))
b[P-1] += -(-0.5/nu5 - Mlast)*2./db[O-2]; b[P-1] += -(-0.5/nu5 - Mlast)*2./db[O-2];
tridiag_premalloc(a, b, c, r, &phi[ii*M*N*O*P + jj*N*O*P + kk*O*P + ll], P); tridiag_premalloc(a, b, c, r, &phi[ii*M*N*O*P + jj*N*O*P + kk*O*P + ll], P);
......
...@@ -156,7 +156,7 @@ class CUDATestCase(unittest.TestCase): ...@@ -156,7 +156,7 @@ class CUDATestCase(unittest.TestCase):
def test_4d_integration(self): def test_4d_integration(self):
pts = 10 pts = 10
nu1 = lambda t: 0.5 + 5*t nu1 = lambda t: 0.5 + 5*t
nu2 = lambda t: 10-50*t nu2 = lambda t: 10-20*t
nu3, nu4 = 0.3, 0.9 nu3, nu4 = 0.3, 0.9
m12, m13, m14 = 2.0, 0.1, 3.2 m12, m13, m14 = 2.0, 0.1, 3.2
m21, m23, m24 = lambda t: 0.5+3*t, 0.2, 1.2 m21, m23, m24 = lambda t: 0.5+3*t, 0.2, 1.2
...@@ -168,6 +168,8 @@ class CUDATestCase(unittest.TestCase): ...@@ -168,6 +168,8 @@ class CUDATestCase(unittest.TestCase):
h2 = lambda t: 0.9-t h2 = lambda t: 0.9-t
h3, h4 = 0.3, 0.5 h3, h4 = 0.3, 0.5
theta0 = lambda t: 1 + 2*t theta0 = lambda t: 1 + 2*t
f1,f2,f3,f4 = False, False, False, False
T = 0.1
xx = dadi.Numerics.default_grid(pts) xx = dadi.Numerics.default_grid(pts)
phi = dadi.PhiManip.phi_1D(xx) phi = dadi.PhiManip.phi_1D(xx)
...@@ -175,21 +177,43 @@ class CUDATestCase(unittest.TestCase): ...@@ -175,21 +177,43 @@ class CUDATestCase(unittest.TestCase):
phi = dadi.PhiManip.phi_2D_to_3D(phi, 0, xx,xx,xx) phi = dadi.PhiManip.phi_2D_to_3D(phi, 0, xx,xx,xx)
phi = dadi.PhiManip.phi_3D_to_4D(phi, 0, 0, xx,xx,xx,xx) phi = dadi.PhiManip.phi_3D_to_4D(phi, 0, 0, xx,xx,xx,xx)
dadi.cuda_enabled(False) dadi.cuda_enabled(True)
phi_cpu = dadi.Integration.four_pops(phi.copy(), xx, T=0.1, nu1=nu1, nu2=nu2, nu3=nu3, nu4=nu4, phi_gpu = dadi.Integration.four_pops(phi.copy(), xx, T=T, nu1=nu1, nu2=nu2, nu3=nu3, nu4=nu4,
m12=m12, m13=m13, m14=m14, m21=m21, m23=m23, m24=m24, m12=m12, m13=m13, m14=m14, m21=m21, m23=m23, m24=m24,
m31=m31, m32=m32, m34=m34, m41=m41, m42=m42, m43=m43, m31=m31, m32=m32, m34=m34, m41=m41, m42=m42, m43=m43,
gamma1=gamma1, gamma2=gamma2, gamma3=gamma3, gamma4=gamma4, gamma1=gamma1, gamma2=gamma2, gamma3=gamma3, gamma4=gamma4,
h1=h1, h2=h2, h3=h3, h4=h4, theta0=theta0) h1=h1, h2=h2, h3=h3, h4=h4, theta0=theta0,
frozen1=f1, frozen2=f2, frozen3=f3, frozen4=f4)
dadi.cuda_enabled(True) dadi.cuda_enabled(False)
phi_gpu = dadi.Integration.four_pops(phi.copy(), xx, T=0.1, nu1=nu1, nu2=nu2, nu3=nu3, nu4=nu4, phi_cpu = dadi.Integration.four_pops(phi.copy(), xx, T=T, nu1=nu1, nu2=nu2, nu3=nu3, nu4=nu4,
m12=m12, m13=m13, m14=m14, m21=m21, m23=m23, m24=m24, m12=m12, m13=m13, m14=m14, m21=m21, m23=m23, m24=m24,
m31=m31, m32=m32, m34=m34, m41=m41, m42=m42, m43=m43, m31=m31, m32=m32, m34=m34, m41=m41, m42=m42, m43=m43,
gamma1=gamma1, gamma2=gamma2, gamma3=gamma3, gamma4=gamma4, gamma1=gamma1, gamma2=gamma2, gamma3=gamma3, gamma4=gamma4,
h1=h1, h2=h2, h3=h3, h4=h4, theta0=theta0) h1=h1, h2=h2, h3=h3, h4=h4, theta0=theta0,
frozen1=f1, frozen2=f2, frozen3=f3, frozen4=f4)
self.assertTrue(np.allclose(phi_cpu, phi_gpu)) self.assertTrue(np.allclose(phi_cpu, phi_gpu))
m12, m13, m14, m21, m23, m24, m31, m32, m34, m41, m42, m43 = [0]*12
for f1 in [True, False]:
for f2 in [True, False]:
for f3 in [True, False]:
for f4 in [True, False]:
dadi.cuda_enabled(True)
phi_gpu = dadi.Integration.four_pops(phi.copy(), xx, T=T, nu1=nu1, nu2=nu2, nu3=nu3, nu4=nu4,
m12=m12, m13=m13, m14=m14, m21=m21, m23=m23, m24=m24,
m31=m31, m32=m32, m34=m34, m41=m41, m42=m42, m43=m43,
gamma1=gamma1, gamma2=gamma2, gamma3=gamma3, gamma4=gamma4,
h1=h1, h2=h2, h3=h3, h4=h4, theta0=theta0,
frozen1=f1, frozen2=f2, frozen3=f3, frozen4=f4)
dadi.cuda_enabled(False)
phi_cpu = dadi.Integration.four_pops(phi.copy(), xx, T=T, nu1=nu1, nu2=nu2, nu3=nu3, nu4=nu4,
m12=m12, m13=m13, m14=m14, m21=m21, m23=m23, m24=m24,
m31=m31, m32=m32, m34=m34, m41=m41, m42=m42, m43=m43,
gamma1=gamma1, gamma2=gamma2, gamma3=gamma3, gamma4=gamma4,
h1=h1, h2=h2, h3=h3, h4=h4, theta0=theta0,
frozen1=f1, frozen2=f2, frozen3=f3, frozen4=f4)
if dadi.cuda_enabled(True): if dadi.cuda_enabled(True):
suite = unittest.TestLoader().loadTestsFromTestCase(CUDATestCase) suite = unittest.TestLoader().loadTestsFromTestCase(CUDATestCase)
else: else:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment