Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
khalid
Dadi
Commits
843c4896
Commit
843c4896
authored
Jun 03, 2020
by
Ryan Gutenkunst
Browse files
Add 4D integration CUDA code
parent
51d90205
Changes
3
Hide whitespace changes
Inline
Side-by-side
dadi/cuda/Integration.py
View file @
843c4896
...
...
@@ -32,6 +32,27 @@ def _inject_mutations_3D_valcalc(dt, xx, yy, zz, theta0, frozen1, frozen2, froze
val001
=
dt
/
zz
[
1
]
*
theta0
/
2
*
8
/
((
zz
[
2
]
-
zz
[
0
])
*
xx
[
1
]
*
yy
[
1
])
return
np
.
float64
(
val100
),
np
.
float64
(
val010
),
np
.
float64
(
val001
)
def
_inject_mutations_4D_valcalc
(
dt
,
xx
,
yy
,
zz
,
aa
,
theta0
,
frozen1
,
frozen2
,
frozen3
,
frozen4
):
"""
Calculate mutations that need to be injected.
"""
# Population 1
# Normalization based on the multi-dimensional trapezoid rule is
# implemented ************** here ***************
val1000
,
val0100
,
val0010
,
val0001
=
0
,
0
,
0
if
not
frozen1
:
val1000
=
dt
/
xx
[
1
]
*
theta0
/
2
*
16
/
((
xx
[
2
]
-
xx
[
0
])
*
yy
[
1
]
*
zz
[
1
]
*
aa
[
1
])
# Population 2
if
not
frozen2
:
val0100
=
dt
/
yy
[
1
]
*
theta0
/
2
*
16
/
((
yy
[
2
]
-
yy
[
0
])
*
xx
[
1
]
*
zz
[
1
]
*
aa
[
1
])
# Population 3
if
not
frozen3
:
val0010
=
dt
/
zz
[
1
]
*
theta0
/
2
*
16
/
((
zz
[
2
]
-
zz
[
0
])
*
xx
[
1
]
*
yy
[
1
]
*
aa
[
1
])
# Population 4
if
not
frozen4
:
val0001
=
dt
/
aa
[
1
]
*
theta0
/
2
*
16
/
((
aa
[
2
]
-
aa
[
0
])
*
xx
[
1
]
*
yy
[
1
]
*
zz
[
1
])
return
np
.
float64
(
val1000
),
np
.
float64
(
val0100
),
np
.
float64
(
val0010
),
np
.
float64
(
val0001
)
import
pycuda
import
pycuda.gpuarray
as
gpuarray
from
skcuda.cusparse
import
cusparseDgtsvInterleavedBatch_bufferSizeExt
,
cusparseDgtsvInterleavedBatch
...
...
@@ -493,3 +514,207 @@ def _three_pops_temporal_params(phi, xx, T, initial_t, nu1_f, nu2_f, nu3_f,
current_t
+=
this_dt
return
phi_gpu
.
get
().
reshape
(
L
,
M
,
N
)
def
_four_pops_temporal_params
(
phi
,
xx
,
T
,
initial_t
,
nu1_f
,
nu2_f
,
nu3_f
,
nu4_f
,
m12_f
,
m13_f
,
m14_f
,
m21_f
,
m23_f
,
m24_f
,
m31_f
,
m32_f
,
m34_f
,
m41_f
,
m42_f
,
m43_f
,
gamma1_f
,
gamma2_f
,
gamma3_f
,
gamma4_f
,
h1_f
,
h2_f
,
h3_f
,
h4_f
,
theta0_f
,
frozen1
,
frozen2
,
frozen3
,
frozen4
):
if
dadi
.
Integration
.
use_delj_trick
:
raise
ValueError
(
"delj trick not currently supported in CUDA execution"
)
t
=
current_t
=
initial_t
nu1
,
nu2
,
nu3
,
nu4
=
nu1_f
(
current_t
),
nu2_f
(
current_t
),
nu3_f
(
current_t
),
nu4_f
(
current_t
)
gamma1
,
gamma2
,
gamma3
,
gamma4
=
gamma1_f
(
current_t
),
gamma2_f
(
current_t
),
gamma3_f
(
current_t
),
gamma4_f
(
current_t
)
h1
,
h2
,
h3
,
h4
=
h1_f
(
current_t
),
h2_f
(
current_t
),
h3_f
(
current_t
),
h4_f
(
current_t
)
m12
,
m13
,
m14
=
m12_f
(
current_t
),
m13_f
(
current_t
),
m14_f
(
current_t
)
m21
,
m23
,
m24
=
m21_f
(
current_t
),
m23_f
(
current_t
),
m24_f
(
current_t
)
m31
,
m32
,
m34
=
m31_f
(
current_t
),
m32_f
(
current_t
),
m34_f
(
current_t
)
m41
,
m42
,
m43
=
m41_f
(
current_t
),
m42_f
(
current_t
),
m43_f
(
current_t
)
L
=
M
=
N
=
O
=
np
.
int32
(
len
(
xx
))
phi_gpu
=
gpuarray
.
to_gpu
(
phi
.
reshape
(
L
,
M
*
N
*
O
))
aa
=
yy
=
zz
=
xx
da
=
dx
=
dy
=
dz
=
np
.
diff
(
xx
)
dfactor
=
dadi
.
Integration
.
_compute_dfactor
(
dx
)
xInt
=
(
xx
[:
-
1
]
+
xx
[
1
:])
*
0.5
xx_gpu
=
gpuarray
.
to_gpu
(
xx
)
dx_gpu
=
gpuarray
.
to_gpu
(
dx
)
dfactor_gpu
=
gpuarray
.
to_gpu
(
dfactor
)
xInt_gpu
=
gpuarray
.
to_gpu
(
xInt
)
V_gpu
=
gpuarray
.
empty
(
L
,
np
.
float64
)
VInt_gpu
=
gpuarray
.
empty
(
L
-
1
,
np
.
float64
)
a_gpu
=
gpuarray
.
empty
((
L
,
L
*
L
),
np
.
float64
)
b_gpu
=
gpuarray
.
empty
((
L
,
L
*
L
),
np
.
float64
)
c_gpu
=
gpuarray
.
empty
((
L
,
L
*
L
),
np
.
float64
)
bsize_int
=
cusparseDgtsvInterleavedBatch_bufferSizeExt
(
cusparse_handle
,
0
,
L
,
a_gpu
.
gpudata
,
b_gpu
.
gpudata
,
c_gpu
.
gpudata
,
phi_gpu
.
gpudata
,
L
**
2
)
pBuffer
=
pycuda
.
driver
.
mem_alloc
(
bsize_int
)
while
current_t
<
T
:
dt
=
min
(
dadi
.
Integration
.
_compute_dt
(
dx
,
nu1
,
[
m12
,
m13
,
m14
],
gamma1
,
h1
),
dadi
.
Integration
.
_compute_dt
(
dy
,
nu2
,
[
m21
,
m23
,
m24
],
gamma2
,
h2
),
dadi
.
Integration
.
_compute_dt
(
dz
,
nu3
,
[
m31
,
m32
,
m34
],
gamma3
,
h3
),
dadi
.
Integration
.
_compute_dt
(
dz
,
nu4
,
[
m41
,
m42
,
m43
],
gamma4
,
h4
))
this_dt
=
np
.
float64
(
min
(
dt
,
T
-
current_t
))
next_t
=
current_t
+
this_dt
nu1
,
nu2
,
nu3
,
nu4
=
nu1_f
(
next_t
),
nu2_f
(
next_t
),
nu3_f
(
next_t
),
nu4_f
(
next_t
)
gamma1
,
gamma2
,
gamma3
,
gamma4
=
gamma1_f
(
next_t
),
gamma2_f
(
next_t
),
gamma3_f
(
next_t
),
gamma4_f
(
next_t
)
h1
,
h2
,
h3
,
h4
=
h1_f
(
next_t
),
h2_f
(
next_t
),
h3_f
(
next_t
),
h4_f
(
next_t
)
m12
,
m13
,
m14
=
m12_f
(
next_t
),
m13_f
(
next_t
),
m14_f
(
next_t
)
m21
,
m23
,
m24
=
m21_f
(
next_t
),
m23_f
(
next_t
),
m24_f
(
next_t
)
m31
,
m32
,
m34
=
m31_f
(
next_t
),
m32_f
(
next_t
),
m34_f
(
next_t
)
m41
,
m42
,
m43
=
m41_f
(
next_t
),
m42_f
(
next_t
),
m43_f
(
next_t
)
theta0
=
theta0_f
(
next_t
)
if
np
.
any
(
np
.
less
([
T
,
nu1
,
nu2
,
nu3
,
nu4
,
m12
,
m13
,
m14
,
m21
,
m23
,
m24
,
m31
,
m32
,
m34
,
m41
,
m42
,
m43
,
theta0
],
0
)):
raise
ValueError
(
'A time, population size, migration rate, or '
'theta0 is < 0. Has the model been mis-specified?'
)
if
np
.
any
(
np
.
equal
([
nu1
,
nu2
,
nu3
,
nu4
],
0
)):
raise
ValueError
(
'A population size is 0. Has the model been '
'mis-specified?'
)
val1000
,
val0100
,
val0010
,
val0001
=
\
_inject_mutations_4D_valcalc
(
this_dt
,
xx
,
yy
,
zz
,
aa
,
theta0
,
frozen1
,
frozen2
,
frozen3
,
frozen4
)
kernels
.
_inject_mutations_4D_vals
(
phi_gpu
,
L
,
val0001
,
val0010
,
val0100
,
val1000
,
block
=
(
1
,
1
,
1
))
# I can use the c_gpu buffer for the MInt_gpu buffer, to save GPU memory.
# Note that I have to reassign this after each transpose operation I do.
MInt_gpu
=
c_gpu
if
not
frozen1
:
kernels
.
_Vfunc
(
xx_gpu
,
nu1
,
L
,
V_gpu
,
grid
=
_grid
(
L
),
block
=
_block
())
kernels
.
_Vfunc
(
xInt_gpu
,
nu1
,
np
.
int32
(
L
-
1
),
VInt_gpu
,
grid
=
_grid
(
L
-
1
),
block
=
_block
())
kernels
.
_Mfunc4D
(
xInt_gpu
,
xx_gpu
,
xx_gpu
,
xx_gpu
,
m12
,
m13
,
m14
,
gamma1
,
h1
,
np
.
int32
(
L
-
1
),
M
,
N
,
O
,
MInt_gpu
,
grid
=
_grid
((
L
-
1
)
*
M
*
N
*
O
),
block
=
_block
())
b_gpu
.
fill
(
1.
/
this_dt
)
kernels
.
_compute_ab_nobc
(
dx_gpu
,
dfactor_gpu
,
MInt_gpu
,
V_gpu
,
this_dt
,
L
,
M
*
N
*
O
,
a_gpu
,
b_gpu
,
grid
=
_grid
((
L
-
1
)
*
M
*
N
*
O
),
block
=
_block
())
kernels
.
_compute_bc_nobc
(
dx_gpu
,
dfactor_gpu
,
MInt_gpu
,
V_gpu
,
this_dt
,
L
,
M
*
N
*
O
,
b_gpu
,
c_gpu
,
grid
=
_grid
((
L
-
1
)
*
M
*
N
*
O
),
block
=
_block
())
kernels
.
_include_bc
(
dx_gpu
,
nu1
,
gamma1
,
h1
,
L
,
M
*
N
*
O
,
b_gpu
,
block
=
(
1
,
1
,
1
))
kernels
.
_cx0
(
c_gpu
,
L
,
M
*
N
*
O
,
grid
=
_grid
(
M
*
N
*
O
),
block
=
_block
())
phi_gpu
/=
this_dt
cusparseDgtsvInterleavedBatch
(
cusparse_handle
,
0
,
L
,
a_gpu
.
gpudata
,
b_gpu
.
gpudata
,
c_gpu
.
gpudata
,
phi_gpu
.
gpudata
,
M
*
N
*
O
,
pBuffer
)
transpose_gpuarray
(
phi_gpu
,
c_gpu
.
reshape
(
M
*
N
*
O
,
L
))
phi_gpu
,
c_gpu
=
c_gpu
.
reshape
(
M
,
L
*
N
*
O
),
phi_gpu
.
reshape
(
M
,
L
*
N
*
O
)
MInt_gpu
=
c_gpu
if
not
frozen2
:
kernels
.
_Vfunc
(
xx_gpu
,
nu2
,
M
,
V_gpu
,
grid
=
_grid
(
M
),
block
=
_block
())
kernels
.
_Vfunc
(
xInt_gpu
,
nu2
,
np
.
int32
(
M
-
1
),
VInt_gpu
,
grid
=
_grid
(
M
-
1
),
block
=
_block
())
# Note the order of the m arguments here.
kernels
.
_Mfunc4D
(
xInt_gpu
,
xx_gpu
,
xx_gpu
,
xx_gpu
,
m23
,
m24
,
m21
,
gamma2
,
h2
,
np
.
int32
(
M
-
1
),
N
,
O
,
L
,
MInt_gpu
,
grid
=
_grid
((
M
-
1
)
*
L
*
N
*
O
),
block
=
_block
())
b_gpu
.
fill
(
1.
/
this_dt
)
kernels
.
_compute_ab_nobc
(
dx_gpu
,
dfactor_gpu
,
MInt_gpu
,
V_gpu
,
this_dt
,
M
,
L
*
N
*
O
,
a_gpu
,
b_gpu
,
grid
=
_grid
((
M
-
1
)
*
L
*
N
*
O
),
block
=
_block
())
kernels
.
_compute_bc_nobc
(
dx_gpu
,
dfactor_gpu
,
MInt_gpu
,
V_gpu
,
this_dt
,
M
,
L
*
N
*
O
,
b_gpu
,
c_gpu
,
grid
=
_grid
((
M
-
1
)
*
L
*
N
*
O
),
block
=
_block
())
kernels
.
_include_bc
(
dx_gpu
,
nu2
,
gamma2
,
h2
,
M
,
L
*
N
*
O
,
b_gpu
,
block
=
(
1
,
1
,
1
))
kernels
.
_cx0
(
c_gpu
,
M
,
L
*
N
*
O
,
grid
=
_grid
(
L
*
N
*
O
),
block
=
_block
())
phi_gpu
/=
this_dt
cusparseDgtsvInterleavedBatch
(
cusparse_handle
,
0
,
M
,
a_gpu
.
gpudata
,
b_gpu
.
gpudata
,
c_gpu
.
gpudata
,
phi_gpu
.
gpudata
,
L
*
N
*
O
,
pBuffer
)
transpose_gpuarray
(
phi_gpu
,
c_gpu
.
reshape
(
L
*
N
*
O
,
M
))
phi_gpu
,
c_gpu
=
c_gpu
.
reshape
(
N
,
L
*
M
*
O
),
phi_gpu
.
reshape
(
N
,
L
*
M
*
O
)
MInt_gpu
=
c_gpu
if
not
frozen3
:
kernels
.
_Vfunc
(
xx_gpu
,
nu3
,
N
,
V_gpu
,
grid
=
_grid
(
N
),
block
=
_block
())
kernels
.
_Vfunc
(
xInt_gpu
,
nu3
,
np
.
int32
(
N
-
1
),
VInt_gpu
,
grid
=
_grid
(
N
-
1
),
block
=
_block
())
kernels
.
_Mfunc3D
(
xInt_gpu
,
xx_gpu
,
xx_gpu
,
m34
,
m31
,
m32
,
gamma3
,
h3
,
np
.
int32
(
N
-
1
),
O
,
L
,
M
,
MInt_gpu
,
grid
=
_grid
((
N
-
1
)
*
M
*
L
*
O
),
block
=
_block
())
b_gpu
.
fill
(
1.
/
this_dt
)
kernels
.
_compute_ab_nobc
(
dx_gpu
,
dfactor_gpu
,
MInt_gpu
,
V_gpu
,
this_dt
,
N
,
L
*
M
*
O
,
a_gpu
,
b_gpu
,
grid
=
_grid
((
N
-
1
)
*
L
*
M
*
O
),
block
=
_block
())
kernels
.
_compute_bc_nobc
(
dx_gpu
,
dfactor_gpu
,
MInt_gpu
,
V_gpu
,
this_dt
,
N
,
L
*
M
*
O
,
b_gpu
,
c_gpu
,
grid
=
_grid
((
N
-
1
)
*
L
*
M
*
O
),
block
=
_block
())
kernels
.
_include_bc
(
dx_gpu
,
nu3
,
gamma3
,
h3
,
N
,
L
*
M
*
O
,
b_gpu
,
block
=
(
1
,
1
,
1
))
kernels
.
_cx0
(
c_gpu
,
N
,
L
*
M
*
O
,
grid
=
_grid
(
L
*
M
*
O
),
block
=
_block
())
phi_gpu
/=
this_dt
cusparseDgtsvInterleavedBatch
(
cusparse_handle
,
0
,
N
,
a_gpu
.
gpudata
,
b_gpu
.
gpudata
,
c_gpu
.
gpudata
,
phi_gpu
.
gpudata
,
L
*
M
*
O
,
pBuffer
)
transpose_gpuarray
(
phi_gpu
,
c_gpu
.
reshape
(
L
*
N
*
O
,
M
))
phi_gpu
,
c_gpu
=
c_gpu
.
reshape
(
O
,
L
*
M
*
N
),
phi_gpu
.
reshape
(
O
,
L
*
M
*
N
)
MInt_gpu
=
c_gpu
if
not
frozen4
:
kernels
.
_Vfunc
(
xx_gpu
,
nu4
,
N
,
V_gpu
,
grid
=
_grid
(
O
),
block
=
_block
())
kernels
.
_Vfunc
(
xInt_gpu
,
nu4
,
np
.
int32
(
O
-
1
),
VInt_gpu
,
grid
=
_grid
(
O
-
1
),
block
=
_block
())
kernels
.
_Mfunc3D
(
xInt_gpu
,
xx_gpu
,
xx_gpu
,
m41
,
m42
,
m43
,
gamma4
,
h4
,
np
.
int32
(
O
-
1
),
L
,
M
,
N
,
MInt_gpu
,
grid
=
_grid
((
O
-
1
)
*
M
*
L
*
N
),
block
=
_block
())
b_gpu
.
fill
(
1.
/
this_dt
)
kernels
.
_compute_ab_nobc
(
dx_gpu
,
dfactor_gpu
,
MInt_gpu
,
V_gpu
,
this_dt
,
O
,
L
*
M
*
N
,
a_gpu
,
b_gpu
,
grid
=
_grid
((
O
-
1
)
*
L
*
M
*
N
),
block
=
_block
())
kernels
.
_compute_bc_nobc
(
dx_gpu
,
dfactor_gpu
,
MInt_gpu
,
V_gpu
,
this_dt
,
O
,
L
*
M
*
N
,
b_gpu
,
c_gpu
,
grid
=
_grid
((
O
-
1
)
*
L
*
M
*
N
),
block
=
_block
())
kernels
.
_include_bc
(
dx_gpu
,
nu4
,
gamma4
,
h4
,
O
,
L
*
M
*
N
,
b_gpu
,
block
=
(
1
,
1
,
1
))
kernels
.
_cx0
(
c_gpu
,
O
,
L
*
M
*
N
,
grid
=
_grid
(
L
*
M
*
N
),
block
=
_block
())
phi_gpu
/=
this_dt
cusparseDgtsvInterleavedBatch
(
cusparse_handle
,
0
,
O
,
a_gpu
.
gpudata
,
b_gpu
.
gpudata
,
c_gpu
.
gpudata
,
phi_gpu
.
gpudata
,
L
*
M
*
N
,
pBuffer
)
transpose_gpuarray
(
phi_gpu
,
c_gpu
.
reshape
(
M
*
N
*
O
,
L
))
phi_gpu
,
c_gpu
=
c_gpu
.
reshape
(
L
,
M
*
N
*
O
),
phi_gpu
.
reshape
(
L
,
M
*
N
*
O
)
current_t
+=
this_dt
return
phi_gpu
.
get
().
reshape
(
L
,
M
,
N
,
O
)
dadi/cuda/kernels.cu
View file @
843c4896
...
...
@@ -9,6 +9,13 @@ __global__ void inject_mutations_3D(double *phi, int L, double val001, double va
phi
[
L
*
L
]
+=
val100
;
}
__global__
void
inject_mutations_4D
(
double
*
phi
,
int
L
,
double
val0001
,
double
val0010
,
double
val0100
,
double
val1000
){
phi
[
1
]
+=
val0001
;
phi
[
L
]
+=
val0010
;
phi
[
L
*
L
]
+=
val0100
;
phi
[
L
*
L
*
L
]
+=
val1000
;
}
__global__
void
Vfunc
(
double
*
x
,
double
nu
,
int
L
,
double
*
output
){
int
ii
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
ii
<
L
){
...
...
@@ -42,6 +49,21 @@ __global__ void Mfunc3D(double *x, double *y, double *z, double mxy, double mxz,
}
}
__device__
double
_Mfunc4D
(
double
x
,
double
y
,
double
z
,
double
a
,
double
mxy
,
double
mxz
,
double
mxa
,
double
gamma
,
double
h
){
return
mxy
*
(
y
-
x
)
+
mxz
*
(
z
-
x
)
+
mxa
*
(
a
-
x
)
+
gamma
*
2
*
(
h
+
(
1.
-
2
*
h
)
*
x
)
*
x
*
(
1.
-
x
);
}
__global__
void
Mfunc4D
(
double
*
x
,
double
*
y
,
double
*
z
,
double
*
a
,
double
mxy
,
double
mxz
,
double
mxa
,
double
gamma
,
double
h
,
int
L
,
int
M
,
int
N
,
int
O
,
double
*
output
){
int
ii
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
(
M
*
N
*
O
);
int
jj
=
((
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
(
N
*
O
))
%
M
;
int
kk
=
((
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
O
)
%
N
;
int
ll
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
%
O
;
if
(
ii
<
L
){
output
[
ii
*
M
*
N
*
O
+
jj
*
N
*
O
+
kk
*
O
+
ll
]
=
_Mfunc4D
(
x
[
ii
],
y
[
jj
],
z
[
kk
],
a
[
ll
],
mxy
,
mxz
,
mxa
,
gamma
,
h
);
}
}
// We need an additional simple kernel to zero out the necessary
// values of the c array, because the Interleaved tridiagonal
// solver alters the c array.
...
...
tests/test_CUDA.py
View file @
843c4896
...
...
@@ -152,6 +152,43 @@ class CUDATestCase(unittest.TestCase):
frozen3
)
self
.
assertTrue
(
np
.
allclose
(
phi_cpu
,
phi_gpu
))
def
test_4d_integration
(
self
):
pts
=
10
nu1
=
lambda
t
:
0.5
+
5
*
t
nu2
=
lambda
t
:
10
-
50
*
t
nu3
,
nu4
=
0.3
,
0.9
m12
,
m13
,
m14
=
2.0
,
0.1
,
3.2
m21
,
m23
,
m24
=
lambda
t
:
0.5
+
3
*
t
,
0.2
,
1.2
m31
,
m32
,
m34
=
0.9
,
1.7
,
0.9
m41
,
m42
,
m43
=
0.3
,
0.4
,
1.9
gamma1
=
lambda
t
:
-
2
*
t
gamma2
,
gamma3
,
gamma4
=
3.0
,
-
1
,
0.5
h1
=
lambda
t
:
0.2
+
t
h2
=
lambda
t
:
0.9
-
t
h3
,
h4
=
0.3
,
0.5
theta0
=
lambda
t
:
1
+
2
*
t
xx
=
dadi
.
Numerics
.
default_grid
(
pts
)
phi
=
dadi
.
PhiManip
.
phi_1D
(
xx
)
phi
=
dadi
.
PhiManip
.
phi_1D_to_2D
(
xx
,
phi
)
phi
=
dadi
.
PhiManip
.
phi_2D_to_3D
(
phi
,
0
,
xx
,
xx
,
xx
)
phi
=
dadi
.
PhiManip
.
phi_3D_to_4D
(
phi
,
0
,
0
,
xx
,
xx
,
xx
,
xx
)
dadi
.
cuda_enabled
(
False
)
phi_cpu
=
dadi
.
Integration
.
four_pops
(
phi
.
copy
(),
xx
,
T
=
0.1
,
nu1
=
nu1
,
nu2
=
nu2
,
nu3
=
nu3
,
nu4
=
nu4
,
m12
=
m12
,
m13
=
m13
,
m14
=
m14
,
m21
=
m21
,
m23
=
m23
,
m24
=
m24
,
m31
=
m31
,
m32
=
m32
,
m34
=
m34
,
m41
=
m41
,
m42
=
m42
,
m43
=
m43
,
gamma1
=
gamma1
,
gamma2
=
gamma2
,
gamma3
=
gamma3
,
gamma4
=
gamma4
,
h1
=
h1
,
h2
=
h2
,
h3
=
h3
,
h4
=
h4
,
theta0
=
theta0
)
dadi
.
cuda_enabled
(
True
)
phi_gpu
=
dadi
.
Integration
.
four_pops
(
phi
.
copy
(),
xx
,
T
=
0.1
,
nu1
=
nu1
,
nu2
=
nu2
,
nu3
=
nu3
,
nu4
=
nu4
,
m12
=
m12
,
m13
=
m13
,
m14
=
m14
,
m21
=
m21
,
m23
=
m23
,
m24
=
m24
,
m31
=
m31
,
m32
=
m32
,
m34
=
m34
,
m41
=
m41
,
m42
=
m42
,
m43
=
m43
,
gamma1
=
gamma1
,
gamma2
=
gamma2
,
gamma3
=
gamma3
,
gamma4
=
gamma4
,
h1
=
h1
,
h2
=
h2
,
h3
=
h3
,
h4
=
h4
,
theta0
=
theta0
)
self
.
assertTrue
(
np
.
allclose
(
phi_cpu
,
phi_gpu
))
if
dadi
.
cuda_enabled
(
True
):
suite
=
unittest
.
TestLoader
().
loadTestsFromTestCase
(
CUDATestCase
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment