Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
khalid
radsex
Commits
5b71a432
Commit
5b71a432
authored
Feb 06, 2018
by
Romain Feron
Browse files
Optimized multithreading
parent
071a21c0
Changes
7
Hide whitespace changes
Inline
Side-by-side
stacks_replacement/extract.py
0 → 100644
View file @
5b71a432
input_file
=
open
(
'./test.tsv'
)
header
=
input_file
.
readline
()
header
=
header
[:
-
1
].
split
(
'
\t
'
)
min_males
=
1
min_females
=
1
min_cov
=
0
popmap_file
=
open
(
'./popmap.tsv'
)
popmap
=
{
line
.
split
(
'
\t
'
)[
0
]:
line
[:
-
1
].
split
(
'
\t
'
)[
1
]
for
line
in
popmap_file
if
line
[:
-
1
]}
n_males
=
[
p
for
p
in
popmap
.
values
()].
count
(
'M'
)
n_females
=
[
p
for
p
in
popmap
.
values
()].
count
(
'F'
)
males_col
=
[
i
for
i
,
h
in
enumerate
(
header
)
if
h
in
popmap
.
keys
()
and
popmap
[
h
]
is
'M'
]
females_col
=
[
i
for
i
,
h
in
enumerate
(
header
)
if
h
in
popmap
.
keys
()
and
popmap
[
h
]
is
'F'
]
print
(
n_females
,
n_males
)
output_file
=
open
(
'./haplotypes.tsv'
,
'w'
)
output_file
.
write
(
'Locus'
+
'
\t
'
+
'Males'
+
'
\t
'
+
'Females'
+
'
\t
'
+
'Sequence'
+
'
\n
'
)
for
i
,
line
in
enumerate
(
input_file
):
if
i
%
1000000
==
0
:
print
(
str
(
i
))
temp
=
line
[:
-
1
].
split
(
'
\t
'
)
females
=
len
([
t
for
j
,
t
in
enumerate
(
temp
)
if
j
in
females_col
and
int
(
t
)
>
min_cov
])
males
=
len
([
t
for
j
,
t
in
enumerate
(
temp
)
if
j
in
males_col
and
int
(
t
)
>
min_cov
])
if
males
>
min_males
and
females
>
min_females
:
output_file
.
write
(
str
(
temp
[
0
])
+
'
\t
'
+
str
(
males
)
+
'
\t
'
+
str
(
females
)
+
'
\t
'
+
temp
[
1
]
+
'
\n
'
)
stacks_replacement/src/analysis.cpp
View file @
5b71a432
...
...
@@ -33,6 +33,7 @@ void file_processor(std::vector<InputFile>& input_files, std::unordered_map<std:
if
(
not
it
->
processed
)
{
it
->
processed
=
true
;
remaining_files
=
true
;
files_mutex
.
unlock
();
process_file
(
*
it
,
results
,
results_mutex
);
break
;
}
else
{
...
...
stacks_replacement/src/process_file.cpp
View file @
5b71a432
...
...
@@ -3,9 +3,9 @@
void
process_file
(
InputFile
file
,
std
::
unordered_map
<
std
::
string
,
std
::
unordered_map
<
std
::
string
,
uint16_t
>>&
results
,
std
::
mutex
&
results_mutex
)
{
std
::
unordered_map
<
std
::
string
,
uint16_t
>
temp_results
;
igzstream
input_file
(
file
.
path
.
c_str
());
std
::
string
line
;
bool
write_next_line
=
false
;
while
(
std
::
getline
(
input_file
,
line
))
{
switch
(
line
[
0
])
{
...
...
@@ -17,13 +17,19 @@ void process_file(InputFile file, std::unordered_map<std::string, std::unordered
break
;
default:
if
(
write_next_line
)
{
results_mutex
.
lock
();
++
results
[
line
][
file
.
individual_name
];
results_mutex
.
unlock
();
++
temp_results
[
line
];
}
break
;
}
}
results_mutex
.
lock
();
for
(
auto
sequence
:
temp_results
)
{
results
[
sequence
.
first
][
file
.
individual_name
]
+=
sequence
.
second
;
}
results_mutex
.
unlock
();
std
::
cout
<<
" - Finished processing individual : "
+
file
.
individual_name
+
"."
<<
std
::
endl
;
return
;
}
stacks_replacement/src/process_file.h
View file @
5b71a432
...
...
@@ -2,6 +2,7 @@
#include <mutex>
#include <unordered_map>
#include "gzstream.h"
#include "zlib.h"
#include "input_file.h"
#include "utils.h"
...
...
stacks_replacement/stacks_replacement.pro
View file @
5b71a432
...
...
@@ -7,7 +7,7 @@ SOURCES += \
src
/
main
.
cpp
\
src
/
arg_parser
.
cpp
\
src
/
utils
.
cpp
\
analysis
.
cpp
\
src
/
analysis
.
cpp
\
src
/
analysis
.
cpp
\
src
/
input_dir
.
cpp
\
src
/
process_file
.
cpp
\
...
...
@@ -18,7 +18,7 @@ HEADERS += \
src
/
arg_parser
.
h
\
src
/
parameters
.
h
\
src
/
utils
.
h
\
analysis
.
h
\
src
/
analysis
.
h
\
src
/
analysis
.
h
\
src
/
input_dir
.
h
\
src
/
process_file
.
h
\
...
...
stacks_replacement/time.sh
0 → 100644
View file @
5b71a432
date
time
./bin/stacks_replacement
-i
./test/samples
-o
test
/test.tsv
-t
1
date
date
time
./bin/stacks_replacement
-i
./test/samples
-o
test
/test.tsv
-t
3
date
stacks_replacement/times.txt
0 → 100644
View file @
5b71a432
## Current implementation
Threads: 1
Start: 08:36:36
End: 08:40:36
--> 240s
Threads: 3
Start: 08:40:36
End: 08:44:36
--> 240s
## Just reading the files
Threads: 1
Start: 08:45:28
End: 08:47:16
--> 108s
Threads: 3
Start: 08:47:16
End: 08:48:00
--> 44s
## Copy map at the end
Threads: 1
Start: 09:27:20
End: 09:31:15
--> 235s
Threads: 3
Start: 09:31:15
End: 09:34:07
--> 172s
## Buffer 2048 + Copy map at the end
Threads: 1
Start: 10:11:01
End: 10:15:23
--> 262s
Threads: 3
Start: 10:15:23
End: 10:18:32
--> 189s
## Buffer 65536 + Copy map at the end
Threads: 1
Start: 10:20:32
End: 10:24:49
--> 257s
Threads: 3
Start: 10:24:49
End: 10:27:41
--> 172s
## Buffer 1048576 + Copy map at the end
Threads: 1
Start: 10:35:58
End: 10:40:17
--> 259s
Threads: 3
Start: 10:40:17
End: 10:43:15
--> 178s
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment