Skip to content

Commit 013f230

Browse files
committed
add tool to parse metadata
1 parent 6ca6368 commit 013f230

10 files changed

Lines changed: 47 additions & 129 deletions

json-parser/Dockerfile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ FROM python:3.7.5-slim-buster
22

33
LABEL org.opencontainers.image.source https://github.com/icgc-argo-workflows/data-processing-utility-tools
44

5-
ENV PATH="/tools:${PATH}"
5+
RUN apt-get update && apt-get install -y procps jq
66

7-
COPY *.py /tools/
7+
ENV PATH="/tools:${PATH}"
88

9-
ENTRYPOINT ["/usr/bin/env"]
9+
WORKDIR /tools
1010

11-
CMD ["/bin/bash"]
11+
COPY . .

json-parser/main.nf

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,7 @@ params.publish_dir = "" // set to empty string will disable publishDir
4444

4545

4646
// tool specific parmas go here, add / change as needed
47-
params.input_file = ""
48-
params.output_pattern = "*" // output file name pattern
47+
params.metadata_analysis = ""
4948

5049

5150
process jsonParser {
@@ -55,22 +54,29 @@ process jsonParser {
5554
cpus params.cpus
5655
memory "${params.mem} GB"
5756

58-
input: // input, make update as needed
59-
path input_file
57+
input:
58+
path metadata_analysis
6059

61-
output: // output, make update as needed
62-
path "output_dir/${params.output_pattern}", emit: output_file
60+
output:
61+
env STUDY_ID, emit: study_id
62+
env DONOR_ID, emit: donor_id
63+
env EXP, emit: experimental_strategy
64+
env PAIRED, emit: paired
65+
env ANALYSIS_TOOLS, emit: analysis_tools
66+
env STRAND, emit: library_strandedness
6367

6468
script:
65-
// add and initialize variables here as needed
66-
6769
"""
68-
mkdir -p output_dir
69-
70-
main.py \
71-
-i ${input_file} \
72-
-o output_dir
73-
70+
set -euxo pipefail
71+
VARIABLE1=`cat ${metadata_analysis} | jq -r 'if ([.read_groups[]?] | length) >0 then [.read_groups[] | .is_paired_end] | all | tostring else null end' | tr -d '\\n'`
72+
PAIRED=\${VARIABLE1:-'null'}
73+
VARIABLE2=`cat ${metadata_analysis} | jq -r '[.files[] | .info? | .analysis_tools[]?] | unique | join(",")' | tr -d '\\n'`
74+
ANALYSIS_TOOLS=\${VARIABLE2:-'null'}
75+
VARIABLE3=`cat ${metadata_analysis} | jq -r '.experiment | .library_strandedness?' | tr -d '\\n'`
76+
STRAND=\${VARIABLE3:-'null'}
77+
STUDY_ID=`cat ${metadata_analysis} | jq -er '.studyId' | tr -d '\\n'`
78+
DONOR_ID=`cat ${metadata_analysis} | jq -er '.samples[0].donor.donorId' | tr -d '\\n'`
79+
EXP=`cat ${metadata_analysis} | jq -er '.experiment | .experimental_strategy? // .library_strategy' | tr -d '\\n'`
7480
"""
7581
}
7682

@@ -79,6 +85,6 @@ process jsonParser {
7985
// using this command: nextflow run <git_acc>/<repo>/<pkg_name>/<main_script>.nf -r <pkg_name>.v<pkg_version> --params-file xxx
8086
workflow {
8187
jsonParser(
82-
file(params.input_file)
88+
file(params.metadata_analysis)
8389
)
8490
}

json-parser/main.py

Lines changed: 0 additions & 55 deletions
This file was deleted.

json-parser/pkg.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
{
22
"name": "json-parser",
33
"version": "0.1.0",
4-
"description": "python:3.7.5-slim-buster",
4+
"description": "tool to parse JSON metadata",
55
"main": "main.nf",
66
"deprecated": false,
77
"keywords": [
8-
"tool to parse metadata"
8+
"metadata",
9+
"JSON"
910
],
1011
"repository": {
1112
"type": "git",

json-parser/tests/checker.nf

Lines changed: 11 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -43,60 +43,31 @@ params.container_version = ""
4343
params.container = ""
4444

4545
// tool specific parmas go here, add / change as needed
46-
params.input_file = ""
46+
params.metadata_analysis = ""
4747
params.expected_output = ""
4848

4949
include { jsonParser } from '../main'
5050

51-
52-
process file_smart_diff {
53-
container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}"
54-
55-
input:
56-
path output_file
57-
path expected_file
58-
59-
output:
60-
stdout()
61-
62-
script:
63-
"""
64-
# Note: this is only for demo purpose, please write your own 'diff' according to your own needs.
65-
# in this example, we need to remove date field before comparison eg, <div id="header_filename">Tue 19 Jan 2021<br/>test_rg_3.bam</div>
66-
# sed -e 's#"header_filename">.*<br/>test_rg_3.bam#"header_filename"><br/>test_rg_3.bam</div>#'
67-
68-
cat ${output_file[0]} \
69-
| sed -e 's#"header_filename">.*<br/>#"header_filename"><br/>#' > normalized_output
70-
71-
([[ '${expected_file}' == *.gz ]] && gunzip -c ${expected_file} || cat ${expected_file}) \
72-
| sed -e 's#"header_filename">.*<br/>#"header_filename"><br/>#' > normalized_expected
73-
74-
diff normalized_output normalized_expected \
75-
&& ( echo "Test PASSED" && exit 0 ) || ( echo "Test FAILED, output file mismatch." && exit 1 )
76-
"""
77-
}
78-
79-
8051
workflow checker {
8152
take:
82-
input_file
83-
expected_output
53+
metadata_analysis
8454

8555
main:
8656
jsonParser(
87-
input_file
88-
)
89-
90-
file_smart_diff(
91-
jsonParser.out.output_file,
92-
expected_output
57+
metadata_analysis
9358
)
59+
jsonParser.out.study_id.set{study_id}
60+
study_id.view()
61+
jsonParser.out.donor_id.view()
62+
jsonParser.out.experimental_strategy.view()
63+
jsonParser.out.analysis_tools.view()
64+
jsonParser.out.paired.view()
65+
jsonParser.out.library_strandedness.view()
9466
}
9567

9668

9769
workflow {
9870
checker(
99-
file(params.input_file),
100-
file(params.expected_output)
71+
file(params.metadata_analysis)
10172
)
10273
}

json-parser/tests/input/README.md

Lines changed: 0 additions & 1 deletion
This file was deleted.
-14.6 KB
Binary file not shown.

json-parser/tests/test-job-1.json

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
{
2-
"input_file": "input/test_rg_3.bam",
3-
"expected_output": "expected/expected.test_rg_3.bam",
4-
"publish_dir": "outdir",
5-
"cpus": 1,
6-
"mem": 0.5
2+
"metadata_analysis": "data/40054341-119d-492e-8543-41119dc92ea1.sequencing_experiment.4.analysis.json",
3+
"cpus": 1,
4+
"mem": 0.5
75
}

json-parser/tests/test-job-2.json

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
{
2-
"input_file": "input/test_rg_3.bam",
3-
"expected_output": "expected/expected.test_rg_3.bam",
4-
"publish_dir": "outdir",
5-
"container_registry": "ghcr.io",
6-
"cpus": 1,
7-
"mem": 0.5
2+
"metadata_analysis": "data/89176bbc-ffca-4d73-984a-4a92ed931a98.sequencing_experiment.4.analysis.json",
3+
"cpus": 1,
4+
"mem": 0.5
85
}

tests/data/40054341-119d-492e-8543-41119dc92ea1.sequencing_experiment.4.analysis.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@
4949
"sequencing_date": "2014-12-12",
5050
"library_strategy": "WGS",
5151
"sequencing_center": "EXT",
52-
"submitter_sequencing_experiment_id": "TEST_EXP"
52+
"submitter_sequencing_experiment_id": "TEST_EXP",
53+
"library_strandedness": "Unstrained"
5354
},
5455
"read_groups": [
5556
{

0 commit comments

Comments
 (0)