Commit 81ca2b5a authored by Glenn Hickey's avatar Glenn Hickey
Browse files

send all gafs through gaffilter to remove query-overlaps

parent 475e2cd7
Pipeline #1548 failed with stage
in 5 minutes and 21 seconds
......@@ -163,7 +163,7 @@ fi
cd ${pangenomeBuildDir}
git clone https://github.com/ComparativeGenomicsToolkit/cactus-gfa-tools.git
cd cactus-gfa-tools
git checkout f543c8110cbc5b3647f9fdf6b382d3f28012493e
git checkout 93cab435cab8bde5fbc0e9242f5cf09f48f2cfd7
make -j ${numcpu}
if [[ $STATIC_CHECK -ne 1 || $(ldd paf2lastz | grep so | wc -l) -eq 0 ]]
then
......@@ -183,6 +183,12 @@ then
else
exit 1
fi
if [[ $STATIC_CHECK -ne 1 || $(ldd gaffilter | grep so | wc -l) -eq 0 ]]
then
mv gaffilter ${binDir}
else
exit 1
fi
if [[ $STATIC_CHECK -ne 1 || $(ldd rgfa-split | grep so | wc -l) -eq 0 ]]
then
mv rgfa-split ${binDir}
......
......@@ -301,6 +301,7 @@
<!-- minigraphConstructOptions: flags to pass to minigraph for construction -->
<!-- minMAPQ: ignore minigraph alignments with mapping quality less than this -->
<!-- minGAFBlockLength: ignore minigraph alignments with block length less than this -->
<!-- queryFilterRatio: filter overlapping (by query) gaf records. keep records if their MAPQ or query length are <ratio> bigger than overlapping interval (see gaffilter -r) [0=disable] -->
<!-- maskFilter: any softmasked sequence intervals > than this many bp will be hardmasked before being read by the minigraph mapper [negative value = disable]-->
<!-- delFilter: any deletions implied by split-read mappings greater than this are removed from the paf (by removing all lines of the smallest block bordering deletion)-->
<!-- delFilterThreshold: only remove deletion if it costs < delFilterThreshold * deletion-size matches. must be in range (0, 1] -->
......@@ -313,6 +314,7 @@
minigraphConstructOptions="-c -xggs"
minMAPQ="5"
minGAFBlockLength="50000"
queryFilterRatio="2"
maskFilter="-1"
delFilter="-1"
delFilterThreshold="0.01"
......
......@@ -353,8 +353,13 @@ def minigraph_map_one(job, config, event_name, fa_path, fa_file_id, gfa_file_id)
# note: the gfa needs to be uncompressed for this tool to work
mg_lengths_path = gfa_path + '.node_lengths.tsv'
unstable_gaf_path = gaf_path + '.unstable'
cactus_call(parameters=['gaf2unstable', gaf_path, '-g', gfa_path, '-o', mg_lengths_path],
outfile=unstable_gaf_path)
cmd = ['gaf2unstable', gaf_path, '-g', gfa_path, '-o', mg_lengths_path]
# optional (but recommended) gaf overlap filter
gaf_filter_ratio = getOptionalAttrib(xml_node, "queryFilterRatio", int, default=None)
if gaf_filter_ratio:
cmd = [cmd, ['gaffilter', '-', '-r', str(gaf_filter_ratio)]]
cactus_call(parameters=cmd, outfile=unstable_gaf_path)
# convert the unstable gaf into unstable paf, which is what cactus expects
# also tack on the unique id to the target column
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment