From 3570d36cfdcdcfbba7d9e556058dbd8d6b8896bb Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 19 Jun 2026 12:04:31 -0700 Subject: [PATCH 1/4] Add wrapper for Kracken2 --- .../SequenceAnalysisModule.java | 3 +- .../run/preprocessing/Kracken2Step.java | 195 ++++++++++++++++++ 2 files changed, 197 insertions(+), 1 deletion(-) create mode 100644 SequenceAnalysis/src/org/labkey/sequenceanalysis/run/preprocessing/Kracken2Step.java diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java index 93688d270..7e19a9743 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java @@ -152,6 +152,7 @@ import org.labkey.sequenceanalysis.run.preprocessing.FastqcProcessingStep; import org.labkey.sequenceanalysis.run.preprocessing.FilterReadsStep; import org.labkey.sequenceanalysis.run.preprocessing.FlashPipelineStep; +import org.labkey.sequenceanalysis.run.preprocessing.Kracken2Step; import org.labkey.sequenceanalysis.run.preprocessing.PrintReadsContainingStep; import org.labkey.sequenceanalysis.run.preprocessing.TagPcrSummaryStep; import org.labkey.sequenceanalysis.run.preprocessing.TrimmomaticWrapper; @@ -291,7 +292,7 @@ public static void registerPipelineSteps() SequencePipelineService.get().registerPipelineStep(new CutadaptWrapper.Provider()); SequencePipelineService.get().registerPipelineStep(new FastqcProcessingStep.Provider()); SequencePipelineService.get().registerPipelineStep(new CutadaptCropWrapper.Provider()); - //SequencePipelineService.get().registerPipelineStep(new BlastFilterPipelineStep.Provider()); + SequencePipelineService.get().registerPipelineStep(new Kracken2Step.Provider()); //ref library SequencePipelineService.get().registerPipelineStep(new DNAReferenceLibraryStep.Provider()); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/preprocessing/Kracken2Step.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/preprocessing/Kracken2Step.java new file mode 100644 index 000000000..dea2ab40b --- /dev/null +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/preprocessing/Kracken2Step.java @@ -0,0 +1,195 @@ +package org.labkey.sequenceanalysis.run.preprocessing; + +import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.Nullable; +import org.json.JSONObject; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.pipeline.PipelineJobService; +import org.labkey.api.sequenceanalysis.SequenceAnalysisService; +import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.CommandLineParam; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; +import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.PreprocessingStep; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; +import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; +import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep; +import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; +import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; +import org.labkey.api.util.FileUtil; +import org.labkey.api.util.Pair; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class Kracken2Step extends AbstractCommandPipelineStep implements PreprocessingStep +{ + private static final String DB_PARAM = "db"; + private static final String MODE_PARAM = "mode"; + + public Kracken2Step(PipelineStepProvider provider, PipelineContext ctx) + { + super(provider, ctx, new Kracken2Wrapper(ctx.getLogger())); + } + + public static class Provider extends AbstractPipelineStepProvider + { + public Provider() + { + super("Kracken2", "Kracken2", "Kracken2", "This step aligns input reads against a reference using BWA-mem and will only return read pairs without a passing hit in either read.", Arrays.asList( + ToolParameterDescriptor.create(DB_PARAM, "Database", "This determines the DB for positive or negative selection", "ldk-simplecombo", new JSONObject(){{ + put("storeValues", "bacteria-viral"); + put("multiSelect", false); + put("allowBlank", false); + put("joinReturnValue", true); + put("delimiter", ";"); + }}, "bacteria-viral"), + ToolParameterDescriptor.create(MODE_PARAM, "Reads To Retain", "This determines which set of reads is passed to the next step. If 'Retain Classified' is selected, then reads matching the DB are retained. if 'Retain Unclassified' is selected, then reads that do not match the DB are retained", "ldk-simplecombo", new JSONObject(){{ + put("storeValues", "Classified;Unclassified"); + put("multiSelect", false); + put("allowBlank", false); + put("joinReturnValue", true); + put("delimiter", ";"); + }}, null), + ToolParameterDescriptor.createCommandLineParam(CommandLineParam.create("--minimum-hit-groups"), "minimumHitGroups", "Minimum Hit Groups", "Minimum number of hit groups (overlapping k-mers sharing the same minimizer) needed to make a call", "ldk-integerfield", new JSONObject(){{ + put("minValue", 0); + }}, 2), + ToolParameterDescriptor.createCommandLineParam(CommandLineParam.create("--confidence"), "confidence", "Confidence", "Confidence score threshold (0-1)", "ldk-numberfield", new JSONObject(){{ + put("minValue", 0); + put("maxValue", 1); + put("decimalPrecision", 2); + }}, 0) + ), null, "https://github.com/DerrickWood/kraken2"); + } + + @Override + public Kracken2Step create(PipelineContext context) + { + return new Kracken2Step(this, context); + } + } + + @Override + public Output processInputFile(File inputFile, @Nullable File inputFile2, File outputDir) throws PipelineJobException + { + PreprocessingOutputImpl output = new PreprocessingOutputImpl(inputFile, inputFile2); + + List args = new ArrayList<>(); + args.add(getWrapper().getExe().getPath()); + + if (inputFile2 != null) + { + args.add("--paired"); + } + + if (inputFile.getName().toLowerCase().endsWith(".gz")) + { + args.add("--gzip-compressed"); + } + + Integer threads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger()); + if (threads != null) + { + args.add("--threads"); + args.add(threads.toString()); + } + + String dbName = getProvider().getParameterByName(DB_PARAM).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class); + if (dbName == null) + { + throw new PipelineJobException("Missing DB name"); + } + + File binDir = FileUtil.appendName(new File(PipelineJobService.get().getAppProperties().getToolsDirectory()), "kracken2_dbs"); + if (!binDir.exists()) + { + throw new PipelineJobException("Unable to find kracken2 DB dir, expected: " + binDir.getAbsolutePath()); + } + + File dbDir = FileUtil.appendName(binDir, dbName); + if (!dbDir.exists()) + { + throw new PipelineJobException("Unable to find kracken2 DB dir, expected: " + dbDir.getAbsolutePath()); + } + + args.add("--use-names"); + + args.add("--db"); + args.add(dbDir.getAbsolutePath()); + + args.addAll(getClientCommandArgs()); + + String mode = getProvider().getParameterByName(MODE_PARAM).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class); + + File unclassifiedOutputBase = FileUtil.appendName(outputDir, SequenceAnalysisService.get().getUnzippedBaseName(inputFile.getName()) + ".unclassified"); + args.add("--unclassified-out"); + args.add(unclassifiedOutputBase.getPath() + "#.fq.gz"); + + File classifiedOutputBase = FileUtil.appendName(outputDir, SequenceAnalysisService.get().getUnzippedBaseName(inputFile.getName()) + ".classified"); + args.add("--classified-out"); + args.add(classifiedOutputBase.getPath() + "#.fq.gz"); + + File reportFile = FileUtil.appendName(outputDir, SequencePipelineService.get().getUnzippedBaseName(inputFile.getName()) + ".kracken2.report.txt"); + args.add("--report"); + args.add(reportFile.getPath()); + + args.add(inputFile.getPath()); + if (inputFile2 != null) + { + args.add(inputFile2.getPath()); + } + + getWrapper().execute(args); + + File unclassified1 = new File(unclassifiedOutputBase.getPath() + "_1.fq.gz"); + File unclassified2 = inputFile2 == null ? null : new File(unclassifiedOutputBase.getPath() + "_2.fq.gz"); + + File classified1 = new File(classifiedOutputBase.getPath() + "_1.fq.gz"); + File classified2 = inputFile2 == null ? null : new File(classifiedOutputBase.getPath() + "_2.fq.gz"); + if ("Classified".equals(mode)) + { + if (!classified1.exists()) + { + throw new PipelineJobException("Classified file does not exist: " + classified1.getAbsolutePath()); + } + + output.setProcessedFastq(Pair.of(classified1, classified2)); + output.addIntermediateFile(unclassified1); + if (unclassified2 != null) + { + output.addIntermediateFile(unclassified2); + } + } + else + { + if (!unclassified1.exists()) + { + throw new PipelineJobException("Unclassified file does not exist: " + unclassified1.getAbsolutePath()); + } + + output.setProcessedFastq(Pair.of(unclassified1, unclassified2)); + output.addIntermediateFile(classified1); + if (classified2 != null) + { + output.addIntermediateFile(classified2); + } + } + + return output; + } + + public static class Kracken2Wrapper extends AbstractCommandWrapper + { + public Kracken2Wrapper(Logger log) + { + super(log); + } + + public File getExe() + { + return SimpleScriptWrapper.resolveFileInPath("kracken2", null, true); + } + } +} From bf29255eb0f3770c46c59d854cbc73360be8f287 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 20 Jun 2026 16:37:57 -0700 Subject: [PATCH 2/4] Test fix --- .../labkey/test/tests/external/labModules/SequenceTest.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/SequenceAnalysis/test/src/org/labkey/test/tests/external/labModules/SequenceTest.java b/SequenceAnalysis/test/src/org/labkey/test/tests/external/labModules/SequenceTest.java index 6c8f67114..64de96f2a 100644 --- a/SequenceAnalysis/test/src/org/labkey/test/tests/external/labModules/SequenceTest.java +++ b/SequenceAnalysis/test/src/org/labkey/test/tests/external/labModules/SequenceTest.java @@ -628,6 +628,9 @@ private void analysisPanelTest() throws Exception waitAndClick(Locator.id(fieldsetMap.get("Head Crop").down("ldk-linkbutton[text='Remove']", Ext4CmpRef.class).getId()).append(Locator.tag("a"))); waitForElementToDisappear(Locator.id(fieldsetMap.get("Head Crop").getId())); + waitAndClick(Locator.id(fieldsetMap.get("Kracken2").down("ldk-linkbutton[text='Remove']", Ext4CmpRef.class).getId()).append(Locator.tag("a"))); + waitForElementToDisappear(Locator.id(fieldsetMap.get("Kracken2").getId())); + Integer overlapLength = 6; Double errorRate = 0.2; Integer cropLength = 500; From 63c04e567da8ef233a767d39ffa9e8c8d3a5fc72 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 20 Jun 2026 16:42:55 -0700 Subject: [PATCH 3/4] Kracken2 -> Kraken2 --- .../SequenceAnalysisModule.java | 4 +-- .../{Kracken2Step.java => Kraken2Step.java} | 26 +++++++++---------- .../external/labModules/SequenceTest.java | 4 +-- 3 files changed, 17 insertions(+), 17 deletions(-) rename SequenceAnalysis/src/org/labkey/sequenceanalysis/run/preprocessing/{Kracken2Step.java => Kraken2Step.java} (86%) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java index 7e19a9743..4442068da 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java @@ -152,7 +152,7 @@ import org.labkey.sequenceanalysis.run.preprocessing.FastqcProcessingStep; import org.labkey.sequenceanalysis.run.preprocessing.FilterReadsStep; import org.labkey.sequenceanalysis.run.preprocessing.FlashPipelineStep; -import org.labkey.sequenceanalysis.run.preprocessing.Kracken2Step; +import org.labkey.sequenceanalysis.run.preprocessing.Kraken2Step; import org.labkey.sequenceanalysis.run.preprocessing.PrintReadsContainingStep; import org.labkey.sequenceanalysis.run.preprocessing.TagPcrSummaryStep; import org.labkey.sequenceanalysis.run.preprocessing.TrimmomaticWrapper; @@ -292,7 +292,7 @@ public static void registerPipelineSteps() SequencePipelineService.get().registerPipelineStep(new CutadaptWrapper.Provider()); SequencePipelineService.get().registerPipelineStep(new FastqcProcessingStep.Provider()); SequencePipelineService.get().registerPipelineStep(new CutadaptCropWrapper.Provider()); - SequencePipelineService.get().registerPipelineStep(new Kracken2Step.Provider()); + SequencePipelineService.get().registerPipelineStep(new Kraken2Step.Provider()); //ref library SequencePipelineService.get().registerPipelineStep(new DNAReferenceLibraryStep.Provider()); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/preprocessing/Kracken2Step.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/preprocessing/Kraken2Step.java similarity index 86% rename from SequenceAnalysis/src/org/labkey/sequenceanalysis/run/preprocessing/Kracken2Step.java rename to SequenceAnalysis/src/org/labkey/sequenceanalysis/run/preprocessing/Kraken2Step.java index dea2ab40b..6f22f595f 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/preprocessing/Kracken2Step.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/preprocessing/Kraken2Step.java @@ -24,21 +24,21 @@ import java.util.Arrays; import java.util.List; -public class Kracken2Step extends AbstractCommandPipelineStep implements PreprocessingStep +public class Kraken2Step extends AbstractCommandPipelineStep implements PreprocessingStep { private static final String DB_PARAM = "db"; private static final String MODE_PARAM = "mode"; - public Kracken2Step(PipelineStepProvider provider, PipelineContext ctx) + public Kraken2Step(PipelineStepProvider provider, PipelineContext ctx) { - super(provider, ctx, new Kracken2Wrapper(ctx.getLogger())); + super(provider, ctx, new Kraken2Wrapper(ctx.getLogger())); } public static class Provider extends AbstractPipelineStepProvider { public Provider() { - super("Kracken2", "Kracken2", "Kracken2", "This step aligns input reads against a reference using BWA-mem and will only return read pairs without a passing hit in either read.", Arrays.asList( + super("Kraken2", "Kraken2", "Kraken2", "This step aligns input reads against a reference using BWA-mem and will only return read pairs without a passing hit in either read.", Arrays.asList( ToolParameterDescriptor.create(DB_PARAM, "Database", "This determines the DB for positive or negative selection", "ldk-simplecombo", new JSONObject(){{ put("storeValues", "bacteria-viral"); put("multiSelect", false); @@ -65,9 +65,9 @@ public Provider() } @Override - public Kracken2Step create(PipelineContext context) + public Kraken2Step create(PipelineContext context) { - return new Kracken2Step(this, context); + return new Kraken2Step(this, context); } } @@ -102,16 +102,16 @@ public Output processInputFile(File inputFile, @Nullable File inputFile2, File o throw new PipelineJobException("Missing DB name"); } - File binDir = FileUtil.appendName(new File(PipelineJobService.get().getAppProperties().getToolsDirectory()), "kracken2_dbs"); + File binDir = FileUtil.appendName(new File(PipelineJobService.get().getAppProperties().getToolsDirectory()), "kraken2_dbs"); if (!binDir.exists()) { - throw new PipelineJobException("Unable to find kracken2 DB dir, expected: " + binDir.getAbsolutePath()); + throw new PipelineJobException("Unable to find kraken2 DB dir, expected: " + binDir.getAbsolutePath()); } File dbDir = FileUtil.appendName(binDir, dbName); if (!dbDir.exists()) { - throw new PipelineJobException("Unable to find kracken2 DB dir, expected: " + dbDir.getAbsolutePath()); + throw new PipelineJobException("Unable to find kraken2 DB dir, expected: " + dbDir.getAbsolutePath()); } args.add("--use-names"); @@ -131,7 +131,7 @@ public Output processInputFile(File inputFile, @Nullable File inputFile2, File o args.add("--classified-out"); args.add(classifiedOutputBase.getPath() + "#.fq.gz"); - File reportFile = FileUtil.appendName(outputDir, SequencePipelineService.get().getUnzippedBaseName(inputFile.getName()) + ".kracken2.report.txt"); + File reportFile = FileUtil.appendName(outputDir, SequencePipelineService.get().getUnzippedBaseName(inputFile.getName()) + ".kraken2.report.txt"); args.add("--report"); args.add(reportFile.getPath()); @@ -180,16 +180,16 @@ public Output processInputFile(File inputFile, @Nullable File inputFile2, File o return output; } - public static class Kracken2Wrapper extends AbstractCommandWrapper + public static class Kraken2Wrapper extends AbstractCommandWrapper { - public Kracken2Wrapper(Logger log) + public Kraken2Wrapper(Logger log) { super(log); } public File getExe() { - return SimpleScriptWrapper.resolveFileInPath("kracken2", null, true); + return SimpleScriptWrapper.resolveFileInPath("kraken2", null, true); } } } diff --git a/SequenceAnalysis/test/src/org/labkey/test/tests/external/labModules/SequenceTest.java b/SequenceAnalysis/test/src/org/labkey/test/tests/external/labModules/SequenceTest.java index 64de96f2a..23552a0d5 100644 --- a/SequenceAnalysis/test/src/org/labkey/test/tests/external/labModules/SequenceTest.java +++ b/SequenceAnalysis/test/src/org/labkey/test/tests/external/labModules/SequenceTest.java @@ -628,8 +628,8 @@ private void analysisPanelTest() throws Exception waitAndClick(Locator.id(fieldsetMap.get("Head Crop").down("ldk-linkbutton[text='Remove']", Ext4CmpRef.class).getId()).append(Locator.tag("a"))); waitForElementToDisappear(Locator.id(fieldsetMap.get("Head Crop").getId())); - waitAndClick(Locator.id(fieldsetMap.get("Kracken2").down("ldk-linkbutton[text='Remove']", Ext4CmpRef.class).getId()).append(Locator.tag("a"))); - waitForElementToDisappear(Locator.id(fieldsetMap.get("Kracken2").getId())); + waitAndClick(Locator.id(fieldsetMap.get("Kraken2").down("ldk-linkbutton[text='Remove']", Ext4CmpRef.class).getId()).append(Locator.tag("a"))); + waitForElementToDisappear(Locator.id(fieldsetMap.get("Kraken2").getId())); Integer overlapLength = 6; Double errorRate = 0.2; From 0babf7f528f5edc15c92ad41cc905da483a832bd Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 21 Jun 2026 09:20:00 -0700 Subject: [PATCH 4/4] Test fix --- .../org/labkey/test/tests/external/labModules/SequenceTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/test/src/org/labkey/test/tests/external/labModules/SequenceTest.java b/SequenceAnalysis/test/src/org/labkey/test/tests/external/labModules/SequenceTest.java index 23552a0d5..a8c49a0c7 100644 --- a/SequenceAnalysis/test/src/org/labkey/test/tests/external/labModules/SequenceTest.java +++ b/SequenceAnalysis/test/src/org/labkey/test/tests/external/labModules/SequenceTest.java @@ -611,7 +611,7 @@ private void analysisPanelTest() throws Exception waitForElementToDisappear(Ext4Helper.Locators.window("Add Steps")); Map fieldsetMap = new HashMap<>(); - String[] setNames = {"Adapter Trimming (Trimmomatic)", "Average Quality Filter", "Crop Reads", "Downsample Reads", "Filter Reads Matching Reference", "Head Crop", "Quality Trimming (Adaptive)", "Quality Trimming (Sliding Window)", "Read Length Filter"}; + String[] setNames = {"Adapter Trimming (Trimmomatic)", "Average Quality Filter", "Crop Reads", "Downsample Reads", "Filter Reads Matching Reference", "Head Crop", "Quality Trimming (Adaptive)", "Quality Trimming (Sliding Window)", "Read Length Filter", "Kraken2"}; isPresentInThisOrder(setNames); for (String name : setNames)