Skip to content

Commit eb5fb13

Browse files
committed
Shoehorned that use case of hell
1 parent 7dce37d commit eb5fb13

8 files changed

Lines changed: 151 additions & 86 deletions

File tree

com.workflowconversion.knime2grid/src/com/workflowconversion/knime2grid/export/node/impl/DefaultKnimeNodeConverter.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,9 +169,7 @@ public Job convert(final NativeNodeContainer nativeNodeContainer, final Workflow
169169
// input.setMultiFile(true);
170170
LOGGER.info("Creating FileInput");
171171
nodeFactory = new MimeFileImporterNodeFactory();
172-
inputSettings.add(new VariableSetting("FILENAME", inputFileKey, "tmpfile.txt"));
173172
final String extensionKey = "extension" + currentInput;
174-
inputSettings.add(new VariableSetting("FILE_EXTENSION", extensionKey));
175173
final NodeContainer sourceNode = workflowManager.getNodeContainer(sourceNodeId);
176174
final NodeOutPort sourcePort = sourceNode.getOutPort(connectionContainer.getSourcePort());
177175
// make sure that the origin is indeed a IURIPortObject!
@@ -184,11 +182,15 @@ public Job convert(final NativeNodeContainer nativeNodeContainer, final Workflow
184182
} else {
185183
throw new RuntimeException("The port types of the source and destination port do not match");
186184
}
185+
inputSettings.add(new VariableSetting("FILE_EXTENSION", extensionKey));
186+
// [hacking intensifies]
187+
inputSettings.add(new VariableSetting("FILENAME", inputFileKey + extension, "tmpfile.txt"));
187188
} else {
188189
// not sure what the hell should we do here...
189190
// TODO: is it ok to assume that model writer is fine?
190191
LOGGER.info("PortType " + inPortObjectClass.getName());
191192
nodeFactory = new PortObjectReaderNodeFactory(portType);
193+
// [hacking intensifies]
192194
inputSettings.add(new VariableSetting("filename", inputFileKey));
193195
}
194196
// an extension might have been added

com.workflowconversion.knime2grid/src/com/workflowconversion/knime2grid/export/node/impl/GenericKnimeNodeConverter.java

Lines changed: 47 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@
1010
import java.util.ArrayList;
1111
import java.util.Arrays;
1212
import java.util.Collection;
13+
import java.util.HashMap;
1314
import java.util.LinkedList;
1415
import java.util.List;
16+
import java.util.Map;
1517
import java.util.Set;
1618
import java.util.TreeSet;
1719

@@ -78,14 +80,17 @@ public Job convert(final NativeNodeContainer nativeNodeContainer, final Workflow
7880

7981
// input/output ports are also handled as parameters in GKN, so we need to keep track of
8082
// the ports that we've processed
81-
final Set<String> processedPortNames = new TreeSet<String>();
83+
final Set<String> processedPortNames = new TreeSet<>();
84+
// since we will name converted job inputs using extensions, we need a map to relate GKN port names to converted
85+
// inputs
86+
final Map<String, com.workflowconversion.knime2grid.model.Port> gknPortToConvertedPort = new HashMap<>();
8287

8388
// all inputs and outputs of GKNs are URIs, so this simplifies the conversion
8489
// we need to know how to actually execute this job on a command line environment
8590
// so we need to access the command generator... however, at this point, there
8691
// is no guarantee that all jobs have been converted so we need to use the WorkflowManager to
8792
// gather information about other nodes
88-
createInputsAndOutputsFromKnimeWorkflow(nativeNodeContainer, workflowManager, nodeConfiguration, job, processedPortNames);
93+
createInputsAndOutputsFromKnimeWorkflow(nativeNodeContainer, workflowManager, nodeConfiguration, job, processedPortNames, gknPortToConvertedPort);
8994

9095
// process all non-input/non-output parameters AND the CTD, if any.
9196
// this must be AFTER inputs/outputs have been processed, because we are adding
@@ -98,7 +103,7 @@ public Job convert(final NativeNodeContainer nativeNodeContainer, final Workflow
98103
throw new ApplicationException(
99104
"This job already has a CTD file. Only one CTD file per job is allowed. This is probably a bug and should be reported.");
100105
}
101-
addCTDInputPort(workflowManager, (CommandLineCTDFile) element, job, nodeConfiguration, nativeNodeContainer);
106+
addCTDInputPort(workflowManager, (CommandLineCTDFile) element, job, nodeConfiguration, nativeNodeContainer, gknPortToConvertedPort);
102107
ctdFound = true;
103108
} else if (element instanceof ParametrizedCommandLineElement && !processedPortNames.contains(element.getKey())) {
104109
// we need to process only true parameters, not flags or option identifiers
@@ -124,18 +129,20 @@ public Job convert(final NativeNodeContainer nativeNodeContainer, final Workflow
124129
}
125130

126131
private void createInputsAndOutputsFromKnimeWorkflow(final NativeNodeContainer nativeNodeContainer, final WorkflowManager workflowManager,
127-
final INodeConfiguration nodeConfiguration, final Job job, final Set<String> processedPortNames) {
132+
final INodeConfiguration nodeConfiguration, final Job job, final Set<String> processedPortNames,
133+
final Map<String, com.workflowconversion.knime2grid.model.Port> gknPortToConvertedPort) {
128134
for (final ConnectionContainer connectionContainer : workflowManager.getIncomingConnectionsFor(nativeNodeContainer.getID())) {
129135
final NodeID sourceNodeId = connectionContainer.getSource();
130136
final Node sourceNode = ((NativeNodeContainer) workflowManager.getNodeContainer(connectionContainer.getSource())).getNode();
131137
final int destPortNr = connectionContainer.getDestPort();
132138
final Port destPort = nodeConfiguration.getInputPorts().get(ConverterUtils.convertFromKnimePort(destPortNr));
133-
final Input input = new Input();
134-
input.setSourceId(sourceNodeId);
135-
input.setOriginalPortNr(destPortNr);
136-
input.setName(destPort.getName() + getExtensionForPort(sourceNode, connectionContainer.getSourcePort()));
139+
final Input convertedInput = new Input();
140+
convertedInput.setSourceId(sourceNodeId);
141+
convertedInput.setOriginalPortNr(destPortNr);
142+
convertedInput.setName(destPort.getName() + getExtensionForPort(sourceNode, connectionContainer.getSourcePort()));
143+
gknPortToConvertedPort.put(destPort.getName(), convertedInput);
137144
// input.setMultiFile(destPort.isMultiFile());
138-
job.addInput(input);
145+
job.addInput(convertedInput);
139146

140147
processedPortNames.add(destPort.getName());
141148
}
@@ -144,12 +151,13 @@ private void createInputsAndOutputsFromKnimeWorkflow(final NativeNodeContainer n
144151
if (job.getOutputByOriginalPortNr(sourcePortNr) == null) {
145152
// first time we see the output, we need to add it
146153
final Port sourcePort = nodeConfiguration.getOutputPorts().get(ConverterUtils.convertFromKnimePort(sourcePortNr));
147-
final Output newOutput = new Output();
154+
final Output convertedOutput = new Output();
148155
final Node sourceNode = ((NativeNodeContainer) workflowManager.getNodeContainer(connectionContainer.getSource())).getNode();
149156
// newOutput.setMultiFile(sourcePort.isMultiFile());
150-
newOutput.setName(sourcePort.getName() + getExtensionForPort(sourceNode, connectionContainer.getSourcePort()));
151-
newOutput.setOriginalPortNr(sourcePortNr);
152-
job.addOutput(newOutput);
157+
convertedOutput.setName(sourcePort.getName() + getExtensionForPort(sourceNode, connectionContainer.getSourcePort()));
158+
convertedOutput.setOriginalPortNr(sourcePortNr);
159+
gknPortToConvertedPort.put(sourcePort.getName(), convertedOutput);
160+
job.addOutput(convertedOutput);
153161
processedPortNames.add(sourcePort.getName());
154162
}
155163
}
@@ -158,7 +166,8 @@ private void createInputsAndOutputsFromKnimeWorkflow(final NativeNodeContainer n
158166
// when executing GKN in KNIME, each GKN generates an "on the fly" CTD file and uses it to
159167
// execute the associated binary, but what we need here is to add a new input containing a CTD
160168
private void addCTDInputPort(final WorkflowManager workflowManager, final CommandLineCTDFile element, final Job job,
161-
final INodeConfiguration nodeConfiguration, final NativeNodeContainer nativeNodeContainer) throws IOException, InvalidCTDFileException {
169+
final INodeConfiguration nodeConfiguration, final NativeNodeContainer nativeNodeContainer,
170+
final Map<String, com.workflowconversion.knime2grid.model.Port> gknPortToConvertedPort) throws IOException, InvalidCTDFileException {
162171
final Input ctdInput = new Input();
163172
ctdInput.setName(CommandLineCTDFile.CTD_FILE_KEY);
164173
ctdInput.setConnectionType(ConnectionType.UserProvided);
@@ -171,8 +180,21 @@ private void addCTDInputPort(final WorkflowManager workflowManager, final Comman
171180
dumpConfiguration(clonedNodeConfiguration).getCanonicalPath());
172181
ctdInput.setAssociatedFileParameter(ctdFileParameter);
173182
job.addInput(ctdInput);
174-
// fix the command line element!
183+
// fix the command line element
175184
element.setValue(ctdFileParameter);
185+
// fix converted Input/Outputs
186+
transferToConvertedPorts(clonedNodeConfiguration, gknPortToConvertedPort);
187+
}
188+
189+
private void transferToConvertedPorts(final INodeConfiguration clonedNodeConfiguration,
190+
final Map<String, com.workflowconversion.knime2grid.model.Port> gknPortToConvertedPort) {
191+
for (final Map.Entry<String, com.workflowconversion.knime2grid.model.Port> entry : gknPortToConvertedPort.entrySet()) {
192+
final Parameter<?> parameter = clonedNodeConfiguration.getParameter(entry.getKey());
193+
if (parameter == null || !(parameter instanceof IFileParameter)) {
194+
throw new ApplicationException("Invalid contents of map relating GKN ports to converted inputs. This is a bug and should be reported.");
195+
}
196+
entry.getValue().setAssociatedFileParameter((IFileParameter) parameter);
197+
}
176198
}
177199

178200
private File dumpConfiguration(final INodeConfiguration nodeConfiguration) throws IOException {
@@ -270,21 +292,18 @@ private void fixFilePathsFromAssociatedParameters(final WorkflowManager workflow
270292
processedPorts.add(parameterName);
271293
}
272294
} else if (fileNames.size() > 1) {
273-
if (associatedParameter.getValue() != null) {
274-
// multifile
275-
final List<String> fixedFilenames = new LinkedList<String>();
276-
int fileNumber = 0;
277-
for (final String fileName : fileNames) {
278-
final String extension = FilenameUtils.getExtension(fileName);
279-
fixedFilenames.add(ConverterUtils.generateFileNameForExport(parameterName, extension, fileNumber));
280-
fileNumber++;
281-
}
282-
((FileListParameter) associatedParameter).setValue(fixedFilenames);
283-
processedPorts.add(parameterName);
295+
final List<String> fixedFilenames = new LinkedList<String>();
296+
int fileNumber = 0;
297+
for (final String fileName : fileNames) {
298+
final String extension = FilenameUtils.getExtension(fileName);
299+
fixedFilenames.add(ConverterUtils.generateFileNameForExport(parameterName, extension, fileNumber));
300+
fileNumber++;
284301
}
302+
((FileListParameter) associatedParameter).setValue(fixedFilenames);
303+
processedPorts.add(parameterName);
285304
} else {
286-
// 0 inputs?
287-
throw new RuntimeException("Invalid association between parameters and input files. This is probably a bug, please report it.");
305+
// 0 inputs!
306+
// TODO: what does this mean???
288307
}
289308
}
290309
}

com.workflowconversion.knime2grid/src/com/workflowconversion/knime2grid/export/workflow/impl/guse/GuseKnimeWorkflowExporter.java

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
import com.workflowconversion.knime2grid.model.ConnectionType;
6868
import com.workflowconversion.knime2grid.model.Input;
6969
import com.workflowconversion.knime2grid.model.Job;
70+
import com.workflowconversion.knime2grid.model.JobType;
7071
import com.workflowconversion.knime2grid.model.Output;
7172
import com.workflowconversion.knime2grid.model.Port;
7273
import com.workflowconversion.knime2grid.model.Workflow;
@@ -84,6 +85,8 @@ public class GuseKnimeWorkflowExporter implements KnimeWorkflowExporter {
8485
private static final String INPUT_PORTS_WITH_FILELIST_SCRIPT_KEY = "@@INPUT_PORTS_WITH_FILELIST@@";
8586
private static final String OUTPUT_PORTS_WITH_FILELIST_SCRIPT_KEY = "@@OUTPUT_PORTS_WITH_FILELIST@@";
8687
private static final String COMMAND_LINE_PARAMETERS_SCRIPT_KEY = "@@COMMAND_LINE_PARAMETERS@@";
88+
private static final String INPUT_FILENAME_TRANSLATION_SCRIPT_KEY = "@@INPUT_FILENAME_TRANSLATION@@";
89+
private static final String FILENAME_TRANSLATION_VAR_PREFIX = "KNIME2GRID_VAR_";
8790
private static final String QUOTE_REGEX = "\"";
8891
private static final String QUOTE_REPLACEMENT_FOR_BASH_SCRIPT = "\\\"";
8992
private static final String LOCAL_EXECUTOR_TYPE = "local";
@@ -251,7 +254,7 @@ private void fixDuplicateJobName(final Map<String, Integer> nameOccurrenceMap, f
251254
}
252255

253256
private void fixCollectorGeneratorJob(final Job job) {
254-
// set to use the local executor
257+
// "hardcode" to set to use the local executor
255258
switch (job.getJobType()) {
256259
case Generator :
257260
case Collector :
@@ -311,40 +314,64 @@ private void writeExecuteBin(final String rootEntryName, final ZipOutputStream z
311314
}
312315

313316
private String generateGeneratorScript(final Job job) throws IOException {
314-
return loadScript("zip_loop_start.sh", "@@PORT_NAME@@", job.getInputByPortNr(0).getName());
317+
// TODO: this is hackish, we know (assume) that generator jobs have one input and one output
318+
final Input input = job.getInputs().iterator().next();
319+
final Output output = job.getOutputs().iterator().next();
320+
return loadScript("zip_loop_start.sh", "@@INPUT_PORT_NAME@@", fixPortName(input), "@@OUTPUT_BASE_NAME@@", fixPortName(output));
315321
}
316322

317323
private String generateCollectorScript(final Job job) throws IOException {
318-
return loadScript("zip_loop_end.sh", "@@BASE_PORT_NAME@@", job.getInputByPortNr(0).getName());
324+
// TODO: this is hackish, we know (assume) that collector jobs have one input and one output
325+
final Input input = job.getInputs().iterator().next();
326+
final Output output = job.getOutputs().iterator().next();
327+
return loadScript("zip_loop_end.sh", "@@INPUT_BASE_NAME@@", fixPortName(input), "@@OUTPUT_PORT_NAME@@", fixPortName(output));
319328
}
320329

321330
private String generateDefaultScript(final Job job) throws IOException {
322331
final StringBuilder fileListInputs = new StringBuilder();
323332
final StringBuilder fileListOutputs = new StringBuilder();
333+
final StringBuilder fileNameTranslation = new StringBuilder();
334+
// see comment on job_wrapper.sh
335+
final StringBuilder majorHackett = new StringBuilder();
324336

337+
// not related to port number, this is just a hack for the wrapper script
338+
int scriptPortIndex = 0;
325339
for (final Input input : job.getInputs()) {
326340
if (input.isMultiFile()) {
327341
if (fileListInputs.length() > 0) {
328342
// not the first element, we can prepend a space
329343
fileListInputs.append(' ');
330344
}
331-
fileListInputs.append(input.getName());
345+
fileListInputs.append(fixPortName(input));
346+
fileNameTranslation.append(FILENAME_TRANSLATION_VAR_PREFIX).append(scriptPortIndex).append("=\"");
347+
// only place outside method fixPortName where we use the "base name" of the port!
348+
fileNameTranslation.append(input.getName());
349+
// gUSE runs on Linux, it might be incorrect to use System.getProperty("line.separator")
350+
fileNameTranslation.append("\"\n");
351+
scriptPortIndex++;
332352
}
333353
}
334354
for (final Output output : job.getOutputs()) {
335355
if (output.isMultiFile()) {
336356
if (fileListOutputs.length() > 0) {
337357
fileListOutputs.append(' ');
338358
}
339-
fileListOutputs.append(output.getName());
359+
fileListOutputs.append(fixPortName(output));
360+
}
361+
if (job.getJobType() == JobType.KnimeInternal) {
362+
if (majorHackett.length() > 0) {
363+
majorHackett.append(' ');
364+
}
365+
majorHackett.append(fixPortName(output));
340366
}
341367
}
342368

343369
// script handles empty variables for input/output ports with filelist, make sure to escape the command line,
344370
// which is not 100% under the control of this class
345371
return loadScript("job_wrapper.sh", EXECUTABLE_SCRIPT_KEY, job.getRemoteApplication().getPath(), INPUT_PORTS_WITH_FILELIST_SCRIPT_KEY,
346372
fileListInputs.toString(), OUTPUT_PORTS_WITH_FILELIST_SCRIPT_KEY, fileListOutputs.toString(), COMMAND_LINE_PARAMETERS_SCRIPT_KEY,
347-
generateCommandLine(job).replace(QUOTE_REGEX, QUOTE_REPLACEMENT_FOR_BASH_SCRIPT));
373+
generateCommandLine(job).replace(QUOTE_REGEX, QUOTE_REPLACEMENT_FOR_BASH_SCRIPT), INPUT_FILENAME_TRANSLATION_SCRIPT_KEY,
374+
fileNameTranslation.toString(), "@@MAJOR_HACKETT@@", majorHackett.toString());
348375
}
349376

350377
// loads a script from file,
@@ -565,15 +592,15 @@ private void addConcreteInputPortProperties(final Element inputElement, final In
565592
addConcretePortProperty(inputElement, "file", "knime2grid.file");
566593
addConcretePortProperty(inputElement, "eparam", "0");
567594
addConcretePortProperty(inputElement, "pequaltype", "0");
568-
addConcretePortProperty(inputElement, "intname", input.getName());
595+
addConcretePortProperty(inputElement, "intname", fixPortName(input));
569596
addConcretePortProperty(inputElement, "dpid", Integer.toString(input.getPortNr()));
570597
break;
571598
case Collector :
572599
addConcretePortProperty(inputElement, "waitingtmp", "all");
573600
addConcretePortProperty(inputElement, "waiting", "all");
574601
addConcretePortProperty(inputElement, "eparam", "1");
575602
addConcretePortProperty(inputElement, "pequaltype", "0");
576-
addConcretePortProperty(inputElement, "intname", input.getName());
603+
addConcretePortProperty(inputElement, "intname", fixPortName(input));
577604
addConcretePortProperty(inputElement, "dpid", Integer.toString(input.getPortNr()));
578605
break;
579606
default :

com.workflowconversion.knime2grid/src/com/workflowconversion/knime2grid/export/workflow/impl/guse/job_wrapper.sh

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,63 @@
33

44
# contains names of input ports that take filelists, separated by whitespace
55
INPUT_PORTS_WITH_FILELIST="@@INPUT_PORTS_WITH_FILELIST@@"
6+
7+
# we cannot assume that the contents of the archive contain the input files named exactly as expected
8+
# this section relates input port names to a (base) filename to use to change the names of the extracted files, e.g.,
9+
# KNIME2GRID_VAR_N="bar"
10+
# signifies that the the contents of the N-th input will be extracted and renamed to 0_bar, 1_bar, ...
11+
##### start filename translation variables
12+
@@INPUT_FILENAME_TRANSLATION@@
13+
##### end
14+
615
# contains names of output ports that generate filelists, separated by whitespace
716
OUTPUT_PORTS_WITH_FILELIST="@@OUTPUT_PORTS_WITH_FILELIST@@"
817
EXECUTABLE="@@EXECUTABLE@@"
918
COMMAND_LINE_PARAMETERS="@@COMMAND_LINE_PARAMETERS@@"
1019

20+
# KNIME 3.6 seems to have a race condition, sometimes KNIME reports that the node "Table Reader" is not available, but it is...
21+
# This is probably not best practice, not regular practice, hell, this should not be practice, but basically what we do here
22+
# is to wait for ALL outputs...
23+
# Major Hackett made me do it, I am sorry. The chain of command is sacred.
24+
MAJOR_HACKETT="@@MAJOR_HACKETT@@"
25+
# Hackett out!
26+
27+
ARCHIVE_INDEX=0
1128
if [ -n "$INPUT_PORTS_WITH_FILELIST" ]; then
1229
for input_port in ${INPUT_PORTS_WITH_FILELIST}; do
1330
echo "expanding ${input_port}"
14-
echo tar xfz ${input_port}
31+
# extract files individually and rename them
32+
FILE_INDEX=0
33+
BASENAME_VARIABLE_NAME="KNIME2GRID_VAR_${FILE_INDEX}"
34+
for input_file in `tar tfz ${input_port}`; do
35+
# use basename and index to rename the file as its written to stdout
36+
tar xOfz ${input_port} ${input_file} > ${FILE_INDEX}_${!BASENAME_VARIABLE_NAME}
37+
FILE_INDEX=$(expr ${FILE_INDEX} + 1)
38+
done
39+
ARCHIVE_INDEX=$(expr ${ARCHIVE_INDEX} + 1)
1540
done
1641
fi
1742

18-
# execute the tool
19-
echo "Executing: ${EXECUTABLE} ${COMMAND_LINE_PARAMETERS}"
20-
${EXECUTABLE} ${COMMAND_LINE_PARAMETERS}
43+
# execute the tool using Major Hackett's approach
44+
N_ATTEMPTS=1
45+
HACKETT_OUT=""
46+
while [ -z "${HACKETT_OUT}" ]; do
47+
echo "Executing(${N_ATTEMPTS}): ${EXECUTABLE} ${COMMAND_LINE_PARAMETERS}"
48+
N_ATTEMPTS=$(expr ${N_ATTEMPTS} + 1)
49+
${EXECUTABLE} ${COMMAND_LINE_PARAMETERS}
50+
HACKETT_OUT="yes"
51+
for hackettinno in ${MAJOR_HACKETT}; do
52+
if [ ! -s ${hackettinno} ]; then
53+
# expected output not found or size zero
54+
HACKETT_OUT=""
55+
sleep 2
56+
break;
57+
fi
58+
done
59+
done
60+
2161

22-
# compress the multi-file outputs and make sure to name the archive using port name and .tar.gz as extension
23-
# see: com.workflowconversion.knime2grid.export.workflow.ConverterUtils.generateFileNameForExport(String, String, int) and
24-
# com.workflowconversion.knime2grid.export.workflow.impl.guse.GuseKnimeWorkflowExporter.fixPortName(Port)
62+
# compress the multi-file outputs
2563
if [ -n "$OUTPUT_PORTS_WITH_FILELIST" ]; then
2664
for output_port in ${OUTPUT_PORTS_WITH_FILELIST}; do
2765
echo "compressing outputs for ${output_port}"

0 commit comments

Comments
 (0)