Add CWL Parser, import helper from common activities
Project: http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/commit/6d6dc200 Tree: http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/tree/6d6dc200 Diff: http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/diff/6d6dc200 Branch: refs/heads/cwlparser Commit: 6d6dc20086ca9b8e0f1c0ff0eccd9b58b936e452 Parents: ecc3b67 Author: Majdi Haouech <[email protected]> Authored: Mon Jun 4 10:33:46 2018 +0100 Committer: Majdi Haouech <[email protected]> Committed: Mon Jun 4 10:33:46 2018 +0100 ---------------------------------------------------------------------- .../org/apache/taverna/scufl2/cwl/Parser.java | 262 ++++------ .../apache/taverna/scufl2/cwl/YAMLHelper.java | 484 +++++++++++++++++++ 2 files changed, 566 insertions(+), 180 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/blob/6d6dc200/taverna-scufl2-cwl/src/main/java/org/apache/taverna/scufl2/cwl/Parser.java ---------------------------------------------------------------------- diff --git a/taverna-scufl2-cwl/src/main/java/org/apache/taverna/scufl2/cwl/Parser.java b/taverna-scufl2-cwl/src/main/java/org/apache/taverna/scufl2/cwl/Parser.java index 65844b9..c0ef438 100644 --- a/taverna-scufl2-cwl/src/main/java/org/apache/taverna/scufl2/cwl/Parser.java +++ b/taverna-scufl2-cwl/src/main/java/org/apache/taverna/scufl2/cwl/Parser.java @@ -1,212 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.taverna.scufl2.cwl; -import java.io.*; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; +import java.util.*; +import org.apache.taverna.scufl2.api.core.Workflow; +import org.apache.taverna.scufl2.api.core.Processor; +import org.apache.taverna.scufl2.api.port.InputWorkflowPort; +import org.apache.taverna.scufl2.api.port.OutputWorkflowPort; +import org.apache.taverna.scufl2.api.port.InputProcessorPort; +import org.apache.taverna.scufl2.api.port.OutputProcessorPort; -class InputField { +import com.fasterxml.jackson.databind.JsonNode; - public String key; - public String type; - public int position; - public String prefix; - - public InputField(String _key) { - key = _key; - type = ""; - position = -1; - prefix = ""; - } +public class Parser { - public InputField(String _key, String _type) { - key = _key; - type = _type; - position = -1; - prefix = ""; - } + private JsonNode cwlFile; + private YAMLHelper yamlHelper; + private Workflow workflow; - public InputField(String _key, String _type, int pos) { - key = _key; - type = _type; - position = pos; - prefix = ""; + public Parser(JsonNode cwlFile) { + this.cwlFile = cwlFile; + this.yamlHelper = new YAMLHelper(); + this.workflow = new Workflow(); + this.workflow.setInputPorts(parseInputs()); + this.workflow.setOutputPorts(parseOutputs()); } - public InputField(String _key, String _type, int pos, String _prefix) { - key = _key; - type = _type; - position = pos; - prefix = _prefix; + public Workflow getWorkflow() { + return this.workflow; } -} - - -public class Parser { - - private String yamlLine; - private int fileLength; - - private Map<Integer, String> yamlFile = null; - - public Parser(File file) { - int counter = 0; - - yamlFile = new HashMap<>(); - - FileReader yamlFileDescriptor = null; - - try { - yamlFileDescriptor = new FileReader(file); - BufferedReader bufferedReader = new BufferedReader(yamlFileDescriptor); - String parent = null; - int parentDepth = 0; - while((yamlLine = bufferedReader.readLine()) != null) { - - yamlFile.put(counter, yamlLine); - counter = counter + 1; - } - - bufferedReader.close(); - fileLength = counter; - } catch (IOException e) { - System.err.println("Parser init error: " + e ); - } + public Set<Step> parseSteps() { + return yamlHelper.processSteps(cwlFile); } - public ArrayList<InputField> parseInputs() { - int startIndex = 0; - int endIndex = -1; - int depth = -1; - - /** - * Search for start and end of inputs section - */ - for(Map.Entry<Integer, String> entry: yamlFile.entrySet()) { - int index = entry.getKey(); - String line = entry.getValue(); - String key = getKeyFromLine(line); - if(key.equals("inputs")) { - startIndex = index; - endIndex = index; - depth = getDepth(line); - } else if(!line.equals("") && getDepth(line) <= depth) { - break; - } else { - endIndex++; - } + public Processor convertStepToProcessor(Step step) { + Processor processor = new Processor(null, step.getId()); + // Convert input ports + Set<InputProcessorPort> processorInputs = new HashSet<>(); + Set<StepInput> inputs = step.getInputs(); + for(StepInput input: inputs) { + InputProcessorPort port = new InputProcessorPort(processor, input.getId()); + processorInputs.add(port); } - /** - * Parse each input - */ - ArrayList<InputField> result = new ArrayList<>(); - for(int i = startIndex+1; i <= endIndex; i++) { - int curDepth = getDepth(yamlFile.get(i)); - // If current element is a child of inputs key - if(curDepth == depth + 1) { - result.add(parseInputField(i)); - } + processor.setInputPorts(processorInputs); + // Convert output ports + Set<OutputProcessorPort> processorOutputs = new HashSet<>(); + Set<StepOutput> outputs = step.getOutputs(); + for(StepOutput output: outputs) { + OutputProcessorPort port = new OutputProcessorPort(processor, output.getId()); + processorOutputs.add(port); } + processor.setOutputPorts(processorOutputs); - return result; + return processor; } - public InputField parseInputField(int startIndex) { - String line = yamlFile.get(startIndex); - int depth = getDepth(line); - String id = getKeyFromLine(line); - String value = getValueFromLine(line); + public Set<InputWorkflowPort> parseInputs() { + Map<String, PortDetail> inputs = yamlHelper.processInputDetails(cwlFile); + Map<String, Integer> inputDepths = yamlHelper.processInputDepths(cwlFile); - if(!value.equals("")) { - return new InputField(id, value); + if(inputs == null || inputDepths == null) { + return null; } - - InputField field = new InputField(id); - for(int i = startIndex+1; i < length; i++) { - String curLine = yamlFile.get(i); - if(curLine.equals("")) { - // Ignore empty lines - continue; - } - if(getDepth(curLine) <= depth) { - // Out of input section - break; - } - String key = getKeyFromLine(curLine); - value = getValueFromLine(curLine); - - if(key.trim().equals("type")) { - field.type = value; - } else if(key.trim().equals("inputBinding")) { - - int curDepth = getDepth(curLine); - int nextIndex = getNextLineIndex(i); - String nextLine = yamlFile.get(nextIndex); - String nextKey = getKeyFromLine(nextLine); - String nextValue = getValueFromLine(nextLine); - - if(nextKey.equals("position")){ - field.position = Integer.parseInt(nextValue); - } else if(nextKey.equals("prefix")){ - field.prefix = nextValue; - } - - // Check if we have another inputBinding property - nextIndex = getNextLineIndex(nextIndex); - nextLine = yamlFile.get(nextIndex); - if(getDepth(nextLine) == curDepth + 1) { - nextKey = getKeyFromLine(nextLine); - nextValue = getValueFromLine(nextLine); - if(nextKey.equals("position")){ - field.position = Integer.parseInt(nextValue); - } else if(nextKey.equals("prefix")){ - field.prefix = nextValue.trim(); - } - } - } + Set<InputWorkflowPort> result = new HashSet<InputWorkflowPort>(); + for(String id: inputs.keySet()) { + PortDetail detail = inputs.get(id); + int depth = inputDepths.get(id); + InputWorkflowPort port = new InputWorkflowPort(); + port.setName(id); + port.setDepth(depth); + result.add(port); } - return field; + return result; } - private int getNextLineIndex(int index) { - index++; + public Set<OutputWorkflowPort> parseOutputs() { + Map<String, PortDetail> inputs = yamlHelper.processOutputDetails(cwlFile); - while(yamlFile.get(index).equals("")) { - index++; + if(inputs == null) { + return null; } - - return index; - } - - public static int getDepth(String line) { - int count = 0; - int idx = 0; - while(idx < line.length()) { - if(line.charAt(idx) != ' ') { - break; - } - count++; - idx++; + Set<OutputWorkflowPort> result = new HashSet<OutputWorkflowPort>(); + for(String id: inputs.keySet()) { + PortDetail detail = inputs.get(id); + OutputWorkflowPort port = new OutputWorkflowPort(); + port.setName(id); + result.add(port); } - assert count % 2 == 0; - return count / 2; - } - - public static String getKeyFromLine(String line) { - int commaIndex = line.indexOf(':'); - assert commaIndex != -1; - return line.substring(0, commaIndex).trim(); + return result; } - public static String getValueFromLine(String line) { - int commaIndex = line.indexOf(':'); - assert commaIndex != -1; - - return line.substring(commaIndex + 1).trim(); - } } http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/blob/6d6dc200/taverna-scufl2-cwl/src/main/java/org/apache/taverna/scufl2/cwl/YAMLHelper.java ---------------------------------------------------------------------- diff --git a/taverna-scufl2-cwl/src/main/java/org/apache/taverna/scufl2/cwl/YAMLHelper.java b/taverna-scufl2-cwl/src/main/java/org/apache/taverna/scufl2/cwl/YAMLHelper.java new file mode 100644 index 0000000..dacdc4f --- /dev/null +++ b/taverna-scufl2-cwl/src/main/java/org/apache/taverna/scufl2/cwl/YAMLHelper.java @@ -0,0 +1,484 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.taverna.scufl2.cwl; + + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Set; +import java.util.Map; +import java.util.Map.Entry; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.fasterxml.jackson.databind.node.TextNode; + +public class YAMLHelper { + + public static final String ARRAY_SPLIT_BRACKETS = "\\[\\]"; + public static final String ARRAY_SIGNATURE_BRACKETS = "\\[\\]$"; + private static final String INPUTS = "inputs"; + private static final String OUTPUTS = "outputs"; + private static final String STEPS = "steps"; + private static final String ID = "id"; + private static final String TYPE = "type"; + private static final String ARRAY = "array"; + private static final String DESCRIPTION = "description"; + private static final int DEPTH_0 = 0; + private static final int DEPTH_1 = 1; + private static final int DEPTH_2 = 2; + + private static final String FLOAT = "float"; + private static final String NULL = "null"; + private static final String BOOLEAN = "boolean"; + private static final String INT = "int"; + private static final String DOUBLE = "double"; + private static final String STRING = "string"; + private static final String LABEL = "label"; + private static final String FILE = "file"; + private static final String DIRECTORY = "directory"; + private static final String FORMAT = "format"; + private static final String RUN = "run"; + private static final String SOURCE = "source"; + + private JsonNode nameSpace; + + public YAMLHelper() { + this.nameSpace = null; + } + + public JsonNode getNameSpace() { + return nameSpace; + } + + /** + * This method is processing the CWL NameSpace for later use such as + * figuring out the Format of a input or output + */ + public void processNameSpace(JsonNode file) { + + if (file != null && file.has("$namespaces")) { + nameSpace = file.path("$namespaces"); + } + + } + + public Map<String, Integer> processInputDepths(JsonNode file) { + return process(file.get(INPUTS)); + } + + public Map<String, Integer> processOutputDepths(JsonNode file) { + return process(file.get(OUTPUTS)); + } + + public Map<String, PortDetail> processInputDetails(JsonNode file) { + return processdetails(file.get(INPUTS)); + } + + public Map<String, PortDetail> processOutputDetails(JsonNode file) { + return processdetails(file.get(OUTPUTS)); + } + + /** + * + */ + public Set<Step> processSteps(JsonNode file) { + Set<Step> result = new HashSet<>(); + + if(file == null) { + return result; + } + + if(file.has(STEPS)) { + JsonNode steps = file.get(STEPS); + if(steps.isArray()) { + for (JsonNode stepNode : steps) { + Step step = new Step(); + String id = stepNode.get(ID).asText(); + + String run = stepNode.get(RUN).asText(); + Set<StepInput> inputs = processStepInput(stepNode.get(INPUTS)); + step.setId(id); + step.setRun(run); + step.setInputs(inputs); + result.add(step); + } + } else if(steps.isObject()) { + Iterator<Entry<String, JsonNode>> iterator = steps.fields(); + while(iterator.hasNext()) { + Entry<String, JsonNode> entry = iterator.next(); + Step step = new Step(); + + String id = entry.getKey(); + JsonNode value = entry.getValue(); + if(value.has(RUN)) { + String run = entry.getValue().get(RUN).asText(); + step.setRun(run); + } + Set<StepInput> inputs = processStepInput(value.get(INPUTS)); + step.setId(id); + step.setInputs(inputs); + + result.add(step); + } + } + } + + return result; + } + + private Set<StepInput> processStepInput(JsonNode inputs) { + + Set<StepInput> result = new HashSet<>(); + if(inputs == null) { + return result; + } + if (inputs.getClass() == ArrayNode.class) { + + for (JsonNode input : inputs) { + String id = input.get(ID).asText(); + String source = input.get(SOURCE).asText(); + + result.add(new StepInput(id, source)); + } + } else if (inputs.getClass() == ObjectNode.class) { + Iterator<Entry<String, JsonNode>> iterator = inputs.fields(); + while (iterator.hasNext()) { + Entry<String, JsonNode> entry = iterator.next(); + + String id = entry.getKey(); + String source = entry.getValue().get(SOURCE).asText(); + + result.add(new StepInput(id, source)); + } + } + return result; + } + + /** + * This method will go through CWL tool input or out puts and figure outs + * their IDs and the respective depths + * + * @param inputs + * This is JsonNode object which contains the Inputs or outputs + * of the respective CWL tool + * @return This the respective, ID and the depth of the input or output + */ + public Map<String, Integer> process(JsonNode inputs) { + + Map<String, Integer> result = new HashMap<>(); + + if (inputs == null) + return result; + + if (inputs.getClass() == ArrayNode.class) { + Iterator<JsonNode> iterator = inputs.iterator(); + + while (iterator.hasNext()) { + JsonNode input = iterator.next(); + String currentInputId = input.get(ID).asText(); + + JsonNode typeConfigurations; + try { + + typeConfigurations = input.get(TYPE); + // if type :single argument + if (typeConfigurations.getClass() == TextNode.class) { + // inputs: + /// -id: input_1 + //// type: int[] + if (isValidArrayType(typeConfigurations.asText())) + result.put(currentInputId, DEPTH_1); + // inputs: + /// -id: input_1 + //// type: int or int? + else + result.put(currentInputId, DEPTH_0); + // type : defined as another map which contains type: + } else if (typeConfigurations.getClass() == ObjectNode.class) { + // inputs: + /// -id: input_1 + //// type: + ///// type: array or int[] + String inputType = typeConfigurations.get(TYPE).asText(); + if (inputType.equals(ARRAY) || isValidArrayType(inputType)) { + result.put(currentInputId, DEPTH_1); + + } + // inputs: + // -id: input_1 + // type: + // type: ["null",int] + } else if (typeConfigurations.getClass() == ArrayNode.class) { + if (isValidDataType(typeConfigurations)) { + result.put(currentInputId, DEPTH_0); + } + + } + + } catch (ClassCastException e) { + + System.out.println("Class cast exception !!!"); + } + + } + } else if (inputs.getClass() == ObjectNode.class) { + + Iterator<Entry<String, JsonNode>> iterator = inputs.fields(); + + while (iterator.hasNext()) { + Entry<String, JsonNode> entry = iterator.next(); + String currentInputId = entry.getKey(); + JsonNode typeConfigurations = entry.getValue(); + + if (typeConfigurations.getClass() == TextNode.class) { + if (typeConfigurations.asText().startsWith("$")) { + System.out.println("Exception"); + } + // inputs: + /// input_1: int[] + else if (isValidArrayType(typeConfigurations.asText())) + result.put(currentInputId, DEPTH_1); + // inputs: + /// input_1: int or int? + else + result.put(currentInputId, DEPTH_0); + + } else if (typeConfigurations.getClass() == ObjectNode.class) { + + if (typeConfigurations.has(TYPE)) { + JsonNode inputType = typeConfigurations.get(TYPE); + // inputs: + /// input_1: + //// type: [int,"null"] + if (inputType.getClass() == ArrayNode.class) { + if (isValidDataType(inputType)) + result.put(currentInputId, DEPTH_0); + } else { + // inputs: + /// input_1: + //// type: array or int[] + if (inputType.asText().equals(ARRAY) || isValidArrayType(inputType.asText())) + result.put(currentInputId, DEPTH_1); + // inputs: + /// input_1: + //// type: int or int? + else + result.put(currentInputId, DEPTH_0); + } + } + } + } + + } + return result; + } + + /** + * This method is used for extracting details of the CWL tool inputs or + * outputs. ex:Label, Format, Description + * + * @param inputs + * This is JsonNode object which contains the Inputs or outputs + * of the respective CWL tool + * @return + */ + private Map<String, PortDetail> processdetails(JsonNode inputs) { + + Map<String, PortDetail> result = new HashMap<>(); + if(inputs == null) { + return result; + } + if (inputs.getClass() == ArrayNode.class) { + + for (JsonNode input : inputs) { + PortDetail detail = new PortDetail(); + String currentInputId = input.get(ID).asText(); + + getParamDetails(result, input, detail, currentInputId); + + } + } else if (inputs.getClass() == ObjectNode.class) { + Iterator<Entry<String, JsonNode>> iterator = inputs.fields(); + while (iterator.hasNext()) { + PortDetail detail = new PortDetail(); + Entry<String, JsonNode> entry = iterator.next(); + getParamDetails(result, entry.getValue(), detail, entry.getKey()); + } + } + return result; + } + + private void getParamDetails(Map<String, PortDetail> result, JsonNode input, PortDetail detail, + String currentInputId) { + extractDescription(input, detail); + + extractFormat(input, detail); + + extractLabel(input, detail); + + result.put(currentInputId, detail); + } + + /** + * This method is used for extracting the Label of a CWL input or Output + * + * @param input + * Single CWL input or output as a JsonNode + * @param detail + * respective PortDetail Object to hold the extracted Label + */ + public void extractLabel(JsonNode input, PortDetail detail) { + if (input != null) + if (input.has(LABEL)) { + detail.setLabel(input.get(LABEL).asText()); + } else { + detail.setLabel(null); + } + } + + /** + * + * @param input + * Single CWL input or output as a JsonNode + * @param detail + * respective PortDetail Object to hold the extracted Label + */ + public void extractDescription(JsonNode input, PortDetail detail) { + if (input != null) + if (input.has(DESCRIPTION)) { + detail.setDescription(input.get(DESCRIPTION).asText()); + } else { + detail.setDescription(null); + } + } + + /** + * This method is used for extracting the Formats of a CWL input or Output + * Single argument(Input or Output) can have multiple Formats. + * + * @param input + * Single CWL input or output as a JsonNode + * @param detail + * respective PortDetail Object to hold the extracted Label + */ + public void extractFormat(JsonNode input, PortDetail detail) { + if (input != null) + if (input.has(FORMAT)) { + + JsonNode formatInfo = input.get(FORMAT); + + ArrayList<String> format = new ArrayList<>(); + detail.setFormat(format); + + if (formatInfo.getClass() == TextNode.class) { + + figureOutFormats(formatInfo.asText(), detail); + } else if (formatInfo.getClass() == ArrayNode.class) { + for (JsonNode eachFormat : formatInfo) { + figureOutFormats(eachFormat.asText(), detail); + } + } + + } + } + + /** + * Re Format the CWL format using the NameSpace in CWL Tool if possible + * otherwise it doesn't change the current nameSpace => edam:http://edam.org + * format : edam :1245 => http://edamontology.org/1245 + * + * @param formatInfoString + * Single Format + * @param detail + * respective PortDetail Object to hold the extracted Label + */ + public void figureOutFormats(String formatInfoString, PortDetail detail) { + if (formatInfoString.startsWith("$")) { + + detail.addFormat(formatInfoString); + } else if (formatInfoString.contains(":")) { + String format[] = formatInfoString.split(":"); + String namespaceKey = format[0]; + String urlAppednd = format[1]; + + if (nameSpace.has(namespaceKey)) + detail.addFormat(nameSpace.get(namespaceKey).asText() + urlAppednd); + else + // can't figure out the format + detail.addFormat(formatInfoString); + + } else { + // can't figure out the format + detail.addFormat(formatInfoString); + } + } + + /** + * This method is used to check whether the input/output is valid CWL TYPE + * when the type is represented as type: ["null","int"] + * + * @param typeConfigurations + * Type of the CWl input or output + * @return + */ + public boolean isValidDataType(JsonNode typeConfigurations) { + if (typeConfigurations == null) + return false; + for (JsonNode type : typeConfigurations) { + if (!(type.asText().equals(FLOAT) || type.asText().equals(NULL) || type.asText().equals(BOOLEAN) + || type.asText().equals(INT) || type.asText().equals(STRING) || type.asText().equals(DOUBLE) + || type.asText().equals(FILE)||type.asText().equals(DIRECTORY))) + return false; + } + return true; + } + + /** + * + * This method is for figure out whether the parameter is an array or not. + * As from CWL document v1.0, array can be defined as "TYPE[]". For Example + * : int[] This method will look for "[]" sequence of characters in the end + * of the type and is provided type is a valid CWL TYPE or not + * + * @param type + * type of the CWL parameter + * @return + */ + public boolean isValidArrayType(String type) { + if (type == null) + return false; + Pattern pattern = Pattern.compile(ARRAY_SIGNATURE_BRACKETS); + Matcher matcher = pattern.matcher(type); + ObjectMapper mapper = new ObjectMapper(); + ArrayNode node = mapper.createArrayNode(); + node.add(type.split(ARRAY_SPLIT_BRACKETS)[0]); + if (matcher.find() && isValidDataType(node)) + return true; + else + return false; + } +} \ No newline at end of file
