Add files via upload

Project: 
http://git-wip-us.apache.org/repos/asf/incubator-taverna-databundle-viewer/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-taverna-databundle-viewer/commit/877aa41b
Tree: 
http://git-wip-us.apache.org/repos/asf/incubator-taverna-databundle-viewer/tree/877aa41b
Diff: 
http://git-wip-us.apache.org/repos/asf/incubator-taverna-databundle-viewer/diff/877aa41b

Branch: refs/heads/master
Commit: 877aa41be15534a0a51fb4ed63dcb816dec8778b
Parents: 5f01fa1
Author: PCStefan <[email protected]>
Authored: Mon Jun 6 12:53:31 2016 +0100
Committer: PCStefan <[email protected]>
Committed: Mon Jun 6 12:53:31 2016 +0100

----------------------------------------------------------------------
 app/models/provenance.rb | 606 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 606 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-taverna-databundle-viewer/blob/877aa41b/app/models/provenance.rb
----------------------------------------------------------------------
diff --git a/app/models/provenance.rb b/app/models/provenance.rb
new file mode 100644
index 0000000..5ec3a40
--- /dev/null
+++ b/app/models/provenance.rb
@@ -0,0 +1,606 @@
+require 'sparql'  # query the graph
+require 'uri'     # used to decode urls
+
+class Provenance
+
+  # TODO: try to read the prefixes from the file
+  @@prefixes = "PREFIX dc:  <http://purl.org/dc/elements/1.1/>
+        PREFIX prov:  <http://www.w3.org/ns/prov#>
+        PREFIX cnt:  <http://www.w3.org/2011/content#>
+        PREFIX foaf:  <http://xmlns.com/foaf/0.1/>
+        PREFIX dcmitype:  <http://purl.org/dc/dcmitype/>
+        PREFIX wfprov:  <http://purl.org/wf4ever/wfprov#>
+        PREFIX dcam:  <http://purl.org/dc/dcam/>
+        PREFIX xml:  <http://www.w3.org/XML/1998/namespace>
+        PREFIX vs:  <http://www.w3.org/2003/06/sw-vocab-status/ns#>
+        PREFIX dcterms:  <http://purl.org/dc/terms/>
+        PREFIX rdfs:  <http://www.w3.org/2000/01/rdf-schema#>
+        PREFIX wot:  <http://xmlns.com/wot/0.1/>
+        PREFIX wfdesc:  <http://purl.org/wf4ever/wfdesc#>
+        PREFIX dct:  <http://purl.org/dc/terms/>
+        PREFIX tavernaprov:  <http://ns.taverna.org.uk/2012/tavernaprov/>
+        PREFIX owl:  <http://www.w3.org/2002/07/owl#>
+        PREFIX xsd:  <http://www.w3.org/2001/XMLSchema#>
+        PREFIX rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+        PREFIX skos:  <http://www.w3.org/2004/02/skos/core#>
+        PREFIX scufl2:  <http://ns.taverna.org.uk/2010/scufl2#>
+        "
+  cattr_reader :prefixes
+  attr_reader :graph
+
+  @file = ''
+
+  #constructor
+  def initialize(filepath)
+    @file = filepath
+
+    @graph = RDF::Graph.new
+
+    RDF::Reader.open("#{@file}") do |reader|
+          reader.each_statement do |statement|
+            @graph.insert(statement)
+      end
+    end
+  end
+
+  #Extract all the workflows and their parent workflow
+  def getAllWorkflowRuns
+    # create the query
+    sparql_query = SPARQL.parse("#{Provenance.prefixes}
+      SELECT *
+      WHERE
+      { 
+        ?workflowRun  rdf:type  wfprov:WorkflowRun ;
+                      rdfs:label ?workflowRunLabel .
+        OPTIONAL
+        { 
+          ?workflowRun  wfprov:wasPartOfWorkflowRun  ?wasPartOfWorkflowRun  .
+          ?wasPartOfWorkflowRun  rdfs:label ?wasPartOfWorkflowRunLabel  .
+          FILTER NOT EXISTS { ?something  foaf:primaryTopic  
?wasPartOfWorkflowRun }
+        }
+        OPTIONAL
+        {
+          {
+            ?workflowRun  wfprov:usedInput  ?usedDictionaryInput .
+            ?usedDictionaryInput  rdf:type  prov:Dictionary
+          }
+          UNION
+          {
+            ?workflowRun  wfprov:usedInput  ?usedArtifactInput
+            FILTER NOT EXISTS { ?usedArtifactInput  rdf:type  prov:Dictionary }
+          }
+        }
+        FILTER NOT EXISTS { ?something  foaf:primaryTopic  ?workflowRun }
+      }")
+
+    #return the result of the performing the query
+    sparql_query.execute(graph)
+  end
+ 
+  # Get all the ProcessRuns and their outlinks
+  def getAllProcessRuns
+    sparql_query = SPARQL.parse("#{Provenance.prefixes}   
+      SELECT *
+      WHERE
+      { 
+        ?processURI  rdf:type          wfprov:ProcessRun ;
+                  prov:startedAtTime   ?startedAtTime ;
+                  prov:endedAtTime     ?endedAtTime ;
+                  wfprov:wasEnactedBy  ?engineUsed ;
+                  rdfs:label            ?processLabel 
+        OPTIONAL
+          { 
+            ?processURI  wfprov:wasPartOfWorkflowRun  ?wasPartOfWorkflow  .
+            ?wasPartOfWorkflow  rdfs:label  ?wasPartOfWorkflowLabel .
+            FILTER NOT EXISTS { ?something  foaf:primaryTopic  
?wasPartOfWorkflow }
+          }
+        OPTIONAL
+        { 
+          {
+          ?processURI  wfprov:usedInput  ?usedDictionaryInput .
+          ?usedDictionaryInput  rdf:type  prov:Dictionary
+          }
+          UNION
+          {         
+            ?processURI  wfprov:usedInput  ?usedArtifactInput
+            FILTER NOT EXISTS { ?usedArtifactInput  rdf:type  prov:Dictionary }
+          }        
+        }
+      }")
+
+    # return the processes that were used
+    sparql_query.execute(graph)
+  end
+
+  #Extract all the workflows and their parent workflow
+  def getAllArtifacts
+    # create the query
+    sparql_query = SPARQL.parse("#{Provenance.prefixes}
+      SELECT *
+      WHERE
+      {
+        { 
+          ?artifactURI  rdf:type  wfprov:Artifact  ;
+                        wfprov:describedByParameter  ?describedByParameter  .
+          ?describedByParameter  rdfs:comment  ?comment
+          OPTIONAL
+          {
+            ?artifactURI  tavernaprov:content  ?filepath
+          }
+          OPTIONAL
+          { 
+            ?artifactURI  wfprov:wasOutputFrom  ?outputFromWorkflowRun .
+            ?outputFromWorkflowRun  rdf:type  wfprov:WorkflowRun ;
+                                    rdfs:label  ?outputFromWorkflowRunLabel .
+            FILTER NOT EXISTS { ?something  foaf:primaryTopic  
?outputFromWorkflowRun }
+          }
+          OPTIONAL
+          { 
+            ?artifactURI  wfprov:wasOutputFrom  ?outputFromProcessRun .
+            ?outputFromProcessRun  rdf:type  wfprov:ProcessRun  ;
+                                   prov:startedAtTime  ?startedAtTime ;
+                                   prov:endedAtTime    ?endedAtTime ;
+                                   rdfs:label ?outputFromProcessRunLabel
+          }
+          FILTER NOT EXISTS { ?artifactURI  rdf:type  prov:Dictionary }
+        }
+        UNION
+        { 
+          ?dictionary  rdf:type  prov:Dictionary
+          OPTIONAL
+          {
+            ?dictionary  tavernaprov:content  ?filepath
+          }
+          OPTIONAL
+          { 
+            {
+              ?dictionary  prov:hadMember  ?hadMemberDictionary  .
+              ?hadMemberDictionary  rdf:type  prov:Dictionary .
+            }
+            UNION
+            {
+              ?dictionary  prov:hadMember  ?hadMemberArtifact  .
+              ?hadMemberArtifact wfprov:describedByParameter 
?describedByParameter  .
+              ?describedByParameter rdfs:comment ?comment  .
+              FILTER NOT EXISTS { ?hadMemberArtifact  rdf:type  
prov:Dictionary }
+            }
+          }
+          OPTIONAL
+          { 
+            ?dictionary  wfprov:wasOutputFrom  ?outputFromWorkflowRun .
+            ?outputFromWorkflowRun  rdf:type  wfprov:WorkflowRun ; 
+                                    rdfs:label ?outputFromWorkflowRunLabel .
+            FILTER NOT EXISTS { ?something  foaf:primaryTopic  
?outputFromWorkflowRun }
+          }
+          OPTIONAL
+          { 
+            ?dictionary  wfprov:wasOutputFrom  ?outputFromProcessRun .
+            ?outputFromProcessRun  rdf:type  wfprov:ProcessRun  ;
+                                   prov:startedAtTime  ?startedAtTime ;
+                                   prov:endedAtTime    ?endedAtTime ;
+                                   rdfs:label ?outputFromProcessRunLabel
+          }
+        }
+      }")
+
+    # return the result of the performing the query
+    sparql_query.execute(graph)
+  end
+
+  def getContentOf(extractedFilepath)
+    content = ""
+
+    if File.directory?(extractedFilepath)
+      content = "[" 
+
+      # ffs = Files or Folders
+      #for each folder/file inside this folder do
+      ffs = Dir.glob(extractedFilepath + "/*")
+      for file in ffs
+        content = content + getContentOf("#{file}") + ", "
+      end
+
+      content = content[0...-2] + "]"
+
+    elsif File.file?(extractedFilepath)
+      content = File.read(extractedFilepath)
+    end
+
+    content
+  end
+
+  def to_dataHashObject(bundle_filepath)
+
+    nodes = []
+    links = []
+
+    linkValue = 50
+    processorTrimCount = "Processor execution ".length
+    workflowRunTrimCount = "Workflow run of ".length
+
+    # get all the workflows
+    getAllWorkflowRuns.each do |result|
+
+      # get the name
+      workflowRunURI = result["workflowRun"].to_s
+      workflowRunLabel = result["workflowRunLabel"].to_s
+      if workflowRunLabel[0] == "W"
+        workflowRunLabel = workflowRunLabel[workflowRunTrimCount, 
workflowRunLabel.length]
+      elsif workflowRunLabel[0] == "P"
+        workflowRunLabel = workflowRunLabel[processorTrimCount, 
workflowRunLabel.length]
+      end
+
+      # a temp node for current (Decide whether to be added or not)
+      workflowRun = {:name => workflowRunURI, :type => "Workflow Run", 
+                     :label => workflowRunLabel}
+
+      # see if exists
+      indexSource = nodes.find_index(workflowRun)
+
+      # check
+      if indexSource.blank?
+        indexSource = nodes.count
+        nodes << workflowRun
+      end
+
+      # check if has property wasPartOfWorkflowRun 
+      if result["wasPartOfWorkflowRun"].present?
+
+        secondWorkflowRunLabel = result["wasPartOfWorkflowRunLabel"].to_s
+        if secondWorkflowRunLabel[0] == "W"
+          secondWorkflowRunLabel = 
secondWorkflowRunLabel[workflowRunTrimCount, secondWorkflowRunLabel.length]
+        elsif secondWorkflowRunLabel[0] == "P"
+          secondWorkflowRunLabel = secondWorkflowRunLabel[processorTrimCount, 
secondWorkflowRunLabel.length]
+        end
+        secondWorkflowRun = {:name => result["wasPartOfWorkflowRun"].to_s, 
:type => "Workflow Run",
+                             :label => secondWorkflowRunLabel}
+
+        indexTarget = nodes.find_index(secondWorkflowRun)
+
+        if indexTarget.blank?
+          indexTarget = nodes.count
+          nodes << secondWorkflowRun
+        end
+
+        # add the link
+        linkWfToWf = {:source => indexTarget, :target => indexSource, :value 
=> linkValue}
+        if links.find_index(linkWfToWf).blank?
+          links << linkWfToWf
+        end
+      end
+
+      # check if has property usedInput 
+      if result["usedArtifactInput"].present?
+        artifact = {:name => result["usedArtifactInput"].to_s, :type => 
"Artifact" }
+
+        indexTarget = nodes.find_index(artifact)
+
+        if indexTarget.blank?
+          indexTarget = nodes.count
+          nodes << artifact
+        end
+
+        # add the link
+        linkProcessToArtifact = {:source => indexTarget, :target => 
indexSource, :value => linkValue}
+        if links.find_index(linkProcessToArtifact).blank?
+          links << linkProcessToArtifact
+        end
+      end
+
+      # check if has property usedInput 
+      if result["usedDictionaryInput"].present?
+        dictionary = {:name => result["usedDictionaryInput"].to_s, :type => 
"Dictionary" }
+
+        indexTarget = nodes.find_index(artifact)
+
+        if indexTarget.blank?
+          indexTarget = nodes.count
+          nodes << dictionary
+        end
+
+        # add the link
+        linkProcessToArtifact = {:source => indexTarget, :target => 
indexSource, :value => linkValue}
+        if links.find_index(linkProcessToArtifact).blank?
+          links << linkProcessToArtifact
+        end
+      end
+
+    end
+
+    # get all the processes
+    # get all the workflows
+    getAllProcessRuns.each do |result|
+      # get the name
+      processRunURI = result["processURI"].to_s
+      processRunLabel = result["processLabel"].to_s
+
+      # a temp node for current (Decide whether to be added or not)
+      processRun = {:name => processRunURI, :type => "Process Run", 
+                    :startedAtTime => result["startedAtTime"].to_s, 
:endedAtTime =>result["endedAtTime"].to_s,
+                    :label => processRunLabel[processorTrimCount, 
processRunLabel.length]}
+                    
+
+      # see if exists
+      indexSource = nodes.find_index(processRun)
+
+      # check
+      if indexSource.blank?
+        indexSource = nodes.count
+        nodes << processRun
+      end
+
+      # check if has property wasPartOfWorkflow
+      if result["wasPartOfWorkflow"].present?
+
+        workflowRunLabel = result["wasPartOfWorkflowLabel"].to_s
+        if workflowRunLabel[0] == "W"
+          workflowRunLabel = workflowRunLabel[workflowRunTrimCount, 
workflowRunLabel.length]
+        elsif workflowRunLabel[0] == "P"
+          workflowRunLabel = workflowRunLabel[processorTrimCount, 
workflowRunLabel.length]
+        end
+
+
+        workflowRun = {:name => result["wasPartOfWorkflow"].to_s, :type => 
"Workflow Run", 
+                       :label => workflowRunLabel}
+
+        indexTarget = nodes.find_index(workflowRun)
+
+        if indexTarget.blank?
+          indexTarget = nodes.count
+          nodes << workflowRun
+        end
+
+        # add the link
+        linkProcessToWf = {:source => indexTarget, :target => indexSource, 
:value => linkValue}
+        if links.find_index(linkProcessToWf).blank?
+          links << linkProcessToWf
+        end
+      end
+
+      # check if has property usedInput 
+      if result["usedArtifactInput"].present?
+        artifact = {:name => result["usedArtifactInput"].to_s, :type => 
"Artifact" }
+
+        indexTarget = nodes.find_index(artifact)
+
+        if indexTarget.blank?
+          indexTarget = nodes.count
+          nodes << artifact
+        end
+
+        # add the link
+        linkProcessToArtifact = {:source => indexTarget, :target => 
indexSource, :value => linkValue}
+        if links.find_index(linkProcessToArtifact).blank?
+          links << linkProcessToArtifact
+        end
+      end
+
+      # check if has property usedInput 
+      if result["usedDictionaryInput"].present?
+        dictionary = {:name => result["usedDictionaryInput"].to_s, :type => 
"Dictionary" }
+
+        indexTarget = nodes.find_index(artifact)
+
+        if indexTarget.blank?
+          indexTarget = nodes.count
+          nodes << dictionary
+        end
+
+        # add the link
+        linkProcessToArtifact = {:source => indexTarget, :target => 
indexSource, :value => linkValue}
+        if links.find_index(linkProcessToArtifact).blank?
+          links << linkProcessToArtifact
+        end
+      end
+
+
+
+      # # check if has property engineUsed which represents the 
wfprov:wasEnactedBy 
+      # if result["engineUsed"].present?
+      #   engine = {:name => result["engineUsed"].to_s, :type => "Engine"}
+
+      #   indexTarget = nodes.find_index(engine)
+
+      #   if indexTarget.blank?
+      #     indexTarget = nodes.count
+      #     nodes << engine
+      #   end
+
+    #     # add the link
+      #   linkProcessToEngine = {:source => indexTarget, :target => 
indexSource, :value => linkValue}
+      #   if links.find_index(linkProcessToEngine).blank?
+      #     links << linkProcessToEngine
+      #   end
+      # end
+
+    end
+
+
+    # get all the nodes and links related to the artifact
+    getAllArtifacts.each do |result|
+      
+      if result["artifactURI"].present?
+        # get the name
+        artifactURI = result["artifactURI"].to_s
+
+        # the node that needs to be added to the nodes
+        artifact = {:name => artifactURI, :type => "Artifact"}
+      else
+        # get the name
+        artifactURI = result["dictionary"].to_s
+
+        # the node that needs to be added to the nodes
+        artifact = {:name => artifactURI, :type => "Dictionary"}
+      end
+
+      # get the index of the artifact if present otherwise nil
+      indexSource = -1
+
+      nodes.each_with_index do |node, index|
+        if node[:type].to_s == artifact[:type].to_s
+          if node[:name].to_s == artifact[:name].to_s
+            indexSource = index
+            artifactLabel = "List"
+            if result["comment"].present?
+              artifactLabel = result["comment"].to_s
+            end
+
+            if node[:label].present? and node[:label] != "List"
+              node[:label] = node[:label] + "\\n" + artifactLabel
+              
+            else
+              node.merge!(:label => artifactLabel)
+            end
+
+            if !(node[:content].present?) and result["filepath"].present?
+                artifactContent = 
getContentOf("#{bundle_filepath}#{result["filepath"].to_s}")
+                node[:content] = artifactContent
+            end
+          end
+        end
+      end
+
+      # check if is already in the list if not add to nodes
+      if indexSource == -1
+        indexSource = nodes.count
+        artifactLabel = "List"
+        if result["comment"].present?
+          artifactLabel = result["comment"].to_s
+        end
+        artifact[:label] = artifactLabel
+
+        artifactContent = ""
+        if result["filepath"].present?
+          artifactContent = 
getContentOf("#{bundle_filepath}#{result["filepath"].to_s}")
+          artifact[:content] = artifactContent
+        end
+        nodes << artifact
+      end
+
+      # check if it has the property wasOutputFrom a process Run and add a 
link entity-process
+      if result["outputFromProcessRun"].present?
+        processRunLabel = result["outputFromProcessRunLabel"].to_s
+
+        processRun = {:name => result["outputFromProcessRun"].to_s, :type => 
"Process Run", 
+                      :startedAtTime => result["startedAtTime"].to_s, 
:endedAtTime =>result["endedAtTime"].to_s,
+                      :label => processRunLabel[processorTrimCount, 
processRunLabel.length]}
+    
+        indexTarget = nodes.find_index(processRun)
+
+        if indexTarget.blank?
+          indexTarget = nodes.count
+          nodes << processRun
+        end
+
+        # add the link
+        linkArtifactToProcess = {:source => indexTarget, :target => 
indexSource, :value => linkValue}
+        if links.find_index(linkArtifactToProcess).blank?
+          links << linkArtifactToProcess
+        end
+      end
+
+      if result["outputFromWorkflowRun"].present?
+        workflowRunLabel = result["outputFromWorkflowRunLabel"].to_s
+
+        if workflowRunLabel[0] == "W"
+          workflowRunLabel = workflowRunLabel[workflowRunTrimCount, 
workflowRunLabel.length]
+        elsif workflowRunLabel[0] == "P"
+          workflowRunLabel = workflowRunLabel[processorTrimCount, 
workflowRunLabel.length]
+        end
+
+        workflowRun = {:name => result["outputFromWorkflowRun"].to_s, :type => 
"Workflow Run",
+                       :label => workflowRunLabel}
+    
+        indexTarget = nodes.find_index(workflowRun)
+
+        if indexTarget.blank?
+          indexTarget = nodes.count
+          nodes << workflowRun
+        end
+
+        # add the link
+        linkArtifactToWorkflow = {:source => indexTarget, :target => 
indexSource, :value => linkValue}
+        if links.find_index(linkArtifactToWorkflow).blank?
+          links << linkArtifactToWorkflow
+        end
+      end
+
+      if result["hadMemberArtifact"].present?
+        memberArtifact = {:name => result["hadMemberArtifact"].to_s, :type => 
"Artifact"}
+        
+        indexTarget = -1
+
+        nodes.each_with_index do |node, index|
+          if node[:type].to_s == memberArtifact[:type].to_s
+            if node[:name].to_s == memberArtifact[:name].to_s
+              indexTarget = index
+            end
+          end
+        end
+
+        if indexTarget == -1
+          if result["comment"].present?
+            artifactLabel = result["comment"].to_s
+            memberArtifact.merge!(:label => artifactLabel)
+          end
+          indexTarget = nodes.count
+          nodes << memberArtifact
+        end
+
+        # add the link
+        if result["outputFromProcessRun"].present?
+          linkDictToArtifact = {:source => indexSource, :target => 
indexTarget, :value => linkValue}
+          if links.find_index(linkDictToArtifact).blank?
+            links << linkDictToArtifact
+          end
+        else
+          linkDictToArtifact = {:source => indexTarget, :target => 
indexSource, :value => linkValue}
+          if links.find_index(linkDictToArtifact).blank?
+            links << linkDictToArtifact
+          end
+        end
+      
+      end
+
+      if result["hadMemberDictionary"].present?
+        dictionary = {:name => result["hadMemberDictionary"].to_s, :type => 
"Dictionary"}
+    
+        indexTarget = -1
+
+        nodes.each_with_index do |node, index|
+          if node[:type].to_s == dictionary[:type].to_s
+            if node[:name].to_s == dictionary[:name].to_s
+              indexTarget = index
+            end
+          end
+        end
+
+        if indexTarget == -1
+          # if result["comment"].present?
+          #   artifactLabel = result["comment"].to_s
+          #   memberArtifact.merge!(:label => artifactLabel)
+          # end
+          indexTarget = nodes.count
+          nodes << dictionary
+        end
+
+        # add the link
+        linkDictToDict = {:source => indexTarget, :target => indexSource, 
:value => linkValue}
+        if links.find_index(linkDictToDict).blank?
+          links << linkDictToDict
+        end
+      end
+    end
+
+    # make a hash to return
+    stream = {:nodes => nodes, :links => links }
+
+    # return stream
+    stream
+  end
+
+  # persisted is important not to get "undefined method `to_key' for" error
+  def persisted?
+    false
+  end
+end
\ No newline at end of file

Reply via email to