[Wtr-development] XPath Support for Frame child elements

Kunal Kumar Mon, 15 Feb 2010 22:29:15 -0800

Hello,

I had posted my question regarding XPath Support in frame in this group
before
(
http://groups.google.com/group/watir-general/browse_thread/thread/947df24fa1b27f85/e23489b0cd281220?lnk=gst&q=xpath#e23489b0cd281220).



The webapps for which I am trying to write watir test scripts contains
frames and It was critical for me to have xpath support in frame ( as
elements in frame have generated ids).

I tried my hand in fixing frame class to solve the issue by referring to IE
class. The solution which I took from IE works pretty well in frame as well
if the domain of Frame and IE are same ( what I mean is that frame is
loading content from the same domain ). If the frame domain is different, it
simply throws up "Access Denied" error.

Few things about the changes

1) Modified locate function to make this_frame as member variable.

2) Added functions html_source, tokenize_tagline, all_tag_attributes,
element_by_xpath, elements_by_xpath, rexml_document_object,
create_rexml_document_object, output_rexml_document, xml_escape,
element_by_absolute_xpath from ie-class and modified accordingly

Probably It would be good to inherit IE class by Frame since most of the
functionality are same . What do you guys think?

===================================================================================================

module Watir
  class Frame
    include Container
    include PageContainer

    # Find the frame denoted by how and what in the container and return its
ole_object
    def locate
      how = @how
      what = @what
      frames = @container.document.frames
      target = nil

      for i in 0..(frames.length - 1)
        @this_frame = frames.item(i)
        case how
        when :index
          index = i + 1
          return @this_frame if index == what
        when :name
          begin
            return @this_frame if what.matches(@this_frame.name)
          rescue # access denied?
          end
        when :id
          # We assume that pages contain frames or iframes, but not both.
          this_frame_tag =
@container.document.getElementsByTagName("FRAME").item(i)
          return @this_frame if this_frame_tag and
what.matches(this_frame_tag.invoke("id"))
          this_iframe_tag =
@container.document.getElementsByTagName("IFRAME").item(i)
          return @this_frame if this_iframe_tag and
what.matches(this_iframe_tag.invoke("id"))
        when :src
          this_frame_tag =
@container.document.getElementsByTagName("FRAME").item(i)
          return @this_frame if this_frame_tag and
what.matches(this_frame_tag.src)
          this_iframe_tag =
@container.document.getElementsByTagName("IFRAME").item(i)
          return @this_frame if this_iframe_tag and
what.matches(this_iframe_tag.src)
        else
          raise ArgumentError, "Argument #{how} not supported"
        end
      end

      raise UnknownFrameException, "Unable to locate a frame with
#{how.to_s} #{what}"
    end

    def initialize(container, how, what)
      set_container container
      @how = how
      @what = what
      @o = locate
      copy_test_config container
    end

    def document
      @o.document
    end

    def attach_command
      @container.page_container.attach_command + ".frame(#[email protected]},
#[email protected]})"
    end

    # Returns HTML Source
    # Traverse the DOM tree rooted at body element
    # and generate the HTML source.
    # element: Represent Current element
    # htmlString:HTML Source
    # spaces:(Used for debugging). Helps in indentation
    def html_source(element, htmlString, spaceString)
      begin
        tagLine = ""
        outerHtml = ""
        tagName = ""
        begin
          tagName = element.tagName.downcase
          tagName = EMPTY_TAG_NAME if tagName == ""
          # If tag is a mismatched tag.
          if !(tagName =~ /^(\w|_|:)(.*)$/)
            return htmlString
          end
        rescue
          #handling text nodes
          htmlString += xml_escape(element.toString)
          return htmlString
        end
        #puts tagName
        #Skip comment and script tag
        if tagName =~ /^!/ || tagName== "script" || tagName =="style"
          return htmlString
        end
        #tagLine += spaceString
        outerHtml = all_tag_attributes(element.outerHtml) if tagName !=
EMPTY_TAG_NAME
        tagLine += "<#{tagName} #{outerHtml}"

        canHaveChildren = element.canHaveChildren
        if canHaveChildren
          tagLine += ">"
        else
          tagLine += "/>" #self closing tag
        end
        #spaceString += spaceString
        htmlString += tagLine
        childElements = element.childnodes
        childElements.each do |child|
          htmlString = html_source(child,htmlString,spaceString)
        end
        if canHaveChildren
          #tagLine += spaceString
          tagLine ="</" + tagName + ">"
          htmlString += tagLine
        end
        return htmlString
      rescue => e
        puts e.to_s
      end
      return htmlString
    end

    private :html_source

    #Function Tokenizes the tag line and returns array of tokens.
    #Token could be either tagName or "=" or attribute name or attribute
value
    #Attribute value could be either quoted string or single word
    def tokenize_tagline(outerHtml)
      outerHtml = outerHtml.gsub(/\n|\r/," ")
      #removing "< symbol", opening of current tag
      outerHtml =~ /^\s*<(.*)$/
      outerHtml = $1
      tokens = Array.new
      i = startOffset = 0
      length = outerHtml.length
      #puts outerHtml
      parsingValue = false
      while i < length do
        i +=1 while (i < length && outerHtml[i,1] =~ /\s/)
        next if i == length
        currentToken = outerHtml[i,1]

        #Either current tag has been closed or user has not closed the tag >
        # and we have received the opening of next element
        break if currentToken =~ /<|>/

        #parse quoted value
        if(currentToken == "\"" || currentToken == "'")
          parsingValue = false
          quote = currentToken
          startOffset = i
          i += 1
          i += 1 while (i < length && (outerHtml[i,1] != quote ||
outerHtml[i-1,1] == "\\"))
          if i == length
            tokens.push quote + outerHtml[startOffset..i-1]
          else
            tokens.push outerHtml[startOffset..i]
          end
        elsif currentToken == "="
          tokens.push "="
          parsingValue = true
        else
          startOffset = i
          i += 1 while (i < length && !(outerHtml[i,1] =~ /\s|=|<|>/)) if
!parsingValue
          i += 1 while (i < length && !(outerHtml[i,1] =~ /\s|<|>/)) if
parsingValue
          parsingValue = false
          i -= 1
          tokens.push outerHtml[startOffset..i]
        end
        i += 1
      end
      return tokens
    end
    private :tokenize_tagline


    # This function get and clean all the attributes of the tag.
    def all_tag_attributes(outerHtml)
      tokens = tokenize_tagline(outerHtml)
      #puts tokens
      tagLine = ""
      count = 1
      tokensLength = tokens.length
      expectedEqualityOP= false
      while count < tokensLength do
        if expectedEqualityOP == false
          #print Attribute Name
          # If attribute name is valid. Refer:
http://www.w3.org/TR/REC-xml/#NT-Name
          if tokens[count] =~ /^(\w|_|:)(.*)$/
            tagLine += " #{tokens[count]}"
            expectedEqualityOP = true
          end
        elsif tokens[count] == "="
          count += 1
          if count == tokensLength
            tagLine += "=\"\""
          elsif(tokens[count][0,1] == "\"" || tokens[count][0,1] == "'")
            tagLine += "=#{tokens[count]}"
          else
            tagLine += "=\"#{tokens[count]}\""
          end
          expectedEqualityOP = false
        else
          #Opps! equality was expected but its not there.
          #Set value same as the attribute name e.g. selected="selected"
          tagLine += "=\"#{tokens[count-1]}\""
          expectedEqualityOP = false
          next
        end
        count += 1
      end
      tagLine += "=\"#{tokens[count-1]}\" " if expectedEqualityOP == true
      #puts tagLine
      return tagLine
    end
    private :all_tag_attributes



    # return the first element that matches the xpath
    def element_by_xpath(xpath)
      temp = elements_by_xpath(xpath)
      temp = temp[0] if temp
      return temp
    end

    # execute xpath and return an array of elements
    def elements_by_xpath(xpath)
      doc = rexml_document_object
      modifiedXpath = ""
      selectedElements = Array.new
      doc.elements.each(xpath) do |element|
        modifiedXpath = element.xpath                   # element = a REXML
element
#        puts "modified xpath: #{modifiedXpath}"
#        puts "text: #{element.text}"
#        puts "class: #{element.attributes['class']}"
#        require 'breakpoint'; breakpoint
        temp = element_by_absolute_xpath(modifiedXpath) # temp = a DOM/COM
element
        selectedElements << temp if temp != nil
      end
      #puts selectedElements.length
      if selectedElements.length == 0
        return nil
      else
        return selectedElements
      end
    end

    # Get the Rexml object.
    def rexml_document_object
      #puts "Value of rexmlDomobject is : #...@rexmldomobject}"
      if @rexmlDomobject == nil
        create_rexml_document_object
      end
      return @rexmlDomobject
    end

    # Create the Rexml object if it is nil. This method is private so can be
called only
    # from rexml_document_object method.
    def create_rexml_document_object
      # Use our modified rexml libraries
      require 'rexml/document'
      unless REXML::Version >= '3.1.4'
        raise "Requires REXML version of at least 3.1.4. Actual:
#{REXML::Version}"
      end
      if @rexmlDomobject == nil
        htmlSource ="<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<HTML>\n"
        htmlSource = html_source(@this_frame.document.body,htmlSource," ")
        htmlSource += "\n</HTML>\n"
    # Angrez: Resolving Jira issue WTR-114
    htmlSource = htmlSource.gsub(/&nbsp;/, '&#160;')
        begin
          @rexmlDomobject = REXML::Document.new(htmlSource)
        rescue => e
          output_rexml_document("error.xml", htmlSource)
          raise e
        end
      end
    end
    private :create_rexml_document_object

    def output_rexml_document(name, text)
      file = File.open(name,"w")
      file.print(text)
      file.close
    end
    private :output_rexml_document

    # This function is used to escape the characters that are not valid XML
data.
    def xml_escape(str)
      str = str.gsub(/&/,'&amp;')
      str = str.gsub(/</,'&lt;')
      str = str.gsub(/>/,'&gt;')
      str = str.gsub(/"/, '&quot;')
      str
    end
    private :xml_escape

    # Method that iterates over IE DOM object and get the elements for the
given
    # xpath.
    def element_by_absolute_xpath(xpath)
      curElem = nil

      #puts "Hello; Given xpath is : #{xpath}"
      doc = document
      curElem = doc.getElementsByTagName("body")["0"]
      xpath =~ /^.*\/body\[?\d*\]?\/(.*)/
      xpath = $1

      if xpath == nil
        puts "Function Requires absolute XPath."
        return
      end

      arr = xpath.split(/\//)
      return nil if arr.length == 0

      lastTagName = arr[arr.length-1].to_s.upcase

      # lastTagName is like tagName[number] or just tagName. For the first
case we need to
      # separate tagName and number.
      lastTagName =~ /(\w*)\[?\d*\]?/
      lastTagName = $1
      #puts lastTagName

      for element in arr do
        element =~ /(\w*)\[?(\d*)\]?/
        tagname = $1
        tagname = tagname.upcase

        if $2 != nil && $2 != ""
          index = $2
          index = "#{index}".to_i - 1
        else
          index = 0
        end

        #puts "#{element} #{tagname} #{index}"
        allElemns = curElem.childnodes
        if allElemns == nil || allElemns.length == 0
          puts "#{element} is null"
          next # Go to next element
        end

        #puts "Current element is : #{curElem.tagName}"
        allElemns.each do |child|
          gotIt = false
          begin
            curTag = child.tagName
            curTag = EMPTY_TAG_NAME if curTag == ""
          rescue
            next
          end
          #puts child.tagName
          if curTag == tagname
            index-=1
            if index < 0
              curElem = child
              break
            end
          end
        end

      #puts "Node selected at index #{index.to_s} : #{curElem.tagName}"
      end
      begin
        if curElem.tagName == lastTagName
          #puts curElem.tagName
          return curElem
        else
          return nil
        end
      rescue
        return nil
      end
    end
    private :element_by_absolute_xpath


    EMPTY_TAG_NAME = "DUMMY"


  end
end

===================================================================================================

Thanks!
Kunal

------------------------------------------------
Kunal Kumar
Email: [email protected]
Blog: http://www.kspace.in/blog

_______________________________________________
Wtr-development mailing list
[email protected]
http://rubyforge.org/mailman/listinfo/wtr-development

[Wtr-development] XPath Support for Frame child elements

Reply via email to