'''
userDefinedRegex.py

This file is part of w3af, w3af.sourceforge.net .

w3af is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation version 2 of the License.

w3af is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with w3af; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

'''

import core.controllers.outputManager as om
from core.controllers.w3afException import w3afException

# options
from core.data.options.option import option
from core.data.options.optionList import optionList

from core.controllers.basePlugin.baseGrepPlugin import baseGrepPlugin
import core.data.parsers.urlParser as urlParser

import core.data.kb.knowledgeBase as kb
import core.data.kb.vuln as vuln
import core.data.kb.info as info
import core.data.constants.severity as severity

from core.data.db.temp_persist import disk_list

import re
import os


class userDefinedRegex(baseGrepPlugin):
    '''
    Grep every response for user defined regex.
      
    @author: floyd fuh ( floyd_fuh@yahoo.de )
    '''

    def __init__(self):
        baseGrepPlugin.__init__(self)
        
        self._singleRegex = ''
        self._regexlistPath = ''
        self._isVulnerability = False
        
        # Added performance by compiling all the regular expressions
        # before using them (see setOptions method)
        self._regexlistCompiled = []

    def grep(self, request, response):
        '''
        Plugin entry point, search for the user defined regex.
        @parameter request: The HTTP request object.
        @parameter response: The HTTP response object
        @return: None
        '''
        if len(self._regexlistCompiled) == 0:
            return
        
        if response.is_text_or_html():
            html_string = response.getBody()
            for regex in self._regexlistCompiled:
                matchObject = regex.search( html_string )
                if matchObject:
                    if self._isVulnerability:
                        problem = vuln.vuln()
                        problem.setSeverity(severity.LOW)
                        om.out.vulnerability('User defined regex "'+str(regex.pattern)+'" matched!', newLine = True, severity=severity.LOW )
                    else:
                        problem = info.info()
                        om.out.information('User defined regex "'+str(regex.pattern)+'" matched!')
                    problem.setURL( response.getURL() )
                    msg = 'The string below matches the user defined regex "'+str(regex.pattern)+'":\n'
                    msg += str(matchObject.group(0))
                    msg += '\n'
                    problem.setDesc( msg )
                    problem.setId( response.id )
                    problem.setName( 'User defined regex - ' + str(regex.pattern) )
                    kb.kb.append( self , 'userDefinedRegex' , problem )
                  
    
    def setOptions( self, optionsMap ):
        self._isVulnerability = optionsMap['isVulnerability'].getValue()
        
        self._regexlistCompiled = []
        regexlist = optionsMap['regexlist'].getValue()
        if regexlist != '' and os.path.exists( regexlist ):
            self._regexlistPath = regexlist
            currentRegex = ''
            try:
                f = file( self._regexlistPath)
                for regex in f:
                    currentRegex = regex.strip()
                    self._regexlistCompiled.append(re.compile(currentRegex, re.IGNORECASE | re.DOTALL))
            except:
                raise w3afException('Invalid regex in the regexList: '+currentRegex)
            finally:
                f.close()
        self._singleRegex = optionsMap['singleRegex'].getValue()
        if self._singleRegex != '':
            try:
                self._regexlistCompiled.append(re.compile(self._singleRegex, re.IGNORECASE | re.DOTALL))
            except:
                raise w3afException('Invalid regex in the singleRegex field!')
    
    def getOptions( self ):
        '''
        @return: A list of option objects for this plugin.
        '''    
        optionsList = optionList()
        
        description1 = 'Single regex to use in the grep process.'
        help1 = description1
        option1 = option('singleRegex', self._singleRegex , description1, 'string', help=help1)
        optionsList.add(option1)
        
        description2 = 'Path to file with regexes to use in the grep process.'
        help2 = description2+' Attention: The hole file will be loaded into memory, because the regex will be precompiled (better performance).'
        option2 = option('regexlist', self._regexlistPath , description2, 'string', help=help2)
        optionsList.add(option2)
        
        description3 = 'Findings will be marked as vulnerabilites instead of information.'
        help3 = description3
        option3 = option('isVulnerability', self._isVulnerability , description3, 'boolean', help=help3)
        optionsList.add(option3)
        
        return optionsList
        
    def end(self):
        '''
        This method is called when the plugin wont be used anymore.
        '''
        pass
            
    def getPluginDeps( self ):
        '''
        @return: A list with the names of the plugins that should be runned before the
        current one.
        '''
        return []
    
    def getLongDesc( self ):
        '''
        @return: A DETAILED description of the plugin functions and features.
        '''
        return '''
        This plugin greps every response for a user defined regex.
        You can specify a single regex or an entire file of regexes (each line one regex).
        If you enable isVulnerability, the found strings will be marked as vulnerabilities
        and not only as information.
        '''
