Andy Wokula schrieb:
> A.Politz schrieb:
>> todo.txt :
>> 9   HTML indenting can be slow.  Caused by using searchpair().  Can
>>     search() be used instead?
>>
>> (Is this up to date ? If not you can skip the rest ;-) )
>>
>> In fact html indenting can be dramatically slow. For example try to
>> indent this page http://www.weather.com/weather/local/USPA0372 .It
>> needs 10 minutes on my machine, for ~4000 lines.  As the todo item
>> states, this is caused by the 4 searchpair() calls in
>> indent/html.vim, of which 3 are called for nearly every line. In the,
>> very likely, worst-case the whole buffer is searched multiple times
>> and it increases somewhat quadratic for every additional line (which
>> has to be searched and which triggers a new search ).  It looks like
>> using searchpair() in this way in an indent file is a very bad idea.
>>
>> I attached a file, which is not a patch, but contains 2 different
>> foldfuncs (the old and a new one ) and a mapping on <f3> to toggle
>> between them.Instead of searchpair(), the 2nd function pre scans the
>> buffer for <script> and <pre> tags and stores the lines in a dict,
>> which is later used to determine the indent for this tags.  It also
>> indents this tags homogeneously.
>>
>> I also thought about,whether searchpair() could benefit from some
>> memory. This would obviously very volatile, a single change of the
>> buffer and it could not be trusted anymore.  Well, I thought I make
>> a quick implementation in vimscript and if it isn't at least as
>> quick as without it, it could be worth the trouble. But it didn't
>> work out, though I hit the bucket 1/3 of the times (Using htmlindent).
>>
>> -ap
> 
> Another idea:
> 
> The indent function is called in succession for every line to be
> indented.  This fact should be considered --
> 
> After the indent for a line is calculated, a state should be remembered
> (line number, line is within a <pre> block yes/no, etc.; also for
> safety: computed indent, the first word of the line, etc.).
> 
> A call to the indent function should first check if it is a
> "continuation call":
>     IF there is a state
>         and if remembered_lnum+1 == v:lnum
>         and (maybe for safety)
>             if remembered_indent == indent(remembered_lnum)
>         and              ...
> 
>     THEN start calculation at the remembered state, else start from
>         scratch (as it works now).
> 
> Such a mechanism could make sure that searchpair() is only called when
> really needed.
> 
> Problem: I haven't tried it yet ...

Last problem solved, here is a try (see attachment).

I found using search() is possible, because <pre>, <script> and <style>
don't nest.  There is only a problem with comments ...

Using search() gains the most speedup, a state and some Vim7 benefits also
help a lot.

Indenting the weather page now takes around 2 sec on a rather old machine.

-- 
Andy

--~--~---------~--~----~------------~-------~--~----~
You received this message from the "vim_dev" maillist.
For more information, visit http://www.vim.org/maillist.php
-~----------~----~----~----~------~----~------~--~---

" Vim indent script
" Comments:  "{{{
" File:         html.vim
" Author:       Andy Wokula, anwoku#yahoo*de (#* -> @.)
" Last Change:  2007 Nov 17
" Version:      0.1 (still experimental)
" Description:
" - Speedup: uses state and benefits of Vim7
" - more exact: checks all the tags in a line
" - uses search() instead of searchpair(), because the tags <pre>, <script>
"   and <style> ("blocktags") do not nest.
" - comments: ignore content
" - no extra indent after <html>, <head>, <body>, <tbody>
" - <script> content starts with zero indent
" - no syntax dependencies
" Based On:
" - the distributed script from J. Zellner (last change 2006 Jun 05)
" Example:
"   2900 non-blank lines of http://www.weather.com/weather/local/USPA0372
"   without state: 11.75 sec (already much faster than the old script)
"   with state: 2.27 sec (again 5 times faster) (machine 1.2 GHz Athlon)
" Problems:
" - natural state problem:
"       indent line N with "=="
"       change line N with ">>"     (no update of state)
"       indent line N+1 with "=="   (wrong indent)
"       indent line N+1 with "=="   (workaround to get correct indent)
" - quite bloated
" - attributes spanning over several lines
" - doesn't ignore a commented blocktag (unless comment is part of the
"   range)
" Hmm:
" ? use of the term "blocktag"
" ? call "<!--" and "-->" tags
 "}}}

" Init Folklore: "{{{
if exists("b:did_indent")
    finish
endif
let b:did_indent = 1

setlocal indentexpr=HtmlIndent(v:lnum)
setlocal indentkeys=o,O,*<Return>,<>>,{,},!^F

let s:cpo_save = &cpo
set cpo-=C
 "}}}

" Script Variables  "{{{
let s:endtags = []
let s:newstate = {}
let s:countonly = 0
let s:usestate = 1
 "}}}
func! s:IndAdder(tag, ...) "{{{
    " a:tag
    "   tag that changes indent
    " a:1
    "   1 (default): add an indent [unit shiftwidth] for next line
    "   2,3,4: special values for blocks: <script>, <style>, <pre>
    "   5: comments <!-- -->
    "   negative value for closing tag
    " a:2
    "   explicit closing tag
    let val = a:0==0 ? 1 : a:1
    let g:html_indent_tags[a:tag] = val
    let c_tag = a:0<2 ? '/'.a:tag : a:2
    let g:html_indent_tags[c_tag] = -val
    if val >= 2
        if len(s:endtags) < val-2+1
            call extend(s:endtags, range(2, val-len(s:endtags)))
        endif
        let s:endtags[val-2] = a:0<2 ? '<'.c_tag.'>' : c_tag
    endif
endfunc "}}}
" IndAdder {{{
if !exists("g:html_indent_tags")
    let g:html_indent_tags = {}
endif
call s:IndAdder('a')
call s:IndAdder('abbr')
call s:IndAdder('acronym')
call s:IndAdder('address')
call s:IndAdder('b')
call s:IndAdder('bdo')
call s:IndAdder('big')
call s:IndAdder('blockquote')
call s:IndAdder('button')
call s:IndAdder('caption')
call s:IndAdder('center')
call s:IndAdder('cite')
call s:IndAdder('code')
call s:IndAdder('colgroup')
call s:IndAdder('del')
call s:IndAdder('dfn')
call s:IndAdder('dir')
call s:IndAdder('div')
call s:IndAdder('dl')
call s:IndAdder('em')
call s:IndAdder('fieldset')
call s:IndAdder('font')
call s:IndAdder('form')
call s:IndAdder('frameset')
call s:IndAdder('h1')
call s:IndAdder('h2')
call s:IndAdder('h3')
call s:IndAdder('h4')
call s:IndAdder('h5')
call s:IndAdder('h6')
call s:IndAdder('i')
call s:IndAdder('iframe')
call s:IndAdder('ins')
call s:IndAdder('kbd')
call s:IndAdder('label')
call s:IndAdder('legend')
call s:IndAdder('map')
call s:IndAdder('menu')
call s:IndAdder('noframes')
call s:IndAdder('noscript')
call s:IndAdder('object')
call s:IndAdder('ol')
call s:IndAdder('optgroup')
call s:IndAdder('q')
call s:IndAdder('s')
call s:IndAdder('samp')
call s:IndAdder('select')
call s:IndAdder('small')
call s:IndAdder('span')
call s:IndAdder('strong')
call s:IndAdder('sub')
call s:IndAdder('sup')
call s:IndAdder('table')
call s:IndAdder('textarea')
call s:IndAdder('title')
call s:IndAdder('tt')
call s:IndAdder('u')
call s:IndAdder('ul')
call s:IndAdder('var')
"}}}
" Block Tags: contain alien content "{{{
call s:IndAdder('pre', 2)
call s:IndAdder('script', 3)
call s:IndAdder('style', 4)
" Exception: handle comment delimiters <!--...--> like block tags
call s:IndAdder("<!--", 5, '-->')
" if !exists('g:html_indent_strict')
"     call s:IndAdder('body')
"     call s:IndAdder('head')
"     call s:IndAdder('tbody')
" endif

if !exists('g:html_indent_strict_table')
    call s:IndAdder('th')
    call s:IndAdder('td')
    call s:IndAdder('tr')
    call s:IndAdder('tfoot')
    call s:IndAdder('thead')
endif "}}}

func! s:CountITags(...) "{{{

    " relative indent steps for current line [unit &sw]:
    let s:curind = 0
    " relative indent steps for next line [unit &sw]:
    let s:nextrel = 0

    if a:0==0
        let s:block = s:newstate.block
        let tmpline = substitute(s:curline, '<\zs\/\=\a\+\>\|<!--\|-->', 
'\=s:CheckTag(submatch(0))', 'g')
        if s:block == 3
            let s:newstate.scripttype = s:GetScriptType(matchstr(tmpline, 
'\C.*<SCRIPT\>\zs[^>]*'))
        endif
        let s:newstate.block = s:block
    else
        let s:block = 0         " assume starting outside of a block
        let s:countonly = 1     " don't change state
        let tmpline = substitute(s:altline, '<\zs\/\=\a\+\>\|<!--\|-->', 
'\=s:CheckTag(submatch(0))', 'g')
        let s:countonly = 0
    endif
endfunc "}}}
func! s:CheckTag(itag) "{{{
    " "tag" or "/tag" or "<!--" or "-->"
    let ind = get(g:html_indent_tags, a:itag)
    if ind == -1
        " closing tag
        if s:block != 0
            " ignore itag within a block
            return "foo"
        endif
        if s:nextrel == 0
            let s:curind -= 1
        else
            let s:nextrel -= 1
        endif
    elseif ind == 1
        " opening tag
        if s:block != 0
            return "foo"
        endif
        let s:nextrel += 1
    elseif ind != 0
        " block-tag (opening or closing)
        return s:Blocktag(a:itag, ind)
    endif
    " else ind==0 (other tag found): keep indent
    return "foo"   " no matter
endfunc "}}}
func! s:Blocktag(blocktag, ind) "{{{
    if a:ind > 0
        " a block starts here
        if s:block != 0
            " already in a block (nesting) - ignore
            " especially ignore comments after other blocktags
            return "foo"
        endif
        let s:block = a:ind             " block type
        if s:countonly
            return "foo"
        endif
        let s:newstate.blocklnr = s:lnum        " not used
        " save allover indent for the endtag
        let s:newstate.blocktagind = b:indent.baseindent + (s:nextrel + 
s:curind) * &shiftwidth
        if a:ind == 3
            return "SCRIPT"    " all except this must be lowercase
            " line is to be checked again for the type attribute
        endif
    else
        let s:block = 0
        " we get here if starting and closing block-tag on same line
    endif
    return "foo"
endfunc "}}}
func! s:GetScriptType(str) "{{{
    if a:str == "" || a:str =~ "java"
        return "java"
    else
        return ""
    endif
endfunc "}}}

func! s:FreshState(lnum) "{{{
    " Look back in the file (lines 1 to a:lnum-1) to calc a state for line
    " a:lnum.  A state is to know ALL relevant details about the lines
    " 1..a:lnum-1, initial calculating (here!) can be slow, but updating is
    " fast (incremental).
    " State:
    "   lnum            last indented line == prevnonblank(a:lnum - 1)
    "   block = 0       a:lnum located within special tag: 0:none, 2:<pre>,
    "                   3:<script>, 4:<style>, 5:<!--
    "   baseindent      use this indent for line a:lnum as a start - kind of
    "                   autoindent (if block==0)
    "   scripttype = '' type attribute of a script tag (if block==3)
    "   blocktagind     indent for current opening (get) and closing (set)
    "                   blocktag (if block!=0)
    "   blocklnr        lnum of starting blocktag (if block!=0)
    let state = {}
    let state.lnum = prevnonblank(a:lnum - 1)
    let state.scripttype = ""
    let state.blocktagind = -1
    let state.block = 0
    let state.baseindent = 0
    let state.blocklnr = 0

    if state.lnum == 0
        return state
    endif

    " Simplified Rules:
    " remember startline a:lnum-1
    " look back for <script, </script, <style, </style, <pre, </pre tags
    " remember stopline
    " if opening tag found,
    "   assume a:lnum within block
    " else
    "   look back in result range (stopline, startline) for comment
    "       \ delimiters (<!--, -->)
    "   if comment opener found,
    "       assume a:lnum within comment
    "   else
    "       assume usual html for a:lnum
    "       if a:lnum-1 has a closing comment
    "           look back to get indent of comment opener
    " FI

    " look back for blocktag
    call cursor(a:lnum, 1)
    let [stopline, stopcol] = 
searchpos('\c<\zs\/\=\%(pre\>\|script\>\|style\>\)', "bW")
    " fugly ... why isn't there searchstr()
    let tagline = tolower(getline(stopline))
    let blocktag = matchstr(tagline, '\/\=\%(pre\>\|script\>\|style\>\)', 
stopcol-1)
    if stopline > 0 && blocktag[0] != "/"
        " opening tag found, assume a:lnum within block
        let state.block = g:html_indent_tags[blocktag]
        if state.block == 3
            let state.scripttype = s:GetScriptType(matchstr(tagline, '\>[^>]*', 
stopcol))
        endif
        let state.blocklnr = stopline
        " check preceding tags in the line:
        let s:altline = tagline[: stopcol-2]    " XXX -1, -2, -3?
        call s:CountITags(1)
        let state.blocktagind = indent(stopline) + (s:curind + s:nextrel) * 
&shiftwidth
        return state
    endif

    " else look back for comment
    call cursor(a:lnum, 1)
    let [comline, comcol, found] = searchpos('\(<!--\)\|-->', 'bpW', stopline)
    if found == 2
        " comment opener found, assume a:lnum within comment
        let state.block = 5
        let state.blocklnr = comline
        " check preceding tags in the line:
        let s:altline = tolower(getline(comline)[: comcol-2])
        call s:CountITags(1)
        let state.blocktagind = indent(comline) + (s:curind + s:nextrel) * 
&shiftwidth
        return state
    endif

    " else within usual html
    let s:altline = tolower(getline(state.lnum))
    " check a:lnum-1 for closing comment (we need indent from the opening line)
    let comcol = stridx(s:altline, '-->')
    if comcol >= 0
        call cursor(state.lnum, comcol+1)
        let [comline, comcol] = searchpos('<!--', 'bW')
        if comline == state.lnum
            let s:altline = s:altline[: comcol-2]
        else
            let s:altline = tolower(getline(comline)[: comcol-2])
        endif
        call s:CountITags(1)
        let state.baseindent = indent(comline) + (s:nextrel+s:curline) * 
&shiftwidth
        return state
    endif

    " else no comments
    call s:CountITags(1)
    let state.baseindent = indent(state.lnum) + s:nextrel * &shiftwidth
    " line starts with tag
    let swtag = match(s:altline, '^\s*<') >= 0
    if !swtag
        let state.baseindent += s:curind * &shiftwidth
    endif
    " Problem: what if state.lnum is at the end of a block and the first
    " non-blank is not the block-tag, but block-content?
    return state
endfunc "}}}

func! s:Alien2() "{{{
    " <pre> block
    return -1
endfunc "}}}
func! s:Alien3() "{{{
    " <script> javascript
    if prevnonblank(s:lnum-1) == b:indent.blocklnr
        " indent for the first line after <script>
        return 0
    endif
    if b:indent.scripttype == "java"
        " java covers javascript
        return cindent(s:lnum)
    else
        return -1
    endif
endfunc "}}}
func! s:Alien4() "{{{
    " <style>
    return -1
endfunc "}}}
func! s:Alien5() "{{{
    " <!-- -->
    return -1
endfunc "}}}

func! HtmlIndent(lnum) "{{{
    let s:lnum = a:lnum
    let s:curline = tolower(getline(s:lnum))

    let s:newstate = {}
    let s:newstate.lnum = s:lnum

    " is the first non-blank in the line the start of a tag?
    let swtag = match(s:curline, '^\s*<') >= 0

    if prevnonblank(s:lnum-1) == b:indent.lnum && s:usestate
        " use state (continue from previous line)
    else
        " start over (know nothing)
        let b:indent = s:FreshState(a:lnum)
    endif

    if b:indent.block != 0
        " within block
        " if not 0 then always >= 2 (esp. not negative)
        let endtag = s:endtags[b:indent.block-2]
        let blockend = stridx(s:curline, endtag)
        if blockend >= 0
            " block ends here
            let s:newstate.block = 0
            " calc indent for REST OF LINE (may start more blocks):
            let s:curline = strpart(s:curline, blockend+strlen(endtag))
            call s:CountITags()
            if swtag && b:indent.block != 5
                let indent = b:indent.blocktagind + s:curind * &shiftwidth
                let s:newstate.baseindent = indent + s:nextrel * &shiftwidth
            else
                let indent = s:Alien{b:indent.block}()
                let s:newstate.baseindent = b:indent.blocktagind + s:nextrel * 
&shiftwidth
            endif
            call extend(b:indent, s:newstate, "force")
            return indent
        else
            " block continues
            " indent this line with alien method
            let indent = s:Alien{b:indent.block}()
            call extend(b:indent, s:newstate, "force")
            return indent
        endif
    else
        " not within a block - within usual html
        let s:newstate.block = b:indent.block
        call s:CountITags()
        if swtag
            let indent = b:indent.baseindent + s:curind * &shiftwidth
            let s:newstate.baseindent = indent + s:nextrel * &shiftwidth
        else
            let indent = b:indent.baseindent
            let s:newstate.baseindent = indent + (s:curind + s:nextrel) * 
&shiftwidth
        endif
        call extend(b:indent, s:newstate, "force")
        return indent
    endif

endfunc "}}}

let b:indent = s:FreshState(1)

" Cpo, Modeline, Etc: {{{1

com! -nargs=* IHtmlLocal <args>
" :IHtmlLocal let s:usestate = 0

let &cpo = s:cpo_save
unlet s:cpo_save

" vim:set fdm=marker ts=8:

Raspunde prin e-mail lui