I am trying to strip useless HTML from a HTML string returned from a
user control. Basically extra <p> tags, spaces etc. need to be
removed.

e.g.
Calling the method with
    vbTab & vbCrLf & " <p><p>this is a test</p> <p>It really is</p> </
p> " & vbTab & vbCrLf
should return "<p>this is a test</p> <p>It really is</p>"

Here is the code I have so far but it breaks a few valid HTML strings
and strips too many tags. Has anybody managed to successfully do this?

Private Sub Button1_Click(ByVal sender As System.Object, ByVal e As
System.EventArgs) Handles btnGo2.Click
        txtHTMLString.Text = vbTab & vbCrLf & " <p><p>this is a test</
p> <p>It really is</p> </p> " & vbTab & vbCrLf

        txtHTMLString.Text = StripExtraneousHTML(txtHTMLString.Text)
End Sub

Public Function StripExtraneousHTML(ByVal s As String) As String
        Dim i, skip As Integer
        Dim flag As Boolean = True

        If s Is Nothing Then Return Nothing

        While flag
            flag = False

            s = s.Trim()

            Select Case Strings.Right(s, 6)
                Case " "
                    flag = True
                    s = Strings.Left(s, s.Length - 6)
                Case "<br />"
                    flag = True
                    s = Strings.Left(s, s.Length - 6)
            End Select

            Select Case Strings.Right(s, 4)
                Case "</p>"
                    flag = True
                    skip = 0
                    For i = s.Length - 7 To 0 Step -1
                        If s.Substring(i, 4) = "</p>" Then
                            skip += 1
                        End If

                        If s.Substring(i, 3) = "<p>" Then
                            If skip = 0 AndAlso i = 0 Then
                                s = Strings.Left(s, i) &
Strings.Mid(s, i + 4)
                                Exit For
                            Else
                                skip -= 1
                            End If
                        End If
                    Next

                    If i = 0 Then
                        s = Strings.Left(s, s.Length - 4)
                    End If
                Case "<br>"
                        flag = True
                        s = Strings.Left(s, s.Length - 4)
                Case "<br />"
                        flag = True
                        s = Strings.Left(s, s.Length - 6)
            End Select

            If Strings.Right(s, 1) = vbCrLf Then
                flag = True
                s = Strings.Left(s, s.Length - 1)
            End If
        End While

        s = s.Replace(vbTab, "")

        Return s
    End Function
n

--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups 
"DotNetDevelopment, VB.NET, C# .NET, ADO.NET, ASP.NET, XML, XML Web 
Services,.NET Remoting" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to [EMAIL PROTECTED]
For more options, visit this group at
http://cm.megasolutions.net/forums/default.aspx
 <p><a href="http://feeds.feedburner.com/DotNetDevelopment";><img 
src="http://feeds.feedburner.com/~fc/DotNetDevelopment?bg=99CCFF&amp;fg=444444&amp;anim=1";
 height="26" width="88" style="border:0" alt="" /></a></p>
-~----------~----~----~----~------~----~------~--~---

Reply via email to