> Graphical representation of links or pages that don't get linked to.
I tried to test the links (with 2 algorithms, code below) in a generated
webpage, but the result I
get are very weird.
Probably one you knows a better way ?
cheers,
Stef
from BeautifulSoup import BeautifulSoup
from urllib import urlopen
from httplib import HTTP
from urlparse import urlparse
def Check_URL_1 ( URL ) :
try:
fh = urlopen ( URL )
return fh.code == 200
except :
return False
def Check_URL_2 ( URL ) :
p = urlparse ( URL )
h = HTTP ( p[1] )
h.putrequest ( 'HEAD', p[2] )
h.endheaders()
if h.getreply()[0] == 200:
return True
else:
return False
def Verify_Links ( URL ) :
Parts = URL.split('/')
Site = '/'.join ( Parts [:3] )
Current = '/'.join ( Parts [:-1] )
fh = urlopen ( URL )
lines = fh.read ()
fh.close()
Soup = BeautifulSoup ( lines )
hrefs = lines = Soup.findAll ( 'a' )
for href in hrefs :
href = href [ 'href' ] #[:-1] ## <== remove "#" to generate all errors
if href.startswith ( '/' ) :
href = Site + href
elif href.startswith ('#' ) :
href = URL + href
elif href.startswith ( 'http' ) :
pass
else :
href = Current + href
try:
fh = urllib.urlopen ( href )
except :
pass
print Check_URL_1 ( href ), Check_URL_2 ( href ), href
URL = 'http://127.0.0.1:8000/welcome/default/index'
fh = Verify_Links ( URL )