i want to download zip files from website , my script first lists all the url links to a text file and then fetches each url and tries to download zip files.
but i am getting error as below: Running script.. https://sagamusix.dehttps:// sagamusix.de/other/Saga%20Musix%20-%20Colors%20of%20Synth1%20v1.0.zip /n https://sagamusix.dehttps://sagamusix.de/sample_collection/bass.zip /n https://sagamusix.dehttps://sagamusix.de/sample_collection/bass_drums.zip /n https://sagamusix.dehttps://sagamusix.de/sample_collection/drums.zip /n https://sagamusix.dehttps://sagamusix.de/sample_collection/fx.zip /n https://sagamusix.dehttps://sagamusix.de/sample_collection/pads_strings.zip /n https://sagamusix.dehttps://sagamusix.de/sample_collection/powerchords.zip /n https://sagamusix.dehttps://sagamusix.de/sample_collection/synths.zip /n https://sagamusix.dehttps://sagamusix.de/sample_collection/tr-808.zip /n https://sagamusix.dehttps://sagamusix.de/sample_collection/tr-909.zip /n Saga%20Musix%20-%20Colors%20of%20Synth1%20v1.0.zip Trying to reach https://sagamusix.dehttps:// sagamusix.de/other/Saga%20Musix%20-%20Colors%20of%20Synth1%20v1.0.zip We failed to reach a server.https://sagamusix.dehttps:// sagamusix.de/other/Saga%20Musix%20-%20Colors%20of%20Synth1%20v1.0.zip Reason: [Errno 11001] getaddrinfo failed bass.zip please help me to fix so that i acn download all the zip files code: import urllib2 from urllib2 import Request, urlopen, URLError #import urllib import os from bs4 import BeautifulSoup # import socket # socket.getaddrinfo('localhost', 8080) #Create a new directory to put the files into #Get the current working directory and create a new directory in it named test cwd = os.getcwd() newdir = cwd +"\\test" print "The current Working directory is " + cwd os.mkdir( newdir); print "Created new directory " + newdir newfile = open('zipfiles.txt','w') print newfile print "Running script.. " #Set variable for page to be open and url to be concatenated url = "https://sagamusix.de" page = urllib2.urlopen('https://sagamusix.de/en/samples/').read() #File extension to be looked for. extension = ".zip" #Use BeautifulSoup to clean up the page soup = BeautifulSoup(page, "html5lib") soup.prettify() #Find all the links on the page that end in .zip for anchor in soup.findAll('a', href=True): links = url + anchor['href'] if links.endswith(extension): newfile.write(links + '\n') newfile.close() #Read what is saved in zipfiles.txt and output it to the user #This is done to create presistent data newfile = open('zipfiles.txt', 'r') for line in newfile: print line + '/n' newfile.close() #Read through the lines in the text file and download the zip files. #Handle exceptions and print exceptions to the console with open('zipfiles.txt', 'r') as url: for line in url: if line.find('/'): print line.rsplit('/', 1)[1] try: ziplink = line #Removes the first 48 characters of the url to get the name of the file zipfile = line[24:] #Removes the last 4 characters to remove the .zip zipfile2 = zipfile[:3] print "Trying to reach " + ziplink response = urllib2.urlopen(ziplink) except URLError as e: print 'We failed to reach a server.'+ziplink if hasattr(e, 'reason'): print 'Reason: ', e.reason continue elif hasattr(e, 'code'): print 'The server couldnt fulfill the request.' print 'Error code: ', e.code continue else: zipcontent = response.read() completeName = os.path.join(newdir, zipfile2+ ".zip") with open (completeName, 'w') as f: print "downloading.. " + zipfile f.write(zipcontent) f.close() print "Script completed" _______________________________________________ Tutor maillist - [email protected] To unsubscribe or change subscription options: https://mail.python.org/mailman/listinfo/tutor
