Removed old crawler [Issue:#126]

Gustavo Bernardo
1 parent 78344a3e
Showing 1 changed file with 0 additions and 53 deletions Show diff stats
links/image-crawler.py
@@ -1,53 +0,0 @@
-from bs4 import BeautifulSoup
-from urllib.request import urlopen
-import urllib.request
-
-
-def make_soup(url):
-    try:
-        html = urlopen(url).read()
-        return BeautifulSoup(html,"lxml")
-    except urllib.error.HTTPError as e:
-        return "Use default image"
-
-def get_images(url):
-    try:
-        soup = make_soup(url)
-    except:
-        return("Use default image")
-    if soup == None or type(soup) == str:
-        return "Use default image"
-    images = [img for img in soup.findAll('img')]
-    image_links = [each.get('src') for each in images]
-    contador = 0
-    for each in image_links:
-        booleano = False
-        if each != "":
-            if each == None:
-                continue
-            if 'jpg' in each:
-                booleano = True
-                pos = each.index("jpg")
-                each = each[0:pos+3]
-            elif 'png' in each:
-                booleano = True
-                pos = each.index("png")
-                each = each[0:pos+3]
-            elif 'jpeg' in each:
-                booleano = True
-                pos = each.index('jpeg')
-                each = each[0:pos+4]
-            if not booleano:
-                continue
-
-            if each[0] + each[1] == '//' or each[0] == '/':
-                each = 'http:'+each
-            if each[0:4] != 'http' and each[0:5] != 'https':
-                each = url[0:url.index('/',8)] + each
-            contador += 1
-            caminho = ""
-            filename=each.split('/')[-1]
-            try:
-                urllib.request.urlretrieve(each,"%s"%(caminho)+str(contador)+filename)
-            except Exception:
-                continue
...	...	@@ -1,53 +0,0 @@
1		-from bs4 import BeautifulSoup
2		-from urllib.request import urlopen
3		-import urllib.request
4		-
5		-
6		-def make_soup(url):
7		- try:
8		- html = urlopen(url).read()
9		- return BeautifulSoup(html,"lxml")
10		- except urllib.error.HTTPError as e:
11		- return "Use default image"
12		-
13		-def get_images(url):
14		- try:
15		- soup = make_soup(url)
16		- except:
17		- return("Use default image")
18		- if soup == None or type(soup) == str:
19		- return "Use default image"
20		- images = [img for img in soup.findAll('img')]
21		- image_links = [each.get('src') for each in images]
22		- contador = 0
23		- for each in image_links:
24		- booleano = False
25		- if each != "":
26		- if each == None:
27		- continue
28		- if 'jpg' in each:
29		- booleano = True
30		- pos = each.index("jpg")
31		- each = each[0:pos+3]
32		- elif 'png' in each:
33		- booleano = True
34		- pos = each.index("png")
35		- each = each[0:pos+3]
36		- elif 'jpeg' in each:
37		- booleano = True
38		- pos = each.index('jpeg')
39		- each = each[0:pos+4]
40		- if not booleano:
41		- continue
42		-
43		- if each[0] + each[1] == '//' or each[0] == '/':
44		- each = 'http:'+each
45		- if each[0:4] != 'http' and each[0:5] != 'https':
46		- each = url[0:url.index('/',8)] + each
47		- contador += 1
48		- caminho = ""
49		- filename=each.split('/')[-1]
50		- try:
51		- urllib.request.urlretrieve(each,"%s"%(caminho)+str(contador)+filename)
52		- except Exception:
53		- continue