Removed old crawler [Issue:#126]

Gustavo Bernardo
1 parent 78344a3e
Showing 1 changed file with 0 additions and 53 deletions Show diff stats
links/image-crawler.py
@@ -1,53 +0,0 @@
-from bs4 import BeautifulSoup
-from urllib.request import urlopen
-import urllib.request
-
-
-def make_soup(url):
-    try:
-        html = urlopen(url).read()
-        return BeautifulSoup(html,"lxml")
-    except urllib.error.HTTPError as e:
-        return "Use default image"
-
-def get_images(url):
-    try:
-        soup = make_soup(url)
-    except:
-        return("Use default image")
-    if soup == None or type(soup) == str:
-        return "Use default image"
-    images = [img for img in soup.findAll('img')]
-    image_links = [each.get('src') for each in images]
-    contador = 0
-    for each in image_links:
-        booleano = False
-        if each != "":
-            if each == None:
-                continue
-            if 'jpg' in each:
-                booleano = True
-                pos = each.index("jpg")
-                each = each[0:pos+3]
-            elif 'png' in each:
-                booleano = True
-                pos = each.index("png")
-                each = each[0:pos+3]
-            elif 'jpeg' in each:
-                booleano = True
-                pos = each.index('jpeg')
-                each = each[0:pos+4]
-            if not booleano:
-                continue
-
-            if each[0] + each[1] == '//' or each[0] == '/':
-                each = 'http:'+each
-            if each[0:4] != 'http' and each[0:5] != 'https':
-                each = url[0:url.index('/',8)] + each
-            contador += 1
-            caminho = ""
-            filename=each.split('/')[-1]
-            try:
-                urllib.request.urlretrieve(each,"%s"%(caminho)+str(contador)+filename)
-            except Exception:
-                continue
	@@ -1,53 +0,0 @@	@@ -1,53 +0,0 @@
1	-from bs4 import BeautifulSoup
2	-from urllib.request import urlopen
3	-import urllib.request
4	-
5	-
6	-def make_soup(url):
7	- try:
8	- html = urlopen(url).read()
9	- return BeautifulSoup(html,"lxml")
10	- except urllib.error.HTTPError as e:
11	- return "Use default image"
12	-
13	-def get_images(url):
14	- try:
15	- soup = make_soup(url)
16	- except:
17	- return("Use default image")
18	- if soup == None or type(soup) == str:
19	- return "Use default image"
20	- images = [img for img in soup.findAll('img')]
21	- image_links = [each.get('src') for each in images]
22	- contador = 0
23	- for each in image_links:
24	- booleano = False
25	- if each != "":
26	- if each == None:
27	- continue
28	- if 'jpg' in each:
29	- booleano = True
30	- pos = each.index("jpg")
31	- each = each[0:pos+3]
32	- elif 'png' in each:
33	- booleano = True
34	- pos = each.index("png")
35	- each = each[0:pos+3]
36	- elif 'jpeg' in each:
37	- booleano = True
38	- pos = each.index('jpeg')
39	- each = each[0:pos+4]
40	- if not booleano:
41	- continue
42	-
43	- if each[0] + each[1] == '//' or each[0] == '/':
44	- each = 'http:'+each
45	- if each[0:4] != 'http' and each[0:5] != 'https':
46	- each = url[0:url.index('/',8)] + each
47	- contador += 1
48	- caminho = ""
49	- filename=each.split('/')[-1]
50	- try:
51	- urllib.request.urlretrieve(each,"%s"%(caminho)+str(contador)+filename)
52	- except Exception:
53	- continue