Commit ffa2c3af1ca9e9af227af389342e3bc8646ccaff

Authored by filipecmedeiros
2 parents 59818de3 a7615474

Merge branch 'dev' of https://github.com/amadeusproject/amadeuslms into dev

Showing 2 changed files with 57 additions and 1 deletions   Show diff stats
links/image-crawler.py 0 → 100644
... ... @@ -0,0 +1,53 @@
  1 +from bs4 import BeautifulSoup
  2 +from urllib.request import urlopen
  3 +import urllib.request
  4 +
  5 +
  6 +def make_soup(url):
  7 + try:
  8 + html = urlopen(url).read()
  9 + return BeautifulSoup(html,"lxml")
  10 + except urllib.error.HTTPError as e:
  11 + return "Use default image"
  12 +
  13 +def get_images(url):
  14 + try:
  15 + soup = make_soup(url)
  16 + except:
  17 + return("Use default image")
  18 + if soup == None or type(soup) == str:
  19 + return "Use default image"
  20 + images = [img for img in soup.findAll('img')]
  21 + image_links = [each.get('src') for each in images]
  22 + contador = 0
  23 + for each in image_links:
  24 + booleano = False
  25 + if each != "":
  26 + if each == None:
  27 + continue
  28 + if 'jpg' in each:
  29 + booleano = True
  30 + pos = each.index("jpg")
  31 + each = each[0:pos+3]
  32 + elif 'png' in each:
  33 + booleano = True
  34 + pos = each.index("png")
  35 + each = each[0:pos+3]
  36 + elif 'jpeg' in each:
  37 + booleano = True
  38 + pos = each.index('jpeg')
  39 + each = each[0:pos+4]
  40 + if not booleano:
  41 + continue
  42 +
  43 + if each[0] + each[1] == '//' or each[0] == '/':
  44 + each = 'http:'+each
  45 + if each[0:4] != 'http' and each[0:5] != 'https':
  46 + each = url[0:url.index('/',8)] + each
  47 + contador += 1
  48 + caminho = ""
  49 + filename=each.split('/')[-1]
  50 + try:
  51 + urllib.request.urlretrieve(each,"%s"%(caminho)+str(contador)+filename)
  52 + except Exception:
  53 + continue
... ...
requirements.txt
... ... @@ -21,4 +21,7 @@ pycpfcnpj==1.0.2
21 21 six==1.10.0
22 22 validators==0.11.0
23 23 Werkzeug==0.11.11
24   -whitenoise==3.2.2
25 24 \ No newline at end of file
  25 +whitenoise==3.2.2
  26 +beautifulsoup4==4.5.1
  27 +lxml==3.6.4
  28 +requests==2.11.1
... ...