list2form : gestion des erreurs et des petits fichiers (merci cgi.py…)
[progfou.git] / web / zing-offline.py
1 #!/usr/bin/env python
2 # Licence: public domain
3 # Original idea from http://projektantos.wordpress.com/\
4 # 2009/12/20/tải-nhạc-hot-trong-thang-của-zing-bản-viết-bằng-python/
5
6 # destination directory, where folder will be placed
7 DEST_DIR = "~/Music/Zing"
8
9 # download information
10 HOMEPAGE = "http://mp3.zing.vn/mp3/nghe-album/album-hot/nhac-viet.html"
11 DOWNLOAD_PATTERN = r'"(http://dl\.mp3.*\?filename=([^/]*\.mp3))"'
12
13 import sys
14 import logging
15 from datetime import date
16 from os import mkdir, utime
17 from os.path import expanduser, join, isdir, isfile
18 from urllib import urlopen, urlretrieve
19 from re import findall
20 from calendar import timegm
21
22 logging.basicConfig(level=logging.DEBUG,
23 format="%(asctime)s %(levelname)s %(message)s")
24 logging.info('Starting.')
25
26 # absolute path to destination directory
27 dir = join(expanduser(DEST_DIR), "Thang%02d" % date.today().month)
28 if not isdir(dir):
29 try:
30 mkdir(dir)
31 except:
32 logging.error("Can't create directory '%s'.", dir)
33 sys.exit(1)
34
35 # get the page content
36 try:
37 f = urlopen(HOMEPAGE)
38 page_content = f.read()
39 f.close()
40 except:
41 logging.error("Can't read page '%s'.", HOMEPAGE)
42 sys.exit(2)
43
44 # find all file URLs and download them
45 count = 0
46 for link,filename in findall(DOWNLOAD_PATTERN, page_content):
47 filepath = join(dir, filename)
48 if isfile(filepath):
49 logging.info("'%s' already exists, skipped.", filename)
50 continue
51 logging.info("Downloading '%s'..." % filename)
52 try:
53 info = urlretrieve(link, filepath)[1]
54 last_modified = info.getdate('Last-Modified')
55 if last_modified:
56 last_modified = timegm(last_modified)
57 utime(filepath, (last_modified, last_modified))
58 logging.info("Done.")
59 except:
60 logging.info("ERROR!")
61 count += 1
62
63 print "=" * 78
64 if count <= 1:
65 logging.info("%d file has been downloaded.", count)
66 else:
67 logging.info("%d files have been downloaded to '%s'.", count, dir)
68 print "=" * 78
69
70 sys.exit(0)