list2form : gestion des erreurs et des petits fichiers (merci cgi.py…)
[progfou.git] / web / zing-offline.py
CommitLineData
ef72b6c7
P
1#!/usr/bin/env python
2# Licence: public domain
3# Original idea from http://projektantos.wordpress.com/\
4# 2009/12/20/tải-nhạc-hot-trong-thang-của-zing-bản-viết-bằng-python/
5
6# destination directory, where folder will be placed
7DEST_DIR = "~/Music/Zing"
8
9# download information
10HOMEPAGE = "http://mp3.zing.vn/mp3/nghe-album/album-hot/nhac-viet.html"
11DOWNLOAD_PATTERN = r'"(http://dl\.mp3.*\?filename=([^/]*\.mp3))"'
12
13import sys
14import logging
15from datetime import date
16from os import mkdir, utime
17from os.path import expanduser, join, isdir, isfile
18from urllib import urlopen, urlretrieve
19from re import findall
20from calendar import timegm
21
22logging.basicConfig(level=logging.DEBUG,
23 format="%(asctime)s %(levelname)s %(message)s")
24logging.info('Starting.')
25
26# absolute path to destination directory
27dir = join(expanduser(DEST_DIR), "Thang%02d" % date.today().month)
28if not isdir(dir):
29 try:
30 mkdir(dir)
31 except:
32 logging.error("Can't create directory '%s'.", dir)
33 sys.exit(1)
34
35# get the page content
36try:
37 f = urlopen(HOMEPAGE)
38 page_content = f.read()
39 f.close()
40except:
41 logging.error("Can't read page '%s'.", HOMEPAGE)
42 sys.exit(2)
43
44# find all file URLs and download them
45count = 0
46for link,filename in findall(DOWNLOAD_PATTERN, page_content):
47 filepath = join(dir, filename)
48 if isfile(filepath):
49 logging.info("'%s' already exists, skipped.", filename)
50 continue
51 logging.info("Downloading '%s'..." % filename)
52 try:
53 info = urlretrieve(link, filepath)[1]
54 last_modified = info.getdate('Last-Modified')
55 if last_modified:
56 last_modified = timegm(last_modified)
57 utime(filepath, (last_modified, last_modified))
58 logging.info("Done.")
59 except:
60 logging.info("ERROR!")
61 count += 1
62
63print "=" * 78
64if count <= 1:
65 logging.info("%d file has been downloaded.", count)
66else:
67 logging.info("%d files have been downloaded to '%s'.", count, dir)
68print "=" * 78
69
70sys.exit(0)