2 # Licence: public domain
3 # Original idea from http://projektantos.wordpress.com/\
4 # 2009/12/20/tải-nhạc-hot-trong-thang-của-zing-bản-viết-bằng-python/
6 # destination directory, where folder will be placed
7 DEST_DIR
= "~/Music/Zing"
10 HOMEPAGE
= "http://mp3.zing.vn/mp3/nghe-album/album-hot/nhac-viet.html"
11 DOWNLOAD_PATTERN
= r
'"(http://dl\.mp3.*\?filename=([^/]*\.mp3))"'
15 from datetime
import date
16 from os
import mkdir
, utime
17 from os
.path
import expanduser
, join
, isdir
, isfile
18 from urllib
import urlopen
, urlretrieve
19 from re
import findall
20 from calendar
import timegm
22 logging
.basicConfig(level
=logging
.DEBUG
,
23 format
="%(asctime)s %(levelname)s %(message)s")
24 logging
.info('Starting.')
26 # absolute path to destination directory
27 dir = join(expanduser(DEST_DIR
), "Thang%02d" % date
.today().month
)
32 logging
.error("Can't create directory '%s'.", dir)
35 # get the page content
38 page_content
= f
.read()
41 logging
.error("Can't read page '%s'.", HOMEPAGE
)
44 # find all file URLs and download them
46 for link
,filename
in findall(DOWNLOAD_PATTERN
, page_content
):
47 filepath
= join(dir, filename
)
49 logging
.info("'%s' already exists, skipped.", filename
)
51 logging
.info("Downloading '%s'..." % filename
)
53 info
= urlretrieve(link
, filepath
)[1]
54 last_modified
= info
.getdate('Last-Modified')
56 last_modified
= timegm(last_modified
)
57 utime(filepath
, (last_modified
, last_modified
))
60 logging
.info("ERROR!")
65 logging
.info("%d file has been downloaded.", count
)
67 logging
.info("%d files have been downloaded to '%s'.", count
, dir)