Petit test d'outil de téléchargement web.
authorProgfou <jean-christophe.andre@auf.org>
Sun, 10 Jan 2010 13:55:15 +0000 (20:55 +0700)
committerProgfou <jean-christophe.andre@auf.org>
Sun, 10 Jan 2010 13:55:15 +0000 (20:55 +0700)
web/zing-offline.py [new file with mode: 0755]

diff --git a/web/zing-offline.py b/web/zing-offline.py
new file mode 100755 (executable)
index 0000000..464007e
--- /dev/null
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+# Licence: public domain
+# Original idea from http://projektantos.wordpress.com/\
+#   2009/12/20/tải-nhạc-hot-trong-thang-của-zing-bản-viết-bằng-python/
+
+# destination directory, where folder will be placed
+DEST_DIR = "~/Music/Zing"
+
+# download information
+HOMEPAGE = "http://mp3.zing.vn/mp3/nghe-album/album-hot/nhac-viet.html"
+DOWNLOAD_PATTERN = r'"(http://dl\.mp3.*\?filename=([^/]*\.mp3))"'
+
+import sys
+import logging
+from datetime import date
+from os import mkdir, utime
+from os.path import expanduser, join, isdir, isfile
+from urllib import urlopen, urlretrieve
+from re import findall
+from calendar import timegm
+
+logging.basicConfig(level=logging.DEBUG,
+    format="%(asctime)s %(levelname)s %(message)s")
+logging.info('Starting.')
+
+# absolute path to destination directory
+dir = join(expanduser(DEST_DIR), "Thang%02d" % date.today().month)
+if not isdir(dir):
+    try:
+        mkdir(dir)
+    except:
+        logging.error("Can't create directory '%s'.", dir)
+        sys.exit(1)
+
+# get the page content
+try:
+    f = urlopen(HOMEPAGE)
+    page_content = f.read()
+    f.close()
+except:
+    logging.error("Can't read page '%s'.", HOMEPAGE)
+    sys.exit(2)
+
+# find all file URLs and download them
+count = 0
+for link,filename in findall(DOWNLOAD_PATTERN, page_content):
+    filepath = join(dir, filename)
+    if isfile(filepath):
+        logging.info("'%s' already exists, skipped.", filename)
+        continue
+    logging.info("Downloading '%s'..." % filename)
+    try:
+        info = urlretrieve(link, filepath)[1]
+        last_modified = info.getdate('Last-Modified')
+        if last_modified:
+            last_modified = timegm(last_modified)
+            utime(filepath, (last_modified, last_modified))
+        logging.info("Done.")
+    except:
+        logging.info("ERROR!")
+    count += 1
+
+print "=" * 78
+if count <= 1:
+    logging.info("%d file has been downloaded.", count)
+else:
+    logging.info("%d files have been downloaded to '%s'.", count, dir)
+print "=" * 78
+
+sys.exit(0)