在Linux下资源下载的速度长期受限,ed2k,torrent什么都木有速度,坑爹呀,自从购买了迅雷VIP的服务,可以直接以http形式来从迅雷服务器内下载自己托迅雷下载的东西,而且如果你这个资源别人下载过的话,你就不用再次下载了,迅雷马上提示你这个任务已经完成了。–#roowe攒写
至于其他的,用过的人都知道了,也不再细说。如果windows平台配合迅雷客户端用迅雷VIP的话,这个脚本也没有啥意义了(因为客户端更人性化^_^,当然占用资源也不少,嘿嘿),所以前提是你的OS要是Linux,然后使用迅雷离线的web界面。
由于firefox下载迅雷离线的东西存在这样几个问题,比如文件名中文乱码,要自己改(暗骂编码ing),不支持断点续传(我挂过几次,不过无奈重新下载了T_T),迅雷在点击下载的时候,响应慢死了,好久才跳出窗口。
出于这几个原因,我就去研究了下PT酱的那个离线下载的脚本,然后根据自己的需要重新写如下:(也可以在这里下载脚本)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
#!/usr/bin/env python # -*- coding: utf-8 -*- #Time-stamp: <2011-10-25 21:36:28 Tuesday by roowe> #File Name: thuner_xl_with_wget.py #Author: bestluoliwe@gmail.com #My Blog: www.iroowe.com import re import time import os import logging import sys from htmlentitydefs import entitydefs import subprocess LOG_FILE = "/tmp/thuner_with_wget.log" log = None def log_init(log_file, quiet=False): logger = logging.getLogger() logger.setLevel(logging.DEBUG) hdlr = logging.FileHandler(log_file) formatter = logging.Formatter("%(asctime)s %(levelname)s %(message)s") hdlr.setFormatter(formatter) logger.addHandler(hdlr) if not quiet: hdlr = logging.StreamHandler() hdlr.setFormatter(formatter) logger.addHandler(hdlr) return logger def handle_entitydef(matchobj): key = matchobj.group(1) if entitydefs.has_key(key): return entitydefs[key] else: return matchobj.group(0) def collect_urls(html, only_bturls = False): """ collect urls """ urls = [] for name, url in re.findall(r"<a.+?name=['\"]bturls['\"] title=['\"](.+?)['\"].+?href=['\"](http.+?)['\"]>", html): name = re.sub("&(.*?);", handle_entitydef, name) url = re.sub("&(.*?);", handle_entitydef, url) urls.append((name, url)) if not only_bturls: for id, name in re.findall(r'<input id=[\'"]durl(\w+?)[\'"].+title=[\'"](.+?)[\'"].+', html): result = re.search(r'<input id=[\'"]dl_url%s[\'"].+value=[\'"](http.*?)[\'"]' % id, html) if result: name = re.sub("&(.*?);", handle_entitydef, name) url = result.group(1) url = re.sub("&(.*?);", handle_entitydef, url) urls.append((name, url)) log.info("Filter get %d links" % len(urls)) return urls def choose_download(urls): download_list = {} for name, url in urls: while True: ans = raw_input("Download %s?[Y/n](default: Y) " % name) if len(ans) == 0: ans = True break elif ans.lower() == 'y': ans = True break elif ans.lower() == 'n': ans = False break else: sys.stdout.write("please enter y or n!\n") continue download_list[name] = ans return download_list def thuner_xl_with_wget(urls, output_dir, cookies_file, quiet=False): download_list = choose_download(urls) for name, url in urls: if len(url) == 0: log.debug("Empty Link, Name: " + name) continue if not download_list[name]: continue cmd = ["wget", "--load-cookies", cookies_file, "-c", "-t", "5", "-O", os.path.join(output_dir, name), url] if quiet: cmd.insert(1, "-q") log.info("wget cmd: '%s'" % ' '.join(cmd)) ret = subprocess.call(cmd) if ret != 0: log.debug("wget returned %d." % ret) if ret in (3, 8): log.error("Give up '%s', may be already finished download, or something wrong with disk." % name) else: urls.append((name, url)) log.error("will retry for %s later." % name) continue else: log.info("Finished %s" % name) time.sleep(2) def thuner_xl_with_aria2c(urls, output_dir, cookies_file, quiet=False): """ download with aria2c """ download_list = choose_download(urls) for name, url in urls: if len(url) == 0: log.debug("Empty Link, Name: " + name) continue if not download_list[name]: continue cmd = ["aria2c", "--load-cookies", cookies_file, "-d", output_dir, "-c", "-m", "5", "-s", "5", "-o", name, url] if quiet: cmd.insert(1, "-q") log.info("wget cmd: '%s'" % ' '.join(cmd)) ret = subprocess.call(cmd) if ret != 0: log.debug("wget returned %d." % ret) if ret in (13): log.error("Give up '%s', file already existed." % name) else: urls.append((name, url)) log.error("the exit status number is %d, and then will retry for %s later." % (ret, name)) continue else: log.info("Finished %s" % name) time.sleep(2) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description='Thuner li xian with wget', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-p', nargs='?', default="~/user_task.htm", help="load page file") parser.add_argument('-c', nargs='?', default="~/cookies.txt", help="load cookie file") parser.add_argument('-o', nargs='?', default="~/Downloads", help="output dir") parser.add_argument('-b', action='store_true', default=False, help="bt files only") parser.add_argument('-q', action="store_true", default=False, help="quiet, only log to file.") parser.add_argument('-a', action="store_true", default=False, help="download with aria2c") args = parser.parse_args() only_bturls, cookies_file, output_dir, page_file, quiet = args.b, args.c, args.o, args.p, args.q page_file = os.path.expanduser(page_file) cookies_file = os.path.realpath(os.path.expanduser(cookies_file)) output_dir = os.path.expanduser(output_dir) log = log_init(LOG_FILE, quiet = quiet) if not os.path.exists(cookies_file): log.info("please export cookies file") sys.exit(0) if not os.path.isdir(output_dir): log.info("No such %s", output_dir) sys.exit(0) with open(page_file) as f: page_html = f.read() urls = collect_urls(page_html, only_bturls) if not args.a: thuner_xl_with_wget(urls, output_dir, cookies_file, quiet) else: thuner_xl_with_aria2c(urls, output_dir, cookies_file, quiet) |
时间:2011-10-27 08:44
来源:osmsg
作者:osmsg
原文链接
好文,顶一下
(4)
100%
文章真差,踩一下
(0)
0%
------分隔线----------------------------
- 上一篇:想知道如何将你的Linux服务器清空空吗?
- 下一篇: 使用Squid配置反向代理服务器