免费下载百度文库收费资料
author:一佰互联 2019-03-26   click:160

简介:免费下载百度文库收费资料。使用方法 FreeForWenku TargetUrl Type example: FreeForWenku https://wenku.baidu.com/view/09f3e7c9a1c7aa00b52acb96.html?from=search ppt PPT,PDF会保存图片到文件夹 DOC,TXT会保存 ...

免费下载百度文库收费资料。使用方法 FreeForWenku TargetUrl Type example: FreeForWenku https://wenku.baidu.com/view/09f3e7c9a1c7aa00b52acb96.html?from=search ppt PPT,PDF会保存图片到文件夹 DOC,TXT会保存在文本中 最大可能的还原了格式。源码:
import requestsimport reimport argparseimport sysimport jsonimport osparser = argparse.ArgumentParser()parser.add_argument("url", help="Target Url,你所需要文档的URL",type=str)parser.add_argument("type", help="Target Type,你所需要文档的的类型(DOC|PPT|TXT|PDF)",type=str)args = parser.parse_args()url = args.urltype = args.type#根据文件决定函数y = 0def DOC(url): doc_id = re.findall("view/(.*).html", url)[0] html = requests.get(url).text lists=re.findall("(https.*?0.json.*?)\\x22}",html) lenth = (len(lists)//2) NewLists = lists[:lenth] for i in range(len(NewLists)) : NewLists[i] = NewLists[i].replace("\","") txts=requests.get(NewLists[i]).text txtlists = re.findall(""c":"(.*?)".*?"y":(.*?),",txts) for i in range(0,len(txtlists)): global y print(txtlists[i][0].encode("utf-8").decode("unicode_escape","ignore")) if y != txtlists[i][1]: y = txtlists[i][1] n = "" else: n = "" filename = doc_id + ".txt" with open(filename,"a",encoding="utf-8") as f: f.write(n+txtlists[i][0].encode("utf-8").decode("unicode_escape","ignore").replace("\","")) print("文档保存在"+filename)def PPT(url): doc_id = re.findall("view/(.*).html",url)[0] url = "https://wenku.baidu.com/browse/getbcsurl?doc_id="+doc_id+"&pn=1&rn=99999&type=ppt" html = requests.get(url).text lists=re.findall("{"zoom":"(.*?)","page"",html) for i in range(0,len(lists)): lists[i] = lists[i].replace("\","") try: os.mkdir(doc_id) except: pass for i in range(0,len(lists)): img=requests.get(lists[i]).content with open(doc_id+"img"+str(i)+".jpg","wb") as m: m.write(img) print("PPT图片保存在" + doc_id +"文件夹")def TXT(url): doc_id = re.findall("view/(.*).html", url)[0] url = "https://wenku.baidu.com/api/doc/getdocinfo?callback=cb&doc_id="+doc_id html = requests.get(url).text md5 = re.findall(""md5sum":"(.*?)"",html)[0] pn = re.findall(""totalPageNum":"(.*?)"",html)[0] rsign = re.findall(""rsign":"(.*?)"",html)[0] NewUrl = "https://wkretype.bdimg.com/retype/text/"+doc_id+"?rn="+pn+"&type=txt"+md5+"&rsign="+rsign txt = requests.get(NewUrl).text jsons = json.loads(txt) texts=re.findall(""c": "(.*?)",",str(jsons)) print(texts) filename=doc_id+".txt" with open(filename,"a",encoding="utf-8") as f: for i in range(0,len(texts)): texts[i] = texts[i].replace("\r","") texts[i] = texts[i].replace("\n","") f.write(texts[i]) print("文档保存在" + filename)def PDF(url): doc_id = re.findall("view/(.*).html",url)[0] url = "https://wenku.baidu.com/browse/getbcsurl?doc_id="+doc_id+"&pn=1&rn=99999&type=ppt" html = requests.get(url).text lists=re.findall("{"zoom":"(.*?)","page"",html) for i in range(0,len(lists)): lists[i] = lists[i].replace("\","") try: os.mkdir(doc_id) except: pass for i in range(0,len(lists)): img=requests.get(lists[i]).content with open(doc_id+"img"+str(i)+".jpg","wb") as m: m.write(img) print("FPD图片保存在" + doc_id + "文件夹")if __name__ == "__main__": try: print("""###Athor:Lz1y###Blog:www.Lz1y.cn###TIPS:PDF|PPT只能下载图片 """) eval(type.upper())(url) except: print("获取出错,可能URL错误使用格式name.exe url type请使用--help查看帮助")

软件下载:https://github.com/Lz1y/FreeForWenku/raw/master/FreeForWenku.exe本文仅代表作者个人观点,不代表巅云官方发声,对观点有疑义请先联系作者本人进行修改,若内容非法请联系平台管理员,邮箱2522407257@qq.com。更多相关资讯,请到巅云www.yx10011.com学习互联网营销技术请到巅云建站www.yx10011.com。