[Python] 超星学习通作业批量下载
导读:# -*- coding: utf8 -*- #coder:nianboy #time:2022-7-14 import os import re import requests import pandas as pd cl...
# -*- coding: utf8 -*-
#coder:nianboy
#time:2022-7-14
import os
import re
import requests
import pandas as pd
class ChaoXingWork:
def __init__(self, user, password):
self.password = password
self.user = user
self.work_data_list = []
self.class_lists = []
self.headers = {
"User-agent": "Mozilla/4.0 (Windows NT 10.0;
Win64;
x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36",
}
self.session = requests.session()
def session_token(self):
login_api = "https://passport2.chaoxing.com/api/login"
params = {
"name": self.user, # 学习通账号
"pwd": self.password, # 学习通密码
"verify": "0",
"schoolid": "",
}
self.session.get(login_api, params=params, headers=self.headers)
def class_list(self):
class_url = "http://mooc2-ans.chaoxing.com/visit/courses/list?v=1652629452722&
rss=1&
start=0&
size=500&
catalogId=0&
searchname="
class_data = self.session.get(url=class_url, headers=self.headers).text.replace(' ', "")
obj = re.compile(
r'"color1" href="(?Psrc>
.*?)".*?break-all;
" title="(?Pname>
.*?)">
.*?i class="icon-news">
/i>
', re.S)
result = obj.finditer(class_data)
for cx in result:
dict = {
}
dict['src'] = cx.group("src")
dict['name'] = cx.group("name")
self.class_lists.append(dict)
def class_data(self):
pf = pd.DataFrame(self.class_lists)
pf.index = pf.index + 1
del pf['src']
data_dicts = {
"name": "课程名称",
}
pf.rename(columns=data_dicts, inplace=True)
print(pf)
type = int(input("请输入课程序号:"))
if type == 0:
for j in self.class_lists:
src = j['src']
self.name = j['name']
self.work_datas(src)
else:
src = (self.class_lists[int(type) - 1]["src"])
self.name = (self.class_lists[int(type) - 1]["name"])
self.work_datas(src)
try:
pf = pd.DataFrame(self.work_data_list)
pf.index = pf.index + 1
data_dicts = {
"type" : "文件类型",
"file_name": "文件名称",
}
pf.rename(columns=data_dicts, inplace=True)
del pf['enc']
print(pf)
except:
print(self.name+":该课程暂无作业可以下载!")
def work_datas(self, src):
if not os.path.exists(self.name):
os.mkdir(self.name)
class_res = self.session.get(url=src, headers=self.headers).text
workurl = re.findall(r'title="作业" data-url="(.*?)">
', class_res, re.S)[0]
courseid = re.findall(r'name="courseid" value="(.*?)"/>
', class_res, re.S)[0]
classid = re.findall(r'name="clazzid" value="(.*?)"/>
', class_res, re.S)[0]
workenc = re.findall(r'name="workEnc" value="(.*?)">
', class_res, re.S)[0]
self.work_class_url = workurl + "?courseId=" + courseid + "&
classId=" + classid + "&
enc=" + workenc
work_class_url_res = self.session.get(url=self.work_class_url, headers=self.headers).text
if 'pageNum :' in work_class_url_res:
pagenum = re.findall(r'pageNum : (.*?),', work_class_url_res, re.S)[0]
for i in range(1,int(pagenum)+1):
self.work_class_urls = workurl + "?courseId=" + courseid + "&
classId=" + classid + "&
enc=" + workenc + "&
pageNum=" + str(i)
self.class_work_find()
else:
self.work_class_urls = workurl + "?courseId=" + courseid + "&
classId=" + classid + "&
enc=" + workenc + "&
pageNum=1"
self.class_work_find()
def class_work_find(self):
work_class_url_response = self.session.get(url=self.work_class_urls, headers=self.headers).text
works_url = re.findall(r'li(.*?)">
', work_class_url_response, re.S)
for j in works_url:
page_text = self.session.get(url=j, headers=self.headers).text
if "span data="" in page_text:
obj = re.compile(
r'h2 class="mark_title">
(?Pname>
.*?)/h2>
.*?data="(?Penc>
.*?)" type="(?Ptype>
.*?)"',
re.S)
else:
obj = re.compile(
r'h2 class="mark_title">
(?Pname>
.*?)/h2>
.*?p class="attachNew">
em style="font-style:normal;
display:block;
min-height:2px;
">
/em>
span data="(?Penc>
.*?)" type="(?Ptype>
.*?)" name=".*?"',
re.S)
class_result = obj.finditer(page_text)
for j in class_result:
if j.group("enc") == None:
print("该课程没有作业")
else:
dict = {
}
dict['enc'] = j.group("enc")
dict['type'] = j.group("type")
dict['file_name'] = (j.group("name") + "." + j.group("type")).replace(r"、", ".")
self.work_data_list.append(dict)
def download_data(self):
i = 0
while i == 0:
work_nums = int(input("输入0退出程序\n"
"请输入你想下载文件的序号:"))
if work_nums 1:
break
else:
work_enc = self.work_data_list[work_nums - 1]['enc']
work_type = self.work_data_list[work_nums - 1]['type']
work_name = self.work_data_list[work_nums - 1]['file_name']
work_url = "https://mooc1.chaoxing.com/ueditorupload/read?objectId=" + work_enc
headers = {
'Referer': self.work_class_url,
"User-agent": "Mozilla/4.0 (Windows NT 10.0;
Win64;
x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36",
}
if work_type == 'zip' or work_type == 'rar':
work_content = self.session.get(url=work_url, headers=headers, stream=True)
else:
work_res = self.session.get(url=work_url, headers=self.headers).text
files_url = re.findall(r'a class="btnDown" href="(.*?)">
span>
', work_res)[0]
work_content = self.session.get(url=files_url, headers=headers, stream=True)
work_path = "./" + self.name + "/" + work_name
with open(work_path, 'wb') as f:
for chunk in work_content.iter_content(1024 * 1024 * 2):
f.write(chunk)
print(work_name+"下载成功!")
if __name__ == "__main__":
chaoxing = ChaoXingWork(user="学习通账号", password="学习通密码")
chaoxing.session_token()
chaoxing.class_list()
chaoxing.class_data()
chaoxing.download_data()
声明:本文内容由网友自发贡献,本站不承担相应法律责任。对本内容有异议或投诉,请联系2913721942#qq.com核实处理,我们将尽快回复您,谢谢合作!
若转载请注明出处: [Python] 超星学习通作业批量下载
本文地址: https://pptw.com/jishu/290731.html