代码需要用到edge驱动程序,点击前往官网下载,
也可以将edge换成chrome,只需改动第36行代码即可。
我已将getCourse()和getTeacher()方法中的cookies隐藏,请使用自己的cookies
import time
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
def getTeacher(eid):
u = "https://jwzx.bipt.edu.cn/academic/manager/electcourse/ajaxSchoolTeaching.do?&epid={}".format(eid)
cookies = {
"JSESSIONID": "**********"
}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.63"
}
resp = requests.get(u, cookies=cookies, headers=headers)
page = resp.text
teacher_list = page.strip().split("<br>")
teacher_list = list(filter(None, teacher_list))
return teacher_list
def getCourse(eid):
u = "https://jwzx.bipt.edu.cn/academic/manager/electcourse/ajaxCoursearrangement.do?epid={}".format(eid)
cookies = {
"JSESSIONID": "*********"
}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.63"
}
resp = requests.get(u, cookies=cookies, headers=headers)
page = resp.text
return page.strip().replace(" ", '').replace("<br>", '')
service = Service('msedgedriver.exe')
service.start()
browser = webdriver.Remote(service.service_url)
url = "https://jwzx.bipt.edu.cn/"
browser.get(url)
# 请注意看↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓
# 因为没有为selenium添加cookies,所以此处睡眠15秒,手动输入密码账号并登录
time.sleep(15)
json_data = []
for p in range(1, 23):
url2 = "https://jwzx.bipt.edu.cn/academic/manager/electcourse/findcc.do?search=%E6%9F%A5%E8%AF%A2&sortColumn=course.pcourseid%2Cep.cseq&sortDirection=1&pagingPage={}&cname=&depid=1&pagingNumberPer=50&".format(
p)
browser.get(url2)
time.sleep(2)
page_source = browser.page_source
soup = BeautifulSoup(page_source, 'html.parser')
tr_list = soup.select("body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr")
tr_list.pop(0)
epid = soup.select(
"body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(10) > a")
A = soup.select(
"body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(1)")
B = soup.select(
"body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(2)")
C = soup.select(
"body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(3) > a")
D = soup.select(
"body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(4)")
E = soup.select(
"body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(5)")
F = soup.select(
"body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(6)")
G = soup.select(
"body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(7)")
H = soup.select(
"body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(8)")
I = soup.select(
"body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(9)")
J = soup.select(
"body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(11)")
K = soup.select(
"body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(13)")
L = soup.select(
"body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(14) > span")
for i in range(len(A)):
eid = epid[i].get("href")[-9:]
dic = {
"epid": "{}".format(eid),
"课程号": "{}".format(A[i].get_text().strip()),
"课序号": "{}".format(B[i].get_text().strip()),
"课程名称": "{}".format(C[i].get_text().strip()),
"学分": "{}".format(D[i].get_text().strip()),
"选课属性": "{}".format(E[i].get_text().strip()),
"开课院系": "{}".format(F[i].get_text().strip()),
"课程班校区": "{}".format(G[i].get_text().strip()),
"选课限制": "{}".format(H[i].get_text().strip()),
"课程考核方式": "{}".format(I[i].get_text().strip()),
"任课教师": "{}".format(getTeacher(eid)), # 调用getTeacher()方法
"课程班别名": "{}".format(J[i].get_text().strip()),
"课程安排": "{}".format(getCourse(eid)), # 调用getCourse()方法
"课容量": "{}".format(K[i].get_text().strip()),
"选课人数": "{}".format(L[i].get_text().strip())
}
json_data.append(dic)
f = open("courseInfo.json", "w", encoding="UTF8")
JsonStr = str(json_data).replace("\"[", "[").replace("]\"", "]").replace("\'", '"')
f.write(JsonStr)