BIPT开课信息(json格式)–python爬虫

代码需要用到edge驱动程序,点击前往官网下载

也可以将edge换成chrome,只需改动第36行代码即可。

我已将getCourse()和getTeacher()方法中的cookies隐藏,请使用自己的cookies

import time
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service


def getTeacher(eid):
    u = "https://jwzx.bipt.edu.cn/academic/manager/electcourse/ajaxSchoolTeaching.do?&epid={}".format(eid)
    cookies = {
        "JSESSIONID": "**********"
    }
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.63"
    }
    resp = requests.get(u, cookies=cookies, headers=headers)
    page = resp.text
    teacher_list = page.strip().split("<br>")
    teacher_list = list(filter(None, teacher_list))
    return teacher_list


def getCourse(eid):
    u = "https://jwzx.bipt.edu.cn/academic/manager/electcourse/ajaxCoursearrangement.do?epid={}".format(eid)
    cookies = {
        "JSESSIONID": "*********"
    }
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.63"
    }
    resp = requests.get(u, cookies=cookies, headers=headers)
    page = resp.text
    return page.strip().replace("&nbsp", '').replace("<br>", '')


service = Service('msedgedriver.exe')
service.start()
browser = webdriver.Remote(service.service_url)
url = "https://jwzx.bipt.edu.cn/"
browser.get(url)

# 请注意看↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓
# 因为没有为selenium添加cookies,所以此处睡眠15秒,手动输入密码账号并登录
time.sleep(15)

json_data = []
for p in range(1, 23):
    url2 = "https://jwzx.bipt.edu.cn/academic/manager/electcourse/findcc.do?search=%E6%9F%A5%E8%AF%A2&sortColumn=course.pcourseid%2Cep.cseq&sortDirection=1&pagingPage={}&cname=&depid=1&pagingNumberPer=50&".format(
        p)
    browser.get(url2)
    time.sleep(2)
    page_source = browser.page_source
    soup = BeautifulSoup(page_source, 'html.parser')
    tr_list = soup.select("body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr")
    tr_list.pop(0)
    epid = soup.select(
        "body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(10) > a")
    A = soup.select(
        "body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(1)")
    B = soup.select(
        "body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(2)")
    C = soup.select(
        "body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(3) > a")
    D = soup.select(
        "body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(4)")
    E = soup.select(
        "body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(5)")
    F = soup.select(
        "body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(6)")
    G = soup.select(
        "body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(7)")
    H = soup.select(
        "body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(8)")
    I = soup.select(
        "body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(9)")
    J = soup.select(
        "body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(11)")
    K = soup.select(
        "body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(13)")
    L = soup.select(
        "body > center > table.content_tab > tbody > tr > td > form > table.datalist > tbody > tr > td:nth-child(14) > span")
    for i in range(len(A)):
        eid = epid[i].get("href")[-9:]
        dic = {
            "epid": "{}".format(eid),
            "课程号": "{}".format(A[i].get_text().strip()),
            "课序号": "{}".format(B[i].get_text().strip()),
            "课程名称": "{}".format(C[i].get_text().strip()),
            "学分": "{}".format(D[i].get_text().strip()),
            "选课属性": "{}".format(E[i].get_text().strip()),
            "开课院系": "{}".format(F[i].get_text().strip()),
            "课程班校区": "{}".format(G[i].get_text().strip()),
            "选课限制": "{}".format(H[i].get_text().strip()),
            "课程考核方式": "{}".format(I[i].get_text().strip()),
            "任课教师": "{}".format(getTeacher(eid)),  # 调用getTeacher()方法
            "课程班别名": "{}".format(J[i].get_text().strip()),
            "课程安排": "{}".format(getCourse(eid)),  # 调用getCourse()方法
            "课容量": "{}".format(K[i].get_text().strip()),
            "选课人数": "{}".format(L[i].get_text().strip())
        }
        json_data.append(dic)
f = open("courseInfo.json", "w", encoding="UTF8")
JsonStr = str(json_data).replace("\"[", "[").replace("]\"", "]").replace("\'", '"')
f.write(JsonStr)
暂无评论

发送评论 编辑评论


				
|´・ω・)ノ
ヾ(≧∇≦*)ゝ
(☆ω☆)
(╯‵□′)╯︵┴─┴
 ̄﹃ ̄
(/ω\)
∠( ᐛ 」∠)_
(๑•̀ㅁ•́ฅ)
→_→
୧(๑•̀⌄•́๑)૭
٩(ˊᗜˋ*)و
(ノ°ο°)ノ
(´இ皿இ`)
⌇●﹏●⌇
(ฅ´ω`ฅ)
(╯°A°)╯︵○○○
φ( ̄∇ ̄o)
ヾ(´・ ・`。)ノ"
( ง ᵒ̌皿ᵒ̌)ง⁼³₌₃
(ó﹏ò。)
Σ(っ °Д °;)っ
( ,,´・ω・)ノ"(´っω・`。)
╮(╯▽╰)╭
o(*////▽////*)q
>﹏<
( ๑´•ω•) "(ㆆᴗㆆ)
😂
😀
😅
😊
🙂
🙃
😌
😍
😘
😜
😝
😏
😒
🙄
😳
😡
😔
😫
😱
😭
💩
👻
🙌
🖕
👍
👫
👬
👭
🌚
🌝
🙈
💊
😶
🙏
🍦
🍉
😣
Source: github.com/k4yt3x/flowerhd
颜文字
Emoji
小恐龙
花!
上一篇
下一篇
X公网安备 xxxxx | 京ICP备2022003555号-2
Copyright © 2022 MySheep |