introduction

使用Python进程池并发查询, 通过multiprocessing.Pool()构造出多个进程
运用Pool().map()结合偏函数传递多个参数查询某个用户关注列表及关注列表对应的房间号
getPartialFollowsInfo(vmid, x) 获取部分关注,因为API限制每次只能查询50个关注, 且限制了查看前5页, 故最多获取250个关注
getFollowsInfo(vmid) 5进程并发, 每个进程查询一页关注并返回
getRoomIDbyUID(uid) 通过uid查询完整的rid(部分uid对应短rid)
f1(x) 并发子函数, 返回[name, rid], 任务结束主进程接受形式[[name1,rid1],[name2,rid2],...], 使用sum(ret,[])去除[]
updateList(mid) 主函数体

exp result

使用for或者列表生成式的方法 平均2s每个查询
使用进程池技术平均查询0.02s

relation knowledge

code

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
import requests
import time
import json
from multiprocessing import Pool
from functools import partial


def ti(): return time.time()


visitor_cookie = "finger=fd31c715; im_notify_type_86797383=0; CURRENT_QUALITY=116; " \
                 "UM_distinctid=16325eafd0f3e7-08716f222e246b8-3f616c4d-1fa400-16325eafd1044c; fts=1525349513; " \
                 "sid=5e2mhc7x; LIVE_BUVID=72408416052e67b1fb249d1b623fce1c; LIVE_BUVID__ckMd5=a8df061f2f42c761; " \
                 "rpdid=oqmlmkmommdosisikqsqw; buvid3=0D0E5D31-9A01-454D-9262-00896DDA4DE6103083infoc "

header_visitor = {
    "Cookie": visitor_cookie,
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3493.3 Safari/537.36",
    "Referer":  "https://www.bilibili.com/",
    "Origin": "https://www.bilibili.com",
}


def getPartialFollowsInfo(vmid, x):
    url = "https://api.bilibili.com/x/relation/followings"
    params = {
        'vmid': str(vmid),
        'ps': '50',
        'pn': str(x+1),
        'order': 'desc',
    }
    ret_part = requests.get(
        url=url,
        params=params,
        headers=header_visitor
    )
    ret_part = ret_part.json()
    if ret_part['code']:  # access denied if pn>5
        return None
    ret_part = ret_part['data']['list']
    if not len(ret_part):
        return None
    return ret_part


def getFollowsInfo(vmid):
    pool = Pool(5)
    ret = sum([x for x in pool.map(
        partial(getPartialFollowsInfo, vmid), range(5)) if x], [])
    # merge five ret list to 1D and exclude `[]`
    pool.close()
    pool.join()
    return ret


def getRoomIDbyUID(uid):
    url = "https://api.live.bilibili.com/room_ex/v1/RoomNews/get"
    params = {
        'uid': str(uid)
    }
    ret = requests.get(
        url=url,
        params=params,
        headers=header_visitor
    )
    ret = ret.json()
    if not ret['code']:  # code equal to zero is normal request
        return str(ret['data']['roomid'])
    return None


def f1(x):
    return [x['uname'], getRoomIDbyUID(x['mid'])]


def updateList(mid):
    st = ti()
    stars_uid_tmp = getFollowsInfo(vmid=mid)
    ed = ti()
    print("获取%d个关注花费 %.3fs" % (len(stars_uid_tmp), (ed - st)))
    st = ti()
    pool = Pool(min(len(stars_uid_tmp), 100))

    tmp = pool.map(f1, stars_uid_tmp)
    pool.close()
    pool.join()

    stars_cid_tmp = dict(zip([x[0] for x in tmp], [x[1] for x in tmp]))
    json_str = json.dumps(stars_cid_tmp, indent=2, ensure_ascii=False)
    with open('stars_cid.json', 'w', encoding='utf-8') as f:
        f.write(json_str)
    ed = ti()
    print(
        "已更新并写入%d到stars_cid.json中" % len(stars_cid_tmp),
        "阶段用时 %.3fs" % (ed - st),
        "平均用时 %.3fs" % ((ed - st)/len(stars_uid_tmp)),
        sep='\n'
    )


if __name__ == "__main__":
    updateList(120187282)