当前位置 博文首页 > shelgi的博客:无聊,做个微博用户关系图

    shelgi的博客:无聊,做个微博用户关系图

    作者:[db:作者] 时间:2021-07-28 08:49

    最近要学的太多,还有一堆期末大作业等着做,实在是脑阔疼。好不容易闲了一点,用networkx做个关系图玩一下

    思路

    爬取微博关注列表,然后再去遍历关注列表,得到每个用户的信息,以及他们的关注,这样用户关系就能丰富起来,画的图也就更复杂

    代码

    import requests
    import re
    import json
    import networkx as nx
    import matplotlib.pyplot as plt
    plt.rcParams['font.sans-serif'] = ['YouYuan']
    plt.rcParams['axes.unicode_minus'] = False 
    
    # 请求文本
    def getHtmlText(url, code='UTF-8'):
        trytime = 10
        while trytime > 0:
            try:
                header = {
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.6726.400 QQBrowser/10.2.2265.400',
                }
                r = requests.get(url, headers=header, timeout=5)
                r.raise_for_status()
                r.encoding = code
                return r.text
            except:
                print("get获取失败,重连中")
                trytime -= 1
    
    
    # 获取用户信息
    def getUserInfo(uid):
        url = 'https://m.weibo.cn/api/container/getIndex?type=uid&value={}'.format(uid)
        Infomation = json.loads(getHtmlText(url))
        UserInfo = {}
        UserInfo['id'] = Infomation['data']['userInfo']['id']
        UserInfo['name'] = Infomation['data']['userInfo']['screen_name']
        UserInfo['gender'] = '女' if Infomation['data']['userInfo']['gender'] == 'f' else '男'
        UserInfo['desc'] = Infomation['data']['userInfo']['description']
        # with open('./UserInfo.json', 'w', encoding='utf-8') as f:
        #     f.write(json.dumps(UserInfo, ensure_ascii=False))
        return UserInfo
    
    
    # 获取用户关注列表
    def getInterestList(uid, num):
        url = 'https://m.weibo.cn/api/container/getIndex?containerid=231051_-_followers_-_{}&page=1'.format(uid)
        data = json.loads(getHtmlText(url))
        intertestList = []
        cardlist = data['data']['cards']
        for cards in cardlist:
            if 'title' in cards and (cards['title'] == '她的全部关注' or '他的全部关注'):
                i = 0
                for card in cards['card_group']:
                    if i < num:
                        person = {}
                        person['id'] = card['user']['id']
                        intertestList.append(person)
                        i += 1
        # with open('./interestList.json', 'w', encoding='utf-8') as f:
        #     f.write(json.dumps(intertestList, ensure_ascii=False))
        return intertestList
    
    
    # 深搜获取多层用户信息及用户关注列表
    def deepSearchList(list, uid, floor, num):
        if floor == 0:
            # print(list.keys())
            if uid in list.keys():
                print('{}有重复'.format(uid))
                return list
            else:
                # print(list.keys())
                # print(uid in list.keys())
                list[str(uid)] = dict()
                list[uid]['userInfo'] = getUserInfo(uid)
                print('{}\t{}\t{}\t{}'.format(uid, list[uid]['userInfo']['name'], list[uid]['userInfo']['gender'],
                                              list[uid]['userInfo']['desc']))
                return list
        elif uid in list.keys() and 'interestList' in list[uid].keys():
            # print('interestList' in list[uid].keys())
            #print('{}有重复'.format(uid))
            return list
        else:
            list[str(uid)] = dict()
            list[uid]['userInfo'] = getUserInfo(uid)
            list[uid]['interestList'] = getInterestList(uid, num)
            print('{}\t{}\t{}\t{}'.format(uid, list[uid]['userInfo']['name'],
                                          list[uid]['userInfo']['gender'],
                                          list[uid]['userInfo']['desc']))
            i = 0
            for interestList in list[uid]['interestList']:
                if i < num:
                    list = deepSearchList(list, str(interestList['id']), floor - 1, num)
                    # with open('./list.json', 'w', encoding='utf-8') as f:
                    #     f.write(json.dumps(list, ensure_ascii=False))
                    i += 1
            return list
    
    
    if __name__ == '__main__':
        uid='******'   #微博的uid
    
        try:
            with open(f'./{uid}.json', 'r', encoding='utf-8') as f:
                data = json.load(f)
        except:
            data = dict()
        data = deepSearchList(data, uid, 10, 20)
        with open(f'./{uid}.json', 'w', encoding='utf-8') as f:
            f.write(json.dumps(data, ensure_ascii=False))
    #     print(data)
    #     print(len(data))
    
        plt.figure(figsize=(20,10),dpi=100)
        G = nx.DiGraph()
        node_size_list = dict()
        node_color_list = dict()
        for person in data:
            G.add_node(data[person]['userInfo']['name'])
            node_size_list[data[person]['userInfo']['name']] = 0
            node_color_list[data[person]['userInfo']['name']] = 'lightblue' if data[person]['userInfo']['gender'] == '男' else 'pink'
        for person in data:
            if 'interestList' in data[person].keys():
                for interest in data[person]['interestList']:
                    #print('{} -> {}'.format(person, interest['id']))
                    G.add_edge(data[person]['userInfo']['name'],
                               data[str(interest['id'])]['userInfo']['name'])
                    node_size_list[data[str(interest['id'])]['userInfo']['name']] += 1
        nx.draw(G,
                pos