当前位置 博文首页 > LY的博客:python BS4 收集上港队球员赛季表现

    LY的博客:python BS4 收集上港队球员赛季表现

    作者:[db:作者] 时间:2021-08-09 22:09

    from bs4 import BeautifulSoup
    import requests
    import pandas as pd
    
    url='http://zq.win007.com/cn/team/PlayerData/7642.html'
    url2='http://zq.win007.com/cn/team/PlayerDataAjax.aspx?SclassID=60&matchSeason=2018&teamID=7642'
    
    header={'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.XXXX.XXX Safari/537.36'}
    
    html=requests.get(url2,headers=header).text
    #print (html)
    soup=BeautifulSoup(html,'lxml') #用BeautifulSoup来解析获取的子页面html代码
    
    
    name=[]
    nation=[]
    age=[]
    goal=[]
    yellow=[]
    red=[]
    
    
    
    #获取球员名
    for k in soup.find_all('div',align="left"):
        k=k.get_text()
        k=k.replace('\n','')
        k=k.replace(' ','')
        print (k)
        name.append(k)
    
    print (soup.find_all('div',align="center"))
    print (len(soup.find_all('div',align="center")))
    print(soup.find_all('div',align="center")[9:])
    
    for k in soup.find_all('div',align="center")[9:]:
        k=k.get_text()
        k=k.replace('\n','')
        k=k.replace(' ','')
        print (k)
    
    
    la=[]
    #获取球员属性
    for k in soup.find_all('div',align="center")[9:]:
    
        k=k.get_text()
        k=k.replace('\n','')
        k=k.replace(' ','')
        print (k)
        la.append(k)
    
    
    print (len(la))
    
    print ('#############')
    print([la[i] for i in range(0, len(la), 8)])
    
    print([la[i] for i in range(0+1, len(la)+1, 8)])
    
    nation=[la[i] for i in range(0+1, len(la)+1, 8)]
    
    
    print([la[i] for i in range(0+2, len(la)+2, 8)])
    goal=[la[i] for i in range(0+2, len(la)+2, 8)]
    
    print([la[i] for i in range(0+4, len(la)+4, 8)])
    red=[la[i] for i in range(0+5, len(la)+5, 8)]
    print([la[i] for i in range(0+5, len(la)+5, 8)])
    yellow=[la[i] for i in range(0+6, len(la)+6, 8)]
    print([la[i] for i in range(0+6, len(la)+6, 8)])
    
    
    
    
    print ('#############')
    
    
    df= pd.DataFrame({'nation':nation,'goal':goal,'yellow':yellow,'red':red},index=name)
    print (df)

    ?

    cs