当前位置 博文首页 > python抓取京东商城手机列表url实例代码

    python抓取京东商城手机列表url实例代码

    作者:admin 时间:2021-02-20 06:40

    复制代码 代码如下:

    #-*- coding: UTF-8 -*-
    '''
    Created on 2013-12-5

    @author: good-temper
    '''

    import urllib2
    import bs4
    import time

    def getPage(urlStr):
        '''
                    获取页面内容
        '''
        content = urllib2.urlopen(urlStr).read()
        return content

    def getNextPageUrl(currPageNum):
        #http://list.jd.com/9987-653-655-0-0-0-0-0-0-0-1-1-页码-1-1-72-4137-33.html
        url =  u'http://list.jd.com/9987-653-655-0-0-0-0-0-0-0-1-1-'+str(currPageNum+1)+'-1-1-72-4137-33.html'

        #是否有下一页
        content = getPage(url);
        soup = bs4.BeautifulSoup(content)
        list = soup.findAll('span',{'class':'next-disabled'});
        if(len(list) == 0):
            return url
        return ''

    def analyzeList():
        pageNum = 0
        list = []
        url = getNextPageUrl(pageNum)
        while url !='':
            soup = bs4.BeautifulSoup(getPage(url))
            pagelist = soup.findAll('div',{'class':'p-name'})
            for elem in pagelist:
                soup1 =  bs4.BeautifulSoup(str(elem))
                list.append(soup1.find('a')['href'])

            pageNum = pageNum+1
            print pageNum
            url = getNextPageUrl(pageNum)
        return list

    def analyzeContent(url):

        return ''

    def writeToFile(list, path):
        f = open(path, 'a')
        for elem in list:
            f.write(elem+'\n')
        f.close()

    if __name__ == '__main__':
        list = analyzeList()
        print '共抓取'+str(len(list))+'条\n'

        writeToFile(list, u'E:\\jd_phone_list.dat');

    js