当前位置 博文首页 > 蜗牛为梦想而生H:获取淘宝/京东菜单,输出到txt文件

    蜗牛为梦想而生H:获取淘宝/京东菜单,输出到txt文件

    作者:[db:作者] 时间:2021-09-07 19:21

    Maven依赖

    <!--jsoup 是一款 Java 的HTML 解析器-->
    <dependency>
        <groupId>org.jsoup</groupId>
        <artifactId>jsoup</artifactId>
        <version>1.11.3</version>
    </dependency>
    package com.sm.cn.test;
    
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;
    
    import java.io.FileOutputStream;
    import java.io.FileWriter;
    import java.io.IOException;
    import java.nio.charset.StandardCharsets;
    
    /**
     * 下载商城菜单,输出到txt,淘宝为例子
     */
    public class Test01 {
        public static void main(String[] args) throws IOException {
            f1();
            f2();
            f3();
        }
    
        /**
         * 淘宝
         * @throws IOException
         */
        public static void f1() throws IOException {
            StringBuffer stringBuffer = new StringBuffer();
            StringBuffer append = null;
            /*获取当前网页的document对象*/
            Document document = Jsoup.connect("https://www.taobao.com/").get();
            Elements elementsByClass = document.getElementsByClass("service-bd");
            for (Element byClass : elementsByClass) {
                Elements li = byClass.getElementsByTag("li");
                for (Element element : li) {
                    Elements a = element.getElementsByTag("a");
    
                    /*获取a标签中文件并换行输出*/
                    //将可变字符串变为固定长度的字符串,方便下面的转码;
                    append = stringBuffer.append(a.text() + "\r\n");
                }
            }
            String string = append.toString();
            /*使用字节流输出*/
            //因为中文可能会乱码,这里使用了转码,转成UTF-8;
            byte[] bytes = string.getBytes("utf-8");
            FileOutputStream out = new FileOutputStream("C://Users//Desktop//淘宝菜单目录.txt");
            out.write(bytes);
            out.close();
        }
    
        /**
         * 京东
         * @throws IOException
         */
        public static void f2() throws IOException {
            StringBuffer stringBuffer = new StringBuffer();
            StringBuffer append = null;
            /*获取当前网页的document对象*/
            Document document = Jsoup.connect("https://www.jd.com/").get();
            Elements menu = document.getElementsByClass("cate_menu");
            for (int i = 0; i < menu.size(); i++) {
                Element element = menu.get(i);
    
                Elements li = element.getElementsByTag("li");
    
                for (int j = 0; j < li.size(); j++) {
    
                    Element element1 = li.get(j);
                    Elements a = element1.getElementsByTag("a");
                    
                    /*获取a标签中文件并换行输出*/
                    //将可变字符串变为固定长度的字符串,方便下面的转码;
                    append = stringBuffer.append(a.text() + "\r\n");
    
                }
            }
            String string = append.toString();
            /*使用字符流输出*/
            FileWriter fileWriter = new FileWriter("C://Users//Desktop//京东商城目录.txt");
            fileWriter.write(string);
            fileWriter.close();
        }
    
        /**
         * 亚马逊
         * @throws IOException
         */
        public static void f3() throws IOException {
            StringBuffer stringBuffer = new StringBuffer();
            StringBuffer append = null;
            Document document = Jsoup.connect("https://www.amazon.cn/").get();
            Element searchDropdownBox = document.getElementById("searchDropdownBox");
            Elements option = searchDropdownBox.getElementsByTag("option");
            for (Element element : option) {
                append = stringBuffer.append(element.text() + "\r\n");
            }
    
            FileOutputStream stream = new FileOutputStream("C://Users//Desktop//亚马逊目录.txt");
            byte[] bytes = append.toString().getBytes(StandardCharsets.UTF_8);
            stream.write(bytes);
            stream.close();
        }
    }
    

    ?

    cs