当前位置 博文首页 > 蜗牛为梦想而生H:获取淘宝/京东菜单,输出到txt文件
Maven依赖
<!--jsoup 是一款 Java 的HTML 解析器--> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.11.3</version> </dependency>
package com.sm.cn.test; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.nio.charset.StandardCharsets; /** * 下载商城菜单,输出到txt,淘宝为例子 */ public class Test01 { public static void main(String[] args) throws IOException { f1(); f2(); f3(); } /** * 淘宝 * @throws IOException */ public static void f1() throws IOException { StringBuffer stringBuffer = new StringBuffer(); StringBuffer append = null; /*获取当前网页的document对象*/ Document document = Jsoup.connect("https://www.taobao.com/").get(); Elements elementsByClass = document.getElementsByClass("service-bd"); for (Element byClass : elementsByClass) { Elements li = byClass.getElementsByTag("li"); for (Element element : li) { Elements a = element.getElementsByTag("a"); /*获取a标签中文件并换行输出*/ //将可变字符串变为固定长度的字符串,方便下面的转码; append = stringBuffer.append(a.text() + "\r\n"); } } String string = append.toString(); /*使用字节流输出*/ //因为中文可能会乱码,这里使用了转码,转成UTF-8; byte[] bytes = string.getBytes("utf-8"); FileOutputStream out = new FileOutputStream("C://Users//Desktop//淘宝菜单目录.txt"); out.write(bytes); out.close(); } /** * 京东 * @throws IOException */ public static void f2() throws IOException { StringBuffer stringBuffer = new StringBuffer(); StringBuffer append = null; /*获取当前网页的document对象*/ Document document = Jsoup.connect("https://www.jd.com/").get(); Elements menu = document.getElementsByClass("cate_menu"); for (int i = 0; i < menu.size(); i++) { Element element = menu.get(i); Elements li = element.getElementsByTag("li"); for (int j = 0; j < li.size(); j++) { Element element1 = li.get(j); Elements a = element1.getElementsByTag("a"); /*获取a标签中文件并换行输出*/ //将可变字符串变为固定长度的字符串,方便下面的转码; append = stringBuffer.append(a.text() + "\r\n"); } } String string = append.toString(); /*使用字符流输出*/ FileWriter fileWriter = new FileWriter("C://Users//Desktop//京东商城目录.txt"); fileWriter.write(string); fileWriter.close(); } /** * 亚马逊 * @throws IOException */ public static void f3() throws IOException { StringBuffer stringBuffer = new StringBuffer(); StringBuffer append = null; Document document = Jsoup.connect("https://www.amazon.cn/").get(); Element searchDropdownBox = document.getElementById("searchDropdownBox"); Elements option = searchDropdownBox.getElementsByTag("option"); for (Element element : option) { append = stringBuffer.append(element.text() + "\r\n"); } FileOutputStream stream = new FileOutputStream("C://Users//Desktop//亚马逊目录.txt"); byte[] bytes = append.toString().getBytes(StandardCharsets.UTF_8); stream.write(bytes); stream.close(); } }
?
cs下一篇:没有了