当前位置 主页 > 网站技术 > 代码类 >

    Java通过URL获取公众号文章生成HTML的方法

    栏目:代码类 时间:2019-12-31 12:07

    说明:通过公众号URL获取的内容,文字可以正常显示,但是图片存在跨域访问的问题,微信不允许跨域访问公众号图片,所以需要将公众号图片从存入本地后,再上传至OSS,然后把HTML中的图片全部替换为自己的OSS地址就可以了

    这里就需要在后台对HTML进行DOM的解析,需要用的Jsoup

    <dependency>
      <groupId>com.aliyun.oss</groupId>
      <artifactId>aliyun-sdk-oss</artifactId>
      <version>2.2.3</version>
     
     </dependency>
     <dependency>
       <groupId>org.jsoup</groupId>
       <artifactId>jsoup</artifactId>
       <version>1.9.2</version>
     </dependency>
    

    controller

    package com.iueang.controller;
     
    import java.io.File;
    import java.util.HashMap;
    import java.util.Map;
     
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;
    import org.springframework.stereotype.Controller;
    import org.springframework.web.bind.annotation.RequestMapping;
    import org.springframework.web.bind.annotation.ResponseBody;
     
    import com.iueang.util.DownLoadImg;
    import com.iueang.util.GetBody;
    import com.iueang.util.OssUtil2;
    import com.iueang.util.UrlUtil;
    @Controller
    public class TestUrl {
     
     @RequestMapping("tohtml")
     public String tohtml() {
     return "html/index.html";
     }
     @RequestMapping("getHtml")
     @ResponseBody
     public Map<String,String> getHtml(String url){
     //获取url文章生成文本
     String html = UrlUtil.getAccess(url);
     String reg = "<html>(.*?)</html>";
     String head=GetBody.getSubUtilSimple(html, reg);
     String HTTPHOST="http://yueang2.oss-cn-qingdao.aliyuncs.com/testimg/";
     String newsBody=head;
     Document doc = Jsoup.parse(newsBody);
        Elements pngs = doc.select("img[data-src]");
        System.out.println(pngs);
        for (Element element : pngs) {
        //获取图片地址
         String imgUrl = element.attr("data-src");
         //下载图片到本地
         String filename=DownLoadImg.downloadPicture(imgUrl);
      File file =new File("D:\\m2\\"+filename);
      //上传至oss
      Boolean flag = OssUtil2.uploadFileToOss(file, "testimg/"+filename);
      if(flag) {
      file.delete();
      }
         String newsrc =HTTPHOST + filename;
         element.attr("src", newsrc);
        }
        newsBody = doc.toString();
        System.out.println(newsBody);
     Map<String,String> map=new HashMap<String, String>();
     map.put("resultHtml", newsBody);
     return map;
     
     }
    }
    

    util工具类

    GetBody类

    package com.iueang.util;
     
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
     
    public class GetBody {
     
     public static String getSubUtilSimple(String html, String reg) {
      Pattern pattern = Pattern.compile(reg);// 匹配的模式
         Matcher m = pattern.matcher(html);
         while(m.find()){
           return m.group(1);
         }
         return "";
     }
     
    }
    

    OssUtil类

    package com.iueang.util;
     
    import java.io.File;
    import java.util.HashMap;
    import java.util.Map;
     
    import com.aliyun.oss.OSSClient;
    import com.aliyun.oss.model.ObjectMetadata;
     
    public class OssUtil2 { 
     //以下几个参数值必填,参考文章最后文档
     static String endpoint = "http://oss-cn-qingdao.aliyuncs.com";
     static String accessKeyId = "oss获取";
     static String accessKeySecert = "oss获取";
     static String bucketName = "yueang2";
     
     /**
     * 上传单个文件到OSS
     * @param file 要上传的文件File对象
     * @param objName 上传后的文件名,包含文件夹,比如 game/game/test.txt
     * @return
     */
     public static boolean uploadFileToOss(File file, String objName) {
       try {
         OSSClient ossClient = null;
         try {
           ossClient = new OSSClient(endpoint, accessKeyId, accessKeySecert);
         }catch (Exception e){
           e.printStackTrace();
         }
         ObjectMetadata meta = new ObjectMetadata();
         ossClient.putObject(bucketName, objName, file, meta);
         ossClient.shutdown();
       } catch (Exception e) {
         e.printStackTrace();
         return false;
       }
       return true;
     }
    }