詹子聪 преди 5 години
родител
ревизия
981ab48929
променени са 2 файла, в които са добавени 104 реда и са изтрити 42 реда
  1. 34 25
      src/main/java/com/miekir/shibei/tool/web/WebImageTool.java
  2. 70 17
      src/main/java/com/miekir/shibei/tool/web/Sex8Tool.java

+ 34 - 25
src/main/java/com/miekir/shibei/tool/web/WebImageTool.java

@@ -1,10 +1,7 @@
 package com.miekir.shibei.tool.web;
 
 import com.miekir.shibei.tool.TextUtils;
-import sun.net.NetworkClient;
 
-import javax.imageio.ImageIO;
-import java.awt.image.BufferedImage;
 import java.io.*;
 import java.net.HttpURLConnection;
 import java.net.URL;
@@ -18,39 +15,47 @@ alias /file/images/eden/manual/;
 sudo chmod 777 -R /file/images/
 恢复默认权限
 sudo chmod 755 /file/images/*/
-public class WebImageTool {
-    // todo 客户端使用的时候,记得拼上这个
+public class FileTool {
+    // todo 客户端使用的时候,记得拼上这个(判断不以http开头的才拼)
     public static final String BASIC_SERVER_URL = "http://jianjie.life/";
-    // 不能小于100KB
-    private static final int FILE_SIZE_LIMIT = 100 * 1024;
+
     // 只要授权之后,是可以直接写这个路径的,但是由于jar包等冲突(或缺失),会导致war包发布的时候,没有真正运行,也就没有文件写进去了。
-    private static final String PATH_AUTO_FOLDER = "/file/images/eden/auto/";
-    private static final String CMD_GET_AUTO_FILE_COUNT = "ls -l " + PATH_AUTO_FOLDER + " | grep \"^-\"|wc -l";
-    private static final String IMAGE_FORMAT = ".jpg";
-    private static final String IMAGE_URL_FORMAT = "auto/%s%s";
+    //private static final String PATH_AUTO_FOLDER = "/file/images/eden/auto/";
+    //private static final String CMD_GET_AUTO_FILE_COUNT = "ls -l " + PATH_AUTO_FOLDER + " | grep \"^-\"|wc -l";
+
+    public static final String FORMAT_IMAGE = ".jpg";
+    public static final String FORMATTER_IMAGE_URL = "auto/%s%s";
+    // 图片不能小于100KB
+    public static final int FILE_IMAGE_SIZE_LIMIT = 100 * 1024;
+
+    public static final String FORMAT_TORRENT = ".torrent";
+    public static final String FORMATTER_TORRENT_URL = "torrent/%s%s";
+
+    public static final String TARGET_DIR_AUTO_IMAGES = "F:\\eden\\images\\auto";
+    public static final String TARGET_DIR_AUTO_TORRENT = "F:\\eden\\torrent\\auto";
 
-    private WebImageTool() {
+    private FileTool() {
     }
 
     /**
      * 爬虫自动保存图片
      *
-     * @param imageUrl
+     * @param fileUrl
      * @return
      */
-    public static String autoSaveImage(String imageUrl) {
-        if (TextUtils.isEmpty(imageUrl)) {
+    public static String autoSaveFile(String fileUrl, String defaultFormat, String urlFormatter, String targetDir, int sizeLimit) {
+        if (TextUtils.isEmpty(fileUrl)) {
             return null;
         }
 
-        String format = IMAGE_FORMAT;
-        int formatIndex = imageUrl.lastIndexOf(".");
-        if (formatIndex != -1 && formatIndex < imageUrl.length() - 1) {
-            format = imageUrl.substring(formatIndex);
+        String format = defaultFormat;
+        int formatIndex = fileUrl.lastIndexOf(".");
+        if (formatIndex != -1 && formatIndex < fileUrl.length() - 1) {
+            format = fileUrl.substring(formatIndex);
         }
 
         //File folder = new File(PATH_AUTO_FOLDER);
-        File folder = new File("F:\\auto");
+        File folder = new File(targetDir);
         if (!folder.exists()) {
             folder.mkdirs();
         }
@@ -74,7 +79,11 @@ public class WebImageTool {
         FileOutputStream fileOutputStream = null;
         try {
             // 为了避免403
-            HttpURLConnection connection = ((HttpURLConnection)new URL(imageUrl).openConnection());
+            HttpURLConnection connection = ((HttpURLConnection)new URL(fileUrl).openConnection());
+            // 设置连接主机超时(单位:毫秒)
+            connection.setConnectTimeout(30000);
+            // 设置从主机读取数据超时(单位:毫秒)
+            connection.setReadTimeout(30000);
             connection.addRequestProperty("User-Agent", "Mozilla/4.0");
             InputStream input;
             if (connection.getResponseCode() == 200) {
@@ -92,10 +101,10 @@ public class WebImageTool {
             while ((bytesRead = in.read(dataBuffer, 0, 1024)) != -1) {
                 fileOutputStream.write(dataBuffer, 0, bytesRead);
             }
-            if (file.length()  < FILE_SIZE_LIMIT) {
+            if (file.length()  < sizeLimit) {
                 return null;
             }
-            return String.format(IMAGE_URL_FORMAT, String.valueOf(futureFileCount), format);
+            return String.format(urlFormatter, String.valueOf(futureFileCount), format);
         } catch (IOException e) {
             // handle exception
             e.printStackTrace();
@@ -114,8 +123,8 @@ public class WebImageTool {
                 }
             }
 
-            // 去除太小的文件
-            if (file.length()  < FILE_SIZE_LIMIT) {
+            // 去除太小的文件(必须关闭了流才能删)
+            if (file.length()  < sizeLimit) {
                 file.delete();
             }
         }

+ 70 - 17
src/main/java/com/miekir/shibei/tool/web/Sex8Tool.java

@@ -29,7 +29,7 @@ public class Sex8Tool {
 
     private static boolean mIsLoading;
     // 从第二页开始爬
-    private static int mCurrentPage = 349;
+    private static int mCurrentPage = 320;
 
     /**
      * 为了防止被百度拦截(人机识别),需要按F12查看浏览器的Header,设置相关属性
@@ -202,35 +202,39 @@ public class Sex8Tool {
                 return;
             }
 
-            // 解析详情页
-            List<String> photoList = new ArrayList<String>();
-            Elements photoElementList = document.getElementsByClass("zoom");
-            if (photoElementList != null && photoElementList.size() > 0) {
-                for (Element element : photoElementList) {
-                    String imageUrl = element.attr("file");
-                    // 保存图片
-                    String photoUrl = WebImageTool.autoSaveImage(imageUrl);
-                    if (!TextUtils.isEmpty(photoUrl)) {
-                        photoList.add(photoUrl);
-                    }
-                }
-            }
-
+            // 保存torrent
             GoodsBean goodsBean = new GoodsBean();
             List<MagnetBean> magnetBeanList = new ArrayList<MagnetBean>();
             Elements magnetElementList = document.getElementsByClass("attnm");
             if (magnetElementList != null && magnetElementList.size() > 0) {
                 Elements magnetInfoElementList = magnetElementList.get(0).getElementsByTag("a");
                 if (magnetInfoElementList != null && magnetInfoElementList.size() > 0) {
+                    String url = saveTorrent(BASIC_URL + magnetInfoElementList.get(0).attr("href"));
+                    if (TextUtils.isEmpty(url)) {
+                        return;
+                    }
                     MagnetBean magnetBean = new MagnetBean();
                     magnetBean.text = magnetInfoElementList.get(0).text();
-                    // todo 客户端查找的时候,返回去的要拼上BASIC_URL,做成一个在数据库可配置的字段
-                    magnetBean.url = magnetInfoElementList.get(0).attr("href");
+                    magnetBean.url = url;
                     magnetBean.goodsBean = goodsBean;
                     magnetBeanList.add(magnetBean);
                 }
             }
 
+            // 解析详情页
+            List<String> photoList = new ArrayList<String>();
+            Elements photoElementList = document.getElementsByClass("zoom");
+            if (photoElementList != null && photoElementList.size() > 0) {
+                for (Element element : photoElementList) {
+                    String imageUrl = element.attr("file");
+                    // 保存图片
+                    String photoUrl = FileTool.autoSaveFile(imageUrl, FileTool.FORMAT_IMAGE, FileTool.FORMATTER_IMAGE_URL, FileTool.TARGET_DIR_AUTO_IMAGES, FileTool.FILE_IMAGE_SIZE_LIMIT);
+                    if (!TextUtils.isEmpty(photoUrl)) {
+                        photoList.add(photoUrl);
+                    }
+                }
+            }
+
             // 描述
             String description = "暂无描述";
             Elements descElementList = document.getElementsByClass("b_pr");
@@ -269,6 +273,55 @@ public class Sex8Tool {
         }
     }
 
+    private static String saveTorrent(String rawUrl) {
+        if (TextUtils.isEmpty(rawUrl)) {
+            return null;
+        }
+
+        String realFileUrl = getRealUrl(rawUrl);
+        return FileTool.autoSaveFile(realFileUrl, FileTool.FORMAT_TORRENT, FileTool.FORMATTER_TORRENT_URL, FileTool.TARGET_DIR_AUTO_TORRENT, 0);
+    }
+
+    /**
+     * 获取重定向之后的URL
+     * @param targetUrl
+     * @return
+     */
+    private static String getRealUrl(String targetUrl) {
+        HttpURLConnection connection = null;
+        try {
+            URL url = new URL(targetUrl);
+            connection = (HttpURLConnection) url.openConnection();
+            connection = setHeader(connection);
+            connection.connect();
+
+            // 得到重定向之后的网址,重新请求
+            if (302 == connection.getResponseCode()) {
+                String url302 = connection.getHeaderField("Location");
+                if (url302 == null || url302.equals("")) {
+                    // 临时重定向和永久重定向location的大小写有区分
+                    url302 = connection.getHeaderField("location");
+                }
+                //某些时候会省略host,只返回后面的path,所以需要补全url
+                if (!(url302.startsWith("http://") || url302.startsWith("https://"))) {
+                    URL originalUrl = new URL(targetUrl);
+                    url302 = originalUrl.getProtocol() + "://" + originalUrl.getHost() + ":" + originalUrl.getPort() + url302;
+                }
+
+                return url302;
+            } else if (200 == connection.getResponseCode()) {
+                return targetUrl;
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        } finally {
+            if (connection != null) {
+                connection.disconnect();
+            }
+        }
+        return null;
+    }
+
     private static void justWait() {
         int waitMinute = new Random().nextInt(4) + 1;
         try {