Browse Source

文件名

詹子聪 5 years ago
parent
commit
eeba3e1de1

+ 35 - 36
src/main/java/com/miekir/shibei/tool/web/Sex8Tool.java

@@ -24,7 +24,8 @@ public class Sex8Tool {
     public static final String BASIC_URL = "http://sohumayun.space/";
     private static final String PAGE_URL_FORMAT = BASIC_URL + "forum-96-%s.html";
 
-    private Sex8Tool(){}
+    private Sex8Tool() {
+    }
 
     private static boolean mIsLoading;
     // 从第二页开始爬
@@ -32,6 +33,7 @@ public class Sex8Tool {
 
     /**
      * 为了防止被百度拦截(人机识别),需要按F12查看浏览器的Header,设置相关属性
+     *
      * @param connection
      * @return
      * @throws ProtocolException
@@ -63,6 +65,7 @@ public class Sex8Tool {
 
     /**
      * 获取网页源码
+     *
      * @return
      */
     private static String getHtmlString(String targetUrl) {
@@ -89,7 +92,7 @@ public class Sex8Tool {
                 }
 
                 return getHtmlString(url302);
-            } else  if (200 == connection.getResponseCode()) {
+            } else if (200 == connection.getResponseCode()) {
                 InputStream inputStream = null;
                 if (connection.getContentEncoding() != null && !connection.getContentEncoding().equals("")) {
                     String encode = connection.getContentEncoding().toLowerCase();
@@ -123,6 +126,7 @@ public class Sex8Tool {
 
     /**
      * 开始爬目录
+     *
      * @return
      */
     public static boolean startGettingData(CrawlerService service) {
@@ -135,36 +139,24 @@ public class Sex8Tool {
             document = Jsoup.parse(content);
             if (document != null) {
                 // 解析当前页,获取每一项的详情页
-                List<String> titleList = new ArrayList<String>();
-                getTitles(document, titleList);
-                if (titleList.size() > 0) {
-                    Elements itemElementList = document.getElementsByClass("icn");
-                    if (itemElementList != null && itemElementList.size() > 0) {
-                        int index = 0;
-                        for (Element element : itemElementList) {
-                            // 解析获取item对应的详情URL
-                            Elements tagAElementList = element.getElementsByTag("a");
-                            if (tagAElementList != null && tagAElementList.size() > 0) {
-                                String detailUrl = BASIC_URL + tagAElementList.get(0).attr("href");
-                                // 开始爬详情
-                                if (index < titleList.size()) {
-                                    getDetailData(service, detailUrl, titleList.get(index));
-                                }
-                            }
-
-                            // 爬完一个详情,歇一歇
-                            justWait();
-                            index++;
+                Elements itemElementList = document.getElementsByClass("icn");
+                if (itemElementList != null && itemElementList.size() > 0) {
+                    int index = 0;
+                    for (Element element : itemElementList) {
+                        // 解析获取item对应的详情URL
+                        Elements tagAElementList = element.getElementsByTag("a");
+                        if (tagAElementList != null && tagAElementList.size() > 0) {
+                            String detailUrl = BASIC_URL + tagAElementList.get(0).attr("href");
+                            // 开始爬详情
+                            getDetailData(service, detailUrl);
                         }
-                    }
-                }
-
-
-
-
-
 
+                        // 爬完一个详情,歇一歇
+                        justWait();
+                        index++;
+                    }
                 }
+            }
 
         } catch (Exception e) {
             e.printStackTrace();
@@ -175,21 +167,28 @@ public class Sex8Tool {
         return mCurrentPage != 0;
     }
 
-    private static void getTitles(Element docElement, List<String> titleList) {
-        Elements itemElementList = docElement.getElementsByClass("s xst");
+    private static String getTitles(Element docElement) {
+        Elements itemElementList = docElement.getElementsByTag("title");
         if (itemElementList != null && itemElementList.size() > 0) {
-            for (Element element : itemElementList) {
-                titleList.add(element.text());
+            String titleExtra = itemElementList.get(0).text();
+            if (!TextUtils.isEmpty(titleExtra) && titleExtra.contains("杏吧")) {
+                int index = titleExtra.indexOf("杏吧");
+                if (index > 1) {
+                    index = index - 1;
+                }
+                return titleExtra.substring(0, index);
             }
-
         }
+
+        return null;
     }
 
     /**
      * 开始爬详情
+     *
      * @return
      */
-    public static void getDetailData(CrawlerService service, String detailUrl, String title) {
+    public static void getDetailData(CrawlerService service, String detailUrl) {
         Document document;
         try {
             // 获取到当前页的内容
@@ -238,7 +237,7 @@ public class Sex8Tool {
 
             // 保存到数据库
             if (photoList.size() > 0 && magnetBeanList.size() > 0) {
-                goodsBean.title = title;
+                goodsBean.title = getTitles(document);
                 goodsBean.description = description;
                 goodsBean.magnetBeanList.addAll(magnetBeanList);
                 goodsBean.contentImageUrlList.addAll(photoList);

+ 1 - 1
src/main/java/com/miekir/shibei/tool/web/WebImageTool.java

@@ -25,7 +25,7 @@ public class WebImageTool {
     private static final String PATH_AUTO_FOLDER = "/file/images/eden/auto/";
     private static final String CMD_GET_AUTO_FILE_COUNT = "ls -l " + PATH_AUTO_FOLDER + " | grep \"^-\"|wc -l";
     private static final String IMAGE_FORMAT = ".jpg";
-    private static final String IMAGE_URL_FORMAT = "http://jianjie.life/auto/%s.%s";
+    private static final String IMAGE_URL_FORMAT = "http://jianjie.life/auto/%s%s";
 
     private WebImageTool() {
     }