Ver código fonte

爬虫兼数据库插入已就绪

詹子聪 5 anos atrás
pai
commit
495debff2d

+ 3 - 2
src/main/java/com/miekir/shibei/bean/db/GoodsBean.java

@@ -110,8 +110,9 @@ public class GoodsBean {
     /**
      * 磁力链接合集
      */
-    @OneToMany(cascade = CascadeType.ALL, fetch = FetchType.EAGER)
-    @Column(name = "magnetBeanList", nullable = true, insertable = true, updatable = true)
+    //@Column(name = "magnetBeanList", nullable = true, insertable = true, updatable = true)
+    //对应的是MagnetBean里的变量名goodsBean
+    @OneToMany(mappedBy="goodsBean", cascade = CascadeType.ALL, fetch = FetchType.EAGER)
     public List<MagnetBean> magnetBeanList = new ArrayList<MagnetBean>();
 
     /**

+ 9 - 2
src/main/java/com/miekir/shibei/bean/db/MagnetBean.java

@@ -11,6 +11,7 @@ import java.io.Serializable;
  * Description: 磁力链接
  */
 @Entity
+@Table(name = "t_magnet", schema = "eden", catalog = "")
 public class MagnetBean implements Serializable {
     @Id
     @GeneratedValue(strategy= GenerationType.AUTO)
@@ -18,10 +19,16 @@ public class MagnetBean implements Serializable {
     public long id;
 
     @Basic
-    @Column(columnDefinition = "MEDIUMTEXT", name = "magnet_text", nullable = true, insertable = false, updatable = false)
+    @Column(columnDefinition = "MEDIUMTEXT", name = "magnet_text", nullable = true, insertable = true, updatable = true)
     public String text;
 
     @Basic
-    @Column(columnDefinition = "MEDIUMTEXT", name = "magnet_url", nullable = true, insertable = false, updatable = false)
+    @Column(columnDefinition = "MEDIUMTEXT", name = "magnet_url", nullable = true, insertable = true, updatable = true)
     public String url;
+
+    // 这里自定义的外键goods_bean_id
+    @ManyToOne
+    @JoinColumn(name = "goods_bean_id")
+    public GoodsBean goodsBean;
+
 }

+ 25 - 14
src/main/java/com/miekir/shibei/tool/web/Sex8Tool.java

@@ -3,14 +3,11 @@ package com.miekir.shibei.tool.web;
 import com.miekir.shibei.bean.db.GoodsBean;
 import com.miekir.shibei.bean.db.MagnetBean;
 import com.miekir.shibei.controller.task.CrawlerService;
-import com.miekir.shibei.repository.GoodsRepository;
 import com.miekir.shibei.tool.TextUtils;
-import com.sun.org.slf4j.internal.Logger;
 import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
 import org.jsoup.select.Elements;
-import org.springframework.beans.factory.annotation.Autowired;
 
 import java.io.BufferedReader;
 import java.io.InputStream;
@@ -21,7 +18,6 @@ import java.net.URL;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Random;
-import java.util.logging.Level;
 import java.util.zip.GZIPInputStream;
 
 public class Sex8Tool {
@@ -139,24 +135,37 @@ public class Sex8Tool {
             document = Jsoup.parse(content);
             if (document != null) {
                 // 解析当前页,获取每一项的详情页
-                String title = getTitle(document);
-                if (!TextUtils.isEmpty(title)) {
+                List<String> titleList = new ArrayList<String>();
+                getTitles(document, titleList);
+                if (titleList.size() > 0) {
                     Elements itemElementList = document.getElementsByClass("icn");
                     if (itemElementList != null && itemElementList.size() > 0) {
+                        int index = 0;
                         for (Element element : itemElementList) {
                             // 解析获取item对应的详情URL
                             Elements tagAElementList = element.getElementsByTag("a");
                             if (tagAElementList != null && tagAElementList.size() > 0) {
                                 String detailUrl = BASIC_URL + tagAElementList.get(0).attr("href");
                                 // 开始爬详情
-                                getDetailData(service, detailUrl, title);
+                                if (index < titleList.size()) {
+                                    getDetailData(service, detailUrl, titleList.get(index));
+                                }
                             }
+
                             // 爬完一个详情,歇一歇
                             justWait();
+                            index++;
                         }
                     }
                 }
-            }
+
+
+
+
+
+
+                }
+
         } catch (Exception e) {
             e.printStackTrace();
         }
@@ -166,13 +175,14 @@ public class Sex8Tool {
         return mCurrentPage != 0;
     }
 
-    private static String getTitle(Document document) {
-        Elements itemElementList = document.getElementsByClass("s xst");
+    private static void getTitles(Element docElement, List<String> titleList) {
+        Elements itemElementList = docElement.getElementsByClass("s xst");
         if (itemElementList != null && itemElementList.size() > 0) {
-            return itemElementList.get(0).text();
-        }
+            for (Element element : itemElementList) {
+                titleList.add(element.text());
+            }
 
-        return null;
+        }
     }
 
     /**
@@ -203,6 +213,7 @@ public class Sex8Tool {
                 }
             }
 
+            GoodsBean goodsBean = new GoodsBean();
             List<MagnetBean> magnetBeanList = new ArrayList<MagnetBean>();
             Elements magnetElementList = document.getElementsByClass("attnm");
             if (magnetElementList != null && magnetElementList.size() > 0) {
@@ -212,6 +223,7 @@ public class Sex8Tool {
                     magnetBean.text = magnetInfoElementList.get(0).text();
                     // todo 客户端查找的时候,返回去的要拼上BASIC_URL
                     magnetBean.url = magnetInfoElementList.get(0).attr("href");
+                    magnetBean.goodsBean = goodsBean;
                     magnetBeanList.add(magnetBean);
                 }
             }
@@ -226,7 +238,6 @@ public class Sex8Tool {
 
             // 保存到数据库
             if (photoList.size() > 0 && magnetBeanList.size() > 0) {
-                GoodsBean goodsBean = new GoodsBean();
                 goodsBean.title = title;
                 goodsBean.description = description;
                 goodsBean.magnetBeanList.addAll(magnetBeanList);