|
|
@@ -0,0 +1,245 @@
|
|
|
+package com.miekir.shibei.tool;
|
|
|
+
|
|
|
+import java.net.HttpURLConnection;
|
|
|
+import java.net.ProtocolException;
|
|
|
+import java.net.URL;
|
|
|
+
|
|
|
+import com.miekir.shibei.bean.WeatherBean;
|
|
|
+import com.miekir.shibei.bean.YijiBean;
|
|
|
+import org.jsoup.Connection;
|
|
|
+import org.jsoup.Jsoup;
|
|
|
+import org.jsoup.helper.HttpConnection;
|
|
|
+import org.jsoup.nodes.Document;
|
|
|
+import org.jsoup.nodes.Element;
|
|
|
+import org.jsoup.select.Elements;
|
|
|
+
|
|
|
+import java.io.BufferedReader;
|
|
|
+import java.io.InputStream;
|
|
|
+import java.io.InputStreamReader;
|
|
|
+import java.io.UnsupportedEncodingException;
|
|
|
+import java.security.SecureRandom;
|
|
|
+import java.security.cert.CertificateException;
|
|
|
+import java.security.cert.X509Certificate;
|
|
|
+import java.util.zip.GZIPInputStream;
|
|
|
+
|
|
|
+import javax.net.ssl.HostnameVerifier;
|
|
|
+import javax.net.ssl.HttpsURLConnection;
|
|
|
+import javax.net.ssl.SSLContext;
|
|
|
+import javax.net.ssl.SSLSession;
|
|
|
+import javax.net.ssl.X509TrustManager;
|
|
|
+public class WebTool {
|
|
|
+ private WebTool(){}
|
|
|
+
|
|
|
+
|
|
|
+ private static HttpURLConnection setHeader(HttpURLConnection connection) throws ProtocolException {
|
|
|
+ connection.setRequestMethod("GET");
|
|
|
+
|
|
|
+ connection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 7.0; Windows 7)");
|
|
|
+ connection.setRequestProperty("Accept", "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-powerpoint, application/vnd.ms-excel, application/msword, */*");
|
|
|
+ connection.setRequestProperty("Accept-Language", "zh-cn");
|
|
|
+ connection.setRequestProperty("UA-CPU", "x86");
|
|
|
+ //为什么没有deflate呢
|
|
|
+ connection.setRequestProperty("Accept-Encoding", "gzip");
|
|
|
+ connection.setRequestProperty("Content-type", "text/html");
|
|
|
+ //keep-Alive,有什么用呢,你不是在访问网站,你是在采集。嘿嘿。减轻别人的压力,也是减轻自己。
|
|
|
+ connection.setRequestProperty("Connection", "close");
|
|
|
+ //不要用cache,用了也没有什么用,因为我们不会经常对一个链接频繁访问。(针对程序)
|
|
|
+ connection.setUseCaches(false);
|
|
|
+ connection.setConnectTimeout(6 * 1000);
|
|
|
+ connection.setReadTimeout(6 * 1000);
|
|
|
+ connection.setDoOutput(true);
|
|
|
+ connection.setDoInput(true);
|
|
|
+ connection.setRequestProperty("Charset", "utf-8");
|
|
|
+ return connection;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 获取网页源码
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ private static String getHtmlString(String targetUrl) {
|
|
|
+ String content = null;
|
|
|
+
|
|
|
+ HttpURLConnection connection = null;
|
|
|
+ try {
|
|
|
+ URL url = new URL(targetUrl);
|
|
|
+ connection = (HttpURLConnection) url.openConnection();
|
|
|
+ connection = setHeader(connection);
|
|
|
+ connection.connect();
|
|
|
+
|
|
|
+ // 得到重定向之后的网址,重新请求
|
|
|
+ if (302 == connection.getResponseCode()) {
|
|
|
+ String url302 = connection.getHeaderField("Location");
|
|
|
+ if (url302 == null || url302.equals("")) {
|
|
|
+ // 临时重定向和永久重定向location的大小写有区分
|
|
|
+ url302 = connection.getHeaderField("location");
|
|
|
+ }
|
|
|
+ //某些时候会省略host,只返回后面的path,所以需要补全url
|
|
|
+ if (!(url302.startsWith("http://") || url302.startsWith("https://"))) {
|
|
|
+ URL originalUrl = new URL(targetUrl);
|
|
|
+ url302 = originalUrl.getProtocol() + "://" + originalUrl.getHost() + ":" + originalUrl.getPort() + url302;
|
|
|
+ }
|
|
|
+
|
|
|
+ return getHtmlString(url302);
|
|
|
+ } else if (200 == connection.getResponseCode()) {
|
|
|
+ InputStream inputStream = null;
|
|
|
+ if (connection.getContentEncoding() != null && !connection.getContentEncoding().equals("")) {
|
|
|
+ String encode = connection.getContentEncoding().toLowerCase();
|
|
|
+ if (encode != null && !encode.equals("") && encode.indexOf("gzip") >= 0) {
|
|
|
+ inputStream = new GZIPInputStream(connection.getInputStream());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (null == inputStream) {
|
|
|
+ inputStream = connection.getInputStream();
|
|
|
+ }
|
|
|
+
|
|
|
+ BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, "utf-8"));
|
|
|
+ StringBuilder builder = new StringBuilder();
|
|
|
+ String line = null;
|
|
|
+ while ((line = reader.readLine()) != null) {
|
|
|
+ builder.append(line).append("\n");
|
|
|
+ }
|
|
|
+ content = builder.toString();
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ } finally {
|
|
|
+ if (connection != null) {
|
|
|
+ connection.disconnect();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return content;
|
|
|
+ }
|
|
|
+
|
|
|
+ public static WeatherBean getWeatherInfo() {
|
|
|
+ WeatherBean weatherBean = new WeatherBean();
|
|
|
+ Document document;
|
|
|
+ //百度搜索结果每页大小为10,pn参数代表的不是页数,而是返回结果的开始数
|
|
|
+ //如获取第一页则pn=0,第二页则pn=10,第三页则pn=20,以此类推,抽象出模式:(page-1)*pageSize
|
|
|
+ int page = 1;
|
|
|
+ int pageSize = 10;
|
|
|
+ String keyword = "天气";
|
|
|
+ String url = "http://www.baidu.com/s?pn="+(page-1)*pageSize+"&wd="+keyword;
|
|
|
+ try {
|
|
|
+ // 直接使用Jsoup爬会被百度拦截,需要安全验证,所以用普通方法爬
|
|
|
+ String content = getHtmlString(url);
|
|
|
+
|
|
|
+ document = Jsoup.parse(content);
|
|
|
+ if (document == null) {
|
|
|
+ return weatherBean;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 当前温度
|
|
|
+ Elements currentTempElementList = document.getElementsByClass("op_weather4_twoicon_shishi_title");
|
|
|
+ if (currentTempElementList != null && currentTempElementList.size() > 0) {
|
|
|
+ Element element = currentTempElementList.get(0);
|
|
|
+ weatherBean.temperatureNow = element.text();
|
|
|
+ }
|
|
|
+
|
|
|
+ // 温度范围
|
|
|
+ Elements tempRangeElementList = document.getElementsByClass("op_weather4_twoicon_temp");
|
|
|
+ if (tempRangeElementList != null && tempRangeElementList.size() > 0) {
|
|
|
+ Element element = tempRangeElementList.get(0);
|
|
|
+ weatherBean.temperatureRange = element.text();
|
|
|
+ }
|
|
|
+
|
|
|
+ // 风
|
|
|
+ Elements windElementList = document.getElementsByClass("op_weather4_twoicon_wind");
|
|
|
+ if (windElementList != null && windElementList.size() > 0) {
|
|
|
+ Element element = windElementList.get(0);
|
|
|
+ weatherBean.wind = element.text();
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ // 空气质量
|
|
|
+ Elements airElementList = document.getElementsByClass("op_weather4_twoicon_aqi_text_today");
|
|
|
+ if (airElementList != null && airElementList.size() > 0) {
|
|
|
+ Element element = airElementList.get(0);
|
|
|
+ weatherBean.air = element.text();
|
|
|
+ }
|
|
|
+
|
|
|
+ // 描述
|
|
|
+ Elements descElementList = document.getElementsByClass("op_weather4_twoicon_weath");
|
|
|
+ if (descElementList != null && descElementList.size() > 0) {
|
|
|
+ Element element = descElementList.get(0);
|
|
|
+ String text = element.text();
|
|
|
+ if (text != null) {
|
|
|
+ weatherBean.desc = text.replaceAll(" ", "");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ return weatherBean;
|
|
|
+ }
|
|
|
+
|
|
|
+ public static YijiBean getYijiInfo() {
|
|
|
+ YijiBean yijiBean = new YijiBean();
|
|
|
+ Document document;
|
|
|
+ String url = "https://mnongli.911cha.com/";
|
|
|
+ try {
|
|
|
+ // 直接使用Jsoup爬会被百度拦截,需要安全验证,所以用普通方法爬
|
|
|
+ String content = getHtmlString(url);
|
|
|
+ document = Jsoup.parse(content);
|
|
|
+ if (document == null) {
|
|
|
+ return yijiBean;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 当前公历年月日
|
|
|
+ Elements newDateElementList = document.getElementsByTag("h2");
|
|
|
+ if (newDateElementList != null && newDateElementList.size() > 0) {
|
|
|
+ Element element = newDateElementList.get(0);
|
|
|
+ yijiBean.newDate = element.text();
|
|
|
+ }
|
|
|
+
|
|
|
+ // 当前农历年月日
|
|
|
+ Elements oldDateElementList = document.getElementsByTag("p");
|
|
|
+ if (oldDateElementList != null && oldDateElementList.size() > 0) {
|
|
|
+ Element element = oldDateElementList.get(0);
|
|
|
+ yijiBean.oldDate = element.text();
|
|
|
+ }
|
|
|
+
|
|
|
+ // 天干地支
|
|
|
+ Elements contentElementList = document.getElementsByClass("hl_riliTop");
|
|
|
+ if (contentElementList != null && contentElementList.size() > 0) {
|
|
|
+ Elements hsElementList = contentElementList.get(0).getElementsByTag("div");
|
|
|
+ if (hsElementList != null && hsElementList.size() > 1) {
|
|
|
+ yijiBean.hsDate = hsElementList.get(1).text();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ Elements yijiElementList = document.getElementsByClass("hl_riliCon_rItem");
|
|
|
+ if (yijiElementList != null && yijiElementList.size() >= 2) {
|
|
|
+ // 宜
|
|
|
+ Element yiElement = yijiElementList.get(0);
|
|
|
+ StringBuilder yiBuilder = new StringBuilder();
|
|
|
+ Elements yiElementItemList = yiElement.getElementsByTag("a");
|
|
|
+ if (yiElementItemList != null && yiElementItemList.size() > 0) {
|
|
|
+ for (Element element : yiElementItemList) {
|
|
|
+ yiBuilder.append(element.text()).append("、");
|
|
|
+ }
|
|
|
+ yiBuilder.deleteCharAt(yiBuilder.length()-1);
|
|
|
+ yijiBean.yi = yiBuilder.toString();
|
|
|
+ }
|
|
|
+
|
|
|
+ // 忌
|
|
|
+ Element jiElement = yijiElementList.get(1);
|
|
|
+ StringBuilder jiBuilder = new StringBuilder();
|
|
|
+ Elements jiElementItemList = jiElement.getElementsByTag("a");
|
|
|
+ if (jiElementItemList != null && jiElementItemList.size() > 0) {
|
|
|
+ for (Element element : jiElementItemList) {
|
|
|
+ jiBuilder.append(element.text()).append("、");
|
|
|
+ }
|
|
|
+ jiBuilder.deleteCharAt(jiBuilder.length()-1);
|
|
|
+ yijiBean.ji = jiBuilder.toString();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ } catch (Exception e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ return yijiBean;
|
|
|
+ }
|
|
|
+}
|