本文實例講述了java實現爬取往期所有雙色球開獎結果功能。分享給大家供大家參考,具體如下:
夢想還是要有的,萬一實現了呢?我相信經常買雙色球的朋友和我都會有一個疑問,就是往期雙色球的開獎結果是什么?我鐘意的這一注雙色球在往期是否開過一等獎,如果開過的話,基本上可以放棄這一注了,因為歷史上應該沒有出現過兩期雙色球開獎完全一致的吧?那么往期的開獎結果是什么呢?我自己用java寫了一個簡易的類,爬取所有雙色球開獎結果,本來想開發安卓版本的,由于ui等需要時間準備,有緣再開發吧。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
|
import java.io.bufferedreader; import java.io.bufferedwriter; import java.io.file; import java.io.filewriter; import java.io.ioexception; import java.io.inputstream; import java.io.inputstreamreader; import java.net.httpurlconnection; import java.net.url; import java.util.regex.matcher; import java.util.regex.pattern; import java.util.zip.gzipinputstream; public class allballs { private static stringbuffer mstringbuffer; public static void main(string[] args) { system.out.println( "正在獲取..." ); mstringbuffer = new stringbuffer(); string baseurlprefix = "http://kaijiang.zhcw.com/zhcw/html/ssq/list_" ; string baseurlsuffix = ".html" ; string homeurl = "http://kaijiang.zhcw.com/zhcw/html/ssq/list_1.html" ; string pagecountcontent = gethtmlstring(homeurl); int pagecount = getpagecount(pagecountcontent); if (pagecount > 0 ) { for ( int i = 1 ; i <= pagecount; i++) { string url = baseurlprefix + i + baseurlsuffix; string pagecontent = gethtmlstring(url); if (pagecontent != null && !pagecontent.equals( "" )) { getonetermcontent(pagecontent); } else { system.out.println( "第" + i + "頁丟失" ); } try { thread.sleep( 1200 ); } catch (exception e) { // todo: handle exception } } file file = new file( "雙色球.txt" ); if (file.exists()) { file.delete(); } try { filewriter writer = new filewriter(file); bufferedwriter bufferedwriter = new bufferedwriter(writer); bufferedwriter.write(mstringbuffer.tostring()); bufferedwriter.close(); writer.close(); } catch (ioexception e) { // todo auto-generated catch block e.printstacktrace(); } //bufferedwriter writer = new bufferedwriter(new outputs) } else { system.out.println( "結果頁數為0" ); } system.out.println( "完成!" ); } /** * 獲取總頁數 * @param result */ private static int getpagecount(string result) { string regex = "\\d+\">末頁" ; pattern pattern = pattern.compile(regex); matcher matcher = pattern.matcher(result); string[] splits = null ; while (matcher.find()) { string content = matcher.group(); splits = content.split( "\"" ); break ; } if (splits != null && splits.length == 2 ) { string countstring = splits[ 0 ]; if (countstring != null && !countstring.equals( "" )) { return integer.parseint(countstring); } } return 0 ; } /** * 獲取網頁源碼 * @return */ private static string gethtmlstring(string targeturl) { string content = null ; httpurlconnection connection = null ; try { url url = new url(targeturl); connection = (httpurlconnection) url.openconnection(); connection.setrequestmethod( "post" ); connection.setrequestproperty( "user-agent" , "mozilla/4.0 (compatible; msie 7.0; windows 7)" ); connection.setrequestproperty( "accept" , "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-powerpoint, application/vnd.ms-excel, application/msword, */*" ); connection.setrequestproperty( "accept-language" , "zh-cn" ); connection.setrequestproperty( "ua-cpu" , "x86" ); //為什么沒有deflate呢 connection.setrequestproperty( "accept-encoding" , "gzip" ); connection.setrequestproperty( "content-type" , "text/html" ); //keep-alive,有什么用呢,你不是在訪問網站,你是在采集。嘿嘿。減輕別人的壓力,也是減輕自己。 connection.setrequestproperty( "connection" , "close" ); //不要用cache,用了也沒有什么用,因為我們不會經常對一個鏈接頻繁訪問。(針對程序) connection.setusecaches( false ); connection.setconnecttimeout( 6 * 1000 ); connection.setreadtimeout( 6 * 1000 ); connection.setdooutput( true ); connection.setdoinput( true ); connection.setrequestproperty( "charset" , "utf-8" ); connection.connect(); if ( 200 == connection.getresponsecode()) { inputstream inputstream = null ; if (connection.getcontentencoding() != null && !connection.getcontentencoding().equals( "" )) { string encode = connection.getcontentencoding().tolowercase(); if (encode != null && !encode.equals( "" ) && encode.indexof( "gzip" ) >= 0 ) { inputstream = new gzipinputstream(connection.getinputstream()); } } if ( null == inputstream) { inputstream = connection.getinputstream(); } bufferedreader reader = new bufferedreader( new inputstreamreader(inputstream, "utf-8" )); stringbuilder builder = new stringbuilder(); string line = null ; while ((line = reader.readline()) != null ) { builder.append(line).append( "\n" ); } content = builder.tostring(); } } catch (exception e) { e.printstacktrace(); } finally { if (connection != null ) { connection.disconnect(); } } return content; } private static void getonetermcontent(string pagecontent) { string regex = "<td align=\"center\" style=\"padding-left:10px;\">[\\s\\s]+?</em></td>" ; pattern pattern = pattern.compile(regex); matcher matcher = pattern.matcher(pagecontent); while (matcher.find()) { string onetermcontent = matcher.group(); getonetermnumbers(onetermcontent); } } private static void getonetermnumbers(string onetermcontent) { string regex = ">\\d+<" ; pattern pattern = pattern.compile(regex); matcher matcher = pattern.matcher(onetermcontent); while (matcher.find()) { string content = matcher.group(); string ballnumber = content.substring( 1 , content.length()- 1 ); mstringbuffer.append(ballnumber).append( " " ); } mstringbuffer.append( "\r\n" ); } } |
運行結果:
希望本文所述對大家java程序設計有所幫助。
原文鏈接:https://blog.csdn.net/ithouse/article/details/50908296