在看KMP算法時,想要簡單的統計一下執行時間和性能。
得出的結論是: Java的String的indexOf方法性能最好,其次是KMP算法,其次是傳統的BF算法,當然,對比有點牽強,SUN的算法也使用Java來實現、用的看著不像是KMP,還需要詳細研究一下。
測試代碼如下所示:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
|
package com.test.test.kmp; import java.util.Random; public class KMPTest { public static void main(String[] args) { test(); } // public static void test() { // int allLen = 8000000 ; int subLen = 11 ; int charLen = 4 ; String cl = buildString(charLen); int times = 50 ; // testMatch(allLen, subLen, cl, times); } public static void testMatch( int allLen, int subLen, String cl, int times){ int allBF = 0 ; int allString = 0 ; int allKMP = 0 ; int allGC = 0 ; int allbuild = 0 ; int alltoArray = 0 ; long start = System.currentTimeMillis(); System.out.println( "--------------------------" ); for ( int i = 0 ; i < times; i++) { // 1. 構造字符串的損耗 long buildStart = System.currentTimeMillis(); String sub = buildString(subLen, cl); String all = buildString(allLen, sub) +sub; long buildEnd = System.currentTimeMillis(); allbuild += (buildEnd - buildStart); // 2. toCharArray的損耗 long toArrayStart = System.currentTimeMillis(); char [] allArray = all.toCharArray(); char [] subArray = sub.toCharArray(); long toArrayEnd = System.currentTimeMillis(); alltoArray += (toArrayEnd - toArrayStart); // long startBF = System.currentTimeMillis(); int indexBF = bfIndexOf(all, sub); long endBF = System.currentTimeMillis(); // long timeoutBF = endBF - startBF; allBF += timeoutBF; System.gc(); allGC += (System.currentTimeMillis() - endBF); // long startString = System.currentTimeMillis(); int indexString = stringIndexOf(all, sub); long endString = System.currentTimeMillis(); // long timeoutString = endString - startString; allString += timeoutString; System.gc(); allGC += (System.currentTimeMillis() - endString); // long startKMP = System.currentTimeMillis(); //int indexKMP = kmpIndexOf(all, sub); int indexKMP = KMP_Index(allArray, subArray); long endKMP = System.currentTimeMillis(); // long timeoutKMP = endKMP - startKMP; allKMP += timeoutKMP; System.gc(); allGC += (System.currentTimeMillis() - endKMP); // //System.out.println("all="+all.substring(0,100)+" ......"); //System.out.println("sub="+sub); // //System.out.println("indexBF="+indexBF+";耗時: "+timeoutBF+"ms"); //System.out.println("indexString="+indexString+";耗時: "+timeoutString+"ms"); if (indexBF == indexString && indexKMP == indexString){ //System.out.println("!!!!對比相等。"); } else { throw new RuntimeException( "對比出錯." ); } // if (i % 20 == 10 ){ System.out.println(i+ "次bfIndexOf= " +allBF+ "ms" ); System.out.println(i+ "次stringIndexOf= " +allString+ "ms" ); System.out.println(i+ "次KMPIndexOf= " +allKMP+ "ms" ); System.out.println(i+ "次allbuild= " +allbuild+ "ms" ); System.out.println(i+ "次alltoArray= " +alltoArray+ "ms" ); System.out.println(i+ "*3次,GC= " +allGC+ "ms" ); long end = System.currentTimeMillis(); long allTime = end-start; long lossTime = allTime - (allBF+allString+allKMP+allGC); System.out.println(i+ "次,所有總計耗時: " +(allTime)+ "ms; 損耗:" + lossTime + "ms; 損耗比: " +(( 0.0 +lossTime)/allTime * 100 )+ "%" ); System.out.println( "--------------------------" ); } } // System.out.println(times+ "次bfIndexOf;總計耗時: " +allBF+ "ms" ); System.out.println(times+ "次stringIndexOf;總計耗時: " +allString+ "ms" ); System.out.println(times+ "次KMPIndexOf;總計耗時: " +allKMP+ "ms" ); System.out.println(times+ "次allbuild= " +allbuild+ "ms" ); System.out.println(times+ "次alltoArray= " +alltoArray+ "ms" ); System.out.println(times+ "*3次,GC;總計耗時: " +allGC+ "ms" ); long end = System.currentTimeMillis(); long allTime = end-start; long lossTime = allTime - (allBF+allString+allKMP+allGC); System.out.println(times+ "次,所有總計耗時: " +(allTime)+ "ms; 損耗:" + lossTime + "ms; 損耗比: " +(( 0.0 +lossTime)/allTime * 100 )+ "%" ); System.out.println( "--------------------------" ); } // // 構建字符 public static String buildString( int len){ return buildString(len, null ); } public static String buildString( int len, String sub){ // final int TYPE_NUMBER = 0 ; final int TYPE_LOWERCASE = 1 ; final int TYPE_UPPERCASE = 2 ; // long seed = System.nanoTime(); Random random = new Random(seed); // // StringBuilder builder = new StringBuilder(); for ( int i = 0 ; i < len; i++) { // int type = random.nextInt( 3 ); // 0-2 int cvalue = 0 ; if (TYPE_NUMBER == type){ cvalue = '0' + random.nextInt( 10 ); // 0~(n-1) } else if (TYPE_LOWERCASE == type){ cvalue = 'a' + random.nextInt( 26 ); // 0~(n-1) } else if (TYPE_UPPERCASE == type){ cvalue = 'A' + random.nextInt( 26 ); // 0~(n-1) } else { throw new RuntimeException(Random. class .getName()+ "#newxtInt(int);Wrong!" ); } // //cvalue = 'A' + random.nextInt(26);// 0~(n-1) // char c = ( char )cvalue; if ( null != sub && sub.length() > 1 ){ int index = random.nextInt(sub.length()); c = sub.charAt(index); } //String s = String.valueOf(c); builder.append(c); // } // return builder.toString(); } /** * Java自帶的方法,內部實現了 * @param all * @param sub * @return */ public static int stringIndexOf(String all, String sub){ // 防御式編程 if ( null == all || null == sub){ return - 1 ; } // 調用Java的String方法 return all.indexOf(sub); } /** * BF算法 * @param all * @param sub * @return */ public static int bfIndexOf(String all, String sub){ // 防御式編程 if ( null == all || null == sub){ return - 1 ; } // int lenAll = all.length(); int lenSub = sub.length(); int i = 0 ; while (i < lenAll) { // 從i下標開始對比 int j = 0 ; while (j < lenSub) { // char all_i = all.charAt(i); char sub_j = sub.charAt(j); if (all_i == sub_j){ // 相等則繼續對比下一個字符 i++; j++; // 這個增長很重要 } else { // 否則跳出內層循環 break ; } } // 如果 j 增長到和 lenSub相等,則匹配成功 if (lenSub == j){ return i - lenSub; } else { i = 1 +i - j; // 回溯 i } } // return - 1 ; } /** * KMP 算法 * @param all * @param sub * @return */ public static int kmpIndexOf(String all, String sub){ // char [] allArray = all.toCharArray(); char [] subArray = sub.toCharArray(); // return KMP_Index(allArray, subArray); } /** * 獲得字符串的next函數值 * * @param t * 字符串 * @return next函數值 */ public static int [] next( char [] t) { int [] next = new int [t.length]; next[ 0 ] = - 1 ; int i = 0 ; int j = - 1 ; while (i < t.length - 1 ) { if (j == - 1 || t[i] == t[j]) { i++; j++; if (t[i] != t[j]) { next[i] = j; } else { next[i] = next[j]; } } else { j = next[j]; } } return next; } /** * KMP匹配字符串 * * @param allArray * 主串 * @param subArray * 模式串 * @return 若匹配成功,返回下標,否則返回-1 */ public static int KMP_Index( char [] allArray, char [] subArray) { int [] next = next(subArray); int i = 0 ; int j = 0 ; while (i <= allArray.length - 1 && j <= subArray.length - 1 ) { if (j == - 1 || allArray[i] == subArray[j]) { i++; j++; } else { j = next[j]; } } if (j < subArray.length) { return - 1 ; } else return i - subArray.length; // 返回模式串在主串中的頭下標 } } |
測試的一個結果如下所示:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
-------------------------- 10次bfIndexOf= 94ms 10次stringIndexOf= 56ms 10次KMPIndexOf= 76ms 10次allbuild= 5870ms 10次alltoArray= 137ms 10*3次,GC= 155ms 10次,所有總計耗時: 6388ms; 損耗:6007ms; 損耗比: 94.03569192235442% -------------------------- 30次bfIndexOf= 365ms 30次stringIndexOf= 222ms 30次KMPIndexOf= 295ms 30次allbuild= 16452ms 30次alltoArray= 395ms 30*3次,GC= 452ms 30次,所有總計耗時: 18184ms; 損耗:16850ms; 損耗比: 92.66388033435987% -------------------------- 50次bfIndexOf;總計耗時: 550ms 50次stringIndexOf;總計耗時: 335ms 50次KMPIndexOf;總計耗時: 450ms 50次allbuild= 26501ms 50次alltoArray= 637ms 50*3次,GC;總計耗時: 734ms 50次,所有總計耗時: 29211ms; 損耗:27142ms; 損耗比: 92.91705179555647% -------------------------- |
從中可以看出來,使用StringBuilder構造字符串,800萬*50次append大約消耗了26秒。換算下來每秒鐘是1600萬次。。。
看來是需要寫一個專門計時的函數,本來Junit是有自己的統計的,但是樣本不太好做。
如此看來,數據的準備,也就是測試用例的選取很關鍵,可能需要先生成并寫入到文本文件中。