直接上代碼:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
|
package com.sand.mpa.sousuo; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.Statement; import java.util.Date; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; //增量索引 /* * 實現思路:首次查詢數據庫表所有記錄,對每條記錄建立索引,并將最后一條記錄的id存儲到storeId.txt文件中 * 當新插入一條記錄時,再建立索引時不必再對所有數據重新建一遍索引, * 可根據存放在storeId.txt文件中的id查出新插入的數據,只對新增的數據新建索引,并把新增的索引追加到原來的索引文件中 * */ public class IncrementIndex { public static void main(String[] args) { try { IncrementIndex index = new IncrementIndex(); String path = "E:\\Test\\lucene_test\\poiIdext" ; //索引文件的存放路徑 String storeIdPath = "E:\\Test\\lucene_test\\storeId.txt" ; //存儲ID的路徑 String storeId = "" ; Date date1 = new Date(); storeId = index.getStoreId(storeIdPath); ResultSet rs = index.getResult(storeId); System.out.println( "開始建立索引。。。。" ); index.indexBuilding(path, storeIdPath, rs); Date date2 = new Date(); System.out.println( "耗時:" +(date2.getTime()-date1.getTime())+ "ms" ); storeId = index.getStoreId(storeIdPath); System.out.println(storeId); //打印出這次存儲起來的ID } catch (Exception e) { e.printStackTrace(); } } public static void buildIndex(String indexFile, String storeIdFile) { try { String path = indexFile; //索引文件的存放路徑 String storeIdPath = storeIdFile; //存儲ID的路徑 String storeId = "" ; storeId = getStoreId(storeIdPath); ResultSet rs = getResult(storeId); indexBuilding(path, storeIdPath, rs); storeId = getStoreId(storeIdPath); } catch (Exception e) { e.printStackTrace(); } } public static ResultSet getResult(String storeId) throws Exception { Class.forName( "com.mysql.jdbc.Driver" ).newInstance(); String url = "jdbc:mysql://localhost:3306/1pm2_v1" ; String userName = "root" ; String password = "root" ; Connection conn = DriverManager.getConnection(url, userName, password); Statement stmt = conn.createStatement(); String sql = "select * from pd_ugc" ; ResultSet rs = stmt.executeQuery(sql + " where id > '" + storeId + "'order by id" ); return rs; } public static boolean indexBuilding(String path, String storeIdPath, ResultSet rs) { try { Analyzer luceneAnalyzer = new StandardAnalyzer(); // 取得存儲起來的ID,以判定是增量索引還是重新索引 boolean isEmpty = true ; try { File file = new File(storeIdPath); if (!file.exists()) { file.createNewFile(); } FileReader fr = new FileReader(storeIdPath); BufferedReader br = new BufferedReader(fr); if (br.readLine() != null ) { isEmpty = false ; } br.close(); fr.close(); } catch (IOException e) { e.printStackTrace(); } //isEmpty=false表示增量索引 IndexWriter writer = new IndexWriter(path, luceneAnalyzer, isEmpty); String storeId = "" ; boolean indexFlag = false ; String id; String name; String address; String citycode; while (rs.next()) { id = rs.getInt( "id" ) + "" ; name = rs.getString( "name" ); address = rs.getString( "address" ); citycode = rs.getString( "citycode" ); writer.addDocument(Document(id, name, address, citycode)); storeId = id; //將拿到的id給storeId,這種拿法不合理,這里為了方便 indexFlag = true ; } writer.optimize(); writer.close(); if (indexFlag) { // 將最后一個的ID存到磁盤文件中 writeStoreId(storeIdPath, storeId); } return true ; } catch (Exception e) { e.printStackTrace(); System.out.println( "出錯了" + e.getClass() + "\n 錯誤信息為: " + e.getMessage()); return false ; } } public static Document Document(String id, String name, String address, String citycode) { Document doc = new Document(); doc.add( new Field( "id" , id, Field.Store.YES, Field.Index.TOKENIZED)); doc.add( new Field( "name" , name, Field.Store.YES, Field.Index.TOKENIZED)); //查詢字段 doc.add( new Field( "address" , address, Field.Store.YES, Field.Index.TOKENIZED)); doc.add( new Field( "citycode" , citycode, Field.Store.YES, Field.Index.TOKENIZED)); //查詢字段 return doc; } // 取得存儲在磁盤中的ID public static String getStoreId(String path) { String storeId = "" ; try { File file = new File(path); if (!file.exists()) { file.createNewFile(); } FileReader fr = new FileReader(path); BufferedReader br = new BufferedReader(fr); storeId = br.readLine(); if (storeId == null || storeId == "" ) storeId = "0" ; br.close(); fr.close(); } catch (Exception e) { e.printStackTrace(); } return storeId; } // 將ID寫入到磁盤文件中 public static boolean writeStoreId(String path, String storeId) { boolean b = false ; try { File file = new File(path); if (!file.exists()) { file.createNewFile(); } FileWriter fw = new FileWriter(path); PrintWriter out = new PrintWriter(fw); out.write(storeId); out.close(); fw.close(); b = true ; } catch (IOException e) { e.printStackTrace(); } return b; } } |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
|
package com.sand.mpa.sousuo; public class IndexResult { private String id; private String name; private String address; private String citycode; public String getId() { return id; } public void setId(String id) { this .id = id; } public String getName() { return name; } public void setName(String name) { this .name = name; } public String getAddress() { return address; } public void setAddress(String address) { this .address = address; } public String getCitycode() { return citycode; } public void setCitycode(String citycode) { this .citycode = citycode; } } |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
|
package com.sand.mpa.sousuo; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.util.ArrayList; import java.util.Date; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocCollector; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.mira.lucene.analysis.IK_CAnalyzer; public class IndexUtils { //0. 創建增量索引 public static void buildIndex(String indexFile, String storeIdFile) { IncrementIndex.buildIndex(indexFile, storeIdFile); } //1. 單字段查詢 @SuppressWarnings ( "deprecation" ) public static List<IndexResult> queryByOneKey(IndexSearcher indexSearcher, String field, String key) { try { Date date1 = new Date(); QueryParser queryParser = new QueryParser(field, new StandardAnalyzer()); Query query = queryParser.parse(key); Hits hits = indexSearcher.search(query); Date date2 = new Date(); System.out.println( "耗時:" + (date2.getTime() - date1.getTime()) + "ms" ); List<IndexResult> list = new ArrayList<IndexResult>(); for ( int i = 0 ; i < hits.length(); i++) { list.add(getIndexResult(hits.doc(i))); } return list; } catch (ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return null ; } //2. 多條件查詢。這里實現的是and操作 //注:要查詢的字段必須是index的 //即doc.add(new Field("pid", rs.getString("pid"), Field.Store.YES,Field.Index.TOKENIZED)); @SuppressWarnings ( "deprecation" ) public static List<IndexResult> queryByMultiKeys(IndexSearcher indexSearcher, String[] fields, String[] keys) { try { BooleanQuery m_BooleanQuery = new BooleanQuery(); if (keys != null && keys.length > 0 ) { for ( int i = 0 ; i < keys.length; i++) { QueryParser queryParser = new QueryParser(fields[i], new StandardAnalyzer()); Query query = queryParser.parse(keys[i]); m_BooleanQuery.add(query, BooleanClause.Occur.MUST); //and操作 } Hits hits = indexSearcher.search(m_BooleanQuery); List<IndexResult> list = new ArrayList<IndexResult>(); for ( int i = 0 ; i < hits.length(); i++) { list.add(getIndexResult(hits.doc(i))); } return list; } } catch (ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return null ; } //3.高亮顯示 實現了單條件查詢 //可改造為多條件查詢 public static List<IndexResult> highlight(IndexSearcher indexSearcher, String key) throws InvalidTokenOffsetsException { try { QueryParser queryParser = new QueryParser( "name" , new StandardAnalyzer()); Query query = queryParser.parse(key); TopDocCollector collector = new TopDocCollector( 800 ); indexSearcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; Highlighter highlighter = null ; SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter( "<font color='red'>" , "</font>" ); highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); highlighter.setTextFragmenter( new SimpleFragmenter( 200 )); List<IndexResult> list = new ArrayList<IndexResult>(); Document doc; for ( int i = 0 ; i < hits.length; i++) { //System.out.println(hits[i].score); doc = indexSearcher.doc(hits[i].doc); TokenStream tokenStream = new StandardAnalyzer().tokenStream( "name" , new StringReader(doc.get( "name" ))); IndexResult ir = getIndexResult(doc); ir.setName(highlighter.getBestFragment(tokenStream, doc.get( "name" ))); list.add(ir); } return list; } catch (ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return null ; } //4. 多字段查詢 @SuppressWarnings ( "deprecation" ) public static List<IndexResult> queryByMultiFileds(IndexSearcher indexSearcher, String[] fields, String key) { try { MultiFieldQueryParser mfq = new MultiFieldQueryParser(fields, new StandardAnalyzer()); Query query = mfq.parse(key); Hits hits = indexSearcher.search(query); List<IndexResult> list = new ArrayList<IndexResult>(); for ( int i = 0 ; i < hits.length(); i++) { list.add(getIndexResult(hits.doc(i))); } return list; } catch (ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return null ; } //5. 刪除索引 public static void deleteIndex(String indexFile, String id) throws CorruptIndexException, IOException { IndexReader indexReader = IndexReader.open(indexFile); indexReader.deleteDocuments( new Term( "id" , id)); indexReader.close(); } //6. 一元分詞 @SuppressWarnings ( "deprecation" ) public static String Standard_Analyzer(String str) { Analyzer analyzer = new StandardAnalyzer(); Reader r = new StringReader(str); StopFilter sf = (StopFilter) analyzer.tokenStream( "" , r); System.out.println( "=====StandardAnalyzer====" ); System.out.println( "分析方法:默認沒有詞只有字(一元分詞)" ); Token t; String results = "" ; try { while ((t = sf.next()) != null ) { System.out.println(t.termText()); results = results + " " + t.termText(); } } catch (IOException e) { e.printStackTrace(); } return results; } //7. 字典分詞 @SuppressWarnings ( "deprecation" ) public static String ik_CAnalyzer(String str) { Analyzer analyzer = new IK_CAnalyzer(); Reader r = new StringReader(str); TokenStream ts = (TokenStream) analyzer.tokenStream( "" , r); System.out.println( "=====IK_CAnalyzer====" ); System.out.println( "分析方法:字典分詞,正反雙向搜索" ); Token t; String results = "" ; try { while ((t = ts.next()) != null ) { System.out.println(t.termText()); results = results + " " + t.termText(); } } catch (IOException e) { e.printStackTrace(); } System.out.println(results); return results; } //在結果中搜索 public static void queryFromResults() { } //組裝對象 public static IndexResult getIndexResult(Document doc) { IndexResult ir = new IndexResult(); ir.setId(doc.get( "id" )); ir.setName(doc.get( "name" )); ir.setAddress(doc.get( "address" )); ir.setCitycode(doc.get( "citycode" )); return ir; } } |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
|
package com.sand.mpa.sousuo; /** * $Id$ * Copyright 2009-2010 Oak Pacific Interactive. All rights reserved. */ import java.util.Date; import java.util.List; import org.apache.lucene.search.IndexSearcher; import org.apache.velocity.runtime.directive.Break; public class Test { //存放索引文件 private static String indexFile = "E:\\Test\\lucene_test\\poiIdext" ; //存放id private static String storeIdFile = "E:\\Test\\lucene_test\\storeId.txt" ; public static void main(String[] args) throws Exception { //0. 創建增量索引 IndexUtils.buildIndex(indexFile, storeIdFile); IndexSearcher indexSearcher = new IndexSearcher(indexFile); List<IndexResult> list = null ; // String key =""; Date date1 = new Date(); Date date2 = new Date(); //1.單字段查詢 // key = IndexUtils.ik_CAnalyzer("上海"); // list = IndexUtils.queryByOneKey(indexSearcher, "address", key); // date2 = new Date(); // System.out.println("耗時:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size() // + "條=======================================單字段查詢"); // printResults(list); // System.exit(1); //2.多條件查詢 String[] fields = { "name" , "address" }; String[] keys = { IndexUtils.ik_CAnalyzer( "永城市" )}; date1 = new Date(); list = IndexUtils.queryByMultiKeys(indexSearcher, fields, keys); date2 = new Date(); System.out.println( "查詢耗時:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size() + "條\n===============================多條件查詢" ); printResults(list); System.exit( 1 ); //3.高亮顯示 單字段查詢 // System.out.println("\n\n"); // date1 = new Date(); // key="安徽"; // list = IndexUtils.highlight(indexSearcher, key); // date2 = new Date(); // System.out.println("耗時:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size() // + "條\n======================================高亮顯示"); // printResults(list); // System.exit(1); //4. 多字段查詢 // date1 = new Date(); // list = IndexUtils.queryByMultiFileds(indexSearcher, fields, key); // date2 = new Date(); // System.out.println("耗時:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size() // + "條\n=====================================多字段查詢"); // printResults(list); //5. 刪除索引中的字段 根據id進行刪除 // IndexUtils.deleteIndex(indexFile, "1552"); } //打印結果 public static void printResults(List<IndexResult> list) { if (list != null && list.size() > 0 ) { for ( int i = 0 ; i < list.size(); i++) { System.out.println(list.get(i).getId() + "," + list.get(i).getName() + "," + list.get(i).getAddress() + "," + list.get(i).getCitycode()+ "--->" +i); } } } } |
以上就是小編為大家帶來的Java實現lucene搜索功能的方法(推薦)全部內容了,希望大家多多支持服務器之家~