測試代碼1:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
def test( self ): data = { "add" : { "doc" : { "id" : "100001" , "*字段名*" : u "我是一個大好人" }}} params = { "boost" : 1.0 , "overwrite" : "true" , "commitwithin" : 1000 } url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json' headers = { "content-type" : "application/json" } r = requests.post(url, json = data, params = params, headers = headers) print r.text def index_data( self ): solr = pysolr.solr( 'http://127.0.0.1:8983/solr/mycore/' , timeout = 10 ) # how you'd index data. result = solr.add([ { "id" : "doc_1" , "title" : "a test document" , }, { "id" : "doc_2" , "title" : "the banana: tasty or dangerous?" , }, ]) print result |
測試代碼2:
實際數據:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
|
def index_data_fromcsv( self , csvfile): ''' 從csv文件中讀取數據,并索引到solr中 :param csvfile: csv文件,包括完整路徑 :return: ''' list = csvop.readcsv(csvfile) index = 0 doc = {} params = { "boost" : 1.0 , "overwrite" : "true" , "commitwithin" : 1000 } url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json' headers = { "content-type" : "application/json" } for item in list : if index > 0 : # 第一行是標題 try : doc[ 'title' ] = item[ 0 ].decode( 'gb2312' ) doc[ 'link' ] = item[ 1 ] # doc['date'] = item[2] doc[ 'source' ] = item[ 3 ].decode( 'gb2312' ) doc[ 'keyword' ] = item[ 4 ].decode( 'gb2312' ) data = { "add" : { "doc" : doc}} r = requests.post(url, json = data, params = params, headers = headers) print r.text except exception,e: print e.message print index index + = 1 #pysolr客戶端代碼 def pysolr_index_data_fromcsv( self , csvfile,url = 'http://127.0.0.1:8983/solr/mycore/' ): ''' 從csv文件中讀取數據,并索引到solr中 :param csvfile: csv文件,包括完整路徑 :return: ''' list = csvop.readcsv(csvfile) index = 0 listdocs = [] for item in list : if index > 0 : # 第一行是標題 doc = {} try : doc[ 'title' ] = item[ 0 ].decode( 'gb2312' ) doc[ 'link' ] = item[ 1 ] # doc['date'] = item[2] doc[ 'source' ] = item[ 3 ].decode( 'gb2312' ) doc[ 'keyword' ] = item[ 4 ].decode( 'gb2312' ) listdocs.append(doc) except exception,e: print e.message index + = 1 solr = pysolr.solr(url, timeout = 10 ) result = solr.add(listdocs) print result |
查詢代碼:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
def search_data( self ,message = '視頻' ): url = 'http://127.0.0.1:8983/solr/mycore/select?q=title:"\%s"&wt=json&indent=true' % message r = requests.get(url, verify = false) print r.text r = r.json()[ 'response' ][ 'numfound' ] print message + ":" + str (r) #pysolr客戶端 def search_data( self ,where = '視頻' ,url = 'http://127.0.0.1:8983/solr/mycore/' ): solr = pysolr.solr(url, timeout = 10 ) dict = { 'start' : 10 , 'rows' : 30 , 'fl' : 'title,keyword,source,link' } result = solr.search( 'title:視頻' , * * dict ) # result = solr.search('title:視頻') # print result.raw_response['response']['numfound'] for item in result: print 'keyword: %s' % item[ 'keyword' ] print 'title: %s' % item[ 'title' ] print 'source: %s' % item[ 'source' ] print 'link: %s' % item[ 'link' ] print ' |
'
輸出結果:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
|
{ "responseheader" :{ "status" : 0 , "qtime" : 0 , "params" :{ "q" : "title:\"\\視頻\"" , "indent" : "true" , "wt" : "json" }}, "response" :{ "numfound" : 123 , "start" : 0 , "docs" :[ { "source" : "中彩網" , "link" : "http://www.zhcw.com/video/kaijiangshipin-3d/11981126.shtml" , "keyword" : "視頻" , "title" : "福彩3d開獎 視頻 -中彩 視頻" , "id" : "2f0a9d21-3771-4efa-a0cc-e0484cc97993" , "_version_" : 1584214368617234432 }, { "source" : "新浪視頻" , "link" : "http://video.sina.com.cn/news/spj/topvideoes20170707/?opsubject_id=top1" , "keyword" : "視頻" , "title" : "今日熱門 視頻 匯總20170707" , "id" : "c8aae0af-01e9-491f-b999-24b97004a4ba" , "_version_" : 1584214367507841024 }, { "source" : "網易新聞" , "link" : "http://news.163.com/17/0707/13/coocnuie00018aor.html" , "keyword" : "視頻" , "title" : "網傳"蘭桂坊附近不雅 視頻 " 警方:傳播 視頻 將追責" , "id" : "353de48d-ede7-481b-89d3-bc20ab4b3884" , "_version_" : 1584214367821365248 }, { "source" : "鳳凰視頻" , "link" : "http://v.ifeng.com/video_7480871.shtml" , "keyword" : "視頻" , "title" : "創想動畫片:花粉過敏癥的痛誰懂-鳳凰 視頻 -最具媒體品質的綜合 視頻 ..." , "id" : "dc5f19c4-180f-4004-a0db-4499d875a60f" , "_version_" : 1584214366819975168 }, { "source" : "鳳凰視頻" , "link" : "http://v.ifeng.com/video_7805858.shtml" , "keyword" : "視頻" , "title" : "節氣說:小暑時節就該這樣養生-鳳凰 視頻 -最具媒體品質的綜合 視頻 門..." , "id" : "5e9eb7a7-48b8-4e41-9514-7712ae619d9a" , "_version_" : 1584214367516229632 }, { "source" : "鳳凰視頻" , "link" : "http://v.ifeng.com/video_7483506.shtml" , "keyword" : "視頻" , "title" : "聽導演講《神奇女俠》的故事 -鳳凰 視頻 -最具媒體品質的綜合 視頻 門戶-..." , "id" : "6b1482f1-c0c9-479f-bef7-7de324fb9372" , "_version_" : 1584214367647301632 }, { "source" : "汽車雜志" , "link" : "http://www.jiemian.com/article/1445267.html" , "keyword" : "視頻" , "title" : "【視頻】歐寶最近找了一堆穿睡衣的辣媽拍了一段超牛的視頻" , "id" : "1d327555-a6f3-4513-9a21-43d59418ab82" , "_version_" : 1584214368157958144 }, { "source" : "味覺大師" , "link" : "http://www.jiemian.com/article/1453545.html" , "keyword" : "視頻" , "title" : "【視頻】大董沒有肉的肉味燒茄子" , "id" : "7d777870-93cb-4c18-a32b-734af8f133f1" , "_version_" : 1584213891451191296 }, { "source" : "新浪汽車" , "link" : "http://auto.sina.com.cn/video/zz/2017-07-07/detail-ifyhwehx5311889.shtml" , "keyword" : "視頻" , "title" : "視頻 :兩大神車pk!高爾夫思域怎么選?" , "id" : "3a50b303-6b54-4da3-aee1-a61c678c752d" , "_version_" : 1584213892090822656 }, { "source" : "味覺大師" , "link" : "http://www.jiemian.com/article/1453545.html" , "keyword" : "視頻" , "title" : "【視頻】大董沒有肉的肉味燒茄子" , "id" : "01da8e11-77bc-4c31-ba3a-ba668e846d9d" , "_version_" : 1584214366191878144 }] }} |
完整代碼:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
|
#-*- coding: utf-8 -*- import csv import os import codecs def readcsv(filename): if os.path.exists(filename): with open (filename, 'r' ) as f: reader = csv.reader(f) list = [] for item in reader: list .append(item) return list ################################################# #coding=utf-8 import json import requests import os import time from os import walk import csvop from datetime import datetime import pysolr import math class solrclientobj: def test( self ): data = { "add" : { "doc" : { "id" : "100001" , "*字段名*" : u "我是一個大好人" }}} params = { "boost" : 1.0 , "overwrite" : "true" , "commitwithin" : 1000 } url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json' headers = { "content-type" : "application/json" } r = requests.post(url, json = data, params = params, headers = headers) print r.text def pysolr_index_data_fromcsv( self , csvfile,url = 'http://127.0.0.1:8983/solr/mycore/' ): ''' 從csv文件中讀取數據,并索引到solr中 :param csvfile: csv文件,包括完整路徑 :return: ''' list = csvop.readcsv(csvfile) index = 0 listdocs = [] for item in list : if index > 0 : # 第一行是標題 doc = {} try : doc[ 'title' ] = item[ 0 ].decode( 'gb2312' ) doc[ 'link' ] = item[ 1 ] # doc['date'] = item[2] doc[ 'source' ] = item[ 3 ].decode( 'gb2312' ) doc[ 'keyword' ] = item[ 4 ].decode( 'gb2312' ) listdocs.append(doc) except exception,e: print e.message index + = 1 solr = pysolr.solr(url, timeout = 10 ) result = solr.add(listdocs) print result def index_data_fromcsv( self , csvfile): ''' 從csv文件中讀取數據,并索引到solr中 :param csvfile: csv文件,包括完整路徑 :return: ''' list = csvop.readcsv(csvfile) index = 0 doc = {} params = { "boost" : 1.0 , "overwrite" : "true" , "commitwithin" : 1000 } url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json' headers = { "content-type" : "application/json" } for item in list : if index > 0 : # 第一行是標題 try : doc[ 'title' ] = item[ 0 ].decode( 'gb2312' ) doc[ 'link' ] = item[ 1 ] # doc['date'] = item[2] doc[ 'source' ] = item[ 3 ].decode( 'gb2312' ) doc[ 'keyword' ] = item[ 4 ].decode( 'gb2312' ) data = { "add" : { "doc" : doc}} r = requests.post(url, json = data, params = params, headers = headers) print r.text except exception,e: print e.message print index index + = 1 def index_data( self ): solr = pysolr.solr( 'http://127.0.0.1:8983/solr/mycore/' , timeout = 10 ) # how you'd index data. result = solr.add([ { "id" : "doc_1" , "title" : "a test document" , }, { "id" : "doc_2" , "title" : "the banana: tasty or dangerous?" , }, ]) print result def search_data( self ,where = '視頻' ,url = 'http://127.0.0.1:8983/solr/mycore/' ): solr = pysolr.solr(url, timeout = 10 ) dict = { 'start' : 10 , 'rows' : 30 , 'fl' : 'title,keyword,source,link' } result = solr.search( 'title:視頻' , * * dict ) # result = solr.search('title:視頻') # print result.raw_response['response']['numfound'] for item in result: print 'keyword: %s' % item[ 'keyword' ] print 'title: %s' % item[ 'title' ] print 'source: %s' % item[ 'source' ] print 'link: %s' % item[ 'link' ] print ' ' def delete_index_data( self ,where,url = 'http://127.0.0.1:8983/solr/mycore/' ): ''' 刪除索引 :param where: 刪除的條件 :param url: url :return: ''' solr = pysolr.solr(url, timeout = 10 ) # solr.delete(id=where) #id='id1':刪除id為“id1”的索引 result = solr.delete(q = where) #q='*:*'刪除所有索引 print result obj = solrclientobj() # obj.delete_index_data('*:*') #刪除所有索引 # obj.index_data() # obj.search_data() # obj.delete_index_data('doc_1') obj.search_data( '視頻' ) # csvfile = 'd:/work/solr/other/exportexcels/2017-07-07_info.csv' # obj.pysolr_index_data_fromcsv(csvfile) |
以上這篇對python 操作solr索引數據的實例詳解就是小編分享給大家的全部內容了,希望能給大家一個參考,也希望大家多多支持服務器之家。
原文鏈接:https://www.cnblogs.com/shaosks/p/7845576.html