Lucene基础篇

Lucene入门、创建索引库、基本的增删改查


Lucene简介

Lucene官网
LuceneApache软件基金会的一个开源项目,是一个高性能的信息检索库,可用于需要全文索引和搜索功能的任何应用程序。Lucene最初由Doug Cutting开发,后来并入Jakarta,成为Jakarta的一个子项目。关于更多Lucene的信息,请查看wikipedia


Lucene工具Luke

Download地址:https://github.com/DmitryKey/luke/releases
luke是一款Lucene/Solr的图形化工具,使用Luke可以方便地查看并分析文档中的字段内容,进行索引维护以及检查索引的运行状况等。想了解更多关于Luke的信息可以参考:https://github.com/DmitryKey/luke


创建索引库

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
package com.my.lucene;

import java.io.File;
import java.io.IOException;
import java.nio.file.Paths;

import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;

/**
* Lucene入门,创建索引库
* @author Mackvord
* @date 2018年8月30日
* @version 1.0
*/
public class LuceneTest {

/**
* 测试创建索引库
* @throws IOException
*/
@Test
public void createIndexTest() throws IOException {
// 创建Directory对象,指定索引库的存放位置
Directory directory = FSDirectory.open(Paths.get("F:/", "TestLucene"));
// 创建标准分词器
Analyzer analyzer = new StandardAnalyzer();
// 创建索引配置对象
IndexWriterConfig config = new IndexWriterConfig(analyzer);
// 创建indexwriter对象,指定一个路径和配置
IndexWriter indexwriter = new IndexWriter(directory, config);
// 创建filed对象,将file对象添加到document对象中
File file = new File("F:\\searchSource");
File[] listFiles = file.listFiles();
for (File f : listFiles) {
// 创建Document对象
Document document = new Document();
// 文件名称域
Field fileNameField = new TextField("fileName", f.getName(), Store.YES);
// 文件大小域
Field fileSizeField = new TextField("fileSize", String.valueOf(FileUtils.sizeOf(f)), Store.YES);
// 文件路径域
Field filePathField = new StoredField("filePath", f.getPath());
// 文件内容域
Field fileContentField = new TextField("fileContent", FileUtils.readFileToString(f,"UTF-8"), Store.YES);
// 将域属性添加到文档对象中
document.add(fileNameField);
document.add(fileSizeField);
document.add(filePathField);
document.add(fileContentField);
// 使用indexwriter对象将document对象写入索引库中
indexwriter.addDocument(document);
}
// 关闭资源
indexwriter.close();
}

}

Lucene简单查询

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
/**
* 测试简单搜索:根据域名和域值搜索
* @throws IOException
*/
@Test
public void searchTest() throws IOException {
// 1.创建Directory对象,即索引库的存放位置
Directory directory = FSDirectory.open(Paths.get("F:/", "TestLucene"));
// 2.创建IndexReader对象,指定Directory对象
IndexReader indexReader = DirectoryReader.open(directory);
// 3.创建IndexSearcher对象,指定indexReader对象
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
// 4.创建术语查询对象(TermQuery),指定查询的域和其对应的值
Query query = new TermQuery(new Term("fileName", "readme.txt"));
// 5.执行查询(查询两条记录)
TopDocs topDocs = indexSearcher.search(query, 2);
// 6.获取文档id集合,遍历查询结果并输出
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
// 获取文档id
int id = scoreDoc.doc;
// 创建Document对象
Document document = indexSearcher.doc(id);
// 取出域值
System.out.println(document.get("fileName"));
System.out.println(document.get("fileSize"));
System.out.println(document.get("fileContent"));
System.out.println(document.get("filePath"));
}
indexReader.close();
}

基本的增删改查

以下是Lucene的一些基本的增删改查操作,主要是熟悉一下API

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
package com.my.lucene;

import java.io.IOException;
import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BooleanQuery.Builder;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;

/**
* 索引管理、基本的增删改查
* @author Mackvord
* @date 2018年8月26日
* @version 1.0
*/
public class LuceneManager {

/**
* 获取IndexWriter对象的方法
* @return
* @throws IOException
*/
public IndexWriter getIndexWriter() throws IOException {
// 创建Directory对象
Directory directory = FSDirectory.open(Paths.get("F:/", "TestLucene"));
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(analyzer);
return new IndexWriter(directory, config);
}

/**
* 获取IndexSearcher对象
* @return IndexSearcher
* @throws IOException
*/
public IndexSearcher getIndexSearcher() throws IOException {
// 1.创建Directory对象,即索引库的存放位置
Directory directory = FSDirectory.open(Paths.get("F:/", "TestLucene"));
// 2.创建IndexReader对象,指定Directory对象
IndexReader indexReader = DirectoryReader.open(directory);
// 3.创建IndexSearcher对象,指定indexReader对象
return new IndexSearcher(indexReader);
}

/**
* 测试删除所有索引
* @throws IOException
*/
@Test
public void deleteAllTest() throws IOException {
IndexWriter indexWriter = getIndexWriter();
indexWriter.deleteAll();
indexWriter.close();
}

/**
* 测试根据条件删除
* @throws IOException
*/
@Test
public void deleteTest() throws IOException {
IndexWriter indexWriter = getIndexWriter();
Query query = new TermQuery(new Term("fileName","readme"));
indexWriter.deleteDocuments(query);
indexWriter.close();
}

/**
* 测试修改
* @throws IOException
*/
@Test
public void updateTest() throws IOException {
IndexWriter indexWriter = getIndexWriter();
Document doc = new Document();
doc.add(new TextField("Name", "测试名", Store.YES));
doc.add(new TextField("Content", "测试内容", Store.YES));
indexWriter.updateDocument(new Term("fileName", "readme.txt"), doc);
indexWriter.close();
}

public void printResult(IndexSearcher indexSearcher, Query query) throws IOException {
// 执行查询(查询两条记录)
TopDocs topDocs = indexSearcher.search(query, 2);
// 获取文档id集合,遍历查询结果并输出
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
// 获取文档id
int id = scoreDoc.doc;
// 创建Document对象
Document document = indexSearcher.doc(id);
// 取出域值
System.out.println(document.get("fileName"));
System.out.println(document.get("fileSize"));
System.out.println(document.get("fileContent"));
System.out.println(document.get("filePath"));
}
}

/**
* 测试查询所有
* @throws IOException
*/
@Test
public void MatchAllDocsQueryTest() throws IOException {
IndexSearcher indexSearcher = getIndexSearcher();
Query query = new MatchAllDocsQuery();
printResult(indexSearcher, query);
indexSearcher.getIndexReader().close();
}

/**
* 测试根据范围查询
* @throws IOException
*/
@Test
public void TermRangeQueryTest() throws IOException {
IndexSearcher indexSearcher = getIndexSearcher();
Query query = TermRangeQuery.newStringRange("fileSize", String.valueOf(400), String.valueOf(800), true, true);
printResult(indexSearcher, query);
indexSearcher.getIndexReader().close();
}

/**
* 测试组合查询
* @throws IOException
*/
@Test
public void BooleanQueryTest() throws IOException {
IndexSearcher indexSearcher = getIndexSearcher();
Query query1 = new TermQuery(new Term("fileName", "readme.txt"));
Query query2 = new TermQuery(new Term("fileName", ".txt"));
// Occur.MUST: 必须有
// Occur.SHOULD: 可有可无
// Occur.MUST_NOT: 必须没有
Builder builder = new Builder();
builder.add(query1, Occur.MUST);
builder.add(query2, Occur.SHOULD);
BooleanQuery booleanQuery = builder.build();
printResult(indexSearcher, booleanQuery);
}

/**
* 解析查询
* @throws IOException
* @throws ParseException
*/
@Test
public void QueryParserTest() throws IOException, ParseException {
IndexSearcher indexSearcher = getIndexSearcher();
// 第一个参数为默认查询的域,第二个参数为指定的分词器
QueryParser queryParser = new QueryParser("fileName", new StandardAnalyzer());
// 指定解析查询的表达式(域:值),如果表达式中的域与上面设置的默认域不一致,那么查询的是表达式中的域,*:*表示查询所有,+表示必须满足,-表示必须不满足
Query query = queryParser.parse("fileName:readme.txt");
printResult(indexSearcher, query);
// 打印查询语句
System.out.println(query);
indexSearcher.getIndexReader().close();
}

/**
* 多条件解析查询(多个默认域)
* @throws IOException
* @throws ParseException
*/
@Test
public void MultiFieldQueryParserTest() throws IOException, ParseException {
IndexSearcher indexSearcher = getIndexSearcher();
// 域数组
String[] fields = {"fileName", "fileSize"};
// 第一个参数为默认查询的域,第二个参数为指定的分词器
MultiFieldQueryParser multiFieldQueryParser = new MultiFieldQueryParser(fields, new StandardAnalyzer());
// 指定解析查询的表达式(域:值),如果表达式中的域与上面设置的默认域不一致,那么查询的是表达式中的域,*:*表示查询所有,+表示必须满足,-表示必须不满足
Query query = multiFieldQueryParser.parse("readme.txt");
printResult(indexSearcher, query);
// 打印查询语句
System.out.println(query);
indexSearcher.getIndexReader().close();
}
}

结束语

以上仅仅算是Lucene的入门操作,要深入学习Lucene相关的知识还需要日后慢慢积累,有时间在继续补充!


如果您觉得我的文章对您有帮助,请随意赞赏,您的支持将鼓励我继续创作!
0%