Lucene基础篇

Lucene入门、创建索引库、基本的增删改查

Lucene简介

Lucene官网
Lucene是Apache软件基金会的一个开源项目，是一个高性能的信息检索库，可用于需要全文索引和搜索功能的任何应用程序。Lucene最初由Doug Cutting开发，后来并入Jakarta，成为Jakarta的一个子项目。关于更多Lucene的信息，请查看wikipedia

Lucene工具Luke

Download地址：https://github.com/DmitryKey/luke/releases
luke是一款Lucene/Solr的图形化工具，使用Luke可以方便地查看并分析文档中的字段内容，进行索引维护以及检查索引的运行状况等。想了解更多关于Luke的信息可以参考：https://github.com/DmitryKey/luke

创建索引库

package com.my.lucene;

import java.io.File;
import java.io.IOException;
import java.nio.file.Paths;

import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;

/**
 * Lucene入门，创建索引库
 * @author Mackvord
 * @date   2018年8月30日
 * @version 1.0
 */
public class LuceneTest {

    /**
     * 测试创建索引库
     * @throws IOException
     */
    @Test
    public void createIndexTest() throws IOException {
    	// 创建Directory对象，指定索引库的存放位置
    	Directory directory = FSDirectory.open(Paths.get("F:/", "TestLucene"));
    	// 创建标准分词器
    	Analyzer analyzer = new StandardAnalyzer();
    	// 创建索引配置对象
    	IndexWriterConfig config = new IndexWriterConfig(analyzer);
    	// 创建indexwriter对象,指定一个路径和配置
    	IndexWriter indexwriter = new IndexWriter(directory, config);
    	// 创建filed对象，将file对象添加到document对象中
    	File file = new File("F:\\searchSource");
    	File[] listFiles = file.listFiles();
    	for (File f : listFiles) {
    	    // 创建Document对象
    	    Document document = new Document();
    	    // 文件名称域
    	    Field fileNameField = new TextField("fileName", f.getName(), Store.YES);
    	    // 文件大小域
    	    Field fileSizeField = new TextField("fileSize", String.valueOf(FileUtils.sizeOf(f)), Store.YES);
    	    // 文件路径域
    	    Field filePathField = new StoredField("filePath", f.getPath());
    	    // 文件内容域
    	    Field fileContentField = new TextField("fileContent", FileUtils.readFileToString(f,"UTF-8"), Store.YES);
    	    // 将域属性添加到文档对象中
    	    document.add(fileNameField);
    	    document.add(fileSizeField);
    	    document.add(filePathField);
    	    document.add(fileContentField);
    	    // 使用indexwriter对象将document对象写入索引库中
    	    indexwriter.addDocument(document);
    	}
    	// 关闭资源
    	indexwriter.close();
    }

}

Lucene简单查询

/**
 * 测试简单搜索：根据域名和域值搜索
 * @throws IOException 
 */
@Test
public void searchTest() throws IOException {
    // 1.创建Directory对象，即索引库的存放位置
    Directory directory = FSDirectory.open(Paths.get("F:/", "TestLucene"));
    // 2.创建IndexReader对象，指定Directory对象
    IndexReader indexReader = DirectoryReader.open(directory);
    // 3.创建IndexSearcher对象，指定indexReader对象
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    // 4.创建术语查询对象（TermQuery），指定查询的域和其对应的值
    Query query = new TermQuery(new Term("fileName", "readme.txt"));
    // 5.执行查询(查询两条记录)
    TopDocs topDocs = indexSearcher.search(query, 2);
    // 6.获取文档id集合,遍历查询结果并输出
    ScoreDoc[] scoreDocs = topDocs.scoreDocs;
    for (ScoreDoc scoreDoc : scoreDocs) {
        // 获取文档id
        int id = scoreDoc.doc;
        // 创建Document对象
        Document document = indexSearcher.doc(id);
        // 取出域值
        System.out.println(document.get("fileName"));
        System.out.println(document.get("fileSize"));
        System.out.println(document.get("fileContent"));
        System.out.println(document.get("filePath"));
    }
    indexReader.close();
}

基本的增删改查

以下是Lucene的一些基本的增删改查操作，主要是熟悉一下API

package com.my.lucene;

import java.io.IOException;
import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BooleanQuery.Builder;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;

/**
 * 索引管理、基本的增删改查
 * @author Mackvord
 * @date   2018年8月26日
 * @version 1.0
 */
public class LuceneManager {

    /**
     * 获取IndexWriter对象的方法
     * @return
     * @throws IOException
     */
    public IndexWriter getIndexWriter() throws IOException {
        // 创建Directory对象
        Directory directory = FSDirectory.open(Paths.get("F:/", "TestLucene"));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        return new IndexWriter(directory, config);
    }

    /**
     * 获取IndexSearcher对象
     * @return IndexSearcher
     * @throws IOException
     */
    public IndexSearcher getIndexSearcher() throws IOException {
        // 1.创建Directory对象，即索引库的存放位置
        Directory directory = FSDirectory.open(Paths.get("F:/", "TestLucene"));
        // 2.创建IndexReader对象，指定Directory对象
        IndexReader indexReader = DirectoryReader.open(directory);
        // 3.创建IndexSearcher对象，指定indexReader对象
        return new IndexSearcher(indexReader);
    }
	
    /**
     * 测试删除所有索引
     * @throws IOException
     */
    @Test
    public void deleteAllTest() throws IOException {
        IndexWriter indexWriter = getIndexWriter();
        indexWriter.deleteAll();
        indexWriter.close();
    }
	
    /**
     * 测试根据条件删除
     * @throws IOException 
     */
    @Test
    public void deleteTest() throws IOException {
        IndexWriter indexWriter = getIndexWriter();
        Query query = new TermQuery(new Term("fileName","readme"));
        indexWriter.deleteDocuments(query);
        indexWriter.close();
    }
	
    /**
     * 测试修改
     * @throws IOException
     */
    @Test
    public void updateTest() throws IOException {
        IndexWriter indexWriter = getIndexWriter();
        Document doc = new Document();
        doc.add(new TextField("Name", "测试名", Store.YES));
        doc.add(new TextField("Content", "测试内容", Store.YES));
        indexWriter.updateDocument(new Term("fileName", "readme.txt"), doc);
        indexWriter.close();
    }
	
    public void printResult(IndexSearcher indexSearcher, Query query) throws IOException {
        // 执行查询(查询两条记录)
        TopDocs topDocs = indexSearcher.search(query, 2);
        // 获取文档id集合,遍历查询结果并输出
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            // 获取文档id
            int id = scoreDoc.doc;
            // 创建Document对象
            Document document = indexSearcher.doc(id);
            // 取出域值
            System.out.println(document.get("fileName"));
            System.out.println(document.get("fileSize"));
            System.out.println(document.get("fileContent"));
            System.out.println(document.get("filePath"));
        }
    }
	
    /**
     * 测试查询所有
     * @throws IOException
     */
    @Test
    public void MatchAllDocsQueryTest() throws IOException {
    IndexSearcher indexSearcher = getIndexSearcher();
        Query query = new MatchAllDocsQuery();
        printResult(indexSearcher, query);
        indexSearcher.getIndexReader().close();
    }
	
    /**
     * 测试根据范围查询
     * @throws IOException
     */
    @Test
    public void TermRangeQueryTest() throws IOException {
        IndexSearcher indexSearcher = getIndexSearcher();
        Query query = TermRangeQuery.newStringRange("fileSize", String.valueOf(400), String.valueOf(800), true, true);
        printResult(indexSearcher, query);
        indexSearcher.getIndexReader().close();
    }
	
    /**
     * 测试组合查询
     * @throws IOException
     */
    @Test
    public void BooleanQueryTest() throws IOException {
        IndexSearcher indexSearcher = getIndexSearcher();
        Query query1 = new TermQuery(new Term("fileName", "readme.txt"));
        Query query2 = new TermQuery(new Term("fileName", ".txt"));
        // Occur.MUST: 必须有
        // Occur.SHOULD: 可有可无
        // Occur.MUST_NOT: 必须没有
        Builder builder = new Builder();
        builder.add(query1, Occur.MUST);
        builder.add(query2, Occur.SHOULD);
        BooleanQuery booleanQuery = builder.build();
        printResult(indexSearcher, booleanQuery);
    }
	
    /**
     * 解析查询
     * @throws IOException
     * @throws ParseException
     */
    @Test
    public void QueryParserTest() throws IOException, ParseException {
        IndexSearcher indexSearcher = getIndexSearcher();
        // 第一个参数为默认查询的域，第二个参数为指定的分词器
        QueryParser queryParser = new QueryParser("fileName", new StandardAnalyzer());
        // 指定解析查询的表达式(域:值),如果表达式中的域与上面设置的默认域不一致，那么查询的是表达式中的域，*:*表示查询所有，+表示必须满足，-表示必须不满足
        Query query = queryParser.parse("fileName:readme.txt");
        printResult(indexSearcher, query);
        // 打印查询语句
        System.out.println(query);
        indexSearcher.getIndexReader().close();
	}
	    
    /**
     * 多条件解析查询（多个默认域）
     * @throws IOException
     * @throws ParseException
     */
    @Test
    public void MultiFieldQueryParserTest() throws IOException, ParseException {
        IndexSearcher indexSearcher = getIndexSearcher();
        // 域数组
        String[] fields = {"fileName", "fileSize"}; 
        // 第一个参数为默认查询的域，第二个参数为指定的分词器
        MultiFieldQueryParser multiFieldQueryParser = new MultiFieldQueryParser(fields, new StandardAnalyzer());
        // 指定解析查询的表达式(域:值),如果表达式中的域与上面设置的默认域不一致，那么查询的是表达式中的域，*:*表示查询所有，+表示必须满足，-表示必须不满足
        Query query = multiFieldQueryParser.parse("readme.txt");
        printResult(indexSearcher, query);
        // 打印查询语句
        System.out.println(query);
        indexSearcher.getIndexReader().close();
    }
}

结束语

以上仅仅算是Lucene的入门操作，要深入学习Lucene相关的知识还需要日后慢慢积累，有时间在继续补充！