From eecaad8b8e2c447429c31a01d49260ddd6b4ee03 Mon Sep 17 00:00:00 2001 From: Paul Martin <paul@paulsputer.com> Date: Sat, 16 Apr 2016 17:35:32 -0400 Subject: [PATCH] Proof of concept #1026 --- src/main/java/com/gitblit/service/LuceneService.java | 57 +++++++++++++++++++++++++++++++++++++++++++-------------- 1 files changed, 43 insertions(+), 14 deletions(-) diff --git a/src/main/java/com/gitblit/service/LuceneService.java b/src/main/java/com/gitblit/service/LuceneService.java index 59b1ff2..62f7df7 100644 --- a/src/main/java/com/gitblit/service/LuceneService.java +++ b/src/main/java/com/gitblit/service/LuceneService.java @@ -19,6 +19,7 @@ import java.io.ByteArrayOutputStream; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.text.MessageFormat; @@ -66,6 +67,11 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.AutoDetectParser; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.pdf.PDFParser; +import org.apache.tika.sax.BodyContentHandler; import org.eclipse.jgit.diff.DiffEntry.ChangeType; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.FileMode; @@ -85,8 +91,11 @@ import org.slf4j.LoggerFactory; import com.gitblit.Constants.SearchObjectType; +import com.gitblit.GitBlit; import com.gitblit.IStoredSettings; import com.gitblit.Keys; +import com.gitblit.manager.FilestoreManager; +import com.gitblit.manager.IFilestoreManager; import com.gitblit.manager.IRepositoryManager; import com.gitblit.models.PathModel.PathChangeModel; import com.gitblit.models.RefModel; @@ -131,6 +140,8 @@ private final IStoredSettings storedSettings; private final IRepositoryManager repositoryManager; + private final IFilestoreManager filestoreManager; + private final File repositoriesFolder; private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>(); @@ -141,10 +152,12 @@ public LuceneService( IStoredSettings settings, - IRepositoryManager repositoryManager) { + IRepositoryManager repositoryManager, + IFilestoreManager filestoreManager) { this.storedSettings = settings; this.repositoryManager = repositoryManager; + this.filestoreManager = filestoreManager; this.repositoriesFolder = repositoryManager.getRepositoriesFolder(); String exts = luceneIgnoreExtensions; if (settings != null) { @@ -267,7 +280,7 @@ // close all writers for (String writer : writers.keySet()) { try { - writers.get(writer).close(); + writers.get(writer).close(true); } catch (Throwable t) { logger.error("Failed to close Lucene writer for " + writer, t); } @@ -540,7 +553,8 @@ if (!paths.containsKey(path)) { continue; } - +//TODO: Figure out filestore oid the path - bit more involved than updating the index + // remove path from set ObjectId blobId = paths.remove(path); result.blobCount++; @@ -677,9 +691,24 @@ } if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { + String str = ""; // read the blob content - String str = JGitUtils.getStringContent(repository, commit.getTree(), + if (path.isFilestoreItem()) { + //Get file from filestore + BodyContentHandler handler = new BodyContentHandler(); + Metadata metadata = new Metadata(); + PDFParser parser = new PDFParser(); + + ParseContext parseContext = new ParseContext(); + File lfsFile = filestoreManager.getStoragePath(path.getFilestoreOid()); + FileInputStream inputstream = new FileInputStream(lfsFile); + parser.parse(inputstream, handler, metadata, parseContext); + str = handler.toString(); + } else { + str = JGitUtils.getStringContent(repository, commit.getTree(), path.path, encodings); + } + if (str != null) { doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED)); writer.addDocument(doc); @@ -722,8 +751,8 @@ String q = MessageFormat.format(pattern, SearchObjectType.blob.name(), branch, path); BooleanQuery query = new BooleanQuery(); - StandardAnalyzer analyzer = new StandardAnalyzer(); - QueryParser qp = new QueryParser(FIELD_SUMMARY, analyzer); + StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION); + QueryParser qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer); query.add(qp.parse(q), Occur.MUST); IndexWriter writer = getIndexWriter(repositoryName); @@ -968,14 +997,14 @@ IndexWriter indexWriter = writers.get(repository); File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repository), FS.DETECTED); File indexFolder = new File(repositoryFolder, LUCENE_DIR); - Directory directory = FSDirectory.open(indexFolder.toPath()); + Directory directory = FSDirectory.open(indexFolder); if (indexWriter == null) { if (!indexFolder.exists()) { indexFolder.mkdirs(); } - StandardAnalyzer analyzer = new StandardAnalyzer(); - IndexWriterConfig config = new IndexWriterConfig(analyzer); + StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION); + IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, analyzer); config.setOpenMode(OpenMode.CREATE_OR_APPEND); indexWriter = new IndexWriter(directory, config); writers.put(repository, indexWriter); @@ -1028,16 +1057,16 @@ return null; } Set<SearchResult> results = new LinkedHashSet<SearchResult>(); - StandardAnalyzer analyzer = new StandardAnalyzer(); + StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION); try { // default search checks summary and content BooleanQuery query = new BooleanQuery(); QueryParser qp; - qp = new QueryParser(FIELD_SUMMARY, analyzer); + qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer); qp.setAllowLeadingWildcard(true); query.add(qp.parse(text), Occur.SHOULD); - qp = new QueryParser(FIELD_CONTENT, analyzer); + qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer); qp.setAllowLeadingWildcard(true); query.add(qp.parse(text), Occur.SHOULD); @@ -1060,7 +1089,7 @@ Query rewrittenQuery = searcher.rewrite(query); logger.debug(rewrittenQuery.toString()); - TopScoreDocCollector collector = TopScoreDocCollector.create(5000); + TopScoreDocCollector collector = TopScoreDocCollector.create(5000, true); searcher.search(rewrittenQuery, collector); int offset = Math.max(0, (page - 1) * pageSize); ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs; @@ -1225,7 +1254,7 @@ */ private class MultiSourceReader extends MultiReader { - MultiSourceReader(IndexReader [] readers) throws IOException { + MultiSourceReader(IndexReader [] readers) { super(readers, false); } -- Gitblit v1.9.1