From eecaad8b8e2c447429c31a01d49260ddd6b4ee03 Mon Sep 17 00:00:00 2001 From: Paul Martin <paul@paulsputer.com> Date: Sat, 16 Apr 2016 17:35:32 -0400 Subject: [PATCH] Proof of concept #1026 --- src/main/java/com/gitblit/service/LuceneService.java | 46 ++++++++++++++++++++++++++++++++++++++-------- 1 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/main/java/com/gitblit/service/LuceneService.java b/src/main/java/com/gitblit/service/LuceneService.java index 482be5c..62f7df7 100644 --- a/src/main/java/com/gitblit/service/LuceneService.java +++ b/src/main/java/com/gitblit/service/LuceneService.java @@ -19,6 +19,7 @@ import java.io.ByteArrayOutputStream; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.text.MessageFormat; @@ -66,6 +67,11 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.AutoDetectParser; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.pdf.PDFParser; +import org.apache.tika.sax.BodyContentHandler; import org.eclipse.jgit.diff.DiffEntry.ChangeType; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.FileMode; @@ -85,8 +91,11 @@ import org.slf4j.LoggerFactory; import com.gitblit.Constants.SearchObjectType; +import com.gitblit.GitBlit; import com.gitblit.IStoredSettings; import com.gitblit.Keys; +import com.gitblit.manager.FilestoreManager; +import com.gitblit.manager.IFilestoreManager; import com.gitblit.manager.IRepositoryManager; import com.gitblit.models.PathModel.PathChangeModel; import com.gitblit.models.RefModel; @@ -105,7 +114,7 @@ public class LuceneService implements Runnable { - private static final int INDEX_VERSION = 5; + private static final int INDEX_VERSION = 6; private static final String FIELD_OBJECT_TYPE = "type"; private static final String FIELD_PATH = "path"; @@ -125,12 +134,14 @@ private static final String CONF_ALIAS = "aliases"; private static final String CONF_BRANCH = "branches"; - private static final Version LUCENE_VERSION = Version.LUCENE_46; + private static final Version LUCENE_VERSION = Version.LUCENE_4_10_0; private final Logger logger = LoggerFactory.getLogger(LuceneService.class); private final IStoredSettings storedSettings; private final IRepositoryManager repositoryManager; + private final IFilestoreManager filestoreManager; + private final File repositoriesFolder; private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>(); @@ -141,10 +152,12 @@ public LuceneService( IStoredSettings settings, - IRepositoryManager repositoryManager) { + IRepositoryManager repositoryManager, + IFilestoreManager filestoreManager) { this.storedSettings = settings; this.repositoryManager = repositoryManager; + this.filestoreManager = filestoreManager; this.repositoriesFolder = repositoryManager.getRepositoriesFolder(); String exts = luceneIgnoreExtensions; if (settings != null) { @@ -437,7 +450,7 @@ // skip non-annotated tags continue; } - if (!tags.containsKey(tag.getObjectId().getName())) { + if (!tags.containsKey(tag.getReferencedObjectId().getName())) { tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>()); } tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName); @@ -540,7 +553,8 @@ if (!paths.containsKey(path)) { continue; } - +//TODO: Figure out filestore oid the path - bit more involved than updating the index + // remove path from set ObjectId blobId = paths.remove(path); result.blobCount++; @@ -615,7 +629,7 @@ } // finished - reader.release(); + reader.close(); // commit all changes and reset the searcher config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION); @@ -677,9 +691,24 @@ } if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { + String str = ""; // read the blob content - String str = JGitUtils.getStringContent(repository, commit.getTree(), + if (path.isFilestoreItem()) { + //Get file from filestore + BodyContentHandler handler = new BodyContentHandler(); + Metadata metadata = new Metadata(); + PDFParser parser = new PDFParser(); + + ParseContext parseContext = new ParseContext(); + File lfsFile = filestoreManager.getStoragePath(path.getFilestoreOid()); + FileInputStream inputstream = new FileInputStream(lfsFile); + parser.parse(inputstream, handler, metadata, parseContext); + str = handler.toString(); + } else { + str = JGitUtils.getStringContent(repository, commit.getTree(), path.path, encodings); + } + if (str != null) { doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED)); writer.addDocument(doc); @@ -1104,6 +1133,7 @@ content = ""; } + int tabLength = storedSettings.getInteger(Keys.web.tabLength, 4); int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150; QueryScorer scorer = new QueryScorer(query, "content"); @@ -1126,7 +1156,7 @@ if (fragment.length() > fragmentLength) { fragment = fragment.substring(0, fragmentLength) + "..."; } - return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true) + "</pre>"; + return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true, tabLength) + "</pre>"; } // make sure we have unique fragments -- Gitblit v1.9.1