From 55acda898cc001cc2db00bab336719364e0798f5 Mon Sep 17 00:00:00 2001
From: James Moger <james.moger@gitblit.com>
Date: Fri, 02 Mar 2012 18:58:20 -0500
Subject: [PATCH] Whoops. x2. LucenePage.html
---
src/com/gitblit/utils/LuceneUtils.java | 351 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
1 files changed, 322 insertions(+), 29 deletions(-)
diff --git a/src/com/gitblit/utils/LuceneUtils.java b/src/com/gitblit/utils/LuceneUtils.java
index 4ca72f0..e824236 100644
--- a/src/com/gitblit/utils/LuceneUtils.java
+++ b/src/com/gitblit/utils/LuceneUtils.java
@@ -7,8 +7,9 @@
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
import java.util.HashMap;
-import java.util.HashSet;
+import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -26,6 +27,7 @@
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
@@ -37,6 +39,7 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
+import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.ObjectId;
@@ -45,10 +48,14 @@
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevObject;
import org.eclipse.jgit.revwalk.RevWalk;
+import org.eclipse.jgit.storage.file.FileBasedConfig;
import org.eclipse.jgit.treewalk.TreeWalk;
+import org.eclipse.jgit.util.FS;
+import com.gitblit.GitBlit;
import com.gitblit.models.IssueModel;
import com.gitblit.models.IssueModel.Attachment;
+import com.gitblit.models.PathModel.PathChangeModel;
import com.gitblit.models.RefModel;
import com.gitblit.models.SearchResult;
@@ -78,9 +85,11 @@
}
private static final Version LUCENE_VERSION = Version.LUCENE_35;
+ private static final int INDEX_VERSION = 1;
private static final String FIELD_OBJECT_TYPE = "type";
private static final String FIELD_OBJECT_ID = "id";
+ private static final String FIELD_BRANCH = "branch";
private static final String FIELD_REPOSITORY = "repository";
private static final String FIELD_SUMMARY = "summary";
private static final String FIELD_CONTENT = "content";
@@ -90,12 +99,80 @@
private static final String FIELD_LABEL = "label";
private static final String FIELD_ATTACHMENT = "attachment";
- private static Set<String> excludes = new TreeSet<String>(Arrays.asList("7z", "arc", "arj",
- "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib", "lzh",
- "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip"));
+ private static Set<String> excludedExtensions = new TreeSet<String>(Arrays.asList("7z", "arc",
+ "arj", "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib",
+ "lzh", "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip"));
+
+ private static Set<String> excludedBranches = new TreeSet<String>(
+ Arrays.asList("/refs/heads/gb-issues"));
private static final Map<File, IndexSearcher> SEARCHERS = new ConcurrentHashMap<File, IndexSearcher>();
private static final Map<File, IndexWriter> WRITERS = new ConcurrentHashMap<File, IndexWriter>();
+
+ private static final String CONF_FILE = "lucene.conf";
+ private static final String CONF_INDEX = "index";
+ private static final String CONF_VERSION = "version";
+ private static final String CONF_ALIAS = "aliases";
+ private static final String CONF_BRANCH = "branches";
+
+ /**
+ * Returns the name of the repository.
+ *
+ * @param repository
+ * @return the repository name
+ */
+ private static String getName(Repository repository) {
+ String rootPath = GitBlit.getRepositoriesFolder().getAbsolutePath();
+ if (repository.isBare()) {
+ return StringUtils.getRelativePath(rootPath, repository.getDirectory()
+ .getAbsolutePath());
+ } else {
+ return StringUtils.getRelativePath(rootPath, repository.getDirectory().getParentFile()
+ .getAbsolutePath());
+ }
+ }
+
+ /**
+ * Construct a keyname from the branch.
+ *
+ * @param branchName
+ * @return a keyname appropriate for the Git config file format
+ */
+ private static String getBranchKey(String branchName) {
+ return StringUtils.getSHA1(branchName);
+ }
+
+ /**
+ * Returns the Lucene configuration for the specified repository.
+ *
+ * @param repository
+ * @return a config object
+ */
+ private static FileBasedConfig getConfig(Repository repository) {
+ File file = new File(repository.getDirectory(), CONF_FILE);
+ FileBasedConfig config = new FileBasedConfig(file, FS.detect());
+ return config;
+ }
+
+ /**
+ * Reads the Lucene config file for the repository to check the index
+ * version. If the index version is different, then rebuild the repository
+ * index.
+ *
+ * @param repository
+ * @return true of the on-disk index format is different than INDEX_VERSION
+ */
+ public static boolean shouldReindex(Repository repository) {
+ try {
+ FileBasedConfig config = getConfig(repository);
+ config.load();
+ int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0);
+ // reindex if versions do not match
+ return indexVersion != INDEX_VERSION;
+ } catch (Throwable t) {
+ }
+ return true;
+ }
/**
* Deletes the Lucene index for the specified repository.
@@ -110,6 +187,10 @@
org.eclipse.jgit.util.FileUtils.delete(luceneIndex,
org.eclipse.jgit.util.FileUtils.RECURSIVE);
}
+ File luceneConfig = new File(repository.getDirectory(), CONF_FILE);
+ if (luceneConfig.exists()) {
+ luceneConfig.delete();
+ }
return true;
} catch (IOException e) {
throw new RuntimeException(e);
@@ -121,15 +202,25 @@
* index.
*
* @param repository
- * @return true if the indexing has succeeded
+ * @return IndexResult
*/
- public static boolean index(Repository repository) {
+ public static IndexResult reindex(Repository repository) {
+ IndexResult result = new IndexResult();
+ if (!LuceneUtils.deleteIndex(repository)) {
+ return result;
+ }
try {
+ String repositoryName = getName(repository);
+ FileBasedConfig config = getConfig(repository);
Set<String> indexedCommits = new TreeSet<String>();
IndexWriter writer = getIndexWriter(repository, true);
// build a quick lookup of tags
Map<String, List<String>> tags = new HashMap<String, List<String>>();
for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
+ if (!tag.isAnnotatedTag()) {
+ // skip non-annotated tags
+ continue;
+ }
if (!tags.containsKey(tag.getObjectId())) {
tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
}
@@ -139,8 +230,16 @@
// walk through each branch
List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
for (RefModel branch : branches) {
+ if (excludedBranches.contains(branch.getName())) {
+ continue;
+ }
+ String branchName = branch.getName();
RevWalk revWalk = new RevWalk(repository);
RevCommit rev = revWalk.parseCommit(branch.getObjectId());
+
+ String keyName = getBranchKey(branchName);
+ config.setString(CONF_ALIAS, null, keyName, branchName);
+ config.setString(CONF_BRANCH, null, keyName, rev.getName());
// index the blob contents of the tree
ByteArrayOutputStream os = new ByteArrayOutputStream();
@@ -154,6 +253,9 @@
Document doc = new Document();
doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES,
Index.NOT_ANALYZED_NO_NORMS));
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
+ Index.NOT_ANALYZED));
+ doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.NOT_ANALYZED));
doc.add(new Field(FIELD_OBJECT_ID, treeWalk.getPathString(), Store.YES,
Index.NOT_ANALYZED));
doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
@@ -171,7 +273,7 @@
ext = name.substring(name.lastIndexOf('.') + 1);
}
- if (StringUtils.isEmpty(ext) || !excludes.contains(ext)) {
+ if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
// read the blob content
ObjectId entid = treeWalk.getObjectId(0);
FileMode entmode = treeWalk.getFileMode(0);
@@ -199,7 +301,11 @@
String head = rev.getId().getName();
if (indexedCommits.add(head)) {
Document doc = createDocument(rev, tags.get(head));
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
+ Index.NOT_ANALYZED));
+ doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.NOT_ANALYZED));
writer.addDocument(doc);
+ result.commitCount += 1;
}
// traverse the log and index the previous commit objects
@@ -208,7 +314,11 @@
String hash = rev.getId().getName();
if (indexedCommits.add(hash)) {
Document doc = createDocument(rev, tags.get(hash));
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
+ Index.NOT_ANALYZED));
+ doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.NOT_ANALYZED));
writer.addDocument(doc);
+ result.commitCount += 1;
}
}
@@ -221,18 +331,22 @@
List<IssueModel> issues = IssueUtils.getIssues(repository, null);
for (IssueModel issue : issues) {
Document doc = createDocument(issue);
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
+ Index.NOT_ANALYZED));
writer.addDocument(doc);
}
}
// commit all changes and reset the searcher
+ config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
+ config.save();
resetIndexSearcher(repository);
writer.commit();
- return true;
+ result.success = true;
} catch (Exception e) {
e.printStackTrace();
}
- return false;
+ return result;
}
/**
@@ -240,11 +354,76 @@
* repository.
*
* @param repository
+ * @param branch
+ * the fully qualified branch name (e.g. refs/heads/master)
* @param commit
* @return true, if successful
*/
- public static boolean index(Repository repository, RevCommit commit) {
+ public static boolean index(Repository repository, String branch, RevCommit commit) {
try {
+ if (excludedBranches.contains(branch)) {
+ if (IssueUtils.GB_ISSUES.equals(branch)) {
+ // index an issue
+ String issueId = commit.getShortMessage().substring(2).trim();
+ IssueModel issue = IssueUtils.getIssue(repository, issueId);
+ if (issue == null) {
+ // delete the old issue from the index, if exists
+ IndexWriter writer = getIndexWriter(repository, false);
+ writer.deleteDocuments(
+ new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term(
+ FIELD_OBJECT_ID, issueId));
+ writer.commit();
+ return true;
+ }
+ return index(repository, issue);
+ }
+ return false;
+ }
+ List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
+ String repositoryName = getName(repository);
+ String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
+ Resolution.MINUTE);
+ IndexWriter writer = getIndexWriter(repository, false);
+ for (PathChangeModel path : changedPaths) {
+ // delete the indexed blob
+ writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.blob.name()),
+ new Term(FIELD_BRANCH, branch), new Term(FIELD_OBJECT_ID, path.path));
+
+ // re-index the blob
+ if (!ChangeType.DELETE.equals(path.changeType)) {
+ Document doc = new Document();
+ doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES,
+ Index.NOT_ANALYZED_NO_NORMS));
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
+ Index.NOT_ANALYZED));
+ doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.NOT_ANALYZED));
+ doc.add(new Field(FIELD_OBJECT_ID, path.path, Store.YES, Index.NOT_ANALYZED));
+ doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
+ doc.add(new Field(FIELD_AUTHOR, commit.getAuthorIdent().getName(), Store.YES,
+ Index.NOT_ANALYZED_NO_NORMS));
+ doc.add(new Field(FIELD_COMMITTER, commit.getCommitterIdent().getName(),
+ Store.YES, Index.NOT_ANALYZED_NO_NORMS));
+ doc.add(new Field(FIELD_LABEL, branch, Store.YES, Index.ANALYZED));
+
+ // determine extension to compare to the extension
+ // blacklist
+ String ext = null;
+ String name = path.name.toLowerCase();
+ if (name.indexOf('.') > -1) {
+ ext = name.substring(name.lastIndexOf('.') + 1);
+ }
+
+ if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
+ // read the blob content
+ String str = JGitUtils.getStringContent(repository, commit.getTree(),
+ path.path);
+ doc.add(new Field(FIELD_CONTENT, str, Store.NO, Index.ANALYZED));
+ writer.addDocument(doc);
+ }
+ }
+ }
+ writer.commit();
+
Document doc = createDocument(commit, null);
return index(repository, doc);
} catch (Exception e) {
@@ -259,26 +438,107 @@
*
* @param repository
* @param issue
- * @param reindex
- * if true, the old index entry for this issue will be deleted.
- * This is only appropriate for pre-existing/indexed issues.
* @return true, if successful
*/
- public static boolean index(Repository repository, IssueModel issue, boolean reindex) {
+ public static boolean index(Repository repository, IssueModel issue) {
try {
+ // delete the old issue from the index, if exists
+ IndexWriter writer = getIndexWriter(repository, false);
+ writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term(
+ FIELD_OBJECT_ID, String.valueOf(issue.id)));
+ writer.commit();
+
Document doc = createDocument(issue);
- if (reindex) {
- // delete the old issue from the index, if exists
- IndexWriter writer = getIndexWriter(repository, false);
- writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()),
- new Term(FIELD_OBJECT_ID, String.valueOf(issue.id)));
- writer.commit();
- }
return index(repository, doc);
} catch (Exception e) {
e.printStackTrace();
}
return false;
+ }
+
+ /**
+ * Updates a repository index incrementally from the last indexed commits.
+ *
+ * @param repository
+ * @return IndexResult
+ */
+ public static IndexResult updateIndex(Repository repository) {
+ IndexResult result = new IndexResult();
+ try {
+ FileBasedConfig config = getConfig(repository);
+ config.load();
+
+ // build a quick lookup of annotated tags
+ Map<String, List<String>> tags = new HashMap<String, List<String>>();
+ for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
+ if (!tag.isAnnotatedTag()) {
+ // skip non-annotated tags
+ continue;
+ }
+ if (!tags.containsKey(tag.getObjectId())) {
+ tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
+ }
+ tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
+ }
+
+ // detect branch deletion
+ // first assume all branches are deleted and then remove each
+ // existing branch from deletedBranches during indexing
+ Set<String> deletedBranches = new TreeSet<String>();
+ for (String alias : config.getNames(CONF_ALIAS)) {
+ String branch = config.getString(CONF_ALIAS, null, alias);
+ deletedBranches.add(branch);
+ }
+
+ // walk through each branches
+ List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
+ for (RefModel branch : branches) {
+ String branchName = branch.getName();
+
+ // remove this branch from the deletedBranches set
+ deletedBranches.remove(branchName);
+
+ // determine last commit
+ String keyName = getBranchKey(branchName);
+ String lastCommit = config.getString(CONF_BRANCH, null, keyName);
+
+ List<RevCommit> revs;
+ if (StringUtils.isEmpty(lastCommit)) {
+ // new branch/unindexed branch, get all commits on branch
+ revs = JGitUtils.getRevLog(repository, branchName, 0, -1);
+ } else {
+ // pre-existing branch, get changes since last commit
+ revs = JGitUtils.getRevLog(repository, lastCommit, branchName);
+ }
+
+ // reverse the list of commits so we start with the first commit
+ Collections.reverse(revs);
+ for (RevCommit commit : revs) {
+ index(repository, branchName, commit);
+ result.commitCount += 1;
+ }
+
+ // update the config
+ config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
+ config.setString(CONF_ALIAS, null, keyName, branchName);
+ config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());
+ config.save();
+ }
+
+ // the deletedBranches set will normally be empty by this point
+ // unless a branch really was deleted and no longer exists
+ if (deletedBranches.size() > 0) {
+ for (String branch : deletedBranches) {
+ IndexWriter writer = getIndexWriter(repository, false);
+ writer.deleteDocuments(new Term(FIELD_BRANCH, branch));
+ writer.commit();
+ }
+ }
+ result.success = true;
+ } catch (Throwable t) {
+ t.printStackTrace();
+ }
+ return result;
}
/**
@@ -292,6 +552,7 @@
doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.issue.name(), Store.YES,
Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field(FIELD_OBJECT_ID, issue.id, Store.YES, Index.NOT_ANALYZED));
+ doc.add(new Field(FIELD_BRANCH, IssueUtils.GB_ISSUES, Store.YES, Index.NOT_ANALYZED));
doc.add(new Field(FIELD_DATE, DateTools.dateToString(issue.created, Resolution.MINUTE),
Store.YES, Field.Index.NO));
doc.add(new Field(FIELD_AUTHOR, issue.reporter, Store.YES, Index.NOT_ANALYZED_NO_NORMS));
@@ -344,6 +605,8 @@
*/
private static boolean index(Repository repository, Document doc) {
try {
+ String repositoryName = getName(repository);
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.NOT_ANALYZED));
IndexWriter writer = getIndexWriter(repository, false);
writer.addDocument(doc);
resetIndexSearcher(repository);
@@ -363,6 +626,8 @@
result.author = doc.get(FIELD_AUTHOR);
result.committer = doc.get(FIELD_COMMITTER);
result.type = ObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
+ result.repository = doc.get(FIELD_REPOSITORY);
+ result.branch = doc.get(FIELD_BRANCH);
result.id = doc.get(FIELD_OBJECT_ID);
if (doc.get(FIELD_LABEL) != null) {
result.labels = StringUtils.getStringsFromValue(doc.get(FIELD_LABEL));
@@ -437,17 +702,27 @@
}
/**
- * Search the repository for the given text or query
+ * Searches the specified repositories for the given text or query
*
- * @param repository
* @param text
- * @return a list of SearchResults
+ * if the text is null or empty, null is returned
+ * @param maximumHits
+ * the maximum number of hits to collect
+ * @param repositories
+ * a list of repositories to search. if no repositories are
+ * specified null is returned.
+ * @return a list of SearchResults in order from highest to the lowest score
+ *
*/
- public static List<SearchResult> search(Repository repository, String text) {
+ public static List<SearchResult> search(String text, int maximumHits,
+ Repository... repositories) {
if (StringUtils.isEmpty(text)) {
return null;
}
- Set<SearchResult> results = new HashSet<SearchResult>();
+ if (repositories.length == 0) {
+ return null;
+ }
+ Set<SearchResult> results = new LinkedHashSet<SearchResult>();
StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
try {
// default search checks summary and content
@@ -461,10 +736,23 @@
qp.setAllowLeadingWildcard(true);
query.add(qp.parse(text), Occur.SHOULD);
- IndexSearcher searcher = getIndexSearcher(repository);
+ IndexSearcher searcher;
+ if (repositories.length == 1) {
+ // single repository search
+ searcher = getIndexSearcher(repositories[0]);
+ } else {
+ // multiple repository search
+ List<IndexReader> readers = new ArrayList<IndexReader>();
+ for (Repository repository : repositories) {
+ IndexSearcher repositoryIndex = getIndexSearcher(repository);
+ readers.add(repositoryIndex.getIndexReader());
+ }
+ IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
+ MultiReader reader = new MultiReader(rdrs);
+ searcher = new IndexSearcher(reader);
+ }
Query rewrittenQuery = searcher.rewrite(query);
-
- TopScoreDocCollector collector = TopScoreDocCollector.create(200, true);
+ TopScoreDocCollector collector = TopScoreDocCollector.create(maximumHits, true);
searcher.search(rewrittenQuery, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
for (int i = 0; i < hits.length; i++) {
@@ -503,4 +791,9 @@
}
SEARCHERS.clear();
}
+
+ public static class IndexResult {
+ public boolean success;
+ public int commitCount;
+ }
}
--
Gitblit v1.9.1