From 55acda898cc001cc2db00bab336719364e0798f5 Mon Sep 17 00:00:00 2001
From: James Moger <james.moger@gitblit.com>
Date: Fri, 02 Mar 2012 18:58:20 -0500
Subject: [PATCH] Whoops. x2. LucenePage.html

---
 src/com/gitblit/utils/LuceneUtils.java |  254 +++++++++++++++++++++++++++++++++++++++++---------
 1 files changed, 209 insertions(+), 45 deletions(-)

diff --git a/src/com/gitblit/utils/LuceneUtils.java b/src/com/gitblit/utils/LuceneUtils.java
index 738382a..e824236 100644
--- a/src/com/gitblit/utils/LuceneUtils.java
+++ b/src/com/gitblit/utils/LuceneUtils.java
@@ -7,6 +7,7 @@
 import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.LinkedHashSet;
 import java.util.List;
@@ -47,8 +48,11 @@
 import org.eclipse.jgit.revwalk.RevCommit;
 import org.eclipse.jgit.revwalk.RevObject;
 import org.eclipse.jgit.revwalk.RevWalk;
+import org.eclipse.jgit.storage.file.FileBasedConfig;
 import org.eclipse.jgit.treewalk.TreeWalk;
+import org.eclipse.jgit.util.FS;
 
+import com.gitblit.GitBlit;
 import com.gitblit.models.IssueModel;
 import com.gitblit.models.IssueModel.Attachment;
 import com.gitblit.models.PathModel.PathChangeModel;
@@ -81,6 +85,7 @@
 	}
 
 	private static final Version LUCENE_VERSION = Version.LUCENE_35;
+	private static final int INDEX_VERSION = 1;
 
 	private static final String FIELD_OBJECT_TYPE = "type";
 	private static final String FIELD_OBJECT_ID = "id";
@@ -94,17 +99,21 @@
 	private static final String FIELD_LABEL = "label";
 	private static final String FIELD_ATTACHMENT = "attachment";
 
-	private static Set<String> excludedExtensions = new TreeSet<String>(
-			Arrays.asList("7z", "arc", "arj", "bin", "bmp", "dll", "doc",
-					"docx", "exe", "gif", "gz", "jar", "jpg", "lib", "lzh", 
-					"odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls",
-					"xlsx", "zip"));
+	private static Set<String> excludedExtensions = new TreeSet<String>(Arrays.asList("7z", "arc",
+			"arj", "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib",
+			"lzh", "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip"));
 
 	private static Set<String> excludedBranches = new TreeSet<String>(
 			Arrays.asList("/refs/heads/gb-issues"));
 
 	private static final Map<File, IndexSearcher> SEARCHERS = new ConcurrentHashMap<File, IndexSearcher>();
 	private static final Map<File, IndexWriter> WRITERS = new ConcurrentHashMap<File, IndexWriter>();
+
+	private static final String CONF_FILE = "lucene.conf";
+	private static final String CONF_INDEX = "index";
+	private static final String CONF_VERSION = "version";
+	private static final String CONF_ALIAS = "aliases";
+	private static final String CONF_BRANCH = "branches";
 
 	/**
 	 * Returns the name of the repository.
@@ -113,13 +122,58 @@
 	 * @return the repository name
 	 */
 	private static String getName(Repository repository) {
+		String rootPath = GitBlit.getRepositoriesFolder().getAbsolutePath();
 		if (repository.isBare()) {
-			return repository.getDirectory().getName();
+			return StringUtils.getRelativePath(rootPath, repository.getDirectory()
+					.getAbsolutePath());
 		} else {
-			return repository.getDirectory().getParentFile().getName();
+			return StringUtils.getRelativePath(rootPath, repository.getDirectory().getParentFile()
+					.getAbsolutePath());
 		}
 	}
-	
+
+	/**
+	 * Construct a keyname from the branch.
+	 * 
+	 * @param branchName
+	 * @return a keyname appropriate for the Git config file format
+	 */
+	private static String getBranchKey(String branchName) {
+		return StringUtils.getSHA1(branchName);
+	}
+
+	/**
+	 * Returns the Lucene configuration for the specified repository.
+	 * 
+	 * @param repository
+	 * @return a config object
+	 */
+	private static FileBasedConfig getConfig(Repository repository) {
+		File file = new File(repository.getDirectory(), CONF_FILE);
+		FileBasedConfig config = new FileBasedConfig(file, FS.detect());
+		return config;
+	}
+
+	/**
+	 * Reads the Lucene config file for the repository to check the index
+	 * version. If the index version is different, then rebuild the repository
+	 * index.
+	 * 
+	 * @param repository
+	 * @return true of the on-disk index format is different than INDEX_VERSION
+	 */
+	public static boolean shouldReindex(Repository repository) {
+		try {
+			FileBasedConfig config = getConfig(repository);
+			config.load();
+			int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0);
+			// reindex if versions do not match
+			return indexVersion != INDEX_VERSION;
+		} catch (Throwable t) {
+		}
+		return true;
+	}
+
 	/**
 	 * Deletes the Lucene index for the specified repository.
 	 * 
@@ -133,6 +187,10 @@
 				org.eclipse.jgit.util.FileUtils.delete(luceneIndex,
 						org.eclipse.jgit.util.FileUtils.RECURSIVE);
 			}
+			File luceneConfig = new File(repository.getDirectory(), CONF_FILE);
+			if (luceneConfig.exists()) {
+				luceneConfig.delete();
+			}
 			return true;
 		} catch (IOException e) {
 			throw new RuntimeException(e);
@@ -144,16 +202,25 @@
 	 * index.
 	 * 
 	 * @param repository
-	 * @return true if the indexing has succeeded
+	 * @return IndexResult
 	 */
-	public static boolean index(Repository repository) {
+	public static IndexResult reindex(Repository repository) {
+		IndexResult result = new IndexResult();
+		if (!LuceneUtils.deleteIndex(repository)) {
+			return result;
+		}
 		try {
 			String repositoryName = getName(repository);
+			FileBasedConfig config = getConfig(repository);
 			Set<String> indexedCommits = new TreeSet<String>();
 			IndexWriter writer = getIndexWriter(repository, true);
 			// build a quick lookup of tags
 			Map<String, List<String>> tags = new HashMap<String, List<String>>();
 			for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
+				if (!tag.isAnnotatedTag()) {
+					// skip non-annotated tags
+					continue;
+				}
 				if (!tags.containsKey(tag.getObjectId())) {
 					tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
 				}
@@ -170,6 +237,10 @@
 				RevWalk revWalk = new RevWalk(repository);
 				RevCommit rev = revWalk.parseCommit(branch.getObjectId());
 
+				String keyName = getBranchKey(branchName);
+				config.setString(CONF_ALIAS, null, keyName, branchName);
+				config.setString(CONF_BRANCH, null, keyName, rev.getName());
+
 				// index the blob contents of the tree
 				ByteArrayOutputStream os = new ByteArrayOutputStream();
 				byte[] tmp = new byte[32767];
@@ -184,8 +255,7 @@
 							Index.NOT_ANALYZED_NO_NORMS));
 					doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
 							Index.NOT_ANALYZED));
-					doc.add(new Field(FIELD_BRANCH, branchName, Store.YES,
-							Index.NOT_ANALYZED));
+					doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.NOT_ANALYZED));
 					doc.add(new Field(FIELD_OBJECT_ID, treeWalk.getPathString(), Store.YES,
 							Index.NOT_ANALYZED));
 					doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
@@ -233,9 +303,9 @@
 					Document doc = createDocument(rev, tags.get(head));
 					doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
 							Index.NOT_ANALYZED));
-					doc.add(new Field(FIELD_BRANCH, branchName, Store.YES,
-							Index.NOT_ANALYZED));
+					doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.NOT_ANALYZED));
 					writer.addDocument(doc);
+					result.commitCount += 1;
 				}
 
 				// traverse the log and index the previous commit objects
@@ -246,9 +316,9 @@
 						Document doc = createDocument(rev, tags.get(hash));
 						doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
 								Index.NOT_ANALYZED));
-						doc.add(new Field(FIELD_BRANCH, branchName, Store.YES,
-								Index.NOT_ANALYZED));
+						doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.NOT_ANALYZED));
 						writer.addDocument(doc);
+						result.commitCount += 1;
 					}
 				}
 
@@ -268,13 +338,15 @@
 			}
 
 			// commit all changes and reset the searcher
+			config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
+			config.save();
 			resetIndexSearcher(repository);
 			writer.commit();
-			return true;
+			result.success = true;
 		} catch (Exception e) {
 			e.printStackTrace();
 		}
-		return false;
+		return result;
 	}
 
 	/**
@@ -288,13 +360,22 @@
 	 * @return true, if successful
 	 */
 	public static boolean index(Repository repository, String branch, RevCommit commit) {
-		try {			
+		try {
 			if (excludedBranches.contains(branch)) {
 				if (IssueUtils.GB_ISSUES.equals(branch)) {
 					// index an issue
 					String issueId = commit.getShortMessage().substring(2).trim();
 					IssueModel issue = IssueUtils.getIssue(repository, issueId);
-					return index(repository, issue, true);
+					if (issue == null) {
+						// delete the old issue from the index, if exists
+						IndexWriter writer = getIndexWriter(repository, false);
+						writer.deleteDocuments(
+								new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term(
+										FIELD_OBJECT_ID, issueId));
+						writer.commit();
+						return true;
+					}
+					return index(repository, issue);
 				}
 				return false;
 			}
@@ -306,9 +387,8 @@
 			for (PathChangeModel path : changedPaths) {
 				// delete the indexed blob
 				writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.blob.name()),
-						new Term(FIELD_BRANCH, branch),
-						new Term(FIELD_OBJECT_ID, path.path));
-				
+						new Term(FIELD_BRANCH, branch), new Term(FIELD_OBJECT_ID, path.path));
+
 				// re-index the blob
 				if (!ChangeType.DELETE.equals(path.changeType)) {
 					Document doc = new Document();
@@ -317,8 +397,7 @@
 					doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
 							Index.NOT_ANALYZED));
 					doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.NOT_ANALYZED));
-					doc.add(new Field(FIELD_OBJECT_ID, path.path, Store.YES,
-							Index.NOT_ANALYZED));
+					doc.add(new Field(FIELD_OBJECT_ID, path.path, Store.YES, Index.NOT_ANALYZED));
 					doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
 					doc.add(new Field(FIELD_AUTHOR, commit.getAuthorIdent().getName(), Store.YES,
 							Index.NOT_ANALYZED_NO_NORMS));
@@ -336,15 +415,15 @@
 
 					if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
 						// read the blob content
-						String str = JGitUtils.getStringContent(repository, 
-								commit.getTree(), path.path);
+						String str = JGitUtils.getStringContent(repository, commit.getTree(),
+								path.path);
 						doc.add(new Field(FIELD_CONTENT, str, Store.NO, Index.ANALYZED));
 						writer.addDocument(doc);
 					}
 				}
 			}
 			writer.commit();
-			
+
 			Document doc = createDocument(commit, null);
 			return index(repository, doc);
 		} catch (Exception e) {
@@ -359,26 +438,107 @@
 	 * 
 	 * @param repository
 	 * @param issue
-	 * @param reindex
-	 *            if true, the old index entry for this issue will be deleted.
-	 *            This is only appropriate for pre-existing/indexed issues.
 	 * @return true, if successful
 	 */
-	public static boolean index(Repository repository, IssueModel issue, boolean reindex) {
+	public static boolean index(Repository repository, IssueModel issue) {
 		try {
+			// delete the old issue from the index, if exists
+			IndexWriter writer = getIndexWriter(repository, false);
+			writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term(
+					FIELD_OBJECT_ID, String.valueOf(issue.id)));
+			writer.commit();
+
 			Document doc = createDocument(issue);
-			if (reindex) {
-				// delete the old issue from the index, if exists
-				IndexWriter writer = getIndexWriter(repository, false);
-				writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()),
-						new Term(FIELD_OBJECT_ID, String.valueOf(issue.id)));
-				writer.commit();
-			}
 			return index(repository, doc);
 		} catch (Exception e) {
 			e.printStackTrace();
 		}
 		return false;
+	}
+
+	/**
+	 * Updates a repository index incrementally from the last indexed commits.
+	 * 
+	 * @param repository
+	 * @return IndexResult
+	 */
+	public static IndexResult updateIndex(Repository repository) {
+		IndexResult result = new IndexResult();
+		try {
+			FileBasedConfig config = getConfig(repository);
+			config.load();
+
+			// build a quick lookup of annotated tags
+			Map<String, List<String>> tags = new HashMap<String, List<String>>();
+			for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
+				if (!tag.isAnnotatedTag()) {
+					// skip non-annotated tags
+					continue;
+				}
+				if (!tags.containsKey(tag.getObjectId())) {
+					tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
+				}
+				tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
+			}
+
+			// detect branch deletion
+			// first assume all branches are deleted and then remove each
+			// existing branch from deletedBranches during indexing
+			Set<String> deletedBranches = new TreeSet<String>();
+			for (String alias : config.getNames(CONF_ALIAS)) {
+				String branch = config.getString(CONF_ALIAS, null, alias);
+				deletedBranches.add(branch);
+			}
+
+			// walk through each branches
+			List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
+			for (RefModel branch : branches) {
+				String branchName = branch.getName();
+
+				// remove this branch from the deletedBranches set
+				deletedBranches.remove(branchName);
+
+				// determine last commit
+				String keyName = getBranchKey(branchName);
+				String lastCommit = config.getString(CONF_BRANCH, null, keyName);
+
+				List<RevCommit> revs;
+				if (StringUtils.isEmpty(lastCommit)) {
+					// new branch/unindexed branch, get all commits on branch
+					revs = JGitUtils.getRevLog(repository, branchName, 0, -1);
+				} else {
+					// pre-existing branch, get changes since last commit
+					revs = JGitUtils.getRevLog(repository, lastCommit, branchName);
+				}
+
+				// reverse the list of commits so we start with the first commit
+				Collections.reverse(revs);
+				for (RevCommit commit : revs) {
+					index(repository, branchName, commit);
+					result.commitCount += 1;
+				}
+
+				// update the config
+				config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
+				config.setString(CONF_ALIAS, null, keyName, branchName);
+				config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());
+				config.save();
+			}
+
+			// the deletedBranches set will normally be empty by this point
+			// unless a branch really was deleted and no longer exists
+			if (deletedBranches.size() > 0) {
+				for (String branch : deletedBranches) {
+					IndexWriter writer = getIndexWriter(repository, false);
+					writer.deleteDocuments(new Term(FIELD_BRANCH, branch));
+					writer.commit();
+				}
+			}
+			result.success = true;
+		} catch (Throwable t) {
+			t.printStackTrace();
+		}
+		return result;
 	}
 
 	/**
@@ -446,8 +606,7 @@
 	private static boolean index(Repository repository, Document doc) {
 		try {
 			String repositoryName = getName(repository);
-			doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
-					Index.NOT_ANALYZED));
+			doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.NOT_ANALYZED));
 			IndexWriter writer = getIndexWriter(repository, false);
 			writer.addDocument(doc);
 			resetIndexSearcher(repository);
@@ -587,9 +746,9 @@
 				for (Repository repository : repositories) {
 					IndexSearcher repositoryIndex = getIndexSearcher(repository);
 					readers.add(repositoryIndex.getIndexReader());
-				}			
-				IndexReader [] rdrs = readers.toArray(new IndexReader[readers.size()]);
-				MultiReader reader = new MultiReader(rdrs);			
+				}
+				IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
+				MultiReader reader = new MultiReader(rdrs);
 				searcher = new IndexSearcher(reader);
 			}
 			Query rewrittenQuery = searcher.rewrite(query);
@@ -606,7 +765,7 @@
 			e.printStackTrace();
 		}
 		return new ArrayList<SearchResult>(results);
-	}	
+	}
 
 	/**
 	 * Close all the index writers and searchers
@@ -632,4 +791,9 @@
 		}
 		SEARCHERS.clear();
 	}
+
+	public static class IndexResult {
+		public boolean success;
+		public int commitCount;
+	}
 }

--
Gitblit v1.9.1