From 0e44acbb2fec928a1606dc60f427a148fff405c9 Mon Sep 17 00:00:00 2001
From: Mohamed Ragab <moragab@gmail.com>
Date: Wed, 02 May 2012 11:15:01 -0400
Subject: [PATCH] Added a script to facilitate setting the proxy host and port and no proxy hosts, and then it concatenates all the java system properties for setting the java proxy configurations and puts the resulting string in an environment variable JAVA_PROXY_CONFIG, modified the scirpts gitblit,  gitblit-ubuntu, and gitblit-centos to source the java-proxy-config.sh script and then include the resulting java proxy configuration in the java command

---
 src/com/gitblit/LuceneExecutor.java |  544 +++++++++++++++++++++++++++++++++--------------------
 1 files changed, 339 insertions(+), 205 deletions(-)

diff --git a/src/com/gitblit/LuceneExecutor.java b/src/com/gitblit/LuceneExecutor.java
index 527609e..afd1cc5 100644
--- a/src/com/gitblit/LuceneExecutor.java
+++ b/src/com/gitblit/LuceneExecutor.java
@@ -21,10 +21,10 @@
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
+import java.lang.reflect.Method;
 import java.text.MessageFormat;
 import java.text.ParseException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
@@ -73,6 +73,7 @@
 import org.eclipse.jgit.lib.ObjectLoader;
 import org.eclipse.jgit.lib.ObjectReader;
 import org.eclipse.jgit.lib.Repository;
+import org.eclipse.jgit.lib.RepositoryCache.FileKey;
 import org.eclipse.jgit.revwalk.RevCommit;
 import org.eclipse.jgit.revwalk.RevTree;
 import org.eclipse.jgit.revwalk.RevWalk;
@@ -88,6 +89,7 @@
 import com.gitblit.models.IssueModel.Attachment;
 import com.gitblit.models.PathModel.PathChangeModel;
 import com.gitblit.models.RefModel;
+import com.gitblit.models.RepositoryModel;
 import com.gitblit.models.SearchResult;
 import com.gitblit.utils.ArrayUtils;
 import com.gitblit.utils.IssueUtils;
@@ -103,14 +105,13 @@
 public class LuceneExecutor implements Runnable {
 	
 		
-	private static final int INDEX_VERSION = 1;
+	private static final int INDEX_VERSION = 2;
 
 	private static final String FIELD_OBJECT_TYPE = "type";
 	private static final String FIELD_ISSUE = "issue";
 	private static final String FIELD_PATH = "path";
 	private static final String FIELD_COMMIT = "commit";
 	private static final String FIELD_BRANCH = "branch";
-	private static final String FIELD_REPOSITORY = "repository";
 	private static final String FIELD_SUMMARY = "summary";
 	private static final String FIELD_CONTENT = "content";
 	private static final String FIELD_AUTHOR = "author";
@@ -137,51 +138,42 @@
 	private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>();
 	private final Map<String, IndexWriter> writers = new ConcurrentHashMap<String, IndexWriter>();
 	
-	private final Set<String> excludedExtensions = new TreeSet<String>(Arrays.asList("7z", "arc",
-			"arj", "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib",
-			"lzh", "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip"));
-
-	private final Set<String> excludedBranches = new TreeSet<String>(
-			Arrays.asList("/refs/heads/gb-issues"));
+	private final String luceneIgnoreExtensions = "7z arc arj bin bmp dll doc docx exe gif gz jar jpg lib lzh odg odf odt pdf ppt png so swf xcf xls xlsx zip";
+	private Set<String> excludedExtensions;
 	
 	public LuceneExecutor(IStoredSettings settings, File repositoriesFolder) {
 		this.storedSettings = settings;
 		this.repositoriesFolder = repositoriesFolder;
+		String exts = luceneIgnoreExtensions;
+		if (settings != null) {
+			exts = settings.getString(Keys.web.luceneIgnoreExtensions, exts);
+		}
+		excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
 	}
 
 	/**
-	 * Indicates if the Lucene executor can index repositories.
-	 * 
-	 * @return true if the Lucene executor is ready to index repositories
-	 */
-	public boolean isReady() {
-		return storedSettings.getBoolean(Keys.lucene.enable, false);
-	}
-
-	/**
-	 * Run is executed by the gitblit executor service at whatever frequency
-	 * is specified in the settings.  Because this is called by an executor
-	 * service, calls will queue - i.e. there can never be concurrent execution
-	 * of repository index updates.
+	 * Run is executed by the Gitblit executor service.  Because this is called 
+	 * by an executor service, calls will queue - i.e. there can never be
+	 * concurrent execution of repository index updates.
 	 */
 	@Override
 	public void run() {
-		if (!isReady()) {
+		if (!storedSettings.getBoolean(Keys.web.allowLuceneIndexing, true)) {
+			// Lucene indexing is disabled
 			return;
 		}
+		// reload the excluded extensions
+		String exts = storedSettings.getString(Keys.web.luceneIgnoreExtensions, luceneIgnoreExtensions);
+		excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
 
-		for (String repositoryName : GitBlit.self().getRepositoryList()) {
-			Repository repository = GitBlit.self().getRepository(repositoryName);
-			if (repository == null) {
-				logger.warn(MessageFormat.format(
-						"Lucene executor could not find repository {0}. Skipping.",
-						repositoryName));
-				continue;
+		for (String repositoryName: GitBlit.self().getRepositoryList()) {
+			RepositoryModel model = GitBlit.self().getRepositoryModel(repositoryName);
+			if (model.hasCommits && !ArrayUtils.isEmpty(model.indexedBranches)) {
+				Repository repository = GitBlit.self().getRepository(model.name);
+				index(model, repository);				
+				repository.close();
+				System.gc();
 			}
-			// TODO allow repository to bypass Lucene indexing				
-			index(repositoryName, repository);
-			repository.close();
-			System.gc();
 		}
 	}
 
@@ -194,51 +186,71 @@
 	 * @param repository
 	 *            the repository object
 	 */
-	protected void index(String name, Repository repository) {
+	private void index(RepositoryModel model, Repository repository) {
 		try {
-			if (JGitUtils.hasCommits(repository)) {
-				if (shouldReindex(repository)) {
-					// (re)build the entire index					
-					IndexResult result = reindex(name, repository);
-					
-					if (result.success) {
-						if (result.commitCount > 0) {
-							String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs";
-							logger.info(MessageFormat.format(msg, name,
-									result.commitCount, result.blobCount, result.branchCount, result.duration()));
-						}
-					} else {
-						String msg = "Could not build {0} Lucene index!";
-						logger.error(MessageFormat.format(msg, name));
+			if (shouldReindex(repository)) {
+				// (re)build the entire index
+				IndexResult result = reindex(model, repository);
+
+				if (result.success) {
+					if (result.commitCount > 0) {
+						String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs";
+						logger.info(MessageFormat.format(msg, model.name, result.commitCount,
+								result.blobCount, result.branchCount, result.duration()));
 					}
 				} else {
-					// update the index with latest commits					
-					IndexResult result = updateIndex(name, repository);
-					if (result.success) {
-						if (result.commitCount > 0) {
-							String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs";
-							logger.info(MessageFormat.format(msg, name,
-									result.commitCount, result.blobCount, result.branchCount, result.duration()));
-						}
-					} else {
-						String msg = "Could not update {0} Lucene index!";
-						logger.error(MessageFormat.format(msg, name));
-					}
+					String msg = "Could not build {0} Lucene index!";
+					logger.error(MessageFormat.format(msg, model.name));
 				}
 			} else {
-				logger.info(MessageFormat.format("Skipped Lucene index of empty repository {0}",
-						name));
+				// update the index with latest commits
+				IndexResult result = updateIndex(model, repository);
+				if (result.success) {
+					if (result.commitCount > 0) {
+						String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs";
+						logger.info(MessageFormat.format(msg, model.name, result.commitCount,
+								result.blobCount, result.branchCount, result.duration()));
+					}
+				} else {
+					String msg = "Could not update {0} Lucene index!";
+					logger.error(MessageFormat.format(msg, model.name));
+				}
 			}
 		} catch (Throwable t) {
-			logger.error(MessageFormat.format("Lucene indexing failure for {0}", name), t);
+			logger.error(MessageFormat.format("Lucene indexing failure for {0}", model.name), t);
 		}
+	}
+	
+	/**
+	 * Close the writer/searcher objects for a repository.
+	 * 
+	 * @param repositoryName
+	 */
+	public synchronized void close(String repositoryName) {
+		try {
+			IndexSearcher searcher = searchers.remove(repositoryName);
+			if (searcher != null) {
+				searcher.getIndexReader().close();
+			}
+		} catch (Exception e) {
+			logger.error("Failed to close index searcher for " + repositoryName, e);
+		}
+		
+		try {
+			IndexWriter writer = writers.remove(repositoryName);
+			if (writer != null) {
+				writer.close();
+			}
+		} catch (Exception e) {
+			logger.error("Failed to close index writer for " + repositoryName, e);
+		}		
 	}
 
 	/**
 	 * Close all Lucene indexers.
 	 * 
 	 */
-	public void close() {
+	public synchronized void close() {
 		// close all writers
 		for (String writer : writers.keySet()) {
 			try {
@@ -252,7 +264,7 @@
 		// close all searchers
 		for (String searcher : searchers.keySet()) {
 			try {
-				searchers.get(searcher).close();
+				searchers.get(searcher).getIndexReader().close();
 			} catch (Throwable t) {
 				logger.error("Failed to close Lucene searcher for " + searcher, t);
 			}
@@ -269,18 +281,9 @@
 	 */
 	public boolean deleteIndex(String repositoryName) {
 		try {
-			// remove the repository index writer from the cache and close it
-			IndexWriter writer = writers.remove(repositoryName);
-			if (writer != null) {
-				writer.close();
-				writer = null;
-			}
-			// remove the repository index searcher from the cache and close it
-			IndexSearcher searcher = searchers.remove(repositoryName);
-			if (searcher != null) {
-				searcher.close();
-				searcher = null;
-			}
+			// close any open writer/searcher
+			close(repositoryName);
+
 			// delete the index folder
 			File repositoryFolder = new File(repositoriesFolder, repositoryName);
 			File luceneIndex = new File(repositoryFolder, LUCENE_DIR);
@@ -335,6 +338,24 @@
 		}
 		return name;
 	}
+	
+	/**
+	 * Get the tree associated with the given commit.
+	 *
+	 * @param walk
+	 * @param commit
+	 * @return tree
+	 * @throws IOException
+	 */
+	private RevTree getTree(final RevWalk walk, final RevCommit commit)
+			throws IOException {
+		final RevTree tree = commit.getTree();
+		if (tree != null) {
+			return tree;
+		}
+		walk.parseHeaders(commit);
+		return commit.getTree();
+	}
 
 	/**
 	 * Construct a keyname from the branch.
@@ -366,7 +387,7 @@
 	 * @param repository
 	 * @return true of the on-disk index format is different than INDEX_VERSION
 	 */
-	protected boolean shouldReindex(Repository repository) {
+	private boolean shouldReindex(Repository repository) {
 		try {
 			FileBasedConfig config = getConfig(repository);
 			config.load();
@@ -387,15 +408,15 @@
 	 * @param repository
 	 * @return IndexResult
 	 */
-	public IndexResult reindex(String repositoryName, Repository repository) {
-		IndexResult result = new IndexResult();
-		if (!deleteIndex(repositoryName)) {
+	public IndexResult reindex(RepositoryModel model, Repository repository) {
+		IndexResult result = new IndexResult();		
+		if (!deleteIndex(model.name)) {
 			return result;
 		}
 		try {			
 			FileBasedConfig config = getConfig(repository);
 			Set<String> indexedCommits = new TreeSet<String>();
-			IndexWriter writer = getIndexWriter(repositoryName);
+			IndexWriter writer = getIndexWriter(model.name);
 			// build a quick lookup of tags
 			Map<String, List<String>> tags = new HashMap<String, List<String>>();
 			for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
@@ -427,7 +448,7 @@
 			ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
 			for (RefModel branch :  branches) {
 				if (branch.getObjectId().equals(defaultBranchId)) {
-					defaultBranch = branch;					
+					defaultBranch = branch;
 					break;
 				}
 			}
@@ -436,7 +457,23 @@
 			
 			// walk through each branch
 			for (RefModel branch : branches) {
-				if (excludedBranches.contains(branch.getName())) {
+
+				boolean indexBranch = false;
+				if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
+						&& branch.equals(defaultBranch)) {
+					// indexing "default" branch
+					indexBranch = true;
+				} else if (IssueUtils.GB_ISSUES.equals(branch)) {
+					// skip the GB_ISSUES branch because it is indexed later
+					// note: this is different than updateIndex
+					indexBranch = false;
+				} else {
+					// normal explicit branch check
+					indexBranch = model.indexedBranches.contains(branch.getName());
+				}
+				
+				// if this branch is not specifically indexed then skip
+				if (!indexBranch) {
 					continue;
 				}
 
@@ -501,7 +538,6 @@
 						
 						Document doc = new Document();
 						doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
-						doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
 						doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
 						doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
 						doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED));
@@ -520,7 +556,7 @@
 						// index the blob content
 						if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {							
 							ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
-							InputStream in = ldr.openStream();							
+							InputStream in = ldr.openStream();						
 							int n;
 							while ((n = in.read(tmp)) > 0) {
 								os.write(tmp, 0, n);
@@ -542,7 +578,6 @@
 				// index the tip commit object
 				if (indexedCommits.add(tipId)) {
 					Document doc = createDocument(tip, tags.get(tipId));
-					doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
 					doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
 					writer.addDocument(doc);
 					result.commitCount += 1;
@@ -557,7 +592,6 @@
 					String hash = rev.getId().getName();
 					if (indexedCommits.add(hash)) {
 						Document doc = createDocument(rev, tags.get(hash));
-						doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
 						doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
 						writer.addDocument(doc);
 						result.commitCount += 1;
@@ -577,7 +611,6 @@
 				for (IssueModel issue : issues) {
 					result.issueCount++;
 					Document doc = createDocument(issue);
-					doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
 					writer.addDocument(doc);
 				}
 			}
@@ -585,33 +618,15 @@
 			// commit all changes and reset the searcher
 			config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
 			config.save();
-			resetIndexSearcher(repositoryName);
 			writer.commit();
+			resetIndexSearcher(model.name);
 			result.success();
 		} catch (Exception e) {
-			logger.error("Exception while reindexing " + repositoryName, e);
+			logger.error("Exception while reindexing " + model.name, e);
 		}
 		return result;
 	}
 	
-	/**
-	 * Get the tree associated with the given commit.
-	 *
-	 * @param walk
-	 * @param commit
-	 * @return tree
-	 * @throws IOException
-	 */
-	protected RevTree getTree(final RevWalk walk, final RevCommit commit)
-			throws IOException {
-		final RevTree tree = commit.getTree();
-		if (tree != null) {
-			return tree;
-		}
-		walk.parseHeaders(commit);
-		return commit.getTree();
-	}
-
 	/**
 	 * Incrementally update the index with the specified commit for the
 	 * repository.
@@ -627,31 +642,13 @@
 			String branch, RevCommit commit) {
 		IndexResult result = new IndexResult();
 		try {
-			if (excludedBranches.contains(branch)) {
-				if (IssueUtils.GB_ISSUES.equals(branch)) {
-					// index an issue
-					String issueId = commit.getShortMessage().substring(2).trim();
-					IssueModel issue = IssueUtils.getIssue(repository, issueId);
-					if (issue == null) {
-						// issue was deleted, remove from index
-						deleteIssue(repositoryName, issueId);
-						result.success = true;
-						return result;
-					}
-					result.success = index(repositoryName, issue);
-					result.issueCount++;
-					return result;
-					
-				}
-				return result;
-			}
 			List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
 			String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
 					Resolution.MINUTE);
 			IndexWriter writer = getIndexWriter(repositoryName);
 			for (PathChangeModel path : changedPaths) {
 				// delete the indexed blob
-				deleteBlob(repositoryName, branch, path.path);
+				deleteBlob(repositoryName, branch, path.name);
 
 				// re-index the blob
 				if (!ChangeType.DELETE.equals(path.changeType)) {
@@ -659,7 +656,6 @@
 					Document doc = new Document();
 					doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES,
 							Index.NOT_ANALYZED));
-					doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
 					doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
 					doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
 					doc.add(new Field(FIELD_PATH, path.path, Store.YES, Index.ANALYZED));
@@ -685,8 +681,18 @@
 				}
 			}
 			writer.commit();
-
-			Document doc = createDocument(commit, null);
+			
+			// get any annotated commit tags
+			List<String> commitTags = new ArrayList<String>();
+			for (RefModel ref : JGitUtils.getTags(repository, false, -1)) {
+				if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) {
+					commitTags.add(ref.displayName);
+				}
+			}
+			
+			// create and write the Lucene document
+			Document doc = createDocument(commit, commitTags);
+			doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
 			result.commitCount++;
 			result.success = index(repositoryName, doc);
 		} catch (Exception e) {
@@ -759,11 +765,11 @@
 	/**
 	 * Updates a repository index incrementally from the last indexed commits.
 	 * 
-	 * @param repositoryName
+	 * @param model
 	 * @param repository
 	 * @return IndexResult
 	 */
-	protected IndexResult updateIndex(String repositoryName, Repository repository) {
+	private IndexResult updateIndex(RepositoryModel model, Repository repository) {
 		IndexResult result = new IndexResult();
 		try {
 			FileBasedConfig config = getConfig(repository);
@@ -791,14 +797,55 @@
 				deletedBranches.add(branch);
 			}
 
-			// walk through each branches
+			// get the local branches
 			List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
+			
+			// sort them by most recently updated
+			Collections.sort(branches, new Comparator<RefModel>() {
+				@Override
+				public int compare(RefModel ref1, RefModel ref2) {
+					return ref2.getDate().compareTo(ref1.getDate());
+				}
+			});
+						
+			// reorder default branch to first position
+			RefModel defaultBranch = null;
+			ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
+			for (RefModel branch :  branches) {
+				if (branch.getObjectId().equals(defaultBranchId)) {
+					defaultBranch = branch;
+					break;
+				}
+			}
+			branches.remove(defaultBranch);
+			branches.add(0, defaultBranch);
+			
+			// walk through each branches
 			for (RefModel branch : branches) {
 				String branchName = branch.getName();
 
+				boolean indexBranch = false;
+				if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
+						&& branch.equals(defaultBranch)) {
+					// indexing "default" branch
+					indexBranch = true;
+				} else if (IssueUtils.GB_ISSUES.equals(branch)) {
+					// update issues modified on the GB_ISSUES branch
+					// note: this is different than reindex
+					indexBranch = true;
+				} else {
+					// normal explicit branch check
+					indexBranch = model.indexedBranches.contains(branch.getName());
+				}
+				
+				// if this branch is not specifically indexed then skip
+				if (!indexBranch) {
+					continue;
+				}
+				
 				// remove this branch from the deletedBranches set
 				deletedBranches.remove(branchName);
-
+				
 				// determine last commit
 				String keyName = getBranchKey(branchName);
 				String lastCommit = config.getString(CONF_BRANCH, null, keyName);
@@ -816,10 +863,33 @@
 					result.branchCount += 1;
 				}
 				
+				// track the issue ids that we have already indexed
+				Set<String> indexedIssues = new TreeSet<String>();
+				
 				// reverse the list of commits so we start with the first commit				
 				Collections.reverse(revs);
-				for (RevCommit commit : revs) {
-					result.add(index(repositoryName, repository, branchName, commit));					
+				for (RevCommit commit : revs) {					
+					if (IssueUtils.GB_ISSUES.equals(branch)) {
+						// only index an issue once during updateIndex
+						String issueId = commit.getShortMessage().substring(2).trim();
+						if (indexedIssues.contains(issueId)) {
+							continue;
+						}
+						indexedIssues.add(issueId);
+						
+						IssueModel issue = IssueUtils.getIssue(repository, issueId);
+						if (issue == null) {
+							// issue was deleted, remove from index
+							deleteIssue(model.name, issueId);
+						} else {
+							// issue was updated
+							index(model.name, issue);
+							result.issueCount++;
+						}
+					} else {
+						// index a commit
+						result.add(index(model.name, repository, branchName, commit));
+					}
 				}
 
 				// update the config
@@ -833,18 +903,18 @@
 			// unless a branch really was deleted and no longer exists
 			if (deletedBranches.size() > 0) {
 				for (String branch : deletedBranches) {
-					IndexWriter writer = getIndexWriter(repositoryName);
+					IndexWriter writer = getIndexWriter(model.name);
 					writer.deleteDocuments(new Term(FIELD_BRANCH, branch));
 					writer.commit();
 				}
 			}
 			result.success = true;
 		} catch (Throwable t) {
-			logger.error(MessageFormat.format("Exception while updating {0} Lucene index", repositoryName), t);
+			logger.error(MessageFormat.format("Exception while updating {0} Lucene index", model.name), t);
 		}
 		return result;
 	}
-
+	
 	/**
 	 * Creates a Lucene document from an issue.
 	 * 
@@ -906,11 +976,10 @@
 	 */
 	private boolean index(String repositoryName, Document doc) {
 		try {			
-			doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.NOT_ANALYZED));
 			IndexWriter writer = getIndexWriter(repositoryName);
 			writer.addDocument(doc);
-			resetIndexSearcher(repositoryName);
 			writer.commit();
+			resetIndexSearcher(repositoryName);
 			return true;
 		} catch (Exception e) {
 			logger.error(MessageFormat.format("Exception while incrementally updating {0} Lucene index", repositoryName), e);
@@ -918,15 +987,16 @@
 		return false;
 	}
 
-	private SearchResult createSearchResult(Document doc, float score) throws ParseException {
+	private SearchResult createSearchResult(Document doc, float score, int hitId, int totalHits) throws ParseException {
 		SearchResult result = new SearchResult();
+		result.hitId = hitId;
+		result.totalHits = totalHits;
 		result.score = score;
 		result.date = DateTools.stringToDate(doc.get(FIELD_DATE));
 		result.summary = doc.get(FIELD_SUMMARY);		
 		result.author = doc.get(FIELD_AUTHOR);
 		result.committer = doc.get(FIELD_COMMITTER);
 		result.type = SearchObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
-		result.repository = doc.get(FIELD_REPOSITORY);
 		result.branch = doc.get(FIELD_BRANCH);
 		result.commitId = doc.get(FIELD_COMMIT);
 		result.issueId = doc.get(FIELD_ISSUE);
@@ -943,7 +1013,7 @@
 	private synchronized void resetIndexSearcher(String repository) throws IOException {
 		IndexSearcher searcher = searchers.remove(repository);
 		if (searcher != null) {
-			searcher.close();
+			searcher.getIndexReader().close();
 		}
 	}
 
@@ -973,8 +1043,8 @@
 	 * @throws IOException
 	 */
 	private IndexWriter getIndexWriter(String repository) throws IOException {
-		IndexWriter indexWriter = writers.get(repository);		
-		File repositoryFolder = new File(repositoriesFolder, repository);
+		IndexWriter indexWriter = writers.get(repository);				
+		File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repository), FS.DETECTED);
 		File indexFolder = new File(repositoryFolder, LUCENE_DIR);
 		Directory directory = FSDirectory.open(indexFolder);		
 
@@ -996,19 +1066,21 @@
 	 * 
 	 * @param text
 	 *            if the text is null or empty, null is returned
-	 * @param maximumHits
-	 *            the maximum number of hits to collect
+	 * @param page
+	 *            the page number to retrieve. page is 1-indexed.
+	 * @param pageSize
+	 *            the number of elements to return for this page
 	 * @param repositories
 	 *            a list of repositories to search. if no repositories are
 	 *            specified null is returned.
 	 * @return a list of SearchResults in order from highest to the lowest score
 	 * 
 	 */
-	public List<SearchResult> search(String text, int maximumHits, List<String> repositories) {
+	public List<SearchResult> search(String text, int page, int pageSize, List<String> repositories) {
 		if (ArrayUtils.isEmpty(repositories)) {
 			return null;
 		}
-		return search(text, maximumHits, repositories.toArray(new String[0]));
+		return search(text, page, pageSize, repositories.toArray(new String[0]));
 	}
 	
 	/**
@@ -1016,15 +1088,17 @@
 	 * 
 	 * @param text
 	 *            if the text is null or empty, null is returned
-	 * @param maximumHits
-	 *            the maximum number of hits to collect
+	 * @param page
+	 *            the page number to retrieve. page is 1-indexed.
+	 * @param pageSize
+	 *            the number of elements to return for this page
 	 * @param repositories
 	 *            a list of repositories to search. if no repositories are
 	 *            specified null is returned.
 	 * @return a list of SearchResults in order from highest to the lowest score
 	 * 
-	 */	
-	public List<SearchResult> search(String text, int maximumHits, String... repositories) {
+	 */
+	public List<SearchResult> search(String text, int page, int pageSize, String... repositories) {
 		if (StringUtils.isEmpty(text)) {
 			return null;
 		}
@@ -1057,18 +1131,28 @@
 					readers.add(repositoryIndex.getIndexReader());
 				}
 				IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
-				MultiReader reader = new MultiReader(rdrs);
+				MultiSourceReader reader = new MultiSourceReader(rdrs);
 				searcher = new IndexSearcher(reader);
 			}
 			Query rewrittenQuery = searcher.rewrite(query);
-			TopScoreDocCollector collector = TopScoreDocCollector.create(maximumHits, true);
+			TopScoreDocCollector collector = TopScoreDocCollector.create(5000, true);
 			searcher.search(rewrittenQuery, collector);
-			ScoreDoc[] hits = collector.topDocs().scoreDocs;
+			int offset = Math.max(0, (page - 1) * pageSize);
+			ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
+			int totalHits = collector.getTotalHits();
 			for (int i = 0; i < hits.length; i++) {
 				int docId = hits[i].doc;
 				Document doc = searcher.doc(docId);
-				// TODO identify the source index for the doc, then eliminate FIELD_REPOSITORY
-				SearchResult result = createSearchResult(doc, hits[i].score);
+				SearchResult result = createSearchResult(doc, hits[i].score, offset + i + 1, totalHits);
+				if (repositories.length == 1) {
+					// single repository search
+					result.repository = repositories[0];
+				} else {
+					// multi-repository search
+					MultiSourceReader reader = (MultiSourceReader) searcher.getIndexReader();
+					int index = reader.getSourceIndex(docId);
+					result.repository = repositories[index];
+				}
 				String content = doc.get(FIELD_CONTENT);				
 				result.fragment = getHighlightedFragment(analyzer, query, content, result);
 				results.add(result);
@@ -1091,42 +1175,82 @@
 	 */
 	private String getHighlightedFragment(Analyzer analyzer, Query query,
 			String content, SearchResult result) throws IOException, InvalidTokenOffsetsException {
-		content = content == null ? "":StringUtils.escapeForHtml(content, false);
-		
+		if (content == null) {
+			content = "";
+		}		
+
+		int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150;
+
 		QueryScorer scorer = new QueryScorer(query, "content");
-		Fragmenter fragmenter;
-		
-		// TODO improve the fragmenter - hopefully on line breaks
-		if (SearchObjectType.commit == result.type) {
-			fragmenter = new SimpleSpanFragmenter(scorer, 1024); 
-		} else {
-			fragmenter = new SimpleSpanFragmenter(scorer, 150);
-		}
+		Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength); 
 
 		// use an artificial delimiter for the token
-		String termTag = "<!--[";
-		String termTagEnd = "]-->";
+		String termTag = "!!--[";
+		String termTagEnd = "]--!!";
 		SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);
 		Highlighter highlighter = new Highlighter(formatter, scorer);		
 		highlighter.setTextFragmenter(fragmenter);
-		
-		String [] fragments = highlighter.getBestFragments(analyzer, "content", content, 5);
+
+		String [] fragments = highlighter.getBestFragments(analyzer, "content", content, 3);
 		if (ArrayUtils.isEmpty(fragments)) {
 			if (SearchObjectType.blob  == result.type) {
 				return "";
 			}
-			return "<pre class=\"text\">" + content + "</pre>";
+			// clip commit message
+			String fragment = content;
+			if (fragment.length() > fragmentLength) {
+				fragment = fragment.substring(0, fragmentLength) + "...";
+			}
+			return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true) + "</pre>";
 		}
+		
+		int contentPos = 0;
 		StringBuilder sb = new StringBuilder();
 		for (int i = 0, len = fragments.length; i < len; i++) {
 			String fragment = fragments[i];
-			
+			String tag = "<pre class=\"text\">";
+
 			// resurrect the raw fragment from removing the artificial delimiters
-			String raw = fragment.replace(termTag, "").replace(termTagEnd, "");			
-			sb.append(getPreTag(result, raw, content));
+			String raw = fragment.replace(termTag, "").replace(termTagEnd, "");
+
+			// determine position of the raw fragment in the content
+			int pos = content.indexOf(raw, contentPos);
+				
+			// restore complete first line of fragment
+			int c = pos;
+			while (c > 0) {
+				c--;
+				if (content.charAt(c) == '\n') {
+					break;
+				}
+			}
+			if (c > 0) {
+				// inject leading chunk of first fragment line
+				fragment = content.substring(c + 1, pos) + fragment;
+			}
+				
+			if (SearchObjectType.blob  == result.type) {
+				// count lines as offset into the content for this fragment
+				int line = Math.max(1, StringUtils.countLines(content.substring(0, pos)));
+				
+				// create fragment tag with line number and language
+				String lang = "";
+				String ext = StringUtils.getFileExtension(result.path).toLowerCase();
+				if (!StringUtils.isEmpty(ext)) {
+					// maintain leading space!
+					lang = " lang-" + ext;
+				}
+				tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang);
+								
+				// update offset into content				
+				contentPos = pos + raw.length() + 1;
+			}
 			
+			sb.append(tag);
+
 			// replace the artificial delimiter with html tags
-			String html = fragment.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
+			String html = StringUtils.escapeForHtml(fragment, false);
+			html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
 			sb.append(html);
 			sb.append("</pre>");
 			if (i < len - 1) {
@@ -1134,31 +1258,7 @@
 			}
 		}
 		return sb.toString();
-	}
-	
-	/**
-	 * Returns the appropriate tag for a fragment. Commit messages are visually
-	 * differentiated from blob fragments.
-	 * 
-	 * @param result
-	 * @param fragment
-	 * @param content
-	 * @return an html tag appropriate for the fragment
-	 */
-	private String getPreTag(SearchResult result, String fragment, String content) {
-		String pre = "<pre class=\"text\">";
-		if (SearchObjectType.blob  == result.type) {
-			int line = StringUtils.countLines(content.substring(0, content.indexOf(fragment)));			
-			int lastDot = result.path.lastIndexOf('.');
-			if (lastDot > -1) {
-				String ext = result.path.substring(lastDot + 1).toLowerCase();
-				pre = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0} lang-{1}\">", line, ext);	
-			} else {
-				pre = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}\">", line);
-			}
-		}
-		return pre;
-	}
+	}	
 	
 	/**
 	 * Simple class to track the results of an index update. 
@@ -1188,4 +1288,38 @@
 			return (endTime - startTime)/1000f;
 		}
 	}
+	
+	/**
+	 * Custom subclass of MultiReader to identify the source index for a given
+	 * doc id.  This would not be necessary of there was a public method to
+	 * obtain this information.
+	 *  
+	 */
+	private class MultiSourceReader extends MultiReader {
+		
+		final Method method;
+		
+		MultiSourceReader(IndexReader[] subReaders) {
+			super(subReaders);
+			Method m = null;
+			try {
+				m = MultiReader.class.getDeclaredMethod("readerIndex", int.class);
+				m.setAccessible(true);
+			} catch (Exception e) {
+				logger.error("Error getting readerIndex method", e);
+			}
+			method = m;
+		}
+		
+		int getSourceIndex(int docId) {
+			int index = -1;
+			try {
+				Object o = method.invoke(this, docId);
+				index = (Integer) o;
+			} catch (Exception e) {
+				logger.error("Error getting source index", e);
+			}
+			return index;
+		}
+	}
 }

--
Gitblit v1.9.1