Page and message indexing is now more robust and pages that cannot be indexed are skipped.

This commit is contained in:
Dario Solera 2010-05-30 06:29:13 +00:00
parent 4a91270ff1
commit 44311ba307
4 changed files with 79 additions and 55 deletions

View file

@ -16,5 +16,5 @@ using System.Reflection;
// //
// You can specify all the values or you can default the Revision and Build Numbers // You can specify all the values or you can default the Revision and Build Numbers
// by using the '*' as shown below: // by using the '*' as shown below:
[assembly: AssemblyVersion("3.0.2.540")] [assembly: AssemblyVersion("3.0.2.541")]
[assembly: AssemblyFileVersion("3.0.2.540")] [assembly: AssemblyFileVersion("3.0.2.541")]

View file

@ -1188,22 +1188,28 @@ namespace ScrewTurn.Wiki {
/// <returns>The number of indexed words, including duplicates.</returns> /// <returns>The number of indexed words, including duplicates.</returns>
private int IndexPage(PageContent content) { private int IndexPage(PageContent content) {
lock(this) { lock(this) {
string documentName = PageDocument.GetDocumentName(content.PageInfo); try {
string documentName = PageDocument.GetDocumentName(content.PageInfo);
DumpedDocument ddoc = new DumpedDocument(0, documentName, host.PrepareTitleForIndexing(content.PageInfo, content.Title), DumpedDocument ddoc = new DumpedDocument(0, documentName, host.PrepareTitleForIndexing(content.PageInfo, content.Title),
PageDocument.StandardTypeTag, content.LastModified); PageDocument.StandardTypeTag, content.LastModified);
// Store the document // Store the document
// The content should always be prepared using IHost.PrepareForSearchEngineIndexing() // The content should always be prepared using IHost.PrepareForSearchEngineIndexing()
int count = index.StoreDocument(new PageDocument(content.PageInfo, ddoc, TokenizeContent), int count = index.StoreDocument(new PageDocument(content.PageInfo, ddoc, TokenizeContent),
content.Keywords, host.PrepareContentForIndexing(content.PageInfo, content.Content), null); content.Keywords, host.PrepareContentForIndexing(content.PageInfo, content.Content), null);
if(count == 0 && content.Content.Length > 0) { if(count == 0 && content.Content.Length > 0) {
host.LogEntry("Indexed 0 words for page " + content.PageInfo.FullName + ": possible index corruption. Please report this error to the developers", host.LogEntry("Indexed 0 words for page " + content.PageInfo.FullName + ": possible index corruption. Please report this error to the developers",
LogEntryType.Warning, null, this); LogEntryType.Warning, null, this);
}
return count;
}
catch(Exception ex) {
host.LogEntry("Page indexing error for " + content.PageInfo.FullName + " (skipping page): " + ex.ToString(), LogEntryType.Error, null, this);
return 0;
} }
return count;
} }
} }
@ -2389,25 +2395,31 @@ namespace ScrewTurn.Wiki {
/// <returns>The number of indexed words, including duplicates.</returns> /// <returns>The number of indexed words, including duplicates.</returns>
private int IndexMessage(PageInfo page, int id, string subject, DateTime dateTime, string body) { private int IndexMessage(PageInfo page, int id, string subject, DateTime dateTime, string body) {
lock(this) { lock(this) {
// Trim "RE:" to avoid polluting the search engine index try {
if(subject.ToLowerInvariant().StartsWith("re:") && subject.Length > 3) subject = subject.Substring(3).Trim(); // Trim "RE:" to avoid polluting the search engine index
if(subject.ToLowerInvariant().StartsWith("re:") && subject.Length > 3) subject = subject.Substring(3).Trim();
string documentName = MessageDocument.GetDocumentName(page, id); string documentName = MessageDocument.GetDocumentName(page, id);
DumpedDocument ddoc = new DumpedDocument(0, documentName, host.PrepareTitleForIndexing(null, subject), DumpedDocument ddoc = new DumpedDocument(0, documentName, host.PrepareTitleForIndexing(null, subject),
MessageDocument.StandardTypeTag, dateTime); MessageDocument.StandardTypeTag, dateTime);
// Store the document // Store the document
// The content should always be prepared using IHost.PrepareForSearchEngineIndexing() // The content should always be prepared using IHost.PrepareForSearchEngineIndexing()
int count = index.StoreDocument(new MessageDocument(page, id, ddoc, TokenizeContent), null, int count = index.StoreDocument(new MessageDocument(page, id, ddoc, TokenizeContent), null,
host.PrepareContentForIndexing(null, body), null); host.PrepareContentForIndexing(null, body), null);
if(count == 0 && body.Length > 0) { if(count == 0 && body.Length > 0) {
host.LogEntry("Indexed 0 words for message " + page.FullName + ":" + id.ToString() + ": possible index corruption. Please report this error to the developers", host.LogEntry("Indexed 0 words for message " + page.FullName + ":" + id.ToString() + ": possible index corruption. Please report this error to the developers",
LogEntryType.Warning, null, this); LogEntryType.Warning, null, this);
}
return count;
}
catch(Exception ex) {
host.LogEntry("Message indexing error for " + page.FullName + ":" + id.ToString() + " (skipping message): " + ex.ToString(), LogEntryType.Error, null, this);
return 0;
} }
return count;
} }
} }

View file

@ -592,22 +592,28 @@ namespace ScrewTurn.Wiki.Plugins.SqlCommon {
/// <param name="transaction">The current transaction.</param> /// <param name="transaction">The current transaction.</param>
/// <returns>The number of indexed words, including duplicates.</returns> /// <returns>The number of indexed words, including duplicates.</returns>
private int IndexPage(PageContent content, DbTransaction transaction) { private int IndexPage(PageContent content, DbTransaction transaction) {
string documentName = PageDocument.GetDocumentName(content.PageInfo); try {
string documentName = PageDocument.GetDocumentName(content.PageInfo);
DumpedDocument ddoc = new DumpedDocument(0, documentName, host.PrepareTitleForIndexing(content.PageInfo, content.Title), DumpedDocument ddoc = new DumpedDocument(0, documentName, host.PrepareTitleForIndexing(content.PageInfo, content.Title),
PageDocument.StandardTypeTag, content.LastModified); PageDocument.StandardTypeTag, content.LastModified);
// Store the document // Store the document
// The content should always be prepared using IHost.PrepareForSearchEngineIndexing() // The content should always be prepared using IHost.PrepareForSearchEngineIndexing()
int count = index.StoreDocument(new PageDocument(content.PageInfo, ddoc, TokenizeContent), int count = index.StoreDocument(new PageDocument(content.PageInfo, ddoc, TokenizeContent),
content.Keywords, host.PrepareContentForIndexing(content.PageInfo, content.Content), transaction); content.Keywords, host.PrepareContentForIndexing(content.PageInfo, content.Content), transaction);
if(count == 0 && content.Content.Length > 0) { if(count == 0 && content.Content.Length > 0) {
host.LogEntry("Indexed 0 words for page " + content.PageInfo.FullName + ": possible index corruption. Please report this error to the developers", host.LogEntry("Indexed 0 words for page " + content.PageInfo.FullName + ": possible index corruption. Please report this error to the developers",
LogEntryType.Warning, null, this); LogEntryType.Warning, null, this);
}
return count;
}
catch(Exception ex) {
host.LogEntry("Page indexing error for " + content.PageInfo.FullName + " (skipping page): " + ex.ToString(), LogEntryType.Error, null, this);
return 0;
} }
return count;
} }
/// <summary> /// <summary>
@ -647,25 +653,31 @@ namespace ScrewTurn.Wiki.Plugins.SqlCommon {
/// <param name="transaction">The current transaction.</param> /// <param name="transaction">The current transaction.</param>
/// <returns>The number of indexed words, including duplicates.</returns> /// <returns>The number of indexed words, including duplicates.</returns>
private int IndexMessage(PageInfo page, int id, string subject, DateTime dateTime, string body, DbTransaction transaction) { private int IndexMessage(PageInfo page, int id, string subject, DateTime dateTime, string body, DbTransaction transaction) {
// Trim "RE:" to avoid polluting the search engine index try {
if(subject.ToLowerInvariant().StartsWith("re:") && subject.Length > 3) subject = subject.Substring(3).Trim(); // Trim "RE:" to avoid polluting the search engine index
if(subject.ToLowerInvariant().StartsWith("re:") && subject.Length > 3) subject = subject.Substring(3).Trim();
string documentName = MessageDocument.GetDocumentName(page, id); string documentName = MessageDocument.GetDocumentName(page, id);
DumpedDocument ddoc = new DumpedDocument(0, documentName, host.PrepareTitleForIndexing(null, subject), DumpedDocument ddoc = new DumpedDocument(0, documentName, host.PrepareTitleForIndexing(null, subject),
MessageDocument.StandardTypeTag, dateTime); MessageDocument.StandardTypeTag, dateTime);
// Store the document // Store the document
// The content should always be prepared using IHost.PrepareForSearchEngineIndexing() // The content should always be prepared using IHost.PrepareForSearchEngineIndexing()
int count = index.StoreDocument(new MessageDocument(page, id, ddoc, TokenizeContent), null, int count = index.StoreDocument(new MessageDocument(page, id, ddoc, TokenizeContent), null,
host.PrepareContentForIndexing(null, body), transaction); host.PrepareContentForIndexing(null, body), transaction);
if(count == 0 && body.Length > 0) { if(count == 0 && body.Length > 0) {
host.LogEntry("Indexed 0 words for message " + page.FullName + ":" + id.ToString() + ": possible index corruption. Please report this error to the developers", host.LogEntry("Indexed 0 words for message " + page.FullName + ":" + id.ToString() + ": possible index corruption. Please report this error to the developers",
LogEntryType.Warning, null, this); LogEntryType.Warning, null, this);
}
return count;
}
catch(Exception ex) {
host.LogEntry("Message indexing error for " + page.FullName + ":" + id.ToString() + " (skipping message): " + ex.ToString(), LogEntryType.Error, null, this);
return 0;
} }
return count;
} }
/// <summary> /// <summary>

View file

@ -13,7 +13,7 @@ namespace ScrewTurn.Wiki.Plugins.SqlServer {
/// </summary> /// </summary>
public class SqlServerPagesStorageProvider : SqlPagesStorageProviderBase, IPagesStorageProviderV30 { public class SqlServerPagesStorageProvider : SqlPagesStorageProviderBase, IPagesStorageProviderV30 {
private readonly ComponentInformation info = new ComponentInformation("SQL Server Pages Storage Provider", "Threeplicate Srl", "3.0.1.471", "http://www.screwturn.eu", "http://www.screwturn.eu/Version/SQLServerProv/Pages.txt"); private readonly ComponentInformation info = new ComponentInformation("SQL Server Pages Storage Provider", "Threeplicate Srl", "3.0.1.541", "http://www.screwturn.eu", "http://www.screwturn.eu/Version/SQLServerProv/Pages.txt");
private readonly SqlServerCommandBuilder commandBuilder = new SqlServerCommandBuilder(); private readonly SqlServerCommandBuilder commandBuilder = new SqlServerCommandBuilder();