|
SAP NetWeaver '04 | |||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
Global service for crawling repositories.
Copyright (c) SAP AG 2004
| Field Summary | |
static int |
MAX_TASK_DISPLAY_NAME_LENGTH
|
static int |
MAX_TASK_ID_LENGTH
|
static int |
MAX_USER_DATA_LENGTH
|
| Method Summary | |
IXCrawlerParameters |
createCrawlerParameters(int maxDepth,
int retrieverCount,
int providerCount,
boolean useETag,
boolean useChecksum,
boolean followLinks,
boolean followRedirects,
boolean crawlVersions,
boolean crawlHidden,
boolean crawlSystem,
long requestDelayInMilliseconds,
IXCrawlerParameters.ModificationCheckMode modificationCheckMode,
boolean findAllDocsInDepth,
boolean respectRobots,
boolean respectNoIndex,
boolean test,
IResourceFilter[] scopeFilters,
IResourceFilter[] resultFilters,
long maxLogFileSizeInBytes,
int maxBacklogFiles,
String logFilePath,
IXCrawlerParameters.LogLevel maxLogLevel,
long documentTimeoutInSeconds)
Create crawler parameters. |
IXCrawlerParameters |
createCrawlerParameters(int maxDepth,
int retrieverCount,
int providerCount,
boolean useETag,
boolean useChecksum,
boolean followLinks,
boolean followRedirects,
boolean crawlVersions,
boolean crawlHidden,
boolean crawlSystem,
long requestDelayInMilliseconds,
IXCrawlerParameters.ModificationCheckMode modificationCheckMode,
boolean findAllDocsInDepth,
boolean respectRobots,
boolean test,
IResourceFilter[] scopeFilters,
IResourceFilter[] resultFilters,
long maxLogFileSizeInBytes,
int maxBacklogFiles,
String logFilePath,
IXCrawlerParameters.LogLevel maxLogLevel,
long documentTimeoutInSeconds)
Create crawler parameters. |
IXCrawlerParameters |
createCrawlerParameters(int maxDepth,
int retrieverCount,
int providerCount,
boolean useETag,
boolean useChecksum,
boolean followLinks,
boolean crawlVersions,
boolean crawlHidden,
boolean crawlSystem,
long requestDelayInMilliseconds,
IXCrawlerParameters.ModificationCheckMode modificationCheckMode,
boolean findAllDocsInDepth,
boolean respectRobots,
boolean test,
IResourceFilter[] scopeFilters,
IResourceFilter[] resultFilters,
long maxLogFileSizeInBytes,
int maxBacklogFiles,
String logFilePath,
IXCrawlerParameters.LogLevel maxLogLevel,
long documentTimeoutInSeconds)
Create crawler parameters. |
IXCrawlerParameters |
createCrawlerParameters(String parameterName)
Create crawler parameters from a configurable in the configuration plugin /cm/services/xcrawlers. |
void |
deleteCrawlerTask(String taskID)
Delete a crawler task. |
String[] |
getCrawlerParameterNames()
Get the names of the available crawler parameters. |
IXCrawlerTaskSummary[] |
getCrawlerTaskSummaries()
Get the state summaries of all crawler tasks. |
IXCrawlerTaskSummary |
getCrawlerTaskSummary(String taskID)
Get the state summary of a crawler task. |
String |
getDefaultCrawlerParameterName()
Get the name of the default crawler parameters. |
boolean |
isFiltered(IResource resource,
IXCrawlerParameters parameters,
RID crawlStartPath)
Check, if a resource would be filtered out during a crawl with specific crawler parameters |
boolean |
isRunning(String taskID)
Check, if a crawler task is running for the specified taskID. |
boolean |
isScheduled(String taskID)
Check, if a crawler task is scheduled for the specified taskID (and will run if any running or suspended crawler tasks for the same taskID are finished). |
boolean |
isSuspended(String taskID)
Check, if a crawler task is suspended for the specified taskID. |
void |
recrawlErrors(String taskID)
Restart a crawler task by crawling only the documents that failed during the last crawl. |
void |
reloadResourceFilters(String taskID)
Reload the current version of the resource filters for a crawler. |
void |
resumeCrawlerTask(String taskID)
Resume a crawler task. |
void |
runCrawlerTask(String taskID,
String taskDisplayName,
IRidList[] startResources,
IXCrawlerParameters[] parameters,
String resultReceiverFactoryClassName,
String userDataForFactory,
boolean survivesRestart,
boolean delta,
ISystem node,
boolean deleteAfterCompletion)
Run a crawler task. |
void |
stopCrawlerTask(String taskID)
Stop a crawler task. |
void |
stopCrawlerTaskAsync(String taskID)
Stop a crawler task. |
void |
suspendCrawlerTask(String taskID)
Suspend a crawler task. |
| Field Detail |
public static final int MAX_TASK_ID_LENGTH
public static final int MAX_TASK_DISPLAY_NAME_LENGTH
public static final int MAX_USER_DATA_LENGTH
| Method Detail |
public String[] getCrawlerParameterNames()
throws XCrawlerException
XCrawlerException
public String getDefaultCrawlerParameterName()
throws XCrawlerException
XCrawlerException
public IXCrawlerParameters createCrawlerParameters(String parameterName)
throws XCrawlerException
parameterName - name of the configurable
XCrawlerException
public IXCrawlerParameters createCrawlerParameters(int maxDepth,
int retrieverCount,
int providerCount,
boolean useETag,
boolean useChecksum,
boolean followLinks,
boolean crawlVersions,
boolean crawlHidden,
boolean crawlSystem,
long requestDelayInMilliseconds,
IXCrawlerParameters.ModificationCheckMode modificationCheckMode,
boolean findAllDocsInDepth,
boolean respectRobots,
boolean test,
IResourceFilter[] scopeFilters,
IResourceFilter[] resultFilters,
long maxLogFileSizeInBytes,
int maxBacklogFiles,
String logFilePath,
IXCrawlerParameters.LogLevel maxLogLevel,
long documentTimeoutInSeconds)
throws XCrawlerException
followRedirects - this parameter is internally set to the value of followLinks
maxDepth - maximum depth of the crawl (0 is unlimited)retrieverCount - number of threads which retrieve the resources from the repositoriesproviderCount - number of threads which provide the found resources to the result receiversuseETag - true if the ETag of a resource should be used to detect modificationuseChecksum - true if the checksum of the resource content should be used to detect modificationfollowLinks - true if links should be followed during the crawlcrawlVersions - true if versions of resources should be included in the crawlcrawlHidden - true if hidden resources should be included in the crawlcrawlSystem - true if system resources should be included in the crawlrequestDelayInMilliseconds - number of milliseconds between two consecutive resources retrievals
(to limit repository load)modificationCheckMode - mode of resource modification detection (ETag AND checksum, ETag OR checksum)findAllDocsInDepth - true if resources should be found on the shortest possible pathrespectRobots - true if robot-rules of web-servers should be respectedtest - true if no resources should be provided to the result receiverscopeFilters - resource filters narrowing the scope of the crawlresultFilters - resource filters which are applied to the result of the crawl but do not narrow the scopemaxLogFileSizeInBytes - maximum size of the crawler log file in bytes (0 is unlimited)maxBacklogFiles - maximum number of old crawler log fileslogFilePath - path to the crawler log file (if null the current system path is used)maxLogLevel - maximum log leveldocumentTimeoutInSeconds - the document retrieval timeout in seconds
XCrawlerException
public IXCrawlerParameters createCrawlerParameters(int maxDepth,
int retrieverCount,
int providerCount,
boolean useETag,
boolean useChecksum,
boolean followLinks,
boolean followRedirects,
boolean crawlVersions,
boolean crawlHidden,
boolean crawlSystem,
long requestDelayInMilliseconds,
IXCrawlerParameters.ModificationCheckMode modificationCheckMode,
boolean findAllDocsInDepth,
boolean respectRobots,
boolean test,
IResourceFilter[] scopeFilters,
IResourceFilter[] resultFilters,
long maxLogFileSizeInBytes,
int maxBacklogFiles,
String logFilePath,
IXCrawlerParameters.LogLevel maxLogLevel,
long documentTimeoutInSeconds)
throws XCrawlerException
followRedirects
maxDepth - maximum depth of the crawl (0 is unlimited)retrieverCount - number of threads which retrieve the resources from the repositoriesproviderCount - number of threads which provide the found resources to the result receiversuseETag - true if the ETag of a resource should be used to detect modificationuseChecksum - true if the checksum of the resource content should be used to detect modificationfollowLinks - true if links should be followed during the crawlfollowRedirects - true if redirects in Web-RMs should be followed during the crawlcrawlVersions - true if versions of resources should be included in the crawlcrawlHidden - true if hidden resources should be included in the crawlcrawlSystem - true if system resources should be included in the crawlrequestDelayInMilliseconds - number of milliseconds between two consecutive resources retrievals
(to limit repository load)modificationCheckMode - mode of resource modification detection (ETag AND checksum, ETag OR checksum)findAllDocsInDepth - true if resources should be found on the shortest possible pathrespectRobots - true if robot-rules of web-servers should be respectedtest - true if no resources should be provided to the result receiverscopeFilters - resource filters narrowing the scope of the crawlresultFilters - resource filters which are applied to the result of the crawl but do not narrow the scopemaxLogFileSizeInBytes - maximum size of the crawler log file in bytes (0 is unlimited)maxBacklogFiles - maximum number of old crawler log fileslogFilePath - path to the crawler log file (if null the current system path is used)maxLogLevel - maximum log leveldocumentTimeoutInSeconds - the document retrieval timeout in seconds
XCrawlerException
public IXCrawlerParameters createCrawlerParameters(int maxDepth,
int retrieverCount,
int providerCount,
boolean useETag,
boolean useChecksum,
boolean followLinks,
boolean followRedirects,
boolean crawlVersions,
boolean crawlHidden,
boolean crawlSystem,
long requestDelayInMilliseconds,
IXCrawlerParameters.ModificationCheckMode modificationCheckMode,
boolean findAllDocsInDepth,
boolean respectRobots,
boolean respectNoIndex,
boolean test,
IResourceFilter[] scopeFilters,
IResourceFilter[] resultFilters,
long maxLogFileSizeInBytes,
int maxBacklogFiles,
String logFilePath,
IXCrawlerParameters.LogLevel maxLogLevel,
long documentTimeoutInSeconds)
throws XCrawlerException
followRedirects
maxDepth - maximum depth of the crawl (0 is unlimited)retrieverCount - number of threads which retrieve the resources from the repositoriesproviderCount - number of threads which provide the found resources to the result receiversuseETag - true if the ETag of a resource should be used to detect modificationuseChecksum - true if the checksum of the resource content should be used to detect modificationfollowLinks - true if links should be followed during the crawlfollowRedirects - true if redirects in Web-RMs should be followed during the crawlcrawlVersions - true if versions of resources should be included in the crawlcrawlHidden - true if hidden resources should be included in the crawlcrawlSystem - true if system resources should be included in the crawlrequestDelayInMilliseconds - number of milliseconds between two consecutive resources retrievals
(to limit repository load)modificationCheckMode - mode of resource modification detection (ETag AND checksum, ETag OR checksum)findAllDocsInDepth - true if resources should be found on the shortest possible pathrespectRobots - true if robot-rules of web-servers should be respectedrespectNoIndex - true if the index-content property should be respectedtest - true if no resources should be provided to the result receiverscopeFilters - resource filters narrowing the scope of the crawlresultFilters - resource filters which are applied to the result of the crawl but do not narrow the scopemaxLogFileSizeInBytes - maximum size of the crawler log file in bytes (0 is unlimited)maxBacklogFiles - maximum number of old crawler log fileslogFilePath - path to the crawler log file (if null the current system path is used)maxLogLevel - maximum log leveldocumentTimeoutInSeconds - the document retrieval timeout in seconds
XCrawlerException
public void runCrawlerTask(String taskID,
String taskDisplayName,
IRidList[] startResources,
IXCrawlerParameters[] parameters,
String resultReceiverFactoryClassName,
String userDataForFactory,
boolean survivesRestart,
boolean delta,
ISystem node,
boolean deleteAfterCompletion)
throws XCrawlerException
taskID - ID of the new task (maximum length is MAX_TASK_ID_LENGTH)taskDisplayName - display name of the new task (maximum length is MAX_TASK_DISPLAY_NAME_LENGTH,
may be null)startResources - lists of start resourcesparameters - crawler parameters for the lists of start resourcesresultReceiverFactoryClassName - class which created result receivers; the name of the class is persisted in
the database and reused via reflection when the crawler task is resumed; the class must implement
IXCrawlerResultReceiverFactoryuserDataForFactory - this string is passed to the createResultReceiver() method of the
resultReceiverFactory; here the result receiving application can store any data up to
MAX_USER_DATA_LENGTH characters in length (may be null)survivesRestart - if true the crawler can be resumed even after a restart of CMdelta - true if an incremental update should be performednode - cluster node on which the task should be executeddeleteAfterCompletion - true if the crawler should be deleted after it is complete
XCrawlerException
public void suspendCrawlerTask(String taskID)
throws XCrawlerException
taskID - ID of the task
XCrawlerException
public void resumeCrawlerTask(String taskID)
throws XCrawlerException
taskID - ID of the task
XCrawlerException
public void stopCrawlerTask(String taskID)
throws XCrawlerException
taskID - ID of the task
XCrawlerException
public void stopCrawlerTaskAsync(String taskID)
throws XCrawlerException
taskID - ID of the task
XCrawlerException
public void recrawlErrors(String taskID)
throws XCrawlerException
taskID - ID of the task
XCrawlerException
public void deleteCrawlerTask(String taskID)
throws XCrawlerException
taskID - ID of the task
XCrawlerException
public IXCrawlerTaskSummary[] getCrawlerTaskSummaries()
throws XCrawlerException
XCrawlerException
public IXCrawlerTaskSummary getCrawlerTaskSummary(String taskID)
throws XCrawlerException
taskID - ID of the task
XCrawlerException
public boolean isRunning(String taskID)
throws XCrawlerException
taskID - ID of the task
XCrawlerException
public boolean isSuspended(String taskID)
throws XCrawlerException
taskID - ID of the task
XCrawlerException
public boolean isScheduled(String taskID)
throws XCrawlerException
taskID - ID of the task
XCrawlerException
public boolean isFiltered(IResource resource,
IXCrawlerParameters parameters,
RID crawlStartPath)
throws XCrawlerException
resource - the resourceparameters - the crawler parameterscrawlStartPath - path of the related datasource that is attached to the index (for depth calculation)
XCrawlerException
public void reloadResourceFilters(String taskID)
throws XCrawlerException
XCrawlerException
|
SAP NetWeaver '04 | |||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||