public interface IXCrawlerService
Modifier and Type | Field and Description |
---|---|
static int |
MAX_TASK_DISPLAY_NAME_LENGTH |
static int |
MAX_TASK_ID_LENGTH |
static int |
MAX_USER_DATA_LENGTH |
Modifier and Type | Method and Description |
---|---|
void |
clearSurvivesRestart(String taskID)
Set the survive restart flag of a crawler task to 0, i.e.
|
IXCrawlerParameters |
createCrawlerParameters(int maxDepth,
int retrieverCount,
int providerCount,
boolean useETag,
boolean useChecksum,
boolean useACL,
boolean followLinks,
boolean followRedirects,
boolean crawlVersions,
boolean crawlVariants,
boolean crawlHidden,
boolean crawlSystem,
long requestDelayInMilliseconds,
IXCrawlerParameters.ModificationCheckMode modificationCheckMode,
boolean findAllDocsInDepth,
boolean respectRobots,
boolean respectNoIndex,
boolean respectNoFollow,
boolean test,
IResourceFilter[] scopeFilters,
IResourceFilter[] resultFilters,
long maxLogFileSizeInBytes,
int maxBacklogFiles,
String logFilePath,
IXCrawlerParameters.LogLevel maxLogLevel,
long documentTimeoutInSeconds)
Create crawler parameters.
|
IXCrawlerParameters |
createCrawlerParameters(int maxDepth,
int retrieverCount,
int providerCount,
boolean useETag,
boolean useChecksum,
boolean followLinks,
boolean followRedirects,
boolean crawlVersions,
boolean crawlVariants,
boolean crawlHidden,
boolean crawlSystem,
long requestDelayInMilliseconds,
IXCrawlerParameters.ModificationCheckMode modificationCheckMode,
boolean findAllDocsInDepth,
boolean respectRobots,
boolean respectNoIndex,
boolean respectNoFollow,
boolean test,
IResourceFilter[] scopeFilters,
IResourceFilter[] resultFilters,
long maxLogFileSizeInBytes,
int maxBacklogFiles,
String logFilePath,
IXCrawlerParameters.LogLevel maxLogLevel,
long documentTimeoutInSeconds)
Create crawler parameters.
|
IXCrawlerParameters |
createCrawlerParameters(int maxDepth,
int retrieverCount,
int providerCount,
boolean useETag,
boolean useChecksum,
boolean followLinks,
boolean followRedirects,
boolean crawlVersions,
boolean crawlHidden,
boolean crawlSystem,
long requestDelayInMilliseconds,
IXCrawlerParameters.ModificationCheckMode modificationCheckMode,
boolean findAllDocsInDepth,
boolean respectRobots,
boolean respectNoIndex,
boolean test,
IResourceFilter[] scopeFilters,
IResourceFilter[] resultFilters,
long maxLogFileSizeInBytes,
int maxBacklogFiles,
String logFilePath,
IXCrawlerParameters.LogLevel maxLogLevel,
long documentTimeoutInSeconds)
Create crawler parameters.
|
IXCrawlerParameters |
createCrawlerParameters(int maxDepth,
int retrieverCount,
int providerCount,
boolean useETag,
boolean useChecksum,
boolean followLinks,
boolean followRedirects,
boolean crawlVersions,
boolean crawlHidden,
boolean crawlSystem,
long requestDelayInMilliseconds,
IXCrawlerParameters.ModificationCheckMode modificationCheckMode,
boolean findAllDocsInDepth,
boolean respectRobots,
boolean test,
IResourceFilter[] scopeFilters,
IResourceFilter[] resultFilters,
long maxLogFileSizeInBytes,
int maxBacklogFiles,
String logFilePath,
IXCrawlerParameters.LogLevel maxLogLevel,
long documentTimeoutInSeconds)
Create crawler parameters.
|
IXCrawlerParameters |
createCrawlerParameters(int maxDepth,
int retrieverCount,
int providerCount,
boolean useETag,
boolean useChecksum,
boolean followLinks,
boolean crawlVersions,
boolean crawlHidden,
boolean crawlSystem,
long requestDelayInMilliseconds,
IXCrawlerParameters.ModificationCheckMode modificationCheckMode,
boolean findAllDocsInDepth,
boolean respectRobots,
boolean test,
IResourceFilter[] scopeFilters,
IResourceFilter[] resultFilters,
long maxLogFileSizeInBytes,
int maxBacklogFiles,
String logFilePath,
IXCrawlerParameters.LogLevel maxLogLevel,
long documentTimeoutInSeconds)
Create crawler parameters.
|
IXCrawlerParameters |
createCrawlerParameters(String parameterName)
Create crawler parameters from a configurable in the configuration plugin /cm/services/xcrawlers.
|
void |
deleteCrawlerTask(String taskID)
Delete a crawler task.
|
String[] |
getCrawlerParameterNames()
Get the names of the available crawler parameters.
|
IXCrawlerTaskSummary[] |
getCrawlerTaskSummaries()
Get the state summaries of all crawler tasks.
|
IXCrawlerTaskSummary |
getCrawlerTaskSummary(String taskID)
Get the state summary of a crawler task.
|
String |
getDefaultCrawlerParameterName()
Get the name of the default crawler parameters.
|
RID[] |
getFailedResourcesOfCrawler(String taskID,
int max)
Get resources that caused errors in the last (or current) run of a crawler task
Added in 7.X |
RID[] |
getFailedResourcesOfEvents(String taskID,
int max)
Get resources that have been reported as failed by calls of
reportFailedResource() Added in 7.X |
int |
getNumberOfFailedResourcesOfEvents(String taskID)
Get the number of resources that have been reported as failed by calls of
reportFailedResource() Added in 7.X |
boolean |
getSurvivesRestart(String taskID)
Get the survive restart flag of a crawler task.
|
boolean |
isFiltered(IResource resource,
IXCrawlerParameters parameters,
RID crawlStartPath)
Check, if a resource would be filtered out during a crawl with specific crawler parameters
|
boolean |
isRunning(String taskID)
Check, if a crawler task is running for the specified taskID.
|
boolean |
isScheduled(String taskID)
Check, if a crawler task is scheduled for the specified taskID
(and will run if any running or suspended crawler tasks for the same taskID are finished). |
boolean |
isSuspended(String taskID)
Check, if a crawler task is suspended for the specified taskID.
|
void |
recrawlErrors(String taskID)
Restart a crawler task by crawling only the documents that failed during the last crawl.
|
void |
reloadResourceFilters(String taskID)
Reload the current version of the resource filters for a crawler.
|
void |
reportDeletedResource(String taskID,
RID rid,
int startResourceListIndex)
Report the deletion of a resource in the scope of a crawler
The application that uses a crawler may use this method to report the deletion of a resource that was reported to it via RF event. |
void |
reportFailedResource(String taskID,
RID crawlStartPath,
RID rid,
int startResourceListIndex)
Report a problem with the processing of a resource in the scope of a crawler
The application that uses a crawler may use this method to report a problem with the processing of a resource that was reported to him via RF event. |
void |
resumeCrawlerTask(String taskID)
Resume a crawler task.
|
void |
runCrawlerTask(String taskID,
String taskDisplayName,
IRidList[] startResources,
IXCrawlerParameters[] parameters,
String resultReceiverFactoryClassName,
String userDataForFactory,
boolean survivesRestart,
boolean delta,
ISystem node,
boolean deleteAfterCompletion)
Run a crawler task.
|
void |
stopCrawlerTask(String taskID)
Stop a crawler task.
|
void |
stopCrawlerTaskAsync(String taskID)
Stop a crawler task.
|
void |
suspendCrawlerTask(String taskID)
Suspend a crawler task.
|
static final int MAX_TASK_ID_LENGTH
static final int MAX_TASK_DISPLAY_NAME_LENGTH
static final int MAX_USER_DATA_LENGTH
String[] getCrawlerParameterNames() throws XCrawlerException
XCrawlerException
String getDefaultCrawlerParameterName() throws XCrawlerException
XCrawlerException
IXCrawlerParameters createCrawlerParameters(String parameterName) throws XCrawlerException
parameterName
- name of the configurableXCrawlerException
IXCrawlerParameters createCrawlerParameters(int maxDepth, int retrieverCount, int providerCount, boolean useETag, boolean useChecksum, boolean followLinks, boolean crawlVersions, boolean crawlHidden, boolean crawlSystem, long requestDelayInMilliseconds, IXCrawlerParameters.ModificationCheckMode modificationCheckMode, boolean findAllDocsInDepth, boolean respectRobots, boolean test, IResourceFilter[] scopeFilters, IResourceFilter[] resultFilters, long maxLogFileSizeInBytes, int maxBacklogFiles, String logFilePath, IXCrawlerParameters.LogLevel maxLogLevel, long documentTimeoutInSeconds) throws XCrawlerException
maxDepth
- maximum depth of the crawl (0 is unlimited)retrieverCount
- number of threads which retrieve the resources from the repositoriesproviderCount
- number of threads which provide the found resources to the result receiversuseETag
- true if the ETag of a resource should be used to detect modificationuseChecksum
- true if the checksum of the resource content should be used to detect modificationfollowLinks
- true if links should be followed during the crawlcrawlVersions
- true if versions of resources should be included in the crawlcrawlHidden
- true if hidden resources should be included in the crawlcrawlSystem
- true if system resources should be included in the crawlrequestDelayInMilliseconds
- number of milliseconds between two consecutive resources retrievals
(to limit repository load)modificationCheckMode
- mode of resource modification detection (ETag AND checksum, ETag OR checksum)findAllDocsInDepth
- true if resources should be found on the shortest possible pathrespectRobots
- true if robot-rules of web-servers should be respectedtest
- true if no resources should be provided to the result receiverscopeFilters
- resource filters narrowing the scope of the crawlresultFilters
- resource filters which are applied to the result of the crawl but do not narrow the scopemaxLogFileSizeInBytes
- maximum size of the crawler log file in bytes (0 is unlimited)maxBacklogFiles
- maximum number of old crawler log fileslogFilePath
- path to the crawler log file (if null the current system path is used)maxLogLevel
- maximum log leveldocumentTimeoutInSeconds
- the document retrieval timeout in secondsXCrawlerException
IXCrawlerParameters createCrawlerParameters(int maxDepth, int retrieverCount, int providerCount, boolean useETag, boolean useChecksum, boolean followLinks, boolean followRedirects, boolean crawlVersions, boolean crawlHidden, boolean crawlSystem, long requestDelayInMilliseconds, IXCrawlerParameters.ModificationCheckMode modificationCheckMode, boolean findAllDocsInDepth, boolean respectRobots, boolean test, IResourceFilter[] scopeFilters, IResourceFilter[] resultFilters, long maxLogFileSizeInBytes, int maxBacklogFiles, String logFilePath, IXCrawlerParameters.LogLevel maxLogLevel, long documentTimeoutInSeconds) throws XCrawlerException
maxDepth
- maximum depth of the crawl (0 is unlimited)retrieverCount
- number of threads which retrieve the resources from the repositoriesproviderCount
- number of threads which provide the found resources to the result receiversuseETag
- true if the ETag of a resource should be used to detect modificationuseChecksum
- true if the checksum of the resource content should be used to detect modificationfollowLinks
- true if links should be followed during the crawlfollowRedirects
- true if redirects in Web-RMs should be followed during the crawlcrawlVersions
- true if versions of resources should be included in the crawlcrawlHidden
- true if hidden resources should be included in the crawlcrawlSystem
- true if system resources should be included in the crawlrequestDelayInMilliseconds
- number of milliseconds between two consecutive resources retrievals
(to limit repository load)modificationCheckMode
- mode of resource modification detection (ETag AND checksum, ETag OR checksum)findAllDocsInDepth
- true if resources should be found on the shortest possible pathrespectRobots
- true if robot-rules of web-servers should be respectedtest
- true if no resources should be provided to the result receiverscopeFilters
- resource filters narrowing the scope of the crawlresultFilters
- resource filters which are applied to the result of the crawl but do not narrow the scopemaxLogFileSizeInBytes
- maximum size of the crawler log file in bytes (0 is unlimited)maxBacklogFiles
- maximum number of old crawler log fileslogFilePath
- path to the crawler log file (if null the current system path is used)maxLogLevel
- maximum log leveldocumentTimeoutInSeconds
- the document retrieval timeout in secondsXCrawlerException
IXCrawlerParameters createCrawlerParameters(int maxDepth, int retrieverCount, int providerCount, boolean useETag, boolean useChecksum, boolean followLinks, boolean followRedirects, boolean crawlVersions, boolean crawlHidden, boolean crawlSystem, long requestDelayInMilliseconds, IXCrawlerParameters.ModificationCheckMode modificationCheckMode, boolean findAllDocsInDepth, boolean respectRobots, boolean respectNoIndex, boolean test, IResourceFilter[] scopeFilters, IResourceFilter[] resultFilters, long maxLogFileSizeInBytes, int maxBacklogFiles, String logFilePath, IXCrawlerParameters.LogLevel maxLogLevel, long documentTimeoutInSeconds) throws XCrawlerException
maxDepth
- maximum depth of the crawl (0 is unlimited)retrieverCount
- number of threads which retrieve the resources from the repositoriesproviderCount
- number of threads which provide the found resources to the result receiversuseETag
- true if the ETag of a resource should be used to detect modificationuseChecksum
- true if the checksum of the resource content should be used to detect modificationfollowLinks
- true if links should be followed during the crawlfollowRedirects
- true if redirects in Web-RMs should be followed during the crawlcrawlVersions
- true if versions of resources should be included in the crawlcrawlHidden
- true if hidden resources should be included in the crawlcrawlSystem
- true if system resources should be included in the crawlrequestDelayInMilliseconds
- number of milliseconds between two consecutive resources retrievals
(to limit repository load)modificationCheckMode
- mode of resource modification detection (ETag AND checksum, ETag OR checksum)findAllDocsInDepth
- true if resources should be found on the shortest possible pathrespectRobots
- true if robot-rules of web-servers should be respectedrespectNoIndex
- true if the index-content property should be respectedtest
- true if no resources should be provided to the result receiverscopeFilters
- resource filters narrowing the scope of the crawlresultFilters
- resource filters which are applied to the result of the crawl but do not narrow the scopemaxLogFileSizeInBytes
- maximum size of the crawler log file in bytes (0 is unlimited)maxBacklogFiles
- maximum number of old crawler log fileslogFilePath
- path to the crawler log file (if null the current system path is used)maxLogLevel
- maximum log leveldocumentTimeoutInSeconds
- the document retrieval timeout in secondsXCrawlerException
IXCrawlerParameters createCrawlerParameters(int maxDepth, int retrieverCount, int providerCount, boolean useETag, boolean useChecksum, boolean followLinks, boolean followRedirects, boolean crawlVersions, boolean crawlVariants, boolean crawlHidden, boolean crawlSystem, long requestDelayInMilliseconds, IXCrawlerParameters.ModificationCheckMode modificationCheckMode, boolean findAllDocsInDepth, boolean respectRobots, boolean respectNoIndex, boolean respectNoFollow, boolean test, IResourceFilter[] scopeFilters, IResourceFilter[] resultFilters, long maxLogFileSizeInBytes, int maxBacklogFiles, String logFilePath, IXCrawlerParameters.LogLevel maxLogLevel, long documentTimeoutInSeconds) throws XCrawlerException
maxDepth
- maximum depth of the crawl (0 is unlimited)retrieverCount
- number of threads which retrieve the resources from the repositoriesproviderCount
- number of threads which provide the found resources to the result receiversuseETag
- true if the ETag of a resource should be used to detect modificationuseChecksum
- true if the checksum of the resource content should be used to detect modificationfollowLinks
- true if links should be followed during the crawlfollowRedirects
- true if redirects in Web-RMs should be followed during the crawlcrawlVersions
- true if versions of resources should be included in the crawlcrawlVariants
- true if variants of resources should be included in the crawlcrawlHidden
- true if hidden resources should be included in the crawlcrawlSystem
- true if system resources should be included in the crawlrequestDelayInMilliseconds
- number of milliseconds between two consecutive resources retrievals
(to limit repository load)modificationCheckMode
- mode of resource modification detection (ETag AND checksum, ETag OR checksum)findAllDocsInDepth
- true if resources should be found on the shortest possible pathrespectRobots
- true if robot-rules of web-servers should be respectedrespectNoIndex
- true if the index-content property should be respectedrespectNoFollow
- true if the follow-links property should be respectedtest
- true if no resources should be provided to the result receiverscopeFilters
- resource filters narrowing the scope of the crawlresultFilters
- resource filters which are applied to the result of the crawl but do not narrow the scopemaxLogFileSizeInBytes
- maximum size of the crawler log file in bytes (0 is unlimited)maxBacklogFiles
- maximum number of old crawler log fileslogFilePath
- path to the crawler log file (if null the current system path is used)maxLogLevel
- maximum log leveldocumentTimeoutInSeconds
- the document retrieval timeout in secondsXCrawlerException
IXCrawlerParameters createCrawlerParameters(int maxDepth, int retrieverCount, int providerCount, boolean useETag, boolean useChecksum, boolean useACL, boolean followLinks, boolean followRedirects, boolean crawlVersions, boolean crawlVariants, boolean crawlHidden, boolean crawlSystem, long requestDelayInMilliseconds, IXCrawlerParameters.ModificationCheckMode modificationCheckMode, boolean findAllDocsInDepth, boolean respectRobots, boolean respectNoIndex, boolean respectNoFollow, boolean test, IResourceFilter[] scopeFilters, IResourceFilter[] resultFilters, long maxLogFileSizeInBytes, int maxBacklogFiles, String logFilePath, IXCrawlerParameters.LogLevel maxLogLevel, long documentTimeoutInSeconds) throws XCrawlerException
maxDepth
- maximum depth of the crawl (0 is unlimited)retrieverCount
- number of threads which retrieve the resources from the repositoriesproviderCount
- number of threads which provide the found resources to the result receiversuseETag
- true if the ETag of a resource should be used to detect modificationuseChecksum
- true if the checksum of the resource content should be used to detect modificationuseACL
- true if the ACL version number of the resource should be used to detect modificationfollowLinks
- true if links should be followed during the crawlfollowRedirects
- true if redirects in Web-RMs should be followed during the crawlcrawlVersions
- true if versions of resources should be included in the crawlcrawlVariants
- true if variants of resources should be included in the crawlcrawlHidden
- true if hidden resources should be included in the crawlcrawlSystem
- true if system resources should be included in the crawlrequestDelayInMilliseconds
- number of milliseconds between two consecutive resources retrievals
(to limit repository load)modificationCheckMode
- mode of resource modification detection (ETag AND checksum, ETag OR checksum)findAllDocsInDepth
- true if resources should be found on the shortest possible pathrespectRobots
- true if robot-rules of web-servers should be respectedrespectNoIndex
- true if the index-content property should be respectedrespectNoFollow
- true if the follow-links property should be respectedtest
- true if no resources should be provided to the result receiverscopeFilters
- resource filters narrowing the scope of the crawlresultFilters
- resource filters which are applied to the result of the crawl but do not narrow the scopemaxLogFileSizeInBytes
- maximum size of the crawler log file in bytes (0 is unlimited)maxBacklogFiles
- maximum number of old crawler log fileslogFilePath
- path to the crawler log file (if null the current system path is used)maxLogLevel
- maximum log leveldocumentTimeoutInSeconds
- the document retrieval timeout in secondsXCrawlerException
void runCrawlerTask(String taskID, String taskDisplayName, IRidList[] startResources, IXCrawlerParameters[] parameters, String resultReceiverFactoryClassName, String userDataForFactory, boolean survivesRestart, boolean delta, ISystem node, boolean deleteAfterCompletion) throws XCrawlerException
taskID
- ID of the new task (maximum length is MAX_TASK_ID_LENGTH
)taskDisplayName
- display name of the new task (maximum length is MAX_TASK_DISPLAY_NAME_LENGTH
,
may be null)startResources
- lists of start resourcesparameters
- crawler parameters for the lists of start resourcesresultReceiverFactoryClassName
- class which created result receivers; the name of the class is persisted in
the database and reused via reflection when the crawler task is resumed; the class must implement
IXCrawlerResultReceiverFactoryuserDataForFactory
- this string is passed to the createResultReceiver()
method of the
resultReceiverFactory
; here the result receiving application can store any data up to
MAX_USER_DATA_LENGTH
characters in length (may be null)survivesRestart
- if true the crawler can be resumed even after a restart of CMdelta
- true if an incremental update should be performednode
- cluster node on which the task should be executeddeleteAfterCompletion
- true if the crawler should be deleted after it is completeXCrawlerException
void suspendCrawlerTask(String taskID) throws XCrawlerException
taskID
- ID of the taskXCrawlerException
void resumeCrawlerTask(String taskID) throws XCrawlerException
taskID
- ID of the taskXCrawlerException
void stopCrawlerTask(String taskID) throws XCrawlerException
taskID
- ID of the taskXCrawlerException
void stopCrawlerTaskAsync(String taskID) throws XCrawlerException
taskID
- ID of the taskXCrawlerException
void recrawlErrors(String taskID) throws XCrawlerException
taskID
- ID of the taskXCrawlerException
void deleteCrawlerTask(String taskID) throws XCrawlerException
taskID
- ID of the taskXCrawlerException
IXCrawlerTaskSummary[] getCrawlerTaskSummaries() throws XCrawlerException
XCrawlerException
IXCrawlerTaskSummary getCrawlerTaskSummary(String taskID) throws XCrawlerException
taskID
- ID of the taskXCrawlerException
boolean isRunning(String taskID) throws XCrawlerException
taskID
- ID of the taskXCrawlerException
boolean isSuspended(String taskID) throws XCrawlerException
taskID
- ID of the taskXCrawlerException
boolean isScheduled(String taskID) throws XCrawlerException
taskID
- ID of the taskXCrawlerException
boolean isFiltered(IResource resource, IXCrawlerParameters parameters, RID crawlStartPath) throws XCrawlerException
resource
- the resourceparameters
- the crawler parameterscrawlStartPath
- path of the related datasource that is attached to the index (for depth calculation)XCrawlerException
void reloadResourceFilters(String taskID) throws XCrawlerException
XCrawlerException
boolean getSurvivesRestart(String taskID) throws XCrawlerException
taskID
- ID of the taskXCrawlerException
void clearSurvivesRestart(String taskID) throws XCrawlerException
taskID
- ID of the taskXCrawlerException
- Added in 7.Xvoid reportDeletedResource(String taskID, RID rid, int startResourceListIndex) throws XCrawlerException
taskID
- ID of the crawler taskrid
- RID of the deleted resourcestartResourceListIndex
- index (starting at 0) of the start resource list (passed in
run()) the reported RID belongs toXCrawlerException
void reportFailedResource(String taskID, RID crawlStartPath, RID rid, int startResourceListIndex) throws XCrawlerException
taskID
- ID of the crawler taskcrawlStartPath
- path of the related datasource that is attached to the index (for depth calculation)rid
- RID of the failed resourcestartResourceListIndex
- index (starting at 0) of the start resource list (passed in
run()) the reported RID belongs toXCrawlerException
RID[] getFailedResourcesOfCrawler(String taskID, int max) throws XCrawlerException
taskID
- ID of the crawler taskmax
- the maximum number of results (will be truncated to 100)XCrawlerException
int getNumberOfFailedResourcesOfEvents(String taskID) throws XCrawlerException
reportFailedResource()
taskID
- ID of the crawler taskXCrawlerException
RID[] getFailedResourcesOfEvents(String taskID, int max) throws XCrawlerException
reportFailedResource()
taskID
- ID of the crawler taskmax
- the maximum number of results (will be truncated to 100)XCrawlerException
Access Rights |
---|
SC | DC | Public Part | ACH |
---|---|---|---|
[sap.com] KMC-CM
|
[sap.com] tc/km/frwk
|
api
|
EP-KM-CM
|
[sap.com] KMC-WPC
|
[sap.com] tc/kmc/wpc/wpcfacade
|
api
|
EP-PIN-WPC-WCM
|
Copyright 2018 SAP AG Complete Copyright Notice