Modifier and Type | Method and Description |
---|---|
void |
FileCommandDetector.setFilePath(String fileCommandPath) |
void |
FileCommandDetector.setMaxBytes(int maxBytes)
If this is not called on a TikaInputStream, this detector
will spool up to this many bytes to a file to be detected
by the 'file' command.
|
void |
FileCommandDetector.setTimeoutMs(long timeoutMs) |
Modifier and Type | Method and Description |
---|---|
void |
DefaultZipContainerDetector.setMarkLimit(int markLimit)
If this is less than 0, the file will be spooled to disk,
and detection will run on the full file.
|
Modifier and Type | Method and Description |
---|---|
void |
ParsingEmbeddedDocumentExtractorFactory.setWriteFileNameToContent(boolean writeFileNameToContent) |
Modifier and Type | Method and Description |
---|---|
void |
OpenNLPMetadataFilter.setMaxCharsForDetection(int maxCharsForDetection) |
Modifier and Type | Method and Description |
---|---|
void |
OptimaizeMetadataFilter.setMaxCharsForDetection(int maxCharsForDetection) |
Modifier and Type | Method and Description |
---|---|
void |
DateNormalizingMetadataFilter.setDefaultTimeZone(String timeZoneId) |
void |
ExcludeFieldMetadataFilter.setExclude(List<String> exclude) |
void |
FieldNameMappingFilter.setExcludeUnmapped(boolean excludeUnmapped)
If this is
true (default), this means that only the fields that
have a "from" value in the mapper will be passed through. |
void |
IncludeFieldMetadataFilter.setInclude(List<String> include) |
void |
FieldNameMappingFilter.setMappings(Map<String,String> mappings) |
void |
ClearByMimeMetadataFilter.setMimes(List<String> mimes) |
Modifier and Type | Method and Description |
---|---|
void |
RegexCaptureParser.setRegexMap(Map<String,String> map) |
Modifier and Type | Method and Description |
---|---|
void |
ExternalParser.setCommandLine(List<String> commandLine)
Use this to specify the full commandLine.
|
void |
ExternalParser.setMaxStdErr(int maxStdErr) |
void |
ExternalParser.setMaxStdOut(int maxStdOut) |
void |
ExternalParser.setOutputParser(Parser parser)
This parser is called on the output of the process.
|
void |
ExternalParser.setReturnStderr(boolean returnStderr)
If set to true, this will return the stderr in the metadata
via
ExternalProcess.STD_ERR . |
void |
ExternalParser.setReturnStdout(boolean returnStdout)
If set to true, this will return the stdout in the metadata
via
ExternalProcess.STD_OUT . |
void |
ExternalParser.setSupportedTypes(List<String> supportedTypes)
This is set during initialization from a tika-config.
|
void |
ExternalParser.setTimeoutMs(long timeoutMs) |
Modifier and Type | Method and Description |
---|---|
void |
GeoParser.setGazetteerRestEndpoint(String gazetteerRestEndpoint) |
void |
GeoParser.setNerModelUrl(String nerModelUrl) |
Modifier and Type | Method and Description |
---|---|
void |
HtmlParser.setExtractScripts(boolean extractScripts)
Whether or not to extract contents in script entities.
|
void |
HtmlEncodingDetector.setMarkLimit(int markLimit)
How far into the stream to read for charset detection.
|
Modifier and Type | Method and Description |
---|---|
void |
StandardHtmlEncodingDetector.setMarkLimit(int markLimit)
How far into the stream to read for charset detection.
|
Modifier and Type | Method and Description |
---|---|
void |
PSDParser.setMaxDataLengthBytes(int maxDataLengthBytes) |
void |
BPGParser.setMaxRecordLength(int maxRecordLength) |
Modifier and Type | Method and Description |
---|---|
void |
AbstractOfficeParser.setByteArrayMaxOverride(int maxOverride)
WARNING: this sets a static variable in POI.
|
void |
AbstractOfficeParser.setConcatenatePhoneticRuns(boolean concatenatePhoneticRuns) |
void |
AbstractOfficeParser.setDateFormatOverride(String format) |
void |
AbstractOfficeParser.setExtractAllAlternativesFromMSG(boolean extractAllAlternativesFromMSG)
Some .msg files can contain body content in html, rtf and/or text.
|
void |
AbstractOfficeParser.setExtractMacros(boolean extractMacros) |
void |
AbstractOfficeParser.setIncludeDeletedContent(boolean includeDeletedConent) |
void |
AbstractOfficeParser.setIncludeMoveFromContent(boolean includeMoveFromContent) |
void |
AbstractOfficeParser.setIncludeShapeBasedContent(boolean includeShapeBasedContent) |
void |
AbstractOfficeParser.setUseSAXDocxExtractor(boolean useSAXDocxExtractor) |
void |
AbstractOfficeParser.setUseSAXPptxExtractor(boolean useSAXPptxExtractor) |
Modifier and Type | Method and Description |
---|---|
void |
RTFParser.setMemoryLimitInKb(int memoryLimitInKb) |
Modifier and Type | Method and Description |
---|---|
void |
Mp3Parser.setMaxRecordSize(int maxRecordSize)
This statically sets the max record size in
ID3v2Frame |
Modifier and Type | Method and Description |
---|---|
void |
TesseractOCRParser.setApplyRotation(boolean applyRotation) |
void |
TesseractOCRParser.setColorspace(String colorspace) |
void |
TesseractOCRParser.setDensity(int density) |
void |
TesseractOCRParser.setDepth(int depth) |
void |
TesseractOCRParser.setEnableImagePreprocessing(boolean enableImagePreprocessing) |
void |
TesseractOCRParser.setFilter(String filter) |
void |
TesseractOCRParser.setImageMagickPath(String imageMagickPath)
Set the path to the ImageMagick executable directory, needed if it is not on system path.
|
void |
TesseractOCRParser.setLanguage(String language) |
void |
TesseractOCRParser.setMaxFileSizeToOcr(long maxFileSizeToOcr) |
void |
TesseractOCRParser.setMinFileSizeToOcr(long minFileSizeToOcr) |
void |
TesseractOCRParser.setOtherTesseractSettings(List<String> settings) |
void |
TesseractOCRParser.setOutputType(String outputType) |
void |
TesseractOCRParser.setPageSegMode(String pageSegMode) |
void |
TesseractOCRParser.setPreloadLangs(boolean preloadLangs)
If set to
true and if tesseract is found, this will load the
langs that result from --list-langs. |
void |
TesseractOCRParser.setPreserveInterwordSpacing(boolean preserveInterwordSpacing) |
void |
TesseractOCRParser.setResize(int resize) |
void |
TesseractOCRParser.setSkipOCR(boolean skipOCR) |
void |
TesseractOCRParser.setTessdataPath(String tessdataPath)
Set the path to the 'tessdata' folder, which contains language files and config files.
|
void |
TesseractOCRParser.setTesseractPath(String tesseractPath)
Set the path to the Tesseract executable's directory, needed if it is not on system path.
|
void |
TesseractOCRParser.setTimeout(int timeout)
Set default timeout in seconds.
|
Modifier and Type | Method and Description |
---|---|
void |
FlatOpenDocumentParser.setExtractMacros(boolean extractMacros) |
void |
OpenDocumentParser.setExtractMacros(boolean extractMacros) |
Modifier and Type | Method and Description |
---|---|
void |
PDFParser.setDropThreshold(float dropThreshold) |
void |
PDFParser.setEnableAutoSpace(boolean v)
If true (the default), the parser should estimate
where spaces should be inserted between words.
|
void |
PDFParser.setExtractAnnotationText(boolean v)
If true (the default), text in annotations will be
extracted.
|
void |
PDFParser.setImageGraphicsEngineFactory(ImageGraphicsEngineFactory imageGraphicsEngineFactory) |
void |
PDFParser.setImageStrategy(String imageStrategy) |
void |
PDFParser.setMaxMainMemoryBytes(long maxMainMemoryBytes) |
void |
PDFParser.setOcrImageType(String imageType) |
void |
PDFParser.setOcrRenderingStrategy(String ocrRenderingStrategy) |
void |
PDFParser.setOcrStrategy(String ocrStrategyString) |
void |
PDFParser.setOcrStrategyAuto(String ocrStrategyAuto) |
void |
PDFParser.setSortByPosition(boolean v)
If true, sort text tokens by their x/y position
before extracting text.
|
void |
PDFParser.setSuppressDuplicateOverlappingText(boolean v)
If true, the parser should try to remove duplicated
text over the same region.
|
Modifier and Type | Method and Description |
---|---|
void |
PackageParser.setDetectCharsetsInEntryNames(boolean detectCharsetsInEntryNames)
Whether or not to run the default charset detector against entry
names in ZipFiles.
|
void |
CompressorParser.setMemoryLimitInKb(int memoryLimitInKb) |
Modifier and Type | Method and Description |
---|---|
void |
ObjectRecognitionParser.setRecogniser(String recogniserClass) |
Modifier and Type | Field and Description |
---|---|
protected URI |
TensorflowRESTRecogniser.apiBaseUri |
protected double |
TensorflowRESTRecogniser.minConfidence |
protected int |
TensorflowRESTRecogniser.topN |
Modifier and Type | Method and Description |
---|---|
void |
StringsParser.setEncoding(String encoding) |
void |
StringsParser.setMinLength(int minLength) |
void |
StringsParser.setStringsPath(String path)
Sets the "strings" installation folder.
|
void |
StringsParser.setTimeoutSeconds(int timeoutSeconds) |
Modifier and Type | Method and Description |
---|---|
void |
AmazonTranscribe.setBucket(String bucket)
Sets the client secret for the transcriber API.
|
void |
AmazonTranscribe.setClientId(String id)
Sets the client Id for the transcriber API.
|
void |
AmazonTranscribe.setClientSecret(String secret)
Sets the client secret for the transcriber API.
|
void |
AmazonTranscribe.setRegion(String region) |
Modifier and Type | Method and Description |
---|---|
void |
Icu4jEncodingDetector.setIgnoreCharsets(List<String> charsetsToIgnore) |
void |
UniversalEncodingDetector.setMarkLimit(int markLimit)
How far into the stream to read for charset detection.
|
void |
Icu4jEncodingDetector.setMarkLimit(int markLimit)
How far into the stream to read for charset detection.
|
void |
Icu4jEncodingDetector.setStripMarkup(boolean stripMarkup)
Whether or not to attempt to strip html-ish markup
from the stream before sending it to the underlying
detector.
|
Modifier and Type | Method and Description |
---|---|
void |
WordPerfectParser.setIncludeDeletedContent(boolean includeDeletedContent)
Whether or not to include deleted content.
|
Modifier and Type | Method and Description |
---|---|
void |
AZBlobEmitter.setContainer(String container) |
void |
AZBlobEmitter.setEndpoint(String endpoint) |
void |
AZBlobEmitter.setFileExtension(String fileExtension)
If you want to customize the output file's file extension.
|
void |
AZBlobEmitter.setOverwriteExisting(boolean overwriteExisting) |
void |
AZBlobEmitter.setPrefix(String prefix) |
void |
AZBlobEmitter.setSasToken(String sasToken) |
Modifier and Type | Method and Description |
---|---|
void |
FileSystemEmitter.setBasePath(String basePath) |
void |
FileSystemEmitter.setFileExtension(String fileExtension)
If you want to customize the output file's file extension.
|
void |
FileSystemEmitter.setOnExists(String onExists)
What to do if the target file already exists.
|
Modifier and Type | Method and Description |
---|---|
void |
GCSEmitter.setBucket(String bucket) |
void |
GCSEmitter.setFileExtension(String fileExtension)
If you want to customize the output file's file extension.
|
void |
GCSEmitter.setPrefix(String prefix) |
void |
GCSEmitter.setProjectId(String projectId) |
Modifier and Type | Method and Description |
---|---|
void |
OpenSearchEmitter.setAttachmentStrategy(String attachmentStrategy)
Options: SEPARATE_DOCUMENTS, PARENT_CHILD.
|
void |
OpenSearchEmitter.setAuthScheme(String authScheme) |
void |
OpenSearchEmitter.setCommitWithin(int commitWithin) |
void |
OpenSearchEmitter.setConnectionTimeout(int connectionTimeout) |
void |
OpenSearchEmitter.setEmbeddedFileFieldName(String embeddedFileFieldName)
If using the
OpenSearchEmitter.AttachmentStrategy.PARENT_CHILD , this is the field name
used to store the child documents. |
void |
OpenSearchEmitter.setIdField(String idField)
Specify the field in the first Metadata that should be
used as the id field for the document.
|
void |
OpenSearchEmitter.setOpenSearchUrl(String openSearchUrl) |
void |
OpenSearchEmitter.setPassword(String password) |
void |
OpenSearchEmitter.setProxyHost(String proxyHost) |
void |
OpenSearchEmitter.setProxyPort(int proxyPort) |
void |
OpenSearchEmitter.setSocketTimeout(int socketTimeout) |
void |
OpenSearchEmitter.setUserName(String userName) |
Modifier and Type | Method and Description |
---|---|
void |
S3Emitter.setBucket(String bucket) |
void |
S3Emitter.setCredentialsProvider(String credentialsProvider) |
void |
S3Emitter.setFileExtension(String fileExtension)
If you want to customize the output file's file extension.
|
void |
S3Emitter.setMaxConnections(int maxConnections)
maximum number of http connections allowed.
|
void |
S3Emitter.setPrefix(String prefix) |
void |
S3Emitter.setProfile(String profile) |
void |
S3Emitter.setRegion(String region) |
void |
S3Emitter.setSpoolToTemp(boolean spoolToTemp)
Whether or not to spool the metadatalist to a tmp file before putting object.
|
Modifier and Type | Method and Description |
---|---|
void |
SolrEmitter.setAttachmentStrategy(String attachmentStrategy)
Options: SKIP, CONCATENATE_CONTENT, PARENT_CHILD.
|
void |
SolrEmitter.setAuthScheme(String authScheme) |
void |
SolrEmitter.setCommitWithin(int commitWithin) |
void |
SolrEmitter.setConnectionTimeout(int connectionTimeout) |
void |
SolrEmitter.setEmbeddedFileFieldName(String embeddedFileFieldName)
If using the
SolrEmitter.AttachmentStrategy.PARENT_CHILD , this is the field name
used to store the child documents. |
void |
SolrEmitter.setIdField(String idField)
Specify the field in the first Metadata that should be
used as the id field for the document.
|
void |
SolrEmitter.setPassword(String password) |
void |
SolrEmitter.setProxyHost(String proxyHost) |
void |
SolrEmitter.setProxyPort(int proxyPort) |
void |
SolrEmitter.setSocketTimeout(int socketTimeout) |
void |
SolrEmitter.setSolrCollection(String solrCollection) |
void |
SolrEmitter.setSolrUrls(List<String> solrUrls) |
void |
SolrEmitter.setSolrZkChroot(String solrZkChroot) |
void |
SolrEmitter.setSolrZkHosts(List<String> solrZkHosts) |
void |
SolrEmitter.setUpdateStrategy(String updateStrategy) |
void |
SolrEmitter.setUserName(String userName) |
Modifier and Type | Method and Description |
---|---|
void |
AbstractFetcher.setName(String name) |
Modifier and Type | Method and Description |
---|---|
void |
AZBlobFetcher.setContainer(String container) |
void |
AZBlobFetcher.setEndpoint(String endpoint) |
void |
AZBlobFetcher.setExtractUserMetadata(boolean extractUserMetadata)
Whether or not to extract user metadata from the blob object
|
void |
AZBlobFetcher.setSasToken(String sasToken) |
void |
AZBlobFetcher.setSpoolToTemp(boolean spoolToTemp) |
Modifier and Type | Method and Description |
---|---|
void |
FileSystemFetcher.setBasePath(String basePath)
Default behavior si that clients will send in relative paths, this
must be set to allow this fetcher to fetch the
full path.
|
Modifier and Type | Method and Description |
---|---|
void |
GCSFetcher.setBucket(String bucket) |
void |
GCSFetcher.setExtractUserMetadata(boolean extractUserMetadata)
Whether or not to extract user metadata from the S3Object
|
void |
GCSFetcher.setProjectId(String projectId) |
void |
GCSFetcher.setSpoolToTemp(boolean spoolToTemp) |
Modifier and Type | Method and Description |
---|---|
void |
HttpFetcher.setAuthScheme(String authScheme) |
void |
HttpFetcher.setConnectTimeout(int connectTimeout) |
void |
HttpFetcher.setHttpHeaders(List<String> headers)
Which http headers should we capture in the metadata.
|
void |
HttpFetcher.setMaxConnections(int maxConnections) |
void |
HttpFetcher.setMaxConnectionsPerRoute(int maxConnectionsPerRoute) |
void |
HttpFetcher.setMaxErrMsgSize(int maxErrMsgSize) |
void |
HttpFetcher.setMaxRedirects(int maxRedirects) |
void |
HttpFetcher.setMaxSpoolSize(long maxSpoolSize)
Set the maximum number of bytes to spool to a temp file.
|
void |
HttpFetcher.setNtDomain(String domain) |
void |
HttpFetcher.setOverallTimeout(long overallTimeout)
This sets an overall timeout on the request.
|
void |
HttpFetcher.setPassword(String password) |
void |
HttpFetcher.setProxyHost(String proxyHost) |
void |
HttpFetcher.setProxyPort(int proxyPort) |
void |
HttpFetcher.setRequestTimeout(int requestTimeout) |
void |
HttpFetcher.setSocketTimeout(int socketTimeout) |
void |
HttpFetcher.setUserAgent(String userAgent)
When making the request, what User-Agent is sent in the request.
|
void |
HttpFetcher.setUserName(String userName) |
Modifier and Type | Method and Description |
---|---|
void |
S3Fetcher.setBucket(String bucket) |
void |
S3Fetcher.setCredentialsProvider(String credentialsProvider) |
void |
S3Fetcher.setExtractUserMetadata(boolean extractUserMetadata)
Whether or not to extract user metadata from the S3Object
|
void |
S3Fetcher.setMaxConnections(int maxConnections) |
void |
S3Fetcher.setPrefix(String prefix)
prefix to prepend to the fetch key before fetching.
|
void |
S3Fetcher.setProfile(String profile) |
void |
S3Fetcher.setRegion(String region) |
void |
S3Fetcher.setSpoolToTemp(boolean spoolToTemp) |
Modifier and Type | Method and Description |
---|---|
void |
PipesIterator.setEmitterName(String emitterName) |
void |
PipesIterator.setFetcherName(String fetcherName) |
void |
PipesIterator.setHandlerType(String handlerType) |
void |
PipesIterator.setMaxEmbeddedResources(int maxEmbeddedResources) |
void |
PipesIterator.setMaxWaitMs(long maxWaitMs) |
void |
PipesIterator.setOnParseException(String onParseException) |
void |
PipesIterator.setParseMode(String parseModeString) |
void |
PipesIterator.setQueueSize(int queueSize) |
void |
PipesIterator.setWriteLimit(int writeLimit) |
Modifier and Type | Method and Description |
---|---|
void |
AZBlobPipesIterator.setContainer(String container) |
void |
AZBlobPipesIterator.setEndpoint(String endpoint) |
void |
AZBlobPipesIterator.setPrefix(String prefix) |
void |
AZBlobPipesIterator.setSasToken(String sasToken) |
Modifier and Type | Method and Description |
---|---|
void |
CSVPipesIterator.setCsvPath(Path csvPath) |
void |
CSVPipesIterator.setCsvPath(String csvPath) |
void |
CSVPipesIterator.setEmitKeyColumn(String emitKeyColumn) |
void |
CSVPipesIterator.setFetchKeyColumn(String fetchKeyColumn) |
void |
CSVPipesIterator.setIdColumn(String idColumn) |
Modifier and Type | Method and Description |
---|---|
void |
FileListPipesIterator.setFileList(String path) |
void |
FileListPipesIterator.setHasHeader(boolean hasHeader) |
Modifier and Type | Method and Description |
---|---|
void |
FileSystemPipesIterator.setBasePath(String basePath) |
Modifier and Type | Method and Description |
---|---|
void |
GCSPipesIterator.setBucket(String bucket) |
void |
GCSPipesIterator.setPrefix(String prefix) |
void |
GCSPipesIterator.setProjectId(String projectId) |
Modifier and Type | Method and Description |
---|---|
void |
JDBCPipesIterator.setConnection(String connection) |
void |
JDBCPipesIterator.setEmitKeyColumn(String fetchKeyColumn) |
void |
JDBCPipesIterator.setFetchKeyColumn(String fetchKeyColumn) |
void |
JDBCPipesIterator.setFetchKeyRangeEndColumn(String fetchKeyRangeEndColumn) |
void |
JDBCPipesIterator.setFetchKeyRangeStartColumn(String fetchKeyRangeStartColumn) |
void |
JDBCPipesIterator.setFetchSize(int fetchSize) |
void |
JDBCPipesIterator.setIdColumn(String idColumn) |
void |
JDBCPipesIterator.setSelect(String select) |
Modifier and Type | Method and Description |
---|---|
void |
S3PipesIterator.setBucket(String bucket) |
void |
S3PipesIterator.setCredentialsProvider(String credentialsProvider) |
void |
S3PipesIterator.setFileNamePattern(String fileNamePattern) |
void |
S3PipesIterator.setPrefix(String prefix) |
void |
S3PipesIterator.setProfile(String profile) |
void |
S3PipesIterator.setRegion(String region) |
Modifier and Type | Method and Description |
---|---|
void |
SolrPipesIterator.setAdditionalFields(List<String> additionalFields) |
void |
SolrPipesIterator.setAuthScheme(String authScheme) |
void |
SolrPipesIterator.setConnectionTimeout(int connectionTimeout) |
void |
SolrPipesIterator.setFailCountField(String failCountField) |
void |
SolrPipesIterator.setFilters(List<String> filters) |
void |
SolrPipesIterator.setIdField(String idField) |
void |
SolrPipesIterator.setParsingIdField(String parsingIdField) |
void |
SolrPipesIterator.setPassword(String password) |
void |
SolrPipesIterator.setProxyHost(String proxyHost) |
void |
SolrPipesIterator.setProxyPort(int proxyPort) |
void |
SolrPipesIterator.setRows(int rows) |
void |
SolrPipesIterator.setSizeFieldName(String sizeFieldName) |
void |
SolrPipesIterator.setSocketTimeout(int socketTimeout) |
void |
SolrPipesIterator.setSolrCollection(String solrCollection) |
void |
SolrPipesIterator.setSolrUrls(List<String> solrUrls) |
void |
SolrPipesIterator.setSolrZkChroot(String solrZkChroot) |
void |
SolrPipesIterator.setSolrZkHosts(List<String> solrZkHosts) |
void |
SolrPipesIterator.setUserName(String userName) |
Copyright © 2007–2022 The Apache Software Foundation. All rights reserved.