public abstract class AbstractProfiler extends FileResourceConsumer
Modifier and Type | Class and Description |
---|---|
static class |
AbstractProfiler.EXCEPTION_TYPE |
static class |
AbstractProfiler.PARSE_ERROR_TYPE
If information was gathered from the log file about
a parse error
|
Modifier and Type | Field and Description |
---|---|
static String |
FALSE |
protected static AtomicInteger |
ID |
static TableInfo |
MIME_TABLE |
static TableInfo |
REF_EXTRACT_EXCEPTION_TYPES |
static TableInfo |
REF_PARSE_ERROR_TYPES |
static TableInfo |
REF_PARSE_EXCEPTION_TYPES |
static String |
TRUE |
protected IDBWriter |
writer |
ELAPSED_MILLIS, IO_IS, IO_OS, OOM, PARSE_ERR, PARSE_EX, TIMED_OUT
Constructor and Description |
---|
AbstractProfiler(ArrayBlockingQueue<FileResource> fileQueue,
IDBWriter writer) |
Modifier and Type | Method and Description |
---|---|
void |
closeWriter() |
protected static String |
getContent(Metadata metadata) |
protected static String |
getContent(Metadata metadata,
int maxLength,
Map<Cols,String> data)
Get the content and record in the data
Cols.CONTENT_TRUNCATED_AT_MAX_LEN whether the string was truncated |
protected long |
getFileLength(Path p) |
protected org.apache.tika.eval.EvalFilePaths |
getPathsFromExtractCrawl(Metadata metadata,
Path extracts) |
protected org.apache.tika.eval.EvalFilePaths |
getPathsFromSrcCrawl(Metadata metadata,
Path srcDir,
Path extracts) |
protected long |
getSourceFileLength(org.apache.tika.eval.EvalFilePaths fps,
List<Metadata> metadataList) |
static void |
loadCommonTokens(Path p,
String defaultLangCode) |
void |
setMaxContentLength(int maxContentLength)
Truncate the content string if greater than this length to this length
|
void |
setMaxContentLengthForLangId(int maxContentLengthForLangId)
Truncate content string if greater than this length to this length for lang id
|
void |
setMaxTokens(int maxTokens)
Add a LimitTokenCountFilterFactory if > -1
|
protected void |
writeContentData(String fileId,
Metadata m,
String fieldName,
TableInfo contentsTable)
Checks to see if metadata is null or content is empty (null or only whitespace).
|
protected void |
writeExceptionData(String fileId,
Metadata m,
TableInfo exceptionTable) |
protected void |
writeExtractException(TableInfo extractExceptionTable,
String containerId,
String filePath,
ExtractReaderException.TYPE type) |
protected void |
writeProfileData(org.apache.tika.eval.EvalFilePaths fps,
int i,
Metadata m,
String fileId,
String containerId,
List<Integer> numAttachments,
TableInfo profileTable) |
call, checkForTimedOutMillis, close, flushAndClose, getCurrentFile, getNumHandledExceptions, getNumResourcesConsumed, getXMLifiedLogMsg, getXMLifiedLogMsg, incrementHandledExceptions, isStillActive, parse, pleaseShutdown, processFileResource
public static TableInfo REF_EXTRACT_EXCEPTION_TYPES
public static TableInfo REF_PARSE_ERROR_TYPES
public static TableInfo REF_PARSE_EXCEPTION_TYPES
public static final String TRUE
public static final String FALSE
protected static final AtomicInteger ID
public static TableInfo MIME_TABLE
protected IDBWriter writer
public AbstractProfiler(ArrayBlockingQueue<FileResource> fileQueue, IDBWriter writer)
public static void loadCommonTokens(Path p, String defaultLangCode) throws IOException
p
- path to the common_tokens directory. If this is null, try to load from classPathIOException
public void setMaxContentLength(int maxContentLength)
maxContentLength
- public void setMaxContentLengthForLangId(int maxContentLengthForLangId)
maxContentLengthForLangId
- public void setMaxTokens(int maxTokens)
maxTokens
- protected void writeExtractException(TableInfo extractExceptionTable, String containerId, String filePath, ExtractReaderException.TYPE type) throws IOException
IOException
protected void writeProfileData(org.apache.tika.eval.EvalFilePaths fps, int i, Metadata m, String fileId, String containerId, List<Integer> numAttachments, TableInfo profileTable)
protected void writeExceptionData(String fileId, Metadata m, TableInfo exceptionTable)
protected void writeContentData(String fileId, Metadata m, String fieldName, TableInfo contentsTable) throws IOException
fileId
- m
- fieldName
- contentsTable
- IOException
protected static String getContent(Metadata metadata, int maxLength, Map<Cols,String> data)
Cols.CONTENT_TRUNCATED_AT_MAX_LEN
whether the string was truncatedmetadata
- maxLength
- data
- public void closeWriter() throws IOException
IOException
protected org.apache.tika.eval.EvalFilePaths getPathsFromExtractCrawl(Metadata metadata, Path extracts)
metadata
- extracts
- protected org.apache.tika.eval.EvalFilePaths getPathsFromSrcCrawl(Metadata metadata, Path srcDir, Path extracts)
protected long getSourceFileLength(org.apache.tika.eval.EvalFilePaths fps, List<Metadata> metadataList)
protected long getFileLength(Path p)
Copyright © 2007–2018 The Apache Software Foundation. All rights reserved.