protected static class AbstractTextExtractor.DefaultTextStripper
extends org.apache.pdfbox.text.PDFTextStripper
| 限定符和类型 | 字段和说明 |
|---|---|
protected Map<String,Rectangle> |
regionArea
区域
|
protected Map<String,ArrayList<List<org.apache.pdfbox.text.TextPosition>>> |
regionCharacterList
区域字符列表
|
protected Map<String,StringWriter> |
regionText
区域文本字典
|
| 构造器和说明 |
|---|
DefaultTextStripper(Map<String,Rectangle> regionArea)
有参构造
|
| 限定符和类型 | 方法和说明 |
|---|---|
protected float |
computeFontHeight(org.apache.pdfbox.pdmodel.font.PDFont arg0) |
protected int |
getCurrentPageNo()
获取当前页码
|
protected void |
processTextPosition(org.apache.pdfbox.text.TextPosition text)
处理文本定位
|
protected void |
showGlyph(org.apache.pdfbox.util.Matrix arg0,
org.apache.pdfbox.pdmodel.font.PDFont arg1,
int arg2,
org.apache.pdfbox.util.Vector arg3) |
protected void |
writePage()
写入页面
|
endArticle, endDocument, endPage, getAddMoreFormatting, getArticleEnd, getArticleStart, getAverageCharTolerance, getCharactersByArticle, getDropThreshold, getEndBookmark, getEndPage, getIndentThreshold, getLineSeparator, getListItemPatterns, getOutput, getPageEnd, getPageStart, getParagraphEnd, getParagraphStart, getSeparateByBeads, getSortByPosition, getSpacingTolerance, getStartBookmark, getStartPage, getSuppressDuplicateOverlappingText, getText, getWordSeparator, matchPattern, processPage, processPages, setAddMoreFormatting, setArticleEnd, setArticleStart, setAverageCharTolerance, setDropThreshold, setEndBookmark, setEndPage, setIndentThreshold, setLineSeparator, setListItemPatterns, setPageEnd, setPageStart, setParagraphEnd, setParagraphStart, setShouldSeparateByBeads, setSortByPosition, setSpacingTolerance, setStartBookmark, setStartPage, setSuppressDuplicateOverlappingText, setWordSeparator, startArticle, startArticle, startDocument, startPage, writeCharacters, writeLineSeparator, writePageEnd, writePageStart, writeParagraphEnd, writeParagraphSeparator, writeParagraphStart, writeString, writeString, writeText, writeWordSeparatoraddOperator, applyTextAdjustment, beginMarkedContentSequence, beginText, decreaseLevel, endMarkedContentSequence, endText, getAppearance, getCurrentPage, getGraphicsStackSize, getGraphicsState, getInitialMatrix, getLevel, getResources, getTextLineMatrix, getTextMatrix, increaseLevel, operatorException, processAnnotation, processChildStream, processOperator, processOperator, processSoftMask, processTilingPattern, processTilingPattern, processTransparencyGroup, processType3Stream, restoreGraphicsStack, restoreGraphicsState, saveGraphicsStack, saveGraphicsState, setLineDashPattern, setTextLineMatrix, setTextMatrix, showAnnotation, showFontGlyph, showForm, showText, showTextString, showTextStrings, showTransparencyGroup, showType3Glyph, transformedPoint, transformWidth, unsupportedOperatorprotected final Map<String,ArrayList<List<org.apache.pdfbox.text.TextPosition>>> regionCharacterList
protected final Map<String,StringWriter> regionText
protected int getCurrentPageNo()
getCurrentPageNo 在类中 org.apache.pdfbox.text.PDFTextStripperprotected void processTextPosition(org.apache.pdfbox.text.TextPosition text)
processTextPosition 在类中 org.apache.pdfbox.text.PDFTextStrippertext - 文本protected void writePage()
throws IOException
writePage 在类中 org.apache.pdfbox.text.PDFTextStripperIOException - IO异常protected void showGlyph(org.apache.pdfbox.util.Matrix arg0,
org.apache.pdfbox.pdmodel.font.PDFont arg1,
int arg2,
org.apache.pdfbox.util.Vector arg3)
throws IOException
showGlyph 在类中 org.apache.pdfbox.contentstream.PDFStreamEngineIOExceptionprotected float computeFontHeight(org.apache.pdfbox.pdmodel.font.PDFont arg0)
throws IOException
IOExceptionCopyright © 2024. All rights reserved.