public class ShiftReduceParser extends Object
| Modifier and Type | Field and Description |
|---|---|
static String |
COMPLETE
Outcome used when a constituent is complete.
|
protected boolean |
debugOn
Turns debug print on or off.
|
static int |
DEFAULT_BEAMSIZE
The default beam size used if no beam size is given.
|
static double |
defaultAdvancePercentage
The default amount of probability mass required of advanced outcomes.
|
static String |
INC_NODE
The label for the top if an incomplete node.
|
static String |
INCOMPLETE
Outcome used when a constituent is incomplete.
|
static String |
TOK_NODE
The label for a token node.
|
static String |
TOP_NODE
The label for the top node.
|
static Integer |
ZERO
The integer 0.
|
| Constructor and Description |
|---|
ShiftReduceParser(opennlp.tools.ml.model.MaxentModel buildModel,
opennlp.tools.ml.model.MaxentModel checkModel,
SequenceLabelerME tagger,
SequenceLabelerME chunker,
HeadRules headRules,
int beamSize,
double advancePercentage)
Creates a new parser using the specified models and head rules using the
specified beam size and advance percentage.
|
ShiftReduceParser(ParserModel model) |
ShiftReduceParser(ParserModel model,
int beamSize,
double advancePercentage) |
| Modifier and Type | Method and Description |
|---|---|
protected Parse[] |
advanceChunks(Parse p,
double minChunkScore)
Returns the top chunk sequences for the specified parse.
|
protected Parse[] |
advanceTags(Parse p)
Advances the parse by assigning it POS tags and returns multiple tag
sequences.
|
static opennlp.tools.dictionary.Dictionary |
buildDictionary(opennlp.tools.util.ObjectStream<Parse> data,
HeadRules rules,
int cutoff)
Creates a n-gram dictionary from the specified data stream using the
specified head rule and specified cut-off.
|
static opennlp.tools.dictionary.Dictionary |
buildDictionary(opennlp.tools.util.ObjectStream<Parse> data,
HeadRules rules,
opennlp.tools.util.TrainingParameters params)
Creates a n-gram dictionary from the specified data stream using the
specified head rule and specified cut-off.
|
static Parse[] |
collapsePunctuation(Parse[] chunks,
Set<String> punctSet)
Removes the punctuation from the specified set of chunks, adds it to the
parses adjacent to the punctuation is specified, and returns a new array of
parses with the punctuation removed.
|
static void |
mergeReportIntoManifest(Map<String,String> manifest,
Map<String,String> report,
String namespace) |
Parse |
parse(Parse tokens) |
Parse[] |
parse(Parse tokens,
int numParses) |
static Parse[] |
parseLine(String line,
ShiftReduceParser parser,
int numParses) |
static void |
setParents(Parse p)
Assigns parent references for the specified parse so that they are
consistent with the children references.
|
static ParserModel |
train(String languageCode,
opennlp.tools.util.ObjectStream<Parse> parseSamples,
HeadRules rules,
opennlp.tools.util.TrainingParameters trainParams,
ParserFactory parserFactory,
SequenceLabelerModel posModel,
opennlp.tools.util.TrainingParameters chunkerParams,
SequenceLabelerFactory chunkerFactory) |
static ParserModel |
train(String languageCode,
opennlp.tools.util.ObjectStream<Parse> parseSamples,
HeadRules rules,
opennlp.tools.util.TrainingParameters trainParams,
ParserFactory parserFactory,
opennlp.tools.util.TrainingParameters taggerParams,
SequenceLabelerFactory taggerFactory,
opennlp.tools.util.TrainingParameters chunkerParams,
SequenceLabelerFactory chunkerFactory) |
public static final int DEFAULT_BEAMSIZE
public static final double defaultAdvancePercentage
public static final String TOP_NODE
public static final String INC_NODE
public static final String TOK_NODE
public static final Integer ZERO
public static final String COMPLETE
public static final String INCOMPLETE
protected boolean debugOn
public ShiftReduceParser(ParserModel model)
public ShiftReduceParser(ParserModel model, int beamSize, double advancePercentage)
public ShiftReduceParser(opennlp.tools.ml.model.MaxentModel buildModel,
opennlp.tools.ml.model.MaxentModel checkModel,
SequenceLabelerME tagger,
SequenceLabelerME chunker,
HeadRules headRules,
int beamSize,
double advancePercentage)
buildModel - The model to assign constituent labels.checkModel - The model to determine a constituent is complete.tagger - The model to assign pos-tags.chunker - The model to assign flat constituent labels.headRules - The head rules for head word perculation.beamSize - The number of different parses kept during parsing.advancePercentage - The minimal amount of probability mass which advanced outcomes
must represent. Only outcomes which contribute to the top
"advancePercentage" will be explored.public static Parse[] parseLine(String line, ShiftReduceParser parser, int numParses)
public static void setParents(Parse p)
p - The parse whose parent references need to be assigned.protected Parse[] advanceTags(Parse p)
p - The parse to be tagged.protected Parse[] advanceChunks(Parse p, double minChunkScore)
p - A pos-tag assigned parse.minChunkScore - A minimum score below which chunks should not be advanced.public static ParserModel train(String languageCode, opennlp.tools.util.ObjectStream<Parse> parseSamples, HeadRules rules, opennlp.tools.util.TrainingParameters trainParams, ParserFactory parserFactory, opennlp.tools.util.TrainingParameters taggerParams, SequenceLabelerFactory taggerFactory, opennlp.tools.util.TrainingParameters chunkerParams, SequenceLabelerFactory chunkerFactory) throws IOException
IOExceptionpublic static ParserModel train(String languageCode, opennlp.tools.util.ObjectStream<Parse> parseSamples, HeadRules rules, opennlp.tools.util.TrainingParameters trainParams, ParserFactory parserFactory, SequenceLabelerModel posModel, opennlp.tools.util.TrainingParameters chunkerParams, SequenceLabelerFactory chunkerFactory) throws IOException
IOExceptionpublic static void mergeReportIntoManifest(Map<String,String> manifest, Map<String,String> report, String namespace)
public static Parse[] collapsePunctuation(Parse[] chunks, Set<String> punctSet)
chunks - A set of parses.punctSet - The set of punctuation which is to be removed.public static opennlp.tools.dictionary.Dictionary buildDictionary(opennlp.tools.util.ObjectStream<Parse> data, HeadRules rules, int cutoff) throws IOException
data - The data stream of parses.rules - The head rules for the parses.cutoff - The minimum number of entries required for the n-gram to be saved
as part of the dictionary.IOException - if io problemspublic static opennlp.tools.dictionary.Dictionary buildDictionary(opennlp.tools.util.ObjectStream<Parse> data, HeadRules rules, opennlp.tools.util.TrainingParameters params) throws IOException
data - The data stream of parses.rules - The head rules for the parses.params - can contain a cutoff, the minimum number of entries required for
the n-gram to be saved as part of the dictionary.IOException - if io problemsCopyright © 2017 IXA pipes. All rights reserved.