Reranker Framework (ReFr)
Reranking framework for structure prediction and discriminative language modeling
|
Variables | |
tuple | optParse = OptionParser() |
The following arguments are available to hadoop-run.py. More... | |
string | help = "Location of hadoop installation. If not set, " |
string | default = "" |
string | action = "append" |
hadooproot = options.hadooproot | |
streamingloc = options.streamingloc | |
string | tmppath = hadooproot+"/contrib/streaming" |
tuple | streamingjar = glob.glob(tmppath + "/hadoop-streaming*.jar") |
list | filenames = [] |
Collect input filenames. More... | |
tuple | hdproc |
Create output directory if it does not exist. More... | |
string | train_map_options = "" |
Configuration for training optionsOptions passed to the mapper binary. More... | |
string | train_files = "" |
tuple | train_map |
string | extractsym_map = "'" |
Shortcuts to command-line programs. More... | |
string | compiledata_map = "'" |
string | train_reduce = options.refrbin+"/model-merge-reducer" |
string | train_recomb = options.refrbin+"/model-combine-shards" |
string | symbol_recomb = options.refrbin+"/model-combine-symbols" |
string | pipeeval_options = "" |
string | pipeeval = options.refrbin+"/piped-model-evaluator" |
string | hadoop_inputfiles = "" |
precompdevfile = options.develdata | |
Precopilation of string features. More... | |
string | symbol_dir = options.hdfsinputdir+"/Symbols/" |
string | precomp_dir = options.hdfsinputdir+"/Precompiled/" |
string | precompdev_dir = options.hdfsinputdir+"/PrecompiledDev/" |
string | addl_data = "" |
string | symfile_name = options.outputdir+"/" |
cur_model = options.inputmodel | |
converged = False | |
tuple | iteration = int(options.startiter) |
int | prev_loss = -9999 |
list | loss_history = [] |
int | num_in_decline = 0 |
int | best_loss_index = 0 |
string | eval_cmd = pipeeval+" -d " |
tuple | evalio = pyutil.CommandIO(eval_cmd) |
string | iter_str = "'" |
string | model_output = options.outputdir+"/" |
string | proc_cmd = train_recomb+" -o " |
int | devtest_score = 0 |
float | loss = 0.0 |
list | diff = loss_history[-1] |
string hadoop-run.action = "append" |
Definition at line 111 of file hadoop-run.py.
tuple hadoop-run.addl_data = "" |
Definition at line 319 of file hadoop-run.py.
tuple hadoop-run.best_loss_index = 0 |
Definition at line 358 of file hadoop-run.py.
string hadoop-run.compiledata_map = "'" |
Definition at line 285 of file hadoop-run.py.
hadoop-run.converged = False |
Definition at line 352 of file hadoop-run.py.
hadoop-run.cur_model = options.inputmodel |
Definition at line 351 of file hadoop-run.py.
int hadoop-run.default = "" |
Definition at line 103 of file hadoop-run.py.
tuple hadoop-run.devtest_score = 0 |
Definition at line 385 of file hadoop-run.py.
list hadoop-run.diff = loss_history[-1] |
Definition at line 397 of file hadoop-run.py.
string hadoop-run.eval_cmd = pipeeval+" -d " |
Definition at line 360 of file hadoop-run.py.
tuple hadoop-run.evalio = pyutil.CommandIO(eval_cmd) |
Definition at line 363 of file hadoop-run.py.
string hadoop-run.extractsym_map = "'" |
Shortcuts to command-line programs.
Definition at line 284 of file hadoop-run.py.
list hadoop-run.filenames = [] |
Collect input filenames.
Definition at line 237 of file hadoop-run.py.
string hadoop-run.hadoop_inputfiles = "" |
Definition at line 294 of file hadoop-run.py.
string hadoop-run.hadooproot = options.hadooproot |
Definition at line 193 of file hadoop-run.py.
hadoop-run.hdproc |
Create output directory if it does not exist.
HadoopInterface object used to process all Hadoop MR utils.
Definition at line 256 of file hadoop-run.py.
string hadoop-run.help = "Location of hadoop installation. If not set, " |
Definition at line 101 of file hadoop-run.py.
string hadoop-run.iter_str = "'" |
Definition at line 373 of file hadoop-run.py.
tuple hadoop-run.iteration = int(options.startiter) |
Definition at line 354 of file hadoop-run.py.
tuple hadoop-run.loss = 0.0 |
Definition at line 389 of file hadoop-run.py.
list hadoop-run.loss_history = [] |
Definition at line 356 of file hadoop-run.py.
string hadoop-run.model_output = options.outputdir+"/" |
Definition at line 381 of file hadoop-run.py.
int hadoop-run.num_in_decline = 0 |
Definition at line 357 of file hadoop-run.py.
tuple hadoop-run.optParse = OptionParser() |
The following arguments are available to hadoop-run.py.
[in] | hadooproot | Location of hadoop installation. |
[in] | refrbin | Location of the Reranker Framework bin directory. |
[in] | develdata | Location of development data. |
[in] | input | Location of input data on local FS. |
[in] | hdfsinputdir | Location of input data on HDFS. |
[in] | hdfsoutputdir | Output directory (on HDFS) - will be removed before each iteration. |
[in] | outputdir | Output directory. |
[in] | inputmodel | Name of model to start with. |
[in] | inputmodeliter | Iteration number of input model (will start with next iteration). |
[in] | modelname | Name of model file (new models written to –outputdir). |
[in] | maxiter | Maximum number of iterations to run. |
[in] | numreducer | Number of reducers. |
[in] | streamingloc | Location under hadooproot for streaming jar file. |
[in] | libpath | Specify the LD_LIBRARY_PATH for jobs run on Hadoop. |
[in] | splitsize | Min size f each data split. |
[in] | tasktimeout | Amount of time (seconds) for task to run (e.g., loading mode) before processing the next input record. |
[in] | force | Force all data processing even if files exist. |
[in] | forcecompile | Force precomilation if applicable. |
[in] | compilefeatures | Compile features before processing. |
[in] | maxdecline | Number of iterations in decline before stopping |
[in] | model-config | Model configuration file |
[in] | train-config | Feature extractor configuration file for training |
[in] | dev-config | Feature extractor configuration file for dev |
Definition at line 99 of file hadoop-run.py.
string hadoop-run.pipeeval = options.refrbin+"/piped-model-evaluator" |
Definition at line 292 of file hadoop-run.py.
string hadoop-run.pipeeval_options = "" |
Definition at line 289 of file hadoop-run.py.
string hadoop-run.precomp_dir = options.hdfsinputdir+"/Precompiled/" |
Definition at line 313 of file hadoop-run.py.
string hadoop-run.precompdev_dir = options.hdfsinputdir+"/PrecompiledDev/" |
Definition at line 314 of file hadoop-run.py.
string hadoop-run.precompdevfile = options.develdata |
Precopilation of string features.
Optional - reduces the size of the models, but takes time to create initial precompiled data.
Definition at line 301 of file hadoop-run.py.
int hadoop-run.prev_loss = -9999 |
Definition at line 355 of file hadoop-run.py.
string hadoop-run.proc_cmd = train_recomb+" -o " |
Definition at line 382 of file hadoop-run.py.
tuple hadoop-run.streamingjar = glob.glob(tmppath + "/hadoop-streaming*.jar") |
Definition at line 215 of file hadoop-run.py.
list hadoop-run.streamingloc = options.streamingloc |
Definition at line 205 of file hadoop-run.py.
string hadoop-run.symbol_dir = options.hdfsinputdir+"/Symbols/" |
Definition at line 312 of file hadoop-run.py.
string hadoop-run.symbol_recomb = options.refrbin+"/model-combine-symbols" |
Definition at line 288 of file hadoop-run.py.
string hadoop-run.symfile_name = options.outputdir+"/" |
Definition at line 326 of file hadoop-run.py.
string hadoop-run.tmppath = hadooproot+"/contrib/streaming" |
Definition at line 210 of file hadoop-run.py.
string hadoop-run.train_files = "" |
Definition at line 268 of file hadoop-run.py.
tuple hadoop-run.train_map |
Definition at line 275 of file hadoop-run.py.
hadoop-run.train_map_options = "" |
Configuration for training optionsOptions passed to the mapper binary.
Definition at line 265 of file hadoop-run.py.
string hadoop-run.train_recomb = options.refrbin+"/model-combine-shards" |
Definition at line 287 of file hadoop-run.py.
string hadoop-run.train_reduce = options.refrbin+"/model-merge-reducer" |
Definition at line 286 of file hadoop-run.py.