Reranker Framework (ReFr)
Reranking framework for structure prediction and discriminative language modeling
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
symbolize-model.C
Go to the documentation of this file.
1 // Copyright 2012, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 // -----------------------------------------------------------------------------
30 //
31 //
38 
39 #include <string>
40 #include <cstdlib>
41 #include <memory>
42 #include <vector>
43 
44 #include "../proto/dataio.h"
45 #include "model-proto-writer.H"
46 #include "model-reader.H"
47 #include "symbol-table.H"
48 
49 #define PROG_NAME "symbolize-model"
50 
51 // We use two levels of macros to get the string version of an int constant.
52 #define XSTR(arg) STR(arg)
53 #define STR(arg) #arg
54 
55 using namespace std;
56 using namespace reranker;
57 using confusion_learning::SymbolMessage;
58 
59 const char *usage_msg[] = {
60  "Usage:\n",
61  PROG_NAME " <model file> <symbol file> <output model file>\n",
62 };
63 
66 void usage() {
67  int usage_msg_len = sizeof(usage_msg)/sizeof(const char *);
68  for (int i = 0; i < usage_msg_len; ++i) {
69  cout << usage_msg[i];
70  }
71  cout.flush();
72 }
73 
74 int
75 main(int argc, char **argv) {
76  // Required parameters.
77  string model_file;
78  string symbol_file;
79  string model_output_file;
80 
81  if (argc != 4) {
82  usage();
83  return -1;
84  }
85 
86  model_file = argv[1];
87  symbol_file = argv[2];
88  model_output_file = argv[3];
89 
90  bool compressed = true;
91  bool use_base64 = true;
92 
93  // Now, we finally get to the meat of the code for this executable.
94  shared_ptr<Symbols> symbols(new LocalSymbolTable());
95  if (symbol_file != "") {
96  ConfusionProtoIO proto_reader(symbol_file,
97  ConfusionProtoIO::READ,
98  compressed, use_base64);
99  SymbolMessage symbol_message;
100  while (proto_reader.Read(&symbol_message)) {
101  symbols->SetIndex(symbol_message.symbol(), symbol_message.index());
102  }
103  proto_reader.Close();
104  }
105 
106  ModelReader model_reader(1);
107  shared_ptr<Model> model =
108  model_reader.Read(model_file, compressed, use_base64);
109  model->set_symbols(symbols.get());
110 
111  // Serialize model.
112  Factory<ModelProtoWriter> proto_writer_factory;
113  shared_ptr<ModelProtoWriter> model_writer =
114  proto_writer_factory.CreateOrDie(model->proto_writer_spec(),
115  "model proto writer");
116  if (model_writer.get() == NULL) {
117  return -1;
118  }
119 
120  cerr << "Writing out model to file \"" << model_output_file << "\"...";
121  cerr.flush();
122  confusion_learning::ModelMessage model_message;
123  model_writer->Write(model.get(), &model_message, false);
124 
125  ConfusionProtoIO *proto_writer;
126  proto_writer = new ConfusionProtoIO(model_output_file,
127  ConfusionProtoIO::WRITE,
128  compressed, use_base64);
129  proto_writer->Write(model_message);
130  // Write out features.
131  bool output_best_epoch = true;
132  bool output_key = false;
133  model_writer->WriteFeatures(model.get(),
134  *(proto_writer->outputstream()),
135  output_best_epoch,
136  model->num_training_errors(),
137  output_key);
138  delete proto_writer;
139  cerr << "done." << endl;
140 
141  TearDown();
142  google::protobuf::ShutdownProtobufLibrary();
143 }
int main(int argc, char **argv)
Provides the ModelReader class, which can create Model instances from a file.
shared_ptr< Model > Read(const string &filename, bool compressed, bool use_base64)
Definition: model-reader.H:59
Provides the reranker::Symbols interface as well as the reranker::StaticSymbolTable implementation...
A symbol table that stores the mapping from symbols to int’s and vice versa in local (non-static) dat...
Definition: symbol-table.H:185
#define PROG_NAME
Factory for dynamically created instance of the specified type.
Definition: factory.H:396
void usage()
void TearDown()
A free-floating function (within the reranker namespace) that frees statically allocated objects...
shared_ptr< T > CreateOrDie(StreamTokenizer &st, Environment *env=NULL)
Dynamically creates an object, whose type and initialization are contained in a specification string...
Definition: factory.H:562
const char * usage_msg[]
Interface for serializer for reranker::Model instances to ModelMessage instances. ...
Knows how to create Model instances that have been serialized to a file.
Definition: model-reader.H:55