DetectorGraph  2.0
portuguesetranslator.cpp
Go to the documentation of this file.
1 // Copyright 2017 Nest Labs, Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <map>
16 #include <string>
17 #include <iostream>
18 #include <regex>
19 
20 #include "graph.hpp"
21 #include "detector.hpp"
22 #include "sharedptr.hpp"
23 #include "processorcontainer.hpp"
24 
25 using namespace DetectorGraph;
26 using std::cout;
27 using std::endl;
28 
29 /**
30  * @file portuguesetranslator.cpp
31  * @brief A dictionary-based translator that uses shared memory in TopicStates.
32  *
33  * @section ex-pt-intro Introduction
34  * This examples implements a very basic translator based only on word
35  * replacements.
36  *
37  * @section ex-pt-sharing-mem Using Shared Memory in TopicStates
38  * DetectorGraph uses C++ copies to propagate information across Topics &
39  * Detectors. This is fine & desirable for small amounts of data but can become
40  * prohibitive for large objects.
41  * In such cases the recommended pattern is to wrap the large resource using
42  * an appropriate smart pointer (e.g. std::shared_ptr).
43  *
44  * The one caveat is that, as Detectors start accessing shared memory, the
45  * DetectorGraph framework alone cannot guarantee the hermetic nature of
46  * detectors - a detector's data (i.e. the data a member pointer points to) may
47  * change even though no Evaluate('new value') for that member was called.
48  *
49  * In cases where the large data piece only has only read-only use, a
50  * const_shared_ptr can be used to partially address the concern mentioned
51  * above.
52  *
53  * In this example, the `TranslationDictionary` TopicState carries a large,
54  * immutable object and does so using a shared_ptr<const T>:
55  @snippetlineno portuguesetranslator.cpp Immutable Shared Memory TopicState
56  *
57  * A different example that also uses this pattern is
58  * [Fancy Vending Machine](@ref fancyvendingmachine.cpp):
59  @snippetlineno fancyvendingmachine.cpp Immutable Shared Memory TopicState
60  *
61  * @section ex-pt-arch Architecture
62  * This sample implements the following graph:
63  *
64  * @dot "TextTranslatorGraph"
65 digraph GraphAnalyzer {
66  rankdir = "LR";
67  node[fontname=Helvetica];
68  size="12,5";
69  "TranslationDictionary" [label="0:TranslationDictionary",style=filled, shape=box, color=lightblue];
70  "EnglishText" [label="1:EnglishText",style=filled, shape=box, color=lightblue];
71  "TranslationDictionary" -> "EnglishToPortugueseTranslator";
72  "EnglishText" -> "EnglishToPortugueseTranslator";
73  "EnglishToPortugueseTranslator" -> "PortugueseText";
74  "PortugueseText" [label="3:PortugueseText",style=filled, shape=box, color=limegreen];
75  "EnglishToPortugueseTranslator" [label="2:EnglishToPortugueseTranslator", color=blue];
76 }
77  * @enddot
78  *
79  */
80 /// @cond DO_NOT_DOCUMENT
81 /**
82  * @brief A TopicState implementation that wraps a shared resource.
83  *
84  * This TopicState wraps a - potentially massive - std::map. Its Topic
85  * can then be subscribed by any number of Detectors - and those detectors
86  * can keep a copy of this TopicState around - with no impact on memory/ram
87  * usage; the copies are only shallow copies.
88  */
89 
90 using Text2TextMap = std::map<std::string, std::string>;
91 
92 //! [Immutable Shared Memory TopicState]
93 struct TranslationDictionary : public TopicState
94 {
95  TranslationDictionary()
96  : map(std::make_shared<Text2TextMap>())
97  {
98  }
99 
100  TranslationDictionary(const std::shared_ptr<const Text2TextMap>& aMapPtr)
101  : map(aMapPtr)
102  {
103  }
104 
105  bool Lookup(std::string inStr, std::string& outStr) const
106  {
107  Text2TextMap::const_iterator lookupIterator = map->find(inStr);
108  if (lookupIterator != map->end())
109  {
110  outStr = lookupIterator->second;
111  return true;
112  }
113  else
114  {
115  return false;
116  }
117  };
118 
119  std::shared_ptr<const Text2TextMap> map;
120 };
121 //! [Immutable Shared Memory TopicState]
122 
123 struct EnglishText : public TopicState
124 {
125  EnglishText(std::string aText = "") : text(aText) {}
126  std::string text;
127 };
128 
129 struct PortugueseText : public TopicState
130 {
131  PortugueseText(std::string aText = "") : text(aText) {}
132  std::string text;
133 };
134 
135 /**
136  * This Detector keeps a shallow local copy of a potentially large object -
137  * TranslationDictionary - and uses it whenever EnglishText changes to produce
138  * a PortugueseText.
139  */
140 class EnglishToPortugueseTranslator :
141 public Detector,
142 public SubscriberInterface<TranslationDictionary>,
143 public SubscriberInterface<EnglishText>,
144 public Publisher<PortugueseText>
145 {
146 public:
147  EnglishToPortugueseTranslator(Graph* graph) : Detector(graph)
148  {
149  Subscribe<TranslationDictionary>(this);
150  Subscribe<EnglishText>(this);
151  SetupPublishing<PortugueseText>(this);
152  }
153 
154  // Caches
155  TranslationDictionary mDatabase;
156 
157  virtual void Evaluate(const TranslationDictionary& aDatabase)
158  {
159  mDatabase = aDatabase;
160  }
161 
162  virtual void Evaluate(const EnglishText& aEnglishText)
163  {
164  std::vector<std::string> outputWords;
165  auto englishWords = split(aEnglishText.text, "\\s+");
166  for (auto inWord : englishWords)
167  {
168  std::string outWord;
169  bool hasTranslation = mDatabase.Lookup(inWord, outWord);
170  outputWords.push_back((hasTranslation) ? outWord : inWord);
171  }
172 
173  Publish(PortugueseText(join(outputWords, " ")));
174  }
175 
176 private:
177  std::vector<std::string> split(const std::string& input, const std::string& regex) {
178  std::regex re(regex);
179  // passing -1 as the submatch index parameter performs splitting
180  std::sregex_token_iterator first{input.begin(), input.end(), re, -1 /* ha-ha-ha */};
181  std::sregex_token_iterator last; // ha-ha
182  return {first, last};
183  }
184  std::string join(const std::vector<std::string>& lst, const std::string& delim)
185  {
186  std::string ret;
187  for(const auto &s : lst) {
188  if(!ret.empty())
189  ret += delim;
190  ret += s;
191  }
192  return ret;
193  }
194 };
195 
196 /**
197  * This class is a container for Graph+Detectors+Topics and it provides a joint
198  * method for performing graph evaluations and *using* its output (printing in
199  * this case).
200  */
201 class TextTranslatorGraph : public ProcessorContainer
202 {
203 public:
204  TextTranslatorGraph() : mEnglishToPortugueseTranslator(&mGraph)
205  {
206  }
207 
208  virtual void ProcessOutput()
209  {
210  auto actionRequestTopic = mGraph.ResolveTopic<PortugueseText>();
211  if (actionRequestTopic->HasNewValue())
212  {
213  const PortugueseText& actionRequest = actionRequestTopic->GetNewValue();
214  cout << actionRequest.text << endl;
215  }
216  }
217 
218 private:
219  EnglishToPortugueseTranslator mEnglishToPortugueseTranslator;
220 };
221 
222 int main()
223 {
224  TextTranslatorGraph graph;
225 
226  std::shared_ptr<Text2TextMap> database = std::make_shared<Text2TextMap>();
227  database->insert({"brown", "marrom"});
228  database->insert({"dog", "cao"});
229  database->insert({"fox", "raposao"});
230  database->insert({"jumped", "pulou"});
231  database->insert({"lazy", "preguicoso"});
232  database->insert({"over", "sobre"});
233  database->insert({"quick", "rapido"});
234  database->insert({"the", "o"});
235  // Let's imagine this is a massive list.
236 
237  graph.ProcessData(TranslationDictionary(database));
238 
239  graph.ProcessData(EnglishText("the quick brown fox jumped over the lazy dog"));
240 }
241 
242 /// @endcond DO_NOT_DOCUMENT
Implements a graph of Topics & Detectors with Input/Output APIs.
Definition: graph.hpp:127
A Base class for a basic Graph container.
Base struct for topic data types.
Definition: topicstate.hpp:52
Base class that implements a Publisher behavior.
Definition: publisher.hpp:66
A unit of logic in a DetectorGraph.
Definition: detector.hpp:68
A Pure interface that declares the Subscriber behavior.