Longfellow ZK 0290cb32
Loading...
Searching...
No Matches
host_decoder.h
1// Copyright 2025 Google LLC.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#ifndef PRIVACY_PROOFS_ZK_LIB_CBOR_HOST_DECODER_H_
16#define PRIVACY_PROOFS_ZK_LIB_CBOR_HOST_DECODER_H_
17
18#include <stddef.h>
19#include <string.h>
20
21#include <cstdint>
22#include <vector>
23
24#include "util/panic.h"
25
26namespace proofs {
27
28enum CborTag { UNSIGNED, NEGATIVE, BYTES, TEXT, ARRAY, MAP, TAG, PRIMITIVE };
29enum CborPrimitive { FALSE, TRUE, CNULL };
30
31// CBOR decoder for a subset of CBOR used in MDOC.
32//
33// The main advantage of this decoder is that it keeps
34// offsets into the input, which is useful because we need to
35// generate circuits that depend on input offsets.
36//
37// The other security advantage is the smaller codebase, versus
38// relying on an imported CBOR parser that handles a larger subset of CBOR
39// that may introduce issues.
40//
41// The decode function is used to process an untrusted array of bytes.
42// The method returns false if the input is not processed exactly per the
43// MDOC spec with only attributes in the org.iso.18013.5.1 namespace.
44// The resulting CborDoc object is static, and it is assumed that neither the
45// input doc, nor the tree structure changes. All of the lookup and index
46// methods return const pointers to attempt to maintain this property.
47class CborDoc {
48 public:
49 size_t header_pos_;
50 enum CborTag t_;
51
52 // A union is used to store the attributes for either singleton objects (i.e.,
53 // UNSIGNED, NEGATIVE, PRIMITIVE), the start position and len of TEXT and
54 // BYTES array, and the children information for ARRAY or MAP objects.
55 // len of strings and byte arrays
56 union U {
57 uint64_t u64; /* UNSIGNED */
58 int64_t i64; /* NEGATIVE */
59 enum CborPrimitive p; /* PRIMITIVE */
60
61 // BYTES + TEXT, represented as offset in input + length
62 struct {
63 size_t pos;
64 size_t len;
65 } string;
66
67 // arrays, maps, and tags: an array of children nodes.
68 struct {
69 // The original count in the source document. For tags,
70 // the tag itself.
71 size_t n;
72
73 // The actual number of children (e.g. 2*n for maps).
74 size_t nchildren;
75 } items;
76 } u_;
77
78 // This field only applies to ARRAY, MAP nodes, but it has been moved
79 // out of the union to avoid including components with non-default
80 // constructors. It holds the children objects of an array or map. For a map,
81 // even positions are the keys, and the odd positions are the values.
82 std::vector<CborDoc> children_;
83
84 // Parse a byte sequence into a CborDoc structure.
85 //
86 // Caller passes in the input sequence, the length of the
87 // input, and pos and offset values. The offset value handles the case when
88 // the input sequence is a sub-sequence of another string, as it is in
89 // the MDOC and MSO parsing.
90 //
91 // This function can handle adversarial inputs, and returns false when the
92 // input cannot be parsed.
93 bool decode(const uint8_t in[], size_t len, size_t &pos, size_t offset) {
94 /* invariant: pos is always compared with len before it is referenced. */
95 header_pos_ = pos + offset;
96
97 if (pos >= len) {
98 return false;
99 }
100 uint8_t b = in[pos++];
101
102 size_t type = (b >> 5) & 0x7u;
103 size_t count0 = b & 0x1Fu;
104
105 // variable-length count
106 size_t count = 0;
107 if (count0 < 24) {
108 count = count0;
109 } else if (count0 == 24) {
110 if (pos >= len) {
111 return false;
112 }
113 count = in[pos++];
114 } else if (count0 == 25) {
115 if (pos + 1 >= len) {
116 return false;
117 }
118 count = in[pos] * 256 + in[pos + 1];
119 pos += 2;
120 } else if (count0 == 26) {
121 if (pos + 3 >= len) {
122 return false;
123 }
124 for (size_t i = 0; i < 4; ++i) {
125 count *= 256;
126 count += in[pos++];
127 }
128 } else {
129 return false;
130 }
131
132 switch (type) { /* type \in [0,7] by construction */
133 case 0:
134 t_ = UNSIGNED;
135 u_.u64 = count;
136 break;
137 case 1:
138 t_ = NEGATIVE;
139 u_.i64 = -(int64_t)count;
140 break;
141
142 case 2: /* BYTES */
143 case 3: /* TEXT */
144 if (pos + count > len) {
145 return false;
146 }
147 t_ = (type == 2) ? BYTES : TEXT;
148 u_.string.pos = pos;
149 u_.string.len = count;
150 pos += count;
151 break;
152
153 case 4: /* ARRAY */
154 if (pos + count > len) {
155 return false;
156 }
157 return decode_items(ARRAY, count, count, in, len, pos, offset);
158
159 case 5: /* MAP, (key,val) pairs are stored as 2*children */
160 if (pos + 2 * count > len) {
161 return false;
162 }
163 return decode_items(MAP, 2 * count, count, in, len, pos, offset);
164
165 case 6: /* TAG */
166 // Special cases for TAG
167 if (count == 1004) { // date in the form YYYY-MM-DD
168 if (pos + 1 + 10 > len) { // 0xDA for str length + 10 characters
169 return false;
170 }
171 }
172 return decode_items(TAG, 1, count, in, len, pos, offset);
173
174 case 7: /* PRIMITIVE */
175 t_ = PRIMITIVE;
176 switch (count) {
177 case 20:
178 u_.p = FALSE;
179 break;
180 case 21:
181 u_.p = TRUE;
182 break;
183 case 22:
184 u_.p = CNULL;
185 break;
186 default:
187 return false;
188 }
189 break;
190 }
191
192 return true;
193 }
194
195 // Lookup a child node in an array. Returns null if the query is invalid.
196 const CborDoc *index(size_t index) const {
197 if (t_ == ARRAY && index < u_.items.nchildren) {
198 return &children_[index];
199 }
200 return nullptr;
201 }
202
203 // Lookup a key in a map of type {bytes->elements}.
204 // Returns null if the query is invalid.
205 // The key is given as bytes with a length.
206 // ndx is set to the child index of the located key.
207 // The return pointer references the key, and the next object refers to
208 // the value and is guaranteed to exist.
209 const CborDoc *lookup(const uint8_t *const in, size_t len,
210 const uint8_t bytes[/*len*/], size_t &ndx) const {
211 if (t_ == MAP) {
212 for (size_t i = 0; i < u_.items.n; ++i) {
213 if (children_[2 * i].eq(in, len, bytes)) {
214 ndx = i;
215 return &children_[2 * i];
216 }
217 }
218 }
219 return nullptr;
220 }
221
222 // Lookup a key in a map of type {unsigned->object}.
223 // Returns null if the query is invalid.
224 const CborDoc *lookup_unsigned(uint64_t k, size_t &ndx) const {
225 if (t_ == MAP) {
226 for (size_t i = 0; i < u_.items.n; ++i) {
227 const CborDoc *key = &children_[2 * i];
228 if (key->t_ == UNSIGNED && key->u_.u64 == k) {
229 ndx = i;
230 return key;
231 }
232 }
233 }
234 return nullptr;
235 }
236
237 // Lookup a key in a map of type {negative->object}.
238 // Returns null if the query is invalid.
239 const CborDoc *lookup_negative(int64_t k, size_t &ndx) const {
240 if (t_ == MAP) {
241 for (size_t i = 0; i < u_.items.n; ++i) {
242 const CborDoc *key = &children_[2 * i];
243 if (key->t_ == NEGATIVE && key->u_.i64 == k) {
244 ndx = i;
245 return key;
246 }
247 }
248 }
249 return nullptr;
250 }
251
252 // Returns the index of the item with respect to the document bytes.
253 size_t position() const {
254 switch (t_) {
255 case UNSIGNED:
256 return header_pos_;
257 case BYTES:
258 case TEXT:
259 return u_.string.pos;
260 case TAG:
261 return children_[0].u_.string.pos;
262 case PRIMITIVE:
263 return header_pos_;
264 default:
265 check(false, "valueIndex called on non-value type");
266 }
267 return 0;
268 }
269
270 // Returns the length of the item's value in bytes.
271 // According to ISO 18013-5 7.2.1, the mDL data elements shall be encoded
272 // as tstr, uint, bstr, bool, or tdate, so this function only handles those
273 // cases.
274 size_t length() const {
275 switch (t_) {
276 case UNSIGNED:
277 if (u_.u64 < 24) {
278 return 1;
279 } else if (u_.u64 < 256) {
280 return 2;
281 } else if (u_.u64 < 65536) {
282 return 3;
283 }
284 return 5;
285 case BYTES:
286 case TEXT:
287 return u_.string.len;
288 case TAG:
289 return children_[0].u_.string.len; // full-date #6.1004(tstr) format
290 case PRIMITIVE:
291 return 1;
292 default:
293 check(false, "valueLength called on non-value type");
294 }
295 return 0;
296 }
297
298 private:
299 // Decodes a sequence of children nodes.
300 bool decode_items(CborTag t, size_t nchildren, size_t items_n,
301 const uint8_t in[], size_t len, size_t &pos,
302 size_t offset) {
303 t_ = t;
304 u_.items.n = items_n;
305 u_.items.nchildren = nchildren;
306 children_.resize(nchildren);
307 for (size_t i = 0; i < nchildren; ++i) {
308 if (!children_[i].decode(in, len, pos, offset)) return false;
309 }
310 return true;
311 }
312
313 // Compares a text node to a given string of bytes.
314 bool eq(const uint8_t *const in, size_t len,
315 const uint8_t bytes[/*len*/]) const {
316 return t_ == TEXT && u_.string.len == len &&
317 memcmp(bytes, &in[u_.string.pos], len) == 0;
318 }
319};
320
321} // namespace proofs
322
323#endif // PRIVACY_PROOFS_ZK_LIB_CBOR_HOST_DECODER_H_
Definition host_decoder.h:47
Definition host_decoder.h:56