aboutsummaryrefslogtreecommitdiff
path: root/src/headerparser.h
blob: eb0ea8e171ff00b5256d1ffeabe621ae08529d15 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
// Copyright 2008 Google Inc.
// Author: Lincoln Smith
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef OPEN_VCDIFF_HEADERPARSER_H_
#define OPEN_VCDIFF_HEADERPARSER_H_

#include <config.h>
#include <stddef.h>  // NULL
#include <stdint.h>  // int32_t, uint32_t
#include "checksum.h"  // VCDChecksum
#include "vcdiff_defs.h"  // VCDiffResult

namespace open_vcdiff {

// This class contains a contiguous memory buffer with start and end pointers,
// as well as a position pointer which shows how much of the buffer has been
// parsed and how much remains.
//
// Because no virtual destructor is defined for ParseableChunk, a pointer to
// a child class of ParseableChunk must be destroyed using its specific type,
// rather than as a ParseableChunk*.
class ParseableChunk {
 public:
  ParseableChunk(const char* data_start, size_t data_size) {
    SetDataBuffer(data_start, data_size);
  }

  const char* End() const { return end_; }

  // The number of bytes remaining to be parsed.  This is not necessarily the
  // same as the initial size of the buffer; it changes with each call to
  // Advance().
  size_t UnparsedSize() const {
    return end_ - position_;
  }

  // The number of bytes that have already been parsed.
  size_t ParsedSize() const {
    return position_ - start_;
  }

  bool Empty() const { return 0 == UnparsedSize(); }

  // The start of the data remaining to be parsed.
  const char* UnparsedData() const { return position_; }

  // Returns a pointer to the start of the data remaining to be parsed.
  const char** UnparsedDataAddr() { return &position_; }

  // Moves the parsing position forward by number_of_bytes.
  void Advance(size_t number_of_bytes);

  // Jumps the parsing position to a new location.
  void SetPosition(const char* position);

  // Jumps the parsing position to the end of the data chunk.
  void Finish() {
    position_ = end_;
  }

  // Jumps the parsing position so that there are now number_of_bytes
  // bytes left to parse.  This number should be smaller than the size of data
  // to be parsed before the function was called.
  void FinishExcept(size_t number_of_bytes);

  void SetDataBuffer(const char* data_start, size_t data_size) {
    start_ = data_start;
    end_ = data_start + data_size;
    position_ = start_;
  }

 private:
  const char* start_;
  const char* end_;

  // The current parsing position within the data chunk.
  // Must always respect start_ <= position_ <= end_.
  const char* position_;

  // Making these private avoids implicit copy constructor & assignment operator
  ParseableChunk(const ParseableChunk&);
  void operator=(const ParseableChunk&);
};

// Represents one of the three sections in the delta window, as described in
// RFC section 4.3:
//     * Data section for ADDs and RUNs
//     * Instructions and sizes section
//     * Addresses section for COPYs
// When using the interleaved format, data and addresses are pulled from the
// instructions and sizes section rather than being stored in separate sections.
// For that reason, this class allows one DeltaWindowSection to be based on
// another, such that the same position pointer is shared by both sections;
// i.e., UnparsedDataAddr() returns the same value for both objects.
// To achieve this end, one extra level of indirection (a pointer to a
// ParseableChunk object) is added.
class DeltaWindowSection {
 public:
  DeltaWindowSection() : parseable_chunk_(NULL), owned_(true) { }

  ~DeltaWindowSection() {
    FreeChunk();
  }

  void Init(const char* data_start, size_t data_size) {
    if (owned_ && parseable_chunk_) {
      // Reuse the already-allocated ParseableChunk object.
      parseable_chunk_->SetDataBuffer(data_start, data_size);
    } else {
      parseable_chunk_ = new ParseableChunk(data_start, data_size);
      owned_ = true;
    }
  }

  void Init(DeltaWindowSection* original) {
    FreeChunk();
    parseable_chunk_ = original->parseable_chunk_;
    owned_ = false;
  }

  void Invalidate() { FreeChunk(); }

  bool IsOwned() const { return owned_; }

  // The following functions just pass their arguments to the underlying
  // ParseableChunk object.

  const char* End() const {
    return parseable_chunk_->End();
  }

  size_t UnparsedSize() const {
    return parseable_chunk_->UnparsedSize();
  }

  size_t ParsedSize() const {
    return parseable_chunk_->ParsedSize();
  }

  bool Empty() const {
    return parseable_chunk_->Empty();
  }

  const char* UnparsedData() const {
    return parseable_chunk_->UnparsedData();
  }

  const char** UnparsedDataAddr() {
    return parseable_chunk_->UnparsedDataAddr();
  }

  void Advance(size_t number_of_bytes) {
    return parseable_chunk_->Advance(number_of_bytes);
  }
 private:
  void FreeChunk() {
    if (owned_) {
      delete parseable_chunk_;
    }
    parseable_chunk_ = NULL;
  }

  // Will be NULL until Init() has been called.  If owned_ is true, this will
  // point to a ParseableChunk object that has been allocated with "new" and
  // must be deleted by this DeltaWindowSection object.  If owned_ is false,
  // this points at the parseable_chunk_ owned by a different DeltaWindowSection
  // object.  In this case, it is important to free the DeltaWindowSection which
  // does not own the ParseableChunk before (or simultaneously to) freeing the
  // DeltaWindowSection that owns it, or else deleted memory may be accessed.
  ParseableChunk* parseable_chunk_;
  bool owned_;

  // Making these private avoids implicit copy constructor & assignment operator
  DeltaWindowSection(const DeltaWindowSection&);
  void operator=(const DeltaWindowSection&);
};

// Used to parse the bytes and Varints that make up the delta file header
// or delta window header.
class VCDiffHeaderParser {
 public:
  // header_start should be the start of the header to be parsed;
  // data_end is the position just after the last byte of available data
  // (which may extend far past the end of the header.)
  VCDiffHeaderParser(const char* header_start, const char* data_end);

  // One of these functions should be called for each element of the header.
  // variable_description is a description of the value that we are attempting
  // to parse, and will only be used to create descriptive error messages.
  // If the function returns true, then the element was parsed successfully
  // and its value has been placed in *value.  If the function returns false,
  // then *value is unchanged, and GetResult() can be called to return the
  // reason that the element could not be parsed, which will be either
  // RESULT_ERROR (an error occurred), or RESULT_END_OF_DATA (the limit data_end
  // was reached before the end of the element to be parsed.)  Once one of these
  // functions has returned false, further calls to any of the Parse...
  // functions will also return false without performing any additional actions.
  // Typical usage is as follows:
  //     int32_t segment_length = 0;
  //     if (!header_parser.ParseInt32("segment length", &segment_length)) {
  //       return header_parser.GetResult();
  //     }
  //
  // The following example takes advantage of the fact that calling a Parse...
  // function after an error or end-of-data condition is legal and does nothing.
  // It can thus parse more than one element in a row and check the status
  // afterwards.  If the first call to ParseInt32() fails, the second will have
  // no effect:
  //
  //     int32_t segment_length = 0, segment_position = 0;
  //     header_parser.ParseInt32("segment length", &segment_length));
  //     header_parser.ParseInt32("segment position", &segment_position));
  //     if (RESULT_SUCCESS != header_parser.GetResult()) {
  //       return header_parser.GetResult();
  //     }
  //
  bool ParseByte(unsigned char* value);
  bool ParseInt32(const char* variable_description, int32_t* value);
  bool ParseUInt32(const char* variable_description, uint32_t* value);
  bool ParseChecksum(const char* variable_description, VCDChecksum* value);
  bool ParseSize(const char* variable_description, size_t* value);

  // Parses the first three elements of the delta window header:
  //
  //     Win_Indicator                            - byte
  //     [Source segment size]                    - integer (VarintBE format)
  //     [Source segment position]                - integer (VarintBE format)
  //
  // Returns true if the values were parsed successfully and the values were
  // found to be acceptable.  Returns false otherwise, in which case
  // GetResult() can be called to return the reason that the two values
  // could not be validated.  This will be either RESULT_ERROR (an error
  // occurred and was logged), or RESULT_END_OF_DATA (the limit data_end was
  // reached before the end of the values to be parsed.)  If return value is
  // true, then *win_indicator, *source_segment_length, and
  // *source_segment_position are populated with the parsed values.  Otherwise,
  // the values of these output arguments are undefined.
  //
  // dictionary_size: The size of the dictionary (source) file.  Used to
  //     validate the limits of source_segment_length and
  //     source_segment_position if the source segment is taken from the
  //     dictionary (i.e., if the parsed *win_indicator equals VCD_SOURCE.)
  // decoded_target_size: The size of the target data that has been decoded
  //     so far, including all target windows.  Used to validate the limits of
  //     source_segment_length and source_segment_position if the source segment
  //     is taken from the target (i.e., if the parsed *win_indicator equals
  //     VCD_TARGET.)
  // allow_vcd_target: If this argument is false, and the parsed *win_indicator
  //     is VCD_TARGET, then an error is produced; if true, VCD_TARGET is
  //     allowed.
  // win_indicator (output): Points to a single unsigned char (not an array)
  //     that will receive the parsed value of Win_Indicator.
  // source_segment_length (output): The parsed length of the source segment.
  // source_segment_position (output): The parsed zero-based index in the
  //     source/target file from which the source segment is to be taken.
  bool ParseWinIndicatorAndSourceSegment(size_t dictionary_size,
                                         size_t decoded_target_size,
                                         bool allow_vcd_target,
                                         unsigned char* win_indicator,
                                         size_t* source_segment_length,
                                         size_t* source_segment_position);

  // Parses the following two elements of the delta window header:
  //
  //     Length of the delta encoding             - integer (VarintBE format)
  //     Size of the target window                - integer (VarintBE format)
  //
  // Return conditions and values are the same as for
  // ParseWinIndicatorAndSourceSegment(), above.
  //
  bool ParseWindowLengths(size_t* target_window_length);

  // May only be called after ParseWindowLengths() has returned RESULT_SUCCESS.
  // Returns a pointer to the end of the delta window (which might not point to
  // a valid memory location if there is insufficient input data.)
  //
  const char* EndOfDeltaWindow() const;

  // Parses the following element of the delta window header:
  //
  //     Delta_Indicator                          - byte
  //
  // Because none of the bits in Delta_Indicator are used by this implementation
  // of VCDIFF, this function does not have an output argument to return the
  // value of that field.  It may return RESULT_SUCCESS, RESULT_ERROR, or
  // RESULT_END_OF_DATA as with the other Parse...() functions.
  //
  bool ParseDeltaIndicator();

  // Parses the following 3 elements of the delta window header:
  //
  //     Length of data for ADDs and RUNs - integer (VarintBE format)
  //     Length of instructions and sizes - integer (VarintBE format)
  //     Length of addresses for COPYs    - integer (VarintBE format)
  //
  // If has_checksum is true, it also looks for the following element:
  //
  //     Adler32 checksum            - unsigned 32-bit integer (VarintBE format)
  //
  // Return conditions and values are the same as for
  // ParseWinIndicatorAndSourceSegment(), above.
  //
  bool ParseSectionLengths(bool has_checksum,
                           size_t* add_and_run_data_length,
                           size_t* instructions_and_sizes_length,
                           size_t* addresses_length,
                           VCDChecksum* checksum);

  // If one of the Parse... functions returned false, this function
  // can be used to find the result code (RESULT_ERROR or RESULT_END_OF_DATA)
  // describing the reason for the most recent parse failure.  If none of the
  // Parse... functions has returned false, returns RESULT_SUCCESS.
  VCDiffResult GetResult() const {
    return return_code_;
  }

  // The following functions just pass their arguments to the underlying
  // ParseableChunk object.

  const char* End() const {
    return parseable_chunk_.End();
  }

  size_t UnparsedSize() const {
    return parseable_chunk_.UnparsedSize();
  }

  size_t ParsedSize() const {
    return parseable_chunk_.ParsedSize();
  }

  const char* UnparsedData() const {
    return parseable_chunk_.UnparsedData();
  }

 private:
  // Parses two variable-length integers representing the source segment length
  // and source segment position (== offset.)  Checks whether the source segment
  // length and position would cause it to exceed the size of the source file or
  // target file.  Returns true if the values were parsed successfully and the
  // values were found to be acceptable.  Returns false otherwise, in which case
  // GetResult() can be called to return the reason that the two values could
  // not be validated, which will be either RESULT_ERROR (an error occurred and
  // was logged), or RESULT_END_OF_DATA (the limit data_end was reached before
  // the end of the integers to be parsed.)
  // from_size: The requested size of the source segment.
  // from_boundary_name: A NULL-terminated string naming the end of the
  //     source or target file, used in error messages.
  // from_name: A NULL-terminated string naming the source or target file,
  //     also used in error messages.
  // source_segment_length (output): The parsed length of the source segment.
  // source_segment_position (output): The parsed zero-based index in the
  //     source/target file from which the source segment is to be taken.
  //
  bool ParseSourceSegmentLengthAndPosition(size_t from_size,
                                           const char* from_boundary_name,
                                           const char* from_name,
                                           size_t* source_segment_length,
                                           size_t* source_segment_position);

  ParseableChunk parseable_chunk_;

  // Contains the result code of the last Parse...() operation that failed
  // (RESULT_ERROR or RESULT_END_OF_DATA).  If no Parse...() method has been
  // called, or if all calls to Parse...() were successful, then this contains
  // RESULT_SUCCESS.
  VCDiffResult return_code_;

  // Will be zero until ParseWindowLengths() has been called.  After
  // ParseWindowLengths() has been called successfully, this contains the
  // parsed length of the delta encoding.
  size_t delta_encoding_length_;

  // Will be NULL until ParseWindowLengths() has been called.  After
  // ParseWindowLengths() has been called successfully, this points to the
  // beginning of the section of the current window titled "The delta encoding"
  // in the RFC, i.e., to the position just after the length of the delta
  // encoding.
  const char* delta_encoding_start_;

  // Making these private avoids implicit copy constructor & assignment operator
  VCDiffHeaderParser(const VCDiffHeaderParser&);
  void operator=(const VCDiffHeaderParser&);
};

}  // namespace open_vcdiff

#endif  // OPEN_VCDIFF_HEADERPARSER_H_