aboutsummaryrefslogtreecommitdiff
path: root/applier/src/main/java/com/google/archivepatcher/applier/bsdiff/BsPatch.java
blob: cfa8dedb6cd60d92630af753b1dfe1633abcb0af (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
// Copyright 2016 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.archivepatcher.applier.bsdiff;

import com.google.archivepatcher.applier.PatchFormatException;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.RandomAccessFile;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * A Java implementation of the "bspatch" algorithm based on the BSD-2 licensed source code
 * available here: https://github.com/mendsley/bsdiff. This implementation supports a maximum file
 * size of 2GB for all binaries involved (old, new and patch binaries).
 */
public class BsPatch {
  /** If true, output verbose debugging information. */
  private static final boolean VERBOSE = false;
  
  /** Standard header found at the start of every patch. */
  private static final String SIGNATURE = "ENDSLEY/BSDIFF43";

  /**
   * Default buffer size is 50 kibibytes, a reasonable tradeoff between size and speed.
   */
  private static final int PATCH_BUFFER_SIZE = 1024 * 50;

  /**
   * Masks the upper bit of a long, used to determine if a long is positive or negative.
   */
  private static final long NEGATIVE_LONG_SIGN_MASK = 1L << 63;

  /**
   * The patch is typically compressed and the input stream is decompressing on-the-fly. A small
   * buffer greatly improves efficiency on complicated patches with lots of short directives.
   */
  private static final int PATCH_STREAM_BUFFER_SIZE = 4 * 1024;

  /**
   * Complicated patches with lots of short directives result in many calls to write small amounts
   * of data. A buffer greatly improves efficiency for these patches.
   */
  private static final int OUTPUT_STREAM_BUFFER_SIZE = 16 * 1024;

  /** An instance of Java logger for use with the {@code VERBOSE} mode. */
  private static final Logger logger = Logger.getLogger(BsPatch.class.getName());

  /**
   * Applies a patch from |patchData| to the data in |oldData|, writing the result to |newData|.
   *
   * @param oldData data to which the patch should be applied
   * @param newData stream to write the new artifact to
   * @param patchData stream to read patch instructions from
   * @throws PatchFormatException if the patch stream is invalid
   * @throws IOException if unable to read or write any of the data
   */
  public static void applyPatch(
      RandomAccessFile oldData, OutputStream newData, InputStream patchData)
      throws PatchFormatException, IOException {
    applyPatch(oldData, newData, patchData, null);
  }

  /**
   * Applies a patch from |patchData| to the data in |oldData|, writing the result to |newData|
   * while verifying that the expectedSize is obtained.
   *
   * @param oldData data to which the patch should be applied
   * @param newData stream to write the new artifact to
   * @param patchData stream to read patch instructions from
   * @param expectedNewSize the expected number of bytes in |newData| when patching completes. Can
   *     be null in which case no expectedNewSize checks will be performed.
   * @throws PatchFormatException if the patch stream is invalid
   * @throws IOException if unable to read or write any of the data
   */
  public static void applyPatch(
      RandomAccessFile oldData, OutputStream newData, InputStream patchData, Long expectedNewSize)
      throws PatchFormatException, IOException {
    patchData = new BufferedInputStream(patchData, PATCH_STREAM_BUFFER_SIZE);
    newData = new BufferedOutputStream(newData, OUTPUT_STREAM_BUFFER_SIZE);
    try {
      applyPatchInternal(oldData, newData, patchData, expectedNewSize);
    } finally {
      newData.flush();
    }
  }

  /** Does the work of the public applyPatch method. */
  private static void applyPatchInternal(
      final RandomAccessFile oldData,
      final OutputStream newData,
      final InputStream patchData,
      final Long expectedNewSize)
      throws PatchFormatException, IOException {
    final byte[] signatureBuffer = new byte[SIGNATURE.length()];
    try {
      readFully(patchData, signatureBuffer, 0, signatureBuffer.length);
    } catch (IOException e) {
      throw new PatchFormatException("truncated signature");
    }

    String signature = new String(signatureBuffer, 0, signatureBuffer.length, "US-ASCII");
    if (!SIGNATURE.equals(signature)) {
      throw new PatchFormatException("bad signature");
    }

    // Sanity-check: ensure a-priori knowledge matches patch expectations
    final long oldSize = oldData.length();
    if (oldSize > Integer.MAX_VALUE) {
      throw new PatchFormatException("bad oldSize");
    }
    final long newSize = readBsdiffLong(patchData);
    if (newSize < 0 || newSize > Integer.MAX_VALUE) {
      throw new PatchFormatException("bad newSize");
    }
    if (expectedNewSize != null && expectedNewSize != newSize) {
      throw new PatchFormatException("expectedNewSize != newSize");
    }

    // These buffers are used for performing transformations and copies. They are not stateful.
    final byte[] buffer1 = new byte[PATCH_BUFFER_SIZE];
    final byte[] buffer2 = new byte[PATCH_BUFFER_SIZE];

    // Offsets into |oldData| and |newData|.
    long oldDataOffset = 0; // strobes |oldData| in order specified by the patch file
    long newDataBytesWritten = 0; // monotonically increases from 0 .. |expectedNewSize|
    int numDirectives = 0; // only used for debugging output

    while (newDataBytesWritten < newSize) {
      // Read "control data" for the operation. There are three values here:
      // 1. |diffSegmentLength| defines a number of "similar" bytes that can be transformed
      //    from |oldData| to |newData| by applying byte-by-byte addends. The addend bytes are
      //    read from |patchData|. If zero, no "similar" bytes are transformed in this
      //    operation.
      final long diffSegmentLength = readBsdiffLong(patchData);

      // 2. |copySegmentLength| defines a number of identical bytes that can be copied from
      //    |oldData| to |newData|. If zero, no identical bytes are copied in this operation.
      final long copySegmentLength = readBsdiffLong(patchData);

      // 3. |offsetToNextInput| defines a relative offset to the next position in |oldData| to
      //    jump do after the current operation completes. Strangely, this compensates for
      //    |diffSegmentLength| but not for |copySegmentLength|, so |diffSegmentLength| must
      //    be accumulated into |oldDataOffset| while |copySegmentLength| must NOT be.
      final long offsetToNextInput = readBsdiffLong(patchData);

      if (VERBOSE) {
        numDirectives++;
        logger.log(
            Level.FINE,
            "Patch directive "
                + numDirectives
                + ": oldDataOffset="
                + oldDataOffset
                + ", newDataBytesWritten="
                + newDataBytesWritten
                + ", diffSegmentLength="
                + diffSegmentLength
                + ", copySegmentLength="
                + copySegmentLength
                + ", offsetToNextInput="
                + offsetToNextInput);
      }

      // Sanity-checks
      if (diffSegmentLength < 0 || diffSegmentLength > Integer.MAX_VALUE) {
        throw new PatchFormatException("bad diffSegmentLength");
      }
      if (copySegmentLength < 0 || copySegmentLength > Integer.MAX_VALUE) {
        throw new PatchFormatException("bad copySegmentLength");
      }
      if (offsetToNextInput < Integer.MIN_VALUE || offsetToNextInput > Integer.MAX_VALUE) {
        throw new PatchFormatException("bad offsetToNextInput");
      }

      final long expectedFinalNewDataBytesWritten =
          newDataBytesWritten + diffSegmentLength + copySegmentLength;
      if (expectedFinalNewDataBytesWritten > newSize) {
        throw new PatchFormatException("expectedFinalNewDataBytesWritten too large");
      }

      final long expectedFinalOldDataOffset = oldDataOffset + diffSegmentLength + offsetToNextInput;
      if (expectedFinalOldDataOffset > oldSize) {
        throw new PatchFormatException("expectedFinalOldDataOffset too large");
      }
      if (expectedFinalOldDataOffset < 0) {
        throw new PatchFormatException("expectedFinalOldDataOffset is negative");
      }

      // At this point everything is known to be sane, and the operations should all succeed.
      oldData.seek(oldDataOffset);
      if (diffSegmentLength > 0) {
        transformBytes((int) diffSegmentLength, patchData, oldData, newData, buffer1, buffer2);
      }
      if (copySegmentLength > 0) {
        pipe(patchData, newData, buffer1, (int) copySegmentLength);
      }
      newDataBytesWritten = expectedFinalNewDataBytesWritten;
      oldDataOffset = expectedFinalOldDataOffset;
    }
  }

  /**
   * Transforms bytes from |oldData| into |newData| by applying byte-for-byte addends from
   * |patchData|. The number of bytes consumed from |oldData| and |patchData|, as well as the
   * number of bytes written to |newData|, is |diffLength|. The contents of the buffers are
   * ignored and overwritten, and no guarantee is made as to their contents when this method
   * returns. This is the core of the bsdiff patching algorithm. |buffer1.length| must equal
   * |buffer2.length|, and |buffer1| and |buffer2| must be distinct objects.
   *
   * @param diffLength the length of the BsDiff entry (how many bytes to read and apply).
   * @param patchData the input stream from the BsDiff patch containing diff bytes. This stream
   *                  must be positioned so that the first byte read is the first addend to be
   *                  applied to the first byte of data to be read from |oldData|.
   * @param oldData the old file, for the diff bytes to be applied to. This input source must be
   *                positioned so that the first byte read is the first byte of data to which the
   *                first byte of addends from |patchData| should be applied.
   * @param newData the stream to write the resulting data to.
   * @param buffer1 temporary buffer to use for data transformation; contents are ignored, may be
   *                overwritten, and are undefined when this method returns.
   * @param buffer2 temporary buffer to use for data transformation; contents are ignored, may be
   *                overwritten, and are undefined when this method returns.
   */
  // Visible for testing only
  static void transformBytes(
      final int diffLength,
      final InputStream patchData,
      final RandomAccessFile oldData,
      final OutputStream newData,
      final byte[] buffer1,
      final byte[] buffer2)
      throws IOException {
    int numBytesLeft = diffLength;
    while (numBytesLeft > 0) {
      final int numBytesThisRound = Math.min(numBytesLeft, buffer1.length);
      oldData.readFully(buffer1, 0, numBytesThisRound);
      readFully(patchData, buffer2, 0, numBytesThisRound);
      for (int i = 0; i < numBytesThisRound; i++) {
        buffer1[i] += buffer2[i];
      }
      newData.write(buffer1, 0, numBytesThisRound);
      numBytesLeft -= numBytesThisRound;
    }
  }

  /**
   * Reads a long value in little-endian, signed-magnitude format (the format used by the C++
   * bsdiff implementation).
   *
   * @param in the stream to read from
   * @return the long value
   * @throws PatchFormatException if the value is negative zero (unsupported)
   * @throws IOException if unable to read all 8 bytes from the stream
   */
  // Visible for testing only
  static final long readBsdiffLong(InputStream in) throws PatchFormatException, IOException {
    long result = 0;
    for (int bitshift = 0; bitshift < 64; bitshift += 8) {
      result |= ((long) in.read()) << bitshift;
    }

    if (result == NEGATIVE_LONG_SIGN_MASK) {
      // "Negative zero", which is valid in signed-magnitude format.
      // NB: No sane patch generator should ever produce such a value.
      throw new PatchFormatException("read negative zero");
    }

    if ((result & NEGATIVE_LONG_SIGN_MASK) != 0) {
      result = -(result & ~NEGATIVE_LONG_SIGN_MASK);
    }

    return result;
  }

  /**
   * Read exactly the specified number of bytes into the specified buffer.
   *
   * @param in the input stream to read from
   * @param destination where to write the bytes to
   * @param startAt the offset at which to start writing bytes in the destination buffer
   * @param numBytes the number of bytes to read
   * @throws IOException if reading from the stream fails
   */
  // Visible for testing only
  static void readFully(
      final InputStream in, final byte[] destination, final int startAt, final int numBytes)
      throws IOException {
    int numRead = 0;
    while (numRead < numBytes) {
      int readNow = in.read(destination, startAt + numRead, numBytes - numRead);
      if (readNow == -1) {
        throw new IOException("truncated input stream");
      }
      numRead += readNow;
    }
  }

  /**
   * Use an intermediate buffer to pipe bytes from an InputStream directly to an OutputStream. The
   * buffer's contents may be destroyed by this operation.
   *
   * @param in the stream to read bytes from.
   * @param out the stream to write bytes to.
   * @param buffer the buffer to use for copying bytes; must have length > 0
   * @param copyLength the number of bytes to copy from the input stream to the output stream
   */
  // Visible for testing only
  static void pipe(
      final InputStream in, final OutputStream out, final byte[] buffer, int copyLength)
      throws IOException {
    while (copyLength > 0) {
      int maxCopy = Math.min(buffer.length, copyLength);
      readFully(in, buffer, 0, maxCopy);
      out.write(buffer, 0, maxCopy);
      copyLength -= maxCopy;
    }
  }
}