aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVikas Arora <vikasa@google.com>2012-08-21 10:04:52 -0700
committerVikas Arora <vikasa@google.com>2012-08-21 10:04:52 -0700
commit5a50414796e9a458925c7a13a15055d02406bf43 (patch)
treed49b6691738b7febe752dd9e928b884532bb2e8c
parent88fe2b83c4b9232cd08729556fd0485d6a6a92cd (diff)
downloadwebp-tools_r22.tar.gz
Change-Id: Ibbd20e72d28122a9c972621a9752673a28c95d5c
-rw-r--r--ChangeLog1
-rw-r--r--NEWS1
-rw-r--r--README519
-rw-r--r--README.android1
-rw-r--r--include/webp/decode.h283
-rw-r--r--include/webp/decode_vp8.h146
-rw-r--r--include/webp/encode.h366
-rw-r--r--include/webp/format_constants.h90
-rw-r--r--include/webp/types.h17
-rw-r--r--src/dec/Android.mk50
-rw-r--r--src/dec/alpha.c152
-rw-r--r--src/dec/bits.c79
-rw-r--r--src/dec/bits.h108
-rw-r--r--src/dec/buffer.c109
-rw-r--r--src/dec/decode_vp8.h182
-rw-r--r--src/dec/frame.c441
-rw-r--r--src/dec/idec.c510
-rw-r--r--src/dec/io.c709
-rw-r--r--src/dec/layer.c7
-rw-r--r--src/dec/quant.c16
-rw-r--r--src/dec/tree.c10
-rw-r--r--src/dec/vp8.c231
-rw-r--r--src/dec/vp8i.h149
-rw-r--r--src/dec/vp8l.c1200
-rw-r--r--src/dec/vp8li.h121
-rw-r--r--src/dec/webp.c659
-rw-r--r--src/dec/webpi.h83
-rw-r--r--src/dsp/cpu.c86
-rw-r--r--src/dsp/dec.c (renamed from src/dec/dsp.c)288
-rw-r--r--src/dsp/dec_neon.c329
-rw-r--r--src/dsp/dec_sse2.c (renamed from src/dec/dsp_sse2.c)55
-rw-r--r--src/dsp/dsp.h210
-rw-r--r--src/dsp/enc.c (renamed from src/enc/dsp.c)245
-rw-r--r--src/dsp/enc_sse2.c (renamed from src/enc/dsp_sse2.c)25
-rw-r--r--src/dsp/lossless.c1150
-rw-r--r--src/dsp/lossless.h82
-rw-r--r--src/dsp/upsampling.c357
-rw-r--r--src/dsp/upsampling_sse2.c (renamed from src/dec/io_sse2.c)36
-rw-r--r--src/dsp/yuv.c (renamed from src/dec/yuv.c)6
-rw-r--r--src/dsp/yuv.h (renamed from src/dec/yuv.h)89
-rw-r--r--src/enc/Android.mk56
-rw-r--r--src/enc/alpha.c344
-rw-r--r--src/enc/analysis.c40
-rw-r--r--src/enc/backward_references.c874
-rw-r--r--src/enc/backward_references.h212
-rw-r--r--src/enc/bit_writer.h63
-rw-r--r--src/enc/config.c32
-rw-r--r--src/enc/cost.c27
-rw-r--r--src/enc/cost.h20
-rw-r--r--src/enc/filter.c95
-rw-r--r--src/enc/frame.c463
-rw-r--r--src/enc/histogram.c406
-rw-r--r--src/enc/histogram.h115
-rw-r--r--src/enc/iterator.c213
-rw-r--r--src/enc/layer.c14
-rw-r--r--src/enc/picture.c1103
-rw-r--r--src/enc/quant.c83
-rw-r--r--src/enc/syntax.c291
-rw-r--r--src/enc/tree.c13
-rw-r--r--src/enc/vp8enci.h191
-rw-r--r--src/enc/vp8l.c1150
-rw-r--r--src/enc/vp8li.h68
-rw-r--r--src/enc/webpenc.c134
-rw-r--r--src/utils/bit_reader.c229
-rw-r--r--src/utils/bit_reader.h197
-rw-r--r--src/utils/bit_writer.c (renamed from src/enc/bit_writer.c)129
-rw-r--r--src/utils/bit_writer.h123
-rw-r--r--src/utils/color_cache.c44
-rw-r--r--src/utils/color_cache.h68
-rw-r--r--src/utils/filters.c229
-rw-r--r--src/utils/filters.h54
-rw-r--r--src/utils/huffman.c238
-rw-r--r--src/utils/huffman.h78
-rw-r--r--src/utils/huffman_encode.c439
-rw-r--r--src/utils/huffman_encode.h47
-rw-r--r--src/utils/quant_levels.c154
-rw-r--r--src/utils/quant_levels.h39
-rw-r--r--src/utils/rescaler.c152
-rw-r--r--src/utils/rescaler.h76
-rw-r--r--src/utils/thread.c247
-rw-r--r--src/utils/thread.h86
-rw-r--r--src/utils/utils.c44
-rw-r--r--src/utils/utils.h44
83 files changed, 14586 insertions, 3336 deletions
diff --git a/ChangeLog b/ChangeLog
index fe7d3a10..1fcff1a4 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -2,3 +2,4 @@
- 5/11: release version 0.1.2
- 6/11: Added encoder (version 0.1.2) as well
- 7/11: Updated WebP with head change#Ia53f845b
+- 8/12: release version 0.2.0 (head change#I3da2063b
diff --git a/NEWS b/NEWS
index afa13493..9232f5c6 100644
--- a/NEWS
+++ b/NEWS
@@ -1 +1,2 @@
- 9/10: initial release version 0.1
+- 8/12: release version 0.2.0 (head change#I3da2063b).
diff --git a/README b/README
index 22db0862..d3f40620 100644
--- a/README
+++ b/README
@@ -1,43 +1,103 @@
__ __ ____ ____ ____
/ \\/ \/ _ \/ _ )/ _ \
\ / __/ _ \ __/
- \__\__/\____/\_____/__/ _________ ____ ____
- \ \ / _ \/ _/ / \ \ / _ \ _ \
- / \ \ __/ \_/ / / \ \ __/ /_
- \_____/_____/____/____/\_____/_____/_/\__/v0.1
-
+ \__\__/\____/\_____/__/ ____ ___
+ / _/ / \ \ / _ \/ _/
+ / \_/ / / \ \ __/ \__
+ \____/____/\_____/_____/____/v0.2.0
+
Description:
============
-WEBP decoder: libwebpdecode.so is a simple library for
-decoding WEBP image files.
+WebP codec: library to encode and decode images in WebP format. This package
+contains the library that can be used in other programs to add WebP support,
+as well as the command line tools 'cwebp' and 'dwebp'.
-See http://code.google.com/speed/webp
+See http://developers.google.com/speed/webp
+Latest sources are available from http://www.webmproject.org/code/
It is released under the same license as the WebM project.
See http://www.webmproject.org/license/software/ or the
file "COPYING" file for details. An additional intellectual
property rights grant can be found in the file PATENTS.
+Building:
+=========
-API:
-====
+Windows build (without experimental features):
+----------------------------------------------
-This is mainly just one function to call, so just have a look at
-the file src/webp/decode.h for the details and variants:
+By running:
-#include "webp/decode.h"
-uint8_t* WebPDecodeRGB(const uint8_t* data, uint32_t data_size,
- int *width, int *height);
+ nmake /f Makefile.vc CFG=release-static RTLIBCFG=static OBJDIR=output
-A lower-level API is available from the header file <webp/decode_vp8.h>
+the directory output\release-static\(x64|x86)\bin will contain the tools
+cwebp.exe and dwebp.exe. The directory output\release-static\(x64|x86)\lib will
+contain the libwebp static library.
+The target architecture (x86/x64) is detected by Makefile.vc from the Visual
+Studio compiler (cl.exe) available in the system path.
+Windows build (with experimental features):
+-------------------------------------------
+
+This release requires the zlib library. This library is not common under
+Windows nor can it be replaced with calls to the Windows API, so you will need
+to download it or build it yourself.
+
+You can either:
+(a) Link zlib dynamically (as a DLL).
+(b) Link zlib statically.
+
+Linking it dynamically is easier to do (as you can download a precompiled DLL),
+but a bit more cumbersome to use - you need to keep zlib1.dll in the same
+directory as the created EXEs.
+
+a. With dynamic zlib
+~~~~~~~~~~~~~~~~~~~~
+1. Download and unpack the archive from http://zlib.net/zlib125-dll.zip.
+2. Compile libwebp with the command:
+ nmake /f Makefile.vc CFG=release-static RTLIBCFG=static OBJDIR=output \
+ ZDLLDIR=C:\path\to\the\unpacked\archive EXPERIMENTAL=y
+3. Copy C:\path\to\the\unpacked\archive\zlib1.dll to
+ output\release-static\x86\bin for binaries there to work.
+Note: only a 32-bit DLL is currently available for download, so for a 64-bit
+build, you will need to build the DLL yourself.
+
+b. With static zlib
+~~~~~~~~~~~~~~~~~~~
+
+1. Download and unpack the source code from http://zlib.net/zlib125.zip.
+2. For a 32-bit build of zlib compatible with libwebp:
+ a. Edit zlib's win32\Makefile.msc, adding "-SAFESEH" to ASFLAGS.
+ b. Compile zlib with the command:
+ nmake /f win32/Makefile.msc LOC="-DASMV -DASMINF -MT" \
+ OBJA="inffas32.obj match686.obj"
+3. Compile libwebp with the command
+ nmake /f Makefile.vc CFG=release-static RTLIBCFG=static OBJDIR=output \
+ ZLIBDIR=C:\path\to\the\unpacked\archive experimental
+4. Use the binaries in output\release-static\x86\bin.
+Note: a 64-bit build of zlib requires different modifications of
+Makefile.msc.
-Building:
-=========
-If everything goes right, then:
+Unix build using makefile.unix:
+-------------------------------
+
+On platforms with GNU tools installed (gcc and make), running
+
+ make -f makefile.unix
+
+will build the binaries examples/cwebp and examples/dwebp, along
+with the static library src/libwebp.a. No system-wide installation
+is supplied, as this is a simple alternative to the full installation
+system based on the autoconf tools (see below).
+Please refer to the makefile.unix for additional details and customizations.
+
+Using autoconf tools:
+---------------------
+When building from git sources, you will need to run autogen.sh to generate the
+configure script.
./configure
make
@@ -46,19 +106,423 @@ make install
should be all you need to have the following files
/usr/local/include/webp/decode.h
-/usr/local/include/webp/decode_vp8.h
-/usr/local/lib/libwebpdecode.*
+/usr/local/include/webp/encode.h
+/usr/local/include/webp/types.h
+/usr/local/lib/libwebp.*
+/usr/local/bin/cwebp
+/usr/local/bin/dwebp
installed.
+Note: The encoding and decoding libraries are compiled separately
+(as src/dec/libwebpdecode.* and src/dec/libwebpencode.*). They
+can be installed independently using a minor modification in the
+corresponding Makefile.am configure files (see comments there).
+
+SWIG bindings:
+--------------
+
+To generate language bindings from swig/libwebp.i swig-1.3
+(http://www.swig.org) is required. 2.0 may work, but has not been tested.
+
+Currently the following functions are mapped:
+Decode:
+ WebPGetDecoderVersion
+ WebPGetInfo
+ WebPDecodeRGBA
+ WebPDecodeARGB
+ WebPDecodeBGRA
+ WebPDecodeBGR
+ WebPDecodeRGB
+
+Encode:
+ WebPGetEncoderVersion
+ WebPEncodeRGBA
+ WebPEncodeBGRA
+ WebPEncodeRGB
+ WebPEncodeBGR
+ WebPEncodeLosslessRGBA
+ WebPEncodeLosslessBGRA
+ WebPEncodeLosslessRGB
+ WebPEncodeLosslessBGR
+
+Java bindings:
+
+To build the swig-generated JNI wrapper code at least JDK-1.5 (or equivalent)
+is necessary for enum support. The output is intended to be a shared object /
+DLL that can be loaded via System.loadLibrary("webp_jni").
+
+Encoding tool:
+==============
+
+The examples/ directory contains tools for encoding (cwebp) and
+decoding (dwebp) images.
+
+The easiest use should look like:
+ cwebp input.png -q 80 -o output.webp
+which will convert the input file to a WebP file using a quality factor of 80
+on a 0->100 scale (0 being the lowest quality, 100 being the best. Default
+value is 75).
+You might want to try the -lossless flag too, which will compress the source
+(in RGBA format) without any loss. The -q quality parameter will in this case
+control the amount of processing time spent trying to make the output file as
+small as possible.
+
+A longer list of options is available using the -longhelp command line flag:
+
+> cwebp -longhelp
+Usage:
+ cwebp [-preset <...>] [options] in_file [-o out_file]
+
+If input size (-s) for an image is not specified, it is assumed to be a PNG,
+JPEG or TIFF file.
+options:
+ -h / -help ............ short help
+ -H / -longhelp ........ long help
+ -q <float> ............. quality factor (0:small..100:big)
+ -alpha_q <int> ......... Transparency-compression quality (0..100).
+ -preset <string> ....... Preset setting, one of:
+ default, photo, picture,
+ drawing, icon, text
+ -preset must come first, as it overwrites other parameters.
+ -m <int> ............... compression method (0=fast, 6=slowest)
+ -segments <int> ........ number of segments to use (1..4)
+ -size <int> ............ Target size (in bytes)
+ -psnr <float> .......... Target PSNR (in dB. typically: 42)
+
+ -s <int> <int> ......... Input size (width x height) for YUV
+ -sns <int> ............. Spatial Noise Shaping (0:off, 100:max)
+ -f <int> ............... filter strength (0=off..100)
+ -sharpness <int> ....... filter sharpness (0:most .. 7:least sharp)
+ -strong ................ use strong filter instead of simple.
+ -partition_limit <int> . limit quality to fit the 512k limit on
+ the first partition (0=no degradation ... 100=full)
+ -pass <int> ............ analysis pass number (1..10)
+ -crop <x> <y> <w> <h> .. crop picture with the given rectangle
+ -resize <w> <h> ........ resize picture (after any cropping)
+ -map <int> ............. print map of extra info.
+ -print_psnr ............ prints averaged PSNR distortion.
+ -print_ssim ............ prints averaged SSIM distortion.
+ -print_lsim ............ prints local-similarity distortion.
+ -d <file.pgm> .......... dump the compressed output (PGM file).
+ -alpha_method <int> .... Transparency-compression method (0..1)
+ -alpha_filter <string> . predictive filtering for alpha plane.
+ One of: none, fast (default) or best.
+ -alpha_cleanup ......... Clean RGB values in transparent area.
+ -noalpha ............... discard any transparency information.
+ -lossless .............. Encode image losslessly.
+ -hint <string> ......... Specify image characteristics hint.
+ One of: photo, picture or graph
+
+ -short ................. condense printed message
+ -quiet ................. don't print anything.
+ -version ............... print version number and exit.
+ -noasm ................. disable all assembly optimizations.
+ -v ..................... verbose, e.g. print encoding/decoding times
+ -progress .............. report encoding progress
+
+Experimental Options:
+ -af .................... auto-adjust filter strength.
+ -pre <int> ............. pre-processing filter
+
+
+The main options you might want to try in order to further tune the
+visual quality are:
+ -preset
+ -sns
+ -f
+ -m
+
+Namely:
+ * 'preset' will set up a default encoding configuration targeting a
+ particular type of input. It should appear first in the list of options,
+ so that subsequent options can take effect on top of this preset.
+ Default value is 'default'.
+ * 'sns' will progressively turn on (when going from 0 to 100) some additional
+ visual optimizations (like: segmentation map re-enforcement). This option
+ will balance the bit allocation differently. It tries to take bits from the
+ "easy" parts of the picture and use them in the "difficult" ones instead.
+ Usually, raising the sns value (at fixed -q value) leads to larger files,
+ but with better quality.
+ Typical value is around '75'.
+ * 'f' option directly links to the filtering strength used by the codec's
+ in-loop processing. The higher the value, the smoother the
+ highly-compressed area will look. This is particularly useful when aiming
+ at very small files. Typical values are around 20-30. Note that using the
+ option -strong will change the type of filtering. Use "-f 0" to turn
+ filtering off.
+ * 'm' controls the trade-off between encoding speed and quality. Default is 4.
+ You can try -m 5 or -m 6 to explore more (time-consuming) encoding
+ possibilities. A lower value will result in faster encoding at the expense
+ of quality.
+
+Decoding tool:
+==============
+
+There is a decoding sample in examples/dwebp.c which will take
+a .webp file and decode it to a PNG image file (amongst other formats).
+This is simply to demonstrate the use of the API. You can verify the
+file test.webp decodes to exactly the same as test_ref.ppm by using:
+
+ cd examples
+ ./dwebp test.webp -ppm -o test.ppm
+ diff test.ppm test_ref.ppm
+
+The full list of options is available using -h:
+
+> dwebp -h
+Usage: dwebp in_file [options] [-o out_file]
+
+Decodes the WebP image file to PNG format [Default]
+Use following options to convert into alternate image formats:
+ -pam ......... save the raw RGBA samples as a color PAM
+ -ppm ......... save the raw RGB samples as a color PPM
+ -pgm ......... save the raw YUV samples as a grayscale PGM
+ file with IMC4 layout.
+ Other options are:
+ -version .... print version number and exit.
+ -nofancy ..... don't use the fancy YUV420 upscaler.
+ -nofilter .... disable in-loop filtering.
+ -mt .......... use multi-threading
+ -crop <x> <y> <w> <h> ... crop output with the given rectangle
+ -scale <w> <h> .......... scale the output (*after* any cropping)
+ -alpha ....... only save the alpha plane.
+ -h ....... this help message.
+ -v ....... verbose (e.g. print encoding/decoding times)
+ -noasm ....... disable all assembly optimizations.
+
+Visualization tool:
+===================
+
+There's a little self-serve visualization tool called 'vwebp' under the
+examples/ directory. It uses OpenGL to open a simple drawing window and show
+a decoded WebP file. It's not yet integrated in the automake or makefile.unix
+build system, but you can try to manually compile it using the recommendations
+at the top of the source file.
+
+Usage: 'vwebp my_picture.webp'
+
+
+Encoding API:
+=============
+
+The main encoding functions are available in the header src/webp/encode.h
+The ready-to-use ones are:
+size_t WebPEncodeRGB(const uint8_t* rgb, int width, int height, int stride,
+ float quality_factor, uint8_t** output);
+size_t WebPEncodeBGR(const uint8_t* bgr, int width, int height, int stride,
+ float quality_factor, uint8_t** output);
+size_t WebPEncodeRGBA(const uint8_t* rgba, int width, int height, int stride,
+ float quality_factor, uint8_t** output);
+size_t WebPEncodeBGRA(const uint8_t* bgra, int width, int height, int stride,
+ float quality_factor, uint8_t** output);
+
+They will convert raw RGB samples to a WebP data. The only control supplied
+is the quality factor.
+
+There are some variants for using the lossless format:
+
+size_t WebPEncodeLosslessRGB(const uint8_t* rgb, int width, int height,
+ int stride, uint8_t** output);
+size_t WebPEncodeLosslessBGR(const uint8_t* bgr, int width, int height,
+ int stride, uint8_t** output);
+size_t WebPEncodeLosslessRGBA(const uint8_t* rgba, int width, int height,
+ int stride, uint8_t** output);
+size_t WebPEncodeLosslessBGRA(const uint8_t* bgra, int width, int height,
+ int stride, uint8_t** output);
+
+Of course in this case, no quality factor is needed since the compression
+occurs without loss of the input values, at the expense of larger output sizes.
+
+Advanced encoding API:
+----------------------
+
+A more advanced API is based on the WebPConfig and WebPPicture structures.
+
+WebPConfig contains the encoding settings and is not tied to a particular
+picture.
+WebPPicture contains input data, on which some WebPConfig will be used for
+compression.
+The encoding flow looks like:
+
+-------------------------------------- BEGIN PSEUDO EXAMPLE
+
+#include <webp/encode.h>
+
+ // Setup a config, starting form a preset and tuning some additional
+ // parameters
+ WebPConfig config;
+ if (!WebPConfigPreset(&config, WEBP_PRESET_PHOTO, quality_factor))
+ return 0; // version error
+ }
+ // ... additional tuning
+ config.sns_strength = 90;
+ config.filter_sharpness = 6;
+ config_error = WebPValidateConfig(&config); // not mandatory, but useful
+
+ // Setup the input data
+ WebPPicture pic;
+ if (!WebPPictureInit(&pic)) {
+ return 0; // version error
+ }
+ pic.width = width;
+ pic.height = height;
+ // allocated picture of dimension width x height
+ if (!WebPPictureAllocate(&pic)) {
+ return 0; // memory error
+ }
+ // at this point, 'pic' has been initialized as a container,
+ // and can receive the Y/U/V samples.
+ // Alternatively, one could use ready-made import functions like
+ // WebPPictureImportRGB(), which will take care of memory allocation.
+ // In any case, past this point, one will have to call
+ // WebPPictureFree(&pic) to reclaim memory.
+
+ // Set up a byte-output write method. WebPMemoryWriter, for instance.
+ WebPMemoryWriter wrt;
+ pic.writer = MyFileWriter;
+ pic.custom_ptr = my_opaque_structure_to_make_MyFileWriter_work;
+ // initialize 'wrt' here...
+
+ // Compress!
+ int ok = WebPEncode(&config, &pic); // ok = 0 => error occurred!
+ WebPPictureFree(&pic); // must be called independently of the 'ok' result.
+
+ // output data should have been handled by the writer at that point.
+
+-------------------------------------- END PSEUDO EXAMPLE
+
+Decoding API:
+=============
+
+This is mainly just one function to call:
+
+#include "webp/decode.h"
+uint8_t* WebPDecodeRGB(const uint8_t* data, size_t data_size,
+ int* width, int* height);
+
+Please have a look at the file src/webp/decode.h for the details.
+There are variants for decoding in BGR/RGBA/ARGB/BGRA order, along with
+decoding to raw Y'CbCr samples. One can also decode the image directly into a
+pre-allocated buffer.
+
+To detect a WebP file and gather the picture's dimensions, the function:
+ int WebPGetInfo(const uint8_t* data, size_t data_size,
+ int* width, int* height);
+is supplied. No decoding is involved when using it.
+
+Incremental decoding API:
+=========================
+
+In the case when data is being progressively transmitted, pictures can still
+be incrementally decoded using a slightly more complicated API. Decoder state
+is stored into an instance of the WebPIDecoder object. This object can be
+created with the purpose of decoding either RGB or Y'CbCr samples.
+For instance:
+
+ WebPDecBuffer buffer;
+ WebPInitDecBuffer(&buffer);
+ buffer.colorspace = MODE_BGR;
+ ...
+ WebPIDecoder* idec = WebPINewDecoder(&buffer);
+
+As data is made progressively available, this incremental-decoder object
+can be used to decode the picture further. There are two (mutually exclusive)
+ways to pass freshly arrived data:
+
+either by appending the fresh bytes:
+
+ WebPIAppend(idec, fresh_data, size_of_fresh_data);
+
+or by just mentioning the new size of the transmitted data:
+
+ WebPIUpdate(idec, buffer, size_of_transmitted_buffer);
+
+Note that 'buffer' can be modified between each call to WebPIUpdate, in
+particular when the buffer is resized to accommodate larger data.
+
+These functions will return the decoding status: either VP8_STATUS_SUSPENDED if
+decoding is not finished yet or VP8_STATUS_OK when decoding is done. Any other
+status is an error condition.
+
+The 'idec' object must always be released (even upon an error condition) by
+calling: WebPDelete(idec).
+
+To retrieve partially decoded picture samples, one must use the corresponding
+method: WebPIDecGetRGB or WebPIDecGetYUVA.
+It will return the last displayable pixel row.
+
+Lastly, note that decoding can also be performed into a pre-allocated pixel
+buffer. This buffer must be passed when creating a WebPIDecoder, calling
+WebPINewRGB() or WebPINewYUVA().
+
+Please have a look at the src/webp/decode.h header for further details.
+
+Advanced Decoding API:
+======================
+
+WebP decoding supports an advanced API which provides on-the-fly cropping and
+rescaling, something of great usefulness on memory-constrained environments like
+mobile phones. Basically, the memory usage will scale with the output's size,
+not the input's, when one only needs a quick preview or a zoomed in portion of
+an otherwise too-large picture. Some CPU can be saved too, incidentally.
+
+-------------------------------------- BEGIN PSEUDO EXAMPLE
+ // A) Init a configuration object
+ WebPDecoderConfig config;
+ CHECK(WebPInitDecoderConfig(&config));
+
+ // B) optional: retrieve the bitstream's features.
+ CHECK(WebPGetFeatures(data, data_size, &config.input) == VP8_STATUS_OK);
+
+ // C) Adjust 'config' options, if needed
+ config.options.no_fancy_upsampling = 1;
+ config.options.use_scaling = 1;
+ config.options.scaled_width = scaledWidth();
+ config.options.scaled_height = scaledHeight();
+ // etc.
+
+ // D) Specify 'config' output options for specifying output colorspace.
+ // Optionally the external image decode buffer can also be specified.
+ config.output.colorspace = MODE_BGRA;
+ // Optionally, the config.output can be pointed to an external buffer as
+ // well for decoding the image. This externally supplied memory buffer
+ // should be big enough to store the decoded picture.
+ config.output.u.RGBA.rgba = (uint8_t*) memory_buffer;
+ config.output.u.RGBA.stride = scanline_stride;
+ config.output.u.RGBA.size = total_size_of_the_memory_buffer;
+ config.output.is_external_memory = 1;
+
+ // E) Decode the WebP image. There are two variants w.r.t decoding image.
+ // The first one (E.1) decodes the full image and the second one (E.2) is
+ // used to incrementally decode the image using small input buffers.
+ // Any one of these steps can be used to decode the WebP image.
+
+ // E.1) Decode full image.
+ CHECK(WebPDecode(data, data_size, &config) == VP8_STATUS_OK);
+
+ // E.2) Decode image incrementally.
+ WebPIDecoder* const idec = WebPIDecode(NULL, NULL, &config);
+ CHECK(idec != NULL);
+ while (bytes_remaining > 0) {
+ VP8StatusCode status = WebPIAppend(idec, input, bytes_read);
+ if (status == VP8_STATUS_OK || status == VP8_STATUS_SUSPENDED) {
+ bytes_remaining -= bytes_read;
+ } else {
+ break;
+ }
+ }
+ WebPIDelete(idec);
+
+ // F) Decoded image is now in config.output (and config.output.u.RGBA).
+ // It can be saved, displayed or otherwise processed.
-Decoding example:
-=================
+ // G) Reclaim memory allocated in config's object. It's safe to call
+ // this function even if the memory is external and wasn't allocated
+ // by WebPDecode().
+ WebPFreeDecBuffer(&config.output);
-there's a decoding example in example/dwebp.c which will take a .webp file and
-decode it to a PPM image file. This is simply to demonstrate use of the API.
-You can verify the file test.webp decodes to exactly the same as test_ref.ppm:
- `cd examples && ./dwebp test.webp -o test.ppm && diff test.ppm test_ref.ppm`
+-------------------------------------- END PSEUDO EXAMPLE
Bugs:
=====
@@ -72,3 +536,4 @@ Discuss:
========
Email: webp-discuss@webmproject.org
+Web: http://groups.google.com/a/webmproject.org/group/webp-discuss
diff --git a/README.android b/README.android
index c467de46..96dc4417 100644
--- a/README.android
+++ b/README.android
@@ -23,6 +23,7 @@ Local modifications:
- Fixed the Endian'ness bug for Color-Configs (RGB_565 & ARGB_4444).
The fix is similar to jpeglib handling for JCS_RGB_565 & JCS_RGBA_8888
color configs. Added the code under "ANDROID_WEBP_RGB" flag.
+- Sync-patch with libwebp ver 0.2.0 (head change#I3da2063b).
The Android.mk file creates WebP Decoder and Encoder static libraries which
can be added to any application by Adding to LOCAL_STATIC_LIBRARIES
diff --git a/include/webp/decode.h b/include/webp/decode.h
index ccb4c369..43b6c58f 100644
--- a/include/webp/decode.h
+++ b/include/webp/decode.h
@@ -1,11 +1,11 @@
-// Copyright 2010 Google Inc.
+// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// -----------------------------------------------------------------------------
//
-// Main decoding functions for WEBP images.
+// Main decoding functions for WebP images.
//
// Author: Skal (pascal.massimino@gmail.com)
@@ -18,7 +18,7 @@
extern "C" {
#endif
-#define WEBP_DECODER_ABI_VERSION 0x0002
+#define WEBP_DECODER_ABI_VERSION 0x0200 // MAJOR(8b) + MINOR(8b)
// Return the decoder's version number, packed in hexadecimal using 8bits for
// each of major/minor/revision. E.g: v2.5.7 is 0x020507.
@@ -27,42 +27,46 @@ WEBP_EXTERN(int) WebPGetDecoderVersion(void);
// Retrieve basic header information: width, height.
// This function will also validate the header and return 0 in
// case of formatting error.
-// Pointers *width/*height can be passed NULL if deemed irrelevant.
-WEBP_EXTERN(int) WebPGetInfo(const uint8_t* data, uint32_t data_size,
+// Pointers 'width' and 'height' can be passed NULL if deemed irrelevant.
+WEBP_EXTERN(int) WebPGetInfo(const uint8_t* data, size_t data_size,
int* width, int* height);
-// Decodes WEBP images pointed to by *data and returns RGB samples, along
-// with the dimensions in *width and *height.
+// Decodes WebP images pointed to by 'data' and returns RGBA samples, along
+// with the dimensions in *width and *height. The ordering of samples in
+// memory is R, G, B, A, R, G, B, A... in scan order (endian-independent).
// The returned pointer should be deleted calling free().
// Returns NULL in case of error.
-WEBP_EXTERN(uint8_t*) WebPDecodeRGB(const uint8_t* data, uint32_t data_size,
- int* width, int* height);
+WEBP_EXTERN(uint8_t*) WebPDecodeRGBA(const uint8_t* data, size_t data_size,
+ int* width, int* height);
-// Same as WebPDecodeRGB, but returning RGBA data.
-WEBP_EXTERN(uint8_t*) WebPDecodeRGBA(const uint8_t* data, uint32_t data_size,
+// Same as WebPDecodeRGBA, but returning A, R, G, B, A, R, G, B... ordered data.
+WEBP_EXTERN(uint8_t*) WebPDecodeARGB(const uint8_t* data, size_t data_size,
int* width, int* height);
-// Same as WebPDecodeRGBA, but returning ARGB data.
-WEBP_EXTERN(uint8_t*) WebPDecodeARGB(const uint8_t* data, uint32_t data_size,
+// Same as WebPDecodeRGBA, but returning B, G, R, A, B, G, R, A... ordered data.
+WEBP_EXTERN(uint8_t*) WebPDecodeBGRA(const uint8_t* data, size_t data_size,
int* width, int* height);
-// This variant decode to BGR instead of RGB.
-WEBP_EXTERN(uint8_t*) WebPDecodeBGR(const uint8_t* data, uint32_t data_size,
+// Same as WebPDecodeRGBA, but returning R, G, B, R, G, B... ordered data.
+// If the bitstream contains transparency, it is ignored.
+WEBP_EXTERN(uint8_t*) WebPDecodeRGB(const uint8_t* data, size_t data_size,
int* width, int* height);
-// This variant decodes to BGRA instead of RGBA.
-WEBP_EXTERN(uint8_t*) WebPDecodeBGRA(const uint8_t* data, uint32_t data_size,
- int* width, int* height);
-// Decode WEBP images stored in *data in Y'UV format(*). The pointer returned is
-// the Y samples buffer. Upon return, *u and *v will point to the U and V
-// chroma data. These U and V buffers need NOT be free()'d, unlike the returned
-// Y luma one. The dimension of the U and V planes are both (*width + 1) / 2
-// and (*height + 1)/ 2.
+// Same as WebPDecodeRGB, but returning B, G, R, B, G, R... ordered data.
+WEBP_EXTERN(uint8_t*) WebPDecodeBGR(const uint8_t* data, size_t data_size,
+ int* width, int* height);
+
+
+// Decode WebP images pointed to by 'data' to Y'UV format(*). The pointer
+// returned is the Y samples buffer. Upon return, *u and *v will point to
+// the U and V chroma data. These U and V buffers need NOT be free()'d,
+// unlike the returned Y luma one. The dimension of the U and V planes
+// are both (*width + 1) / 2 and (*height + 1)/ 2.
// Upon return, the Y buffer has a stride returned as '*stride', while U and V
// have a common stride returned as '*uv_stride'.
// Return NULL in case of error.
// (*) Also named Y'CbCr. See: http://en.wikipedia.org/wiki/YCbCr
-WEBP_EXTERN(uint8_t*) WebPDecodeYUV(const uint8_t* data, uint32_t data_size,
+WEBP_EXTERN(uint8_t*) WebPDecodeYUV(const uint8_t* data, size_t data_size,
int* width, int* height,
uint8_t** u, uint8_t** v,
int* stride, int* uv_stride);
@@ -75,22 +79,24 @@ WEBP_EXTERN(uint8_t*) WebPDecodeYUV(const uint8_t* data, uint32_t data_size,
// The parameter 'output_stride' specifies the distance (in bytes)
// between scanlines. Hence, output_buffer_size is expected to be at least
// output_stride x picture-height.
-WEBP_EXTERN(uint8_t*) WebPDecodeRGBInto(
- const uint8_t* data, uint32_t data_size,
- uint8_t* output_buffer, int output_buffer_size, int output_stride);
WEBP_EXTERN(uint8_t*) WebPDecodeRGBAInto(
- const uint8_t* data, uint32_t data_size,
- uint8_t* output_buffer, int output_buffer_size, int output_stride);
+ const uint8_t* data, size_t data_size,
+ uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
WEBP_EXTERN(uint8_t*) WebPDecodeARGBInto(
- const uint8_t* data, uint32_t data_size,
- uint8_t* output_buffer, int output_buffer_size, int output_stride);
-// BGR variants
-WEBP_EXTERN(uint8_t*) WebPDecodeBGRInto(
- const uint8_t* data, uint32_t data_size,
- uint8_t* output_buffer, int output_buffer_size, int output_stride);
+ const uint8_t* data, size_t data_size,
+ uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
WEBP_EXTERN(uint8_t*) WebPDecodeBGRAInto(
- const uint8_t* data, uint32_t data_size,
- uint8_t* output_buffer, int output_buffer_size, int output_stride);
+ const uint8_t* data, size_t data_size,
+ uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
+
+// RGB and BGR variants. Here too the transparency information, if present,
+// will be dropped and ignored.
+WEBP_EXTERN(uint8_t*) WebPDecodeRGBInto(
+ const uint8_t* data, size_t data_size,
+ uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
+WEBP_EXTERN(uint8_t*) WebPDecodeBGRInto(
+ const uint8_t* data, size_t data_size,
+ uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
// WebPDecodeYUVInto() is a variant of WebPDecodeYUV() that operates directly
// into pre-allocated luma/chroma plane buffers. This function requires the
@@ -100,29 +106,56 @@ WEBP_EXTERN(uint8_t*) WebPDecodeBGRAInto(
// Pointer to the luma plane ('*luma') is returned or NULL if an error occurred
// during decoding (or because some buffers were found to be too small).
WEBP_EXTERN(uint8_t*) WebPDecodeYUVInto(
- const uint8_t* data, uint32_t data_size,
- uint8_t* luma, int luma_size, int luma_stride,
- uint8_t* u, int u_size, int u_stride,
- uint8_t* v, int v_size, int v_stride);
+ const uint8_t* data, size_t data_size,
+ uint8_t* luma, size_t luma_size, int luma_stride,
+ uint8_t* u, size_t u_size, int u_stride,
+ uint8_t* v, size_t v_size, int v_stride);
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Output colorspaces and buffer
// Colorspaces
+// Note: the naming describes the byte-ordering of packed samples in memory.
+// For instance, MODE_BGRA relates to samples ordered as B,G,R,A,B,G,R,A,...
+// Non-capital names (e.g.:MODE_Argb) relates to pre-multiplied RGB channels.
+// RGB-565 and RGBA-4444 are also endian-agnostic and byte-oriented.
typedef enum { MODE_RGB = 0, MODE_RGBA = 1,
MODE_BGR = 2, MODE_BGRA = 3,
MODE_ARGB = 4, MODE_RGBA_4444 = 5,
MODE_RGB_565 = 6,
+ // RGB-premultiplied transparent modes (alpha value is preserved)
+ MODE_rgbA = 7,
+ MODE_bgrA = 8,
+ MODE_Argb = 9,
+ MODE_rgbA_4444 = 10,
// YUV modes must come after RGB ones.
- MODE_YUV = 7, MODE_YUVA = 8, // yuv 4:2:0
- MODE_LAST = 9
+ MODE_YUV = 11, MODE_YUVA = 12, // yuv 4:2:0
+ MODE_LAST = 13
} WEBP_CSP_MODE;
-// Generic structure for describing the sample buffer.
+// Some useful macros:
+static WEBP_INLINE int WebPIsPremultipliedMode(WEBP_CSP_MODE mode) {
+ return (mode == MODE_rgbA || mode == MODE_bgrA || mode == MODE_Argb ||
+ mode == MODE_rgbA_4444);
+}
+
+static WEBP_INLINE int WebPIsAlphaMode(WEBP_CSP_MODE mode) {
+ return (mode == MODE_RGBA || mode == MODE_BGRA || mode == MODE_ARGB ||
+ mode == MODE_RGBA_4444 || mode == MODE_YUVA ||
+ WebPIsPremultipliedMode(mode));
+}
+
+static WEBP_INLINE int WebPIsRGBMode(WEBP_CSP_MODE mode) {
+ return (mode < MODE_YUV);
+}
+
+//------------------------------------------------------------------------------
+// WebPDecBuffer: Generic structure for describing the output sample buffer.
+
typedef struct { // view as RGBA
uint8_t* rgba; // pointer to RGBA samples
int stride; // stride in bytes from one scanline to the next.
- int size; // total size of the *rgba buffer.
+ size_t size; // total size of the *rgba buffer.
} WebPRGBABuffer;
typedef struct { // view as YUVA
@@ -130,9 +163,9 @@ typedef struct { // view as YUVA
int y_stride; // luma stride
int u_stride, v_stride; // chroma strides
int a_stride; // alpha stride
- int y_size; // luma plane size
- int u_size, v_size; // chroma planes size
- int a_size; // alpha-plane size
+ size_t y_size; // luma plane size
+ size_t u_size, v_size; // chroma planes size
+ size_t a_size; // alpha-plane size
} WebPYUVABuffer;
// Output buffer
@@ -144,25 +177,27 @@ typedef struct {
WebPRGBABuffer RGBA;
WebPYUVABuffer YUVA;
} u; // Nameless union of buffer parameters.
+ uint32_t pad[4]; // padding for later use
+
uint8_t* private_memory; // Internally allocated memory (only when
// is_external_memory is false). Should not be used
// externally, but accessed via the buffer union.
} WebPDecBuffer;
// Internal, version-checked, entry point
-WEBP_EXTERN(int) WebPInitDecBufferInternal(WebPDecBuffer* const, int);
+WEBP_EXTERN(int) WebPInitDecBufferInternal(WebPDecBuffer*, int);
// Initialize the structure as empty. Must be called before any other use.
// Returns false in case of version mismatch
-static inline int WebPInitDecBuffer(WebPDecBuffer* const buffer) {
+static WEBP_INLINE int WebPInitDecBuffer(WebPDecBuffer* buffer) {
return WebPInitDecBufferInternal(buffer, WEBP_DECODER_ABI_VERSION);
}
// Free any memory associated with the buffer. Must always be called last.
// Note: doesn't free the 'buffer' structure itself.
-WEBP_EXTERN(void) WebPFreeDecBuffer(WebPDecBuffer* const buffer);
+WEBP_EXTERN(void) WebPFreeDecBuffer(WebPDecBuffer* buffer);
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Enumeration of the status codes
typedef enum {
@@ -176,7 +211,7 @@ typedef enum {
VP8_STATUS_NOT_ENOUGH_DATA
} VP8StatusCode;
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Incremental decoding
//
// This API allows streamlined decoding of partial data.
@@ -185,7 +220,10 @@ typedef enum {
// picture is only partially decoded, pending additional input.
// Code example:
//
-// WebPIDecoder* const idec = WebPINew(mode);
+// WebPInitDecBuffer(&buffer);
+// buffer.colorspace = mode;
+// ...
+// WebPIDecoder* idec = WebPINewDecoder(&buffer);
// while (has_more_data) {
// // ... (get additional data)
// status = WebPIAppend(idec, new_data, new_data_size);
@@ -195,7 +233,7 @@ typedef enum {
//
// // The above call decodes the current available buffer.
// // Part of the image can now be refreshed by calling to
-// // WebPIDecGetRGB()/WebPIDecGetYUV() etc.
+// // WebPIDecGetRGB()/WebPIDecGetYUVA() etc.
// }
// WebPIDelete(idec);
@@ -207,42 +245,47 @@ typedef struct WebPIDecoder WebPIDecoder;
// is kept, which means that the lifespan of 'output_buffer' must be larger than
// that of the returned WebPIDecoder object.
// Returns NULL if the allocation failed.
-WEBP_EXTERN(WebPIDecoder*) WebPINewDecoder(WebPDecBuffer* const output_buffer);
-
-// Creates a WebPIDecoder object. Returns NULL in case of failure.
-// TODO(skal): DEPRECATED. Prefer using WebPINewDecoder().
-WEBP_EXTERN(WebPIDecoder*) WebPINew(WEBP_CSP_MODE mode);
+WEBP_EXTERN(WebPIDecoder*) WebPINewDecoder(WebPDecBuffer* output_buffer);
// This function allocates and initializes an incremental-decoder object, which
-// will output the r/g/b(/a) samples specified by 'mode' into a preallocated
+// will output the RGB/A samples specified by 'csp' into a preallocated
// buffer 'output_buffer'. The size of this buffer is at least
// 'output_buffer_size' and the stride (distance in bytes between two scanlines)
// is specified by 'output_stride'. Returns NULL if the allocation failed.
WEBP_EXTERN(WebPIDecoder*) WebPINewRGB(
- WEBP_CSP_MODE mode,
- uint8_t* output_buffer, int output_buffer_size, int output_stride);
+ WEBP_CSP_MODE csp,
+ uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
// This function allocates and initializes an incremental-decoder object, which
// will output the raw luma/chroma samples into a preallocated planes. The luma
// plane is specified by its pointer 'luma', its size 'luma_size' and its stride
// 'luma_stride'. Similarly, the chroma-u plane is specified by the 'u',
-// 'u_size' and 'u_stride' parameters, and the chroma-v plane by 'v', 'v_size'
-// and 'v_size'.
+// 'u_size' and 'u_stride' parameters, and the chroma-v plane by 'v'
+// and 'v_size'. And same for the alpha-plane. The 'a' pointer can be pass
+// NULL in case one is not interested in the transparency plane.
// Returns NULL if the allocation failed.
+WEBP_EXTERN(WebPIDecoder*) WebPINewYUVA(
+ uint8_t* luma, size_t luma_size, int luma_stride,
+ uint8_t* u, size_t u_size, int u_stride,
+ uint8_t* v, size_t v_size, int v_stride,
+ uint8_t* a, size_t a_size, int a_stride);
+
+// Deprecated version of the above, without the alpha plane.
+// Kept for backward compatibility.
WEBP_EXTERN(WebPIDecoder*) WebPINewYUV(
- uint8_t* luma, int luma_size, int luma_stride,
- uint8_t* u, int u_size, int u_stride,
- uint8_t* v, int v_size, int v_stride);
+ uint8_t* luma, size_t luma_size, int luma_stride,
+ uint8_t* u, size_t u_size, int u_stride,
+ uint8_t* v, size_t v_size, int v_stride);
// Deletes the WebPIDecoder object and associated memory. Must always be called
-// if WebPINew, WebPINewRGB or WebPINewYUV succeeded.
-WEBP_EXTERN(void) WebPIDelete(WebPIDecoder* const idec);
+// if WebPINewDecoder, WebPINewRGB or WebPINewYUV succeeded.
+WEBP_EXTERN(void) WebPIDelete(WebPIDecoder* idec);
// Copies and decodes the next available data. Returns VP8_STATUS_OK when
// the image is successfully decoded. Returns VP8_STATUS_SUSPENDED when more
// data is expected. Returns error in other cases.
WEBP_EXTERN(VP8StatusCode) WebPIAppend(
- WebPIDecoder* const idec, const uint8_t* data, uint32_t data_size);
+ WebPIDecoder* idec, const uint8_t* data, size_t data_size);
// A variant of the above function to be used when data buffer contains
// partial data from the beginning. In this case data buffer is not copied
@@ -250,23 +293,34 @@ WEBP_EXTERN(VP8StatusCode) WebPIAppend(
// Note that the value of the 'data' pointer can change between calls to
// WebPIUpdate, for instance when the data buffer is resized to fit larger data.
WEBP_EXTERN(VP8StatusCode) WebPIUpdate(
- WebPIDecoder* const idec, const uint8_t* data, uint32_t data_size);
-
-// Returns the r/g/b/(a) image decoded so far. Returns NULL if output params
-// are not initialized yet. The r/g/b/(a) output type corresponds to the mode
-// specified in WebPINew()/WebPINewRGB(). *last_y is the index of last decoded
-// row in raster scan order. Some pointers (*last_y, *width etc.) can be NULL if
-// corresponding information is not needed.
+ WebPIDecoder* idec, const uint8_t* data, size_t data_size);
+
+// Returns the RGB/A image decoded so far. Returns NULL if output params
+// are not initialized yet. The RGB/A output type corresponds to the colorspace
+// specified during call to WebPINewDecoder() or WebPINewRGB().
+// *last_y is the index of last decoded row in raster scan order. Some pointers
+// (*last_y, *width etc.) can be NULL if corresponding information is not
+// needed.
WEBP_EXTERN(uint8_t*) WebPIDecGetRGB(
- const WebPIDecoder* const idec, int* last_y,
+ const WebPIDecoder* idec, int* last_y,
int* width, int* height, int* stride);
-// Same as above function to get YUV image. Returns pointer to the luma plane
-// or NULL in case of error.
-WEBP_EXTERN(uint8_t*) WebPIDecGetYUV(
- const WebPIDecoder* const idec, int* last_y,
- uint8_t** u, uint8_t** v,
- int* width, int* height, int* stride, int* uv_stride);
+// Same as above function to get a YUVA image. Returns pointer to the luma
+// plane or NULL in case of error. If there is no alpha information
+// the alpha pointer '*a' will be returned NULL.
+WEBP_EXTERN(uint8_t*) WebPIDecGetYUVA(
+ const WebPIDecoder* idec, int* last_y,
+ uint8_t** u, uint8_t** v, uint8_t** a,
+ int* width, int* height, int* stride, int* uv_stride, int* a_stride);
+
+// Deprecated alpha-less version of WebPIDecGetYUVA(): it will ignore the
+// alpha information (if present). Kept for backward compatibility.
+static WEBP_INLINE uint8_t* WebPIDecGetYUV(
+ const WebPIDecoder* idec, int* last_y, uint8_t** u, uint8_t** v,
+ int* width, int* height, int* stride, int* uv_stride) {
+ return WebPIDecGetYUVA(idec, last_y, u, v, NULL, width, height,
+ stride, uv_stride, NULL);
+}
// Generic call to retrieve information about the displayable area.
// If non NULL, the left/right/width/height pointers are filled with the visible
@@ -275,11 +329,9 @@ WEBP_EXTERN(uint8_t*) WebPIDecGetYUV(
// Otherwise returns the pointer to the internal representation. This structure
// is read-only, tied to WebPIDecoder's lifespan and should not be modified.
WEBP_EXTERN(const WebPDecBuffer*) WebPIDecodedArea(
- const WebPIDecoder* const idec,
- int* const left, int* const top,
- int* const width, int* const height);
+ const WebPIDecoder* idec, int* left, int* top, int* width, int* height);
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Advanced decoding parametrization
//
// Code sample for using the advanced decoding API
@@ -314,27 +366,30 @@ WEBP_EXTERN(const WebPDecBuffer*) WebPIDecodedArea(
// Features gathered from the bitstream
typedef struct {
- int width; // the original width, as read from the bitstream
- int height; // the original height, as read from the bitstream
- int has_alpha; // true if bitstream contains an alpha channel
+ int width; // Width in pixels, as read from the bitstream.
+ int height; // Height in pixels, as read from the bitstream.
+ int has_alpha; // True if the bitstream contains an alpha channel.
+
+ // Unused for now:
+ int bitstream_version; // should be 0 for now. TODO(later)
int no_incremental_decoding; // if true, using incremental decoding is not
// recommended.
int rotate; // TODO(later)
int uv_sampling; // should be 0 for now. TODO(later)
- int bitstream_version; // should be 0 for now. TODO(later)
+ uint32_t pad[3]; // padding for later use
} WebPBitstreamFeatures;
// Internal, version-checked, entry point
WEBP_EXTERN(VP8StatusCode) WebPGetFeaturesInternal(
- const uint8_t*, uint32_t, WebPBitstreamFeatures* const, int);
+ const uint8_t*, size_t, WebPBitstreamFeatures*, int);
// Retrieve features from the bitstream. The *features structure is filled
// with information gathered from the bitstream.
// Returns false in case of error or version mismatch.
// In case of error, features->bitstream_status will reflect the error code.
-static inline
- VP8StatusCode WebPGetFeatures(const uint8_t* data, uint32_t data_size,
- WebPBitstreamFeatures* const features) {
+static WEBP_INLINE VP8StatusCode WebPGetFeatures(
+ const uint8_t* data, size_t data_size,
+ WebPBitstreamFeatures* features) {
return WebPGetFeaturesInternal(data, data_size, features,
WEBP_DECODER_ABI_VERSION);
}
@@ -349,8 +404,12 @@ typedef struct {
int crop_width, crop_height; // dimension of the cropping area
int use_scaling; // if true, scaling is applied _afterward_
int scaled_width, scaled_height; // final resolution
+ int use_threads; // if true, use multi-threaded decoding
+
+ // Unused for now:
int force_rotation; // forced rotation (to be applied _last_)
int no_enhancement; // if true, discard enhancement layer
+ uint32_t pad[6]; // padding for later use
} WebPDecoderOptions;
// Main object storing the configuration for advanced decoding.
@@ -361,32 +420,32 @@ typedef struct {
} WebPDecoderConfig;
// Internal, version-checked, entry point
-WEBP_EXTERN(int) WebPInitDecoderConfigInternal(WebPDecoderConfig* const, int);
+WEBP_EXTERN(int) WebPInitDecoderConfigInternal(WebPDecoderConfig*, int);
// Initialize the configuration as empty. This function must always be
// called first, unless WebPGetFeatures() is to be called.
// Returns false in case of mismatched version.
-static inline int WebPInitDecoderConfig(WebPDecoderConfig* const config) {
+static WEBP_INLINE int WebPInitDecoderConfig(WebPDecoderConfig* config) {
return WebPInitDecoderConfigInternal(config, WEBP_DECODER_ABI_VERSION);
}
-// Instantiate a new incremental decoder object with requested configuration.
-// The bitstream can be passed using *data and data_size parameter,
-// in which case the features will be parsed and stored into config->input.
-// Otherwise, 'data' can be NULL and now parsing will occur.
-// Note that 'config' can be NULL too, in which case a default configuration is
-// used.
+// Instantiate a new incremental decoder object with the requested
+// configuration. The bitstream can be passed using 'data' and 'data_size'
+// parameter, in which case the features will be parsed and stored into
+// config->input. Otherwise, 'data' can be NULL and no parsing will occur.
+// Note that 'config' can be NULL too, in which case a default configuration
+// is used.
// The return WebPIDecoder object must always be deleted calling WebPIDelete().
// Returns NULL in case of error (and config->status will then reflect
// the error condition).
-WEBP_EXTERN(WebPIDecoder*) WebPIDecode(const uint8_t* data, uint32_t data_size,
- WebPDecoderConfig* const config);
+WEBP_EXTERN(WebPIDecoder*) WebPIDecode(const uint8_t* data, size_t data_size,
+ WebPDecoderConfig* config);
// Non-incremental version. This version decodes the full data at once, taking
-// 'config' into account. Return decoding status (VP8_STATUS_OK if decoding
-// was successful).
-WEBP_EXTERN(VP8StatusCode) WebPDecode(const uint8_t* data, uint32_t data_size,
- WebPDecoderConfig* const config);
+// 'config' into account. Returns decoding status (which should be VP8_STATUS_OK
+// if the decoding was successful).
+WEBP_EXTERN(VP8StatusCode) WebPDecode(const uint8_t* data, size_t data_size,
+ WebPDecoderConfig* config);
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
diff --git a/include/webp/decode_vp8.h b/include/webp/decode_vp8.h
index af276add..cf1654fe 100644
--- a/include/webp/decode_vp8.h
+++ b/include/webp/decode_vp8.h
@@ -1,154 +1,14 @@
-// Copyright 2010 Google Inc.
+// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// -----------------------------------------------------------------------------
//
-// Low-level API for VP8 decoder
-//
-// Author: Skal (pascal.massimino@gmail.com)
+// Dummy file retained to honor decode_vp8.h include in
+// skia/src/images/SkImageDecoder_libwebp.cpp
#ifndef WEBP_WEBP_DECODE_VP8_H_
#define WEBP_WEBP_DECODE_VP8_H_
-#include "./decode.h"
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
-//-----------------------------------------------------------------------------
-// Lower-level API
-//
-// These functions provide fine-grained control of the decoding process.
-// The call flow should resemble:
-//
-// VP8Io io;
-// VP8InitIo(&io);
-// io.data = data;
-// io.data_size = size;
-// /* customize io's functions (setup()/put()/teardown()) if needed. */
-//
-// VP8Decoder* dec = VP8New();
-// bool ok = VP8Decode(dec);
-// if (!ok) printf("Error: %s\n", VP8StatusMessage(dec));
-// VP8Delete(dec);
-// return ok;
-
-// Input / Output
-typedef struct VP8Io VP8Io;
-typedef int (*VP8IoPutHook)(const VP8Io* io);
-typedef int (*VP8IoSetupHook)(VP8Io* io);
-typedef void (*VP8IoTeardownHook)(const VP8Io* io);
-
-struct VP8Io {
- // set by VP8GetHeaders()
- int width, height; // picture dimensions, in pixels (invariable).
- // These are the original, uncropped dimensions.
- // The actual area passed to put() is stored
- // in mb_w / mb_h fields.
-
- // set before calling put()
- int mb_y; // position of the current rows (in pixels)
- int mb_w; // number of columns in the sample
- int mb_h; // number of rows in the sample
- const uint8_t* y, *u, *v; // rows to copy (in yuv420 format)
- int y_stride; // row stride for luma
- int uv_stride; // row stride for chroma
-
- void* opaque; // user data
-
- // called when fresh samples are available. Currently, samples are in
- // YUV420 format, and can be up to width x 24 in size (depending on the
- // in-loop filtering level, e.g.). Should return false in case of error
- // or abort request. The actual size of the area to update is mb_w x mb_h
- // in size, taking cropping into account.
- VP8IoPutHook put;
-
- // called just before starting to decode the blocks.
- // Should returns 0 in case of error.
- VP8IoSetupHook setup;
-
- // called just after block decoding is finished (or when an error occurred).
- VP8IoTeardownHook teardown;
-
- // this is a recommendation for the user-side yuv->rgb converter. This flag
- // is set when calling setup() hook and can be overwritten by it. It then
- // can be taken into consideration during the put() method.
- int fancy_upsampling;
-
- // Input buffer.
- uint32_t data_size;
- const uint8_t* data;
-
- // If true, in-loop filtering will not be performed even if present in the
- // bitstream. Switching off filtering may speed up decoding at the expense
- // of more visible blocking. Note that output will also be non-compliant
- // with the VP8 specifications.
- int bypass_filtering;
-
- // Cropping parameters.
- int use_cropping;
- int crop_left, crop_right, crop_top, crop_bottom;
-
- // Scaling parameters.
- int use_scaling;
- int scaled_width, scaled_height;
-
- // pointer to the alpha data (if present) corresponding to the rows
- const uint8_t* a;
-};
-
-// Internal, version-checked, entry point
-WEBP_EXTERN(int) VP8InitIoInternal(VP8Io* const, int);
-
-// Set the custom IO function pointers and user-data. The setter for IO hooks
-// should be called before initiating incremental decoding. Returns true if
-// WebPIDecoder object is successfully modified, false otherwise.
-WEBP_EXTERN(int) WebPISetIOHooks(WebPIDecoder* const idec,
- VP8IoPutHook put,
- VP8IoSetupHook setup,
- VP8IoTeardownHook teardown,
- void* user_data);
-
-// Main decoding object. This is an opaque structure.
-typedef struct VP8Decoder VP8Decoder;
-
-// Create a new decoder object.
-WEBP_EXTERN(VP8Decoder*) VP8New(void);
-
-// Must be called to make sure 'io' is initialized properly.
-// Returns false in case of version mismatch. Upon such failure, no other
-// decoding function should be called (VP8Decode, VP8GetHeaders, ...)
-static inline int VP8InitIo(VP8Io* const io) {
- return VP8InitIoInternal(io, WEBP_DECODER_ABI_VERSION);
-}
-
-// Start decoding a new picture. Returns true if ok.
-WEBP_EXTERN(int) VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io);
-
-// Decode a picture. Will call VP8GetHeaders() if it wasn't done already.
-// Returns false in case of error.
-WEBP_EXTERN(int) VP8Decode(VP8Decoder* const dec, VP8Io* const io);
-
-// Return current status of the decoder:
-WEBP_EXTERN(VP8StatusCode) VP8Status(VP8Decoder* const dec);
-
-// return readable string corresponding to the last status.
-WEBP_EXTERN(const char*) VP8StatusMessage(VP8Decoder* const dec);
-
-// Resets the decoder in its initial state, reclaiming memory.
-// Not a mandatory call between calls to VP8Decode().
-WEBP_EXTERN(void) VP8Clear(VP8Decoder* const dec);
-
-// Destroy the decoder object.
-WEBP_EXTERN(void) VP8Delete(VP8Decoder* const dec);
-
-//-----------------------------------------------------------------------------
-
-#if defined(__cplusplus) || defined(c_plusplus)
-} // extern "C"
-#endif
-
#endif /* WEBP_WEBP_DECODE_VP8_H_ */
diff --git a/include/webp/encode.h b/include/webp/encode.h
index af6f0a2c..d87426bb 100644
--- a/include/webp/encode.h
+++ b/include/webp/encode.h
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -12,26 +12,27 @@
#ifndef WEBP_WEBP_ENCODE_H_
#define WEBP_WEBP_ENCODE_H_
-#include <stdlib.h>
-
#include "./types.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-#define WEBP_ENCODER_ABI_VERSION 0x0002
+#define WEBP_ENCODER_ABI_VERSION 0x0200 // MAJOR(8b) + MINOR(8b)
// Return the encoder's version number, packed in hexadecimal using 8bits for
// each of major/minor/revision. E.g: v2.5.7 is 0x020507.
WEBP_EXTERN(int) WebPGetEncoderVersion(void);
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// One-stop-shop call! No questions asked:
// Returns the size of the compressed data (pointed to by *output), or 0 if
// an error occurred. The compressed data must be released by the caller
// using the call 'free(*output)'.
+// These functions compress using the lossy format, and the quality_factor
+// can go from 0 (smaller output, lower quality) to 100 (best quality,
+// larger output).
WEBP_EXTERN(size_t) WebPEncodeRGB(const uint8_t* rgb,
int width, int height, int stride,
float quality_factor, uint8_t** output);
@@ -45,31 +46,71 @@ WEBP_EXTERN(size_t) WebPEncodeBGRA(const uint8_t* bgra,
int width, int height, int stride,
float quality_factor, uint8_t** output);
-//-----------------------------------------------------------------------------
+// These functions are the equivalent of the above, but compressing in a
+// lossless manner. Files are usually larger than lossy format, but will
+// not suffer any compression loss.
+WEBP_EXTERN(size_t) WebPEncodeLosslessRGB(const uint8_t* rgb,
+ int width, int height, int stride,
+ uint8_t** output);
+WEBP_EXTERN(size_t) WebPEncodeLosslessBGR(const uint8_t* bgr,
+ int width, int height, int stride,
+ uint8_t** output);
+WEBP_EXTERN(size_t) WebPEncodeLosslessRGBA(const uint8_t* rgba,
+ int width, int height, int stride,
+ uint8_t** output);
+WEBP_EXTERN(size_t) WebPEncodeLosslessBGRA(const uint8_t* bgra,
+ int width, int height, int stride,
+ uint8_t** output);
+
+//------------------------------------------------------------------------------
// Coding parameters
+// Image characteristics hint for the underlying encoder.
+typedef enum {
+ WEBP_HINT_DEFAULT = 0, // default preset.
+ WEBP_HINT_PICTURE, // digital picture, like portrait, inner shot
+ WEBP_HINT_PHOTO, // outdoor photograph, with natural lighting
+ WEBP_HINT_GRAPH, // Discrete tone image (graph, map-tile etc).
+ WEBP_HINT_LAST
+} WebPImageHint;
+
typedef struct {
- float quality; // between 0 (smallest file) and 100 (biggest)
- int target_size; // if non-zero, set the desired target size in bytes.
- // Takes precedence over the 'compression' parameter.
- float target_PSNR; // if non-zero, specifies the minimal distortion to
- // try to achieve. Takes precedence over target_size.
- int method; // quality/speed trade-off (0=fast, 6=slower-better)
- int segments; // maximum number of segments to use, in [1..4]
- int sns_strength; // Spatial Noise Shaping. 0=off, 100=maximum.
- int filter_strength; // range: [0 = off .. 100 = strongest]
- int filter_sharpness; // range: [0 = off .. 7 = least sharp]
- int filter_type; // filtering type: 0 = simple, 1 = strong
- // (only used if filter_strength > 0 or autofilter > 0)
- int autofilter; // Auto adjust filter's strength [0 = off, 1 = on]
- int pass; // number of entropy-analysis passes (in [1..10]).
-
- int show_compressed; // if true, export the compressed picture back.
- // In-loop filtering is not applied.
- int preprocessing; // preprocessing filter (0=none, 1=segment-smooth)
- int partitions; // log2(number of token partitions) in [0..3]
- // Default is set to 0 for easier progressive decoding.
- int alpha_compression; // Algorithm for optimizing the alpha plane (0 = none)
+ int lossless; // Lossless encoding (0=lossy(default), 1=lossless).
+ float quality; // between 0 (smallest file) and 100 (biggest)
+ int method; // quality/speed trade-off (0=fast, 6=slower-better)
+
+ WebPImageHint image_hint; // Hint for image type (lossless only for now).
+
+ // Parameters related to lossy compression only:
+ int target_size; // if non-zero, set the desired target size in bytes.
+ // Takes precedence over the 'compression' parameter.
+ float target_PSNR; // if non-zero, specifies the minimal distortion to
+ // try to achieve. Takes precedence over target_size.
+ int segments; // maximum number of segments to use, in [1..4]
+ int sns_strength; // Spatial Noise Shaping. 0=off, 100=maximum.
+ int filter_strength; // range: [0 = off .. 100 = strongest]
+ int filter_sharpness; // range: [0 = off .. 7 = least sharp]
+ int filter_type; // filtering type: 0 = simple, 1 = strong (only used
+ // if filter_strength > 0 or autofilter > 0)
+ int autofilter; // Auto adjust filter's strength [0 = off, 1 = on]
+ int alpha_compression; // Algorithm for encoding the alpha plane (0 = none,
+ // 1 = compressed with WebP lossless). Default is 1.
+ int alpha_filtering; // Predictive filtering method for alpha plane.
+ // 0: none, 1: fast, 2: best. Default if 1.
+ int alpha_quality; // Between 0 (smallest size) and 100 (lossless).
+ // Default is 100.
+ int pass; // number of entropy-analysis passes (in [1..10]).
+
+ int show_compressed; // if true, export the compressed picture back.
+ // In-loop filtering is not applied.
+ int preprocessing; // preprocessing filter (0=none, 1=segment-smooth)
+ int partitions; // log2(number of token partitions) in [0..3]. Default
+ // is set to 0 for easier progressive decoding.
+ int partition_limit; // quality degradation allowed to fit the 512k limit
+ // on prediction modes coding (0: no degradation,
+ // 100: maximum possible degradation).
+
+ uint32_t pad[8]; // padding for later use
} WebPConfig;
// Enumerate some predefined settings for WebPConfig, depending on the type
@@ -84,13 +125,13 @@ typedef enum {
} WebPPreset;
// Internal, version-checked, entry point
-WEBP_EXTERN(int) WebPConfigInitInternal(
- WebPConfig* const, WebPPreset, float, int);
+WEBP_EXTERN(int) WebPConfigInitInternal(WebPConfig*, WebPPreset, float, int);
// Should always be called, to initialize a fresh WebPConfig structure before
-// modification. Returns 0 in case of version mismatch. WebPConfigInit() must
-// have succeeded before using the 'config' object.
-static inline int WebPConfigInit(WebPConfig* const config) {
+// modification. Returns false in case of version mismatch. WebPConfigInit()
+// must have succeeded before using the 'config' object.
+// Note that the default values are lossless=0 and quality=75.
+static WEBP_INLINE int WebPConfigInit(WebPConfig* config) {
return WebPConfigInitInternal(config, WEBP_PRESET_DEFAULT, 75.f,
WEBP_ENCODER_ABI_VERSION);
}
@@ -98,25 +139,27 @@ static inline int WebPConfigInit(WebPConfig* const config) {
// This function will initialize the configuration according to a predefined
// set of parameters (referred to by 'preset') and a given quality factor.
// This function can be called as a replacement to WebPConfigInit(). Will
-// return 0 in case of error.
-static inline int WebPConfigPreset(WebPConfig* const config,
- WebPPreset preset, float quality) {
+// return false in case of error.
+static WEBP_INLINE int WebPConfigPreset(WebPConfig* config,
+ WebPPreset preset, float quality) {
return WebPConfigInitInternal(config, preset, quality,
WEBP_ENCODER_ABI_VERSION);
}
-// Returns 1 if all parameters are in valid range and the configuration is OK.
-WEBP_EXTERN(int) WebPValidateConfig(const WebPConfig* const config);
+// Returns true if 'config' is non-NULL and all configuration parameters are
+// within their valid ranges.
+WEBP_EXTERN(int) WebPValidateConfig(const WebPConfig* config);
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Input / Output
typedef struct WebPPicture WebPPicture; // main structure for I/O
-// non-essential structure for storing auxiliary statistics
+// Structure for storing auxiliary statistics (mostly for lossy encoding).
typedef struct {
- float PSNR[4]; // peak-signal-to-noise ratio for Y/U/V/All
int coded_size; // final size
+
+ float PSNR[5]; // peak-signal-to-noise ratio for Y/U/V/All/Alpha
int block_count[3]; // number of intra4/intra16/skipped macroblocks
int header_bytes[2]; // approximate number of bytes spent for header
// and mode-partition #0
@@ -128,13 +171,46 @@ typedef struct {
int alpha_data_size; // size of the transparency data
int layer_data_size; // size of the enhancement layer data
+
+ // lossless encoder statistics
+ uint32_t lossless_features; // bit0:predictor bit1:cross-color transform
+ // bit2:subtract-green bit3:color indexing
+ int histogram_bits; // number of precision bits of histogram
+ int transform_bits; // precision bits for transform
+ int cache_bits; // number of bits for color cache lookup
+ int palette_size; // number of color in palette, if used
+ int lossless_size; // final lossless size
+
+ uint32_t pad[4]; // padding for later use
} WebPAuxStats;
-// Signature for output function. Should return 1 if writing was successful.
+// Signature for output function. Should return true if writing was successful.
// data/data_size is the segment of data to write, and 'picture' is for
// reference (and so one can make use of picture->custom_ptr).
typedef int (*WebPWriterFunction)(const uint8_t* data, size_t data_size,
- const WebPPicture* const picture);
+ const WebPPicture* picture);
+
+// WebPMemoryWrite: a special WebPWriterFunction that writes to memory using
+// the following WebPMemoryWriter object (to be set as a custom_ptr).
+typedef struct {
+ uint8_t* mem; // final buffer (of size 'max_size', larger than 'size').
+ size_t size; // final size
+ size_t max_size; // total capacity
+ uint32_t pad[1]; // padding for later use
+} WebPMemoryWriter;
+
+// The following must be called first before any use.
+WEBP_EXTERN(void) WebPMemoryWriterInit(WebPMemoryWriter* writer);
+
+// The custom writer to be used with WebPMemoryWriter as custom_ptr. Upon
+// completion, writer.mem and writer.size will hold the coded data.
+WEBP_EXTERN(int) WebPMemoryWrite(const uint8_t* data, size_t data_size,
+ const WebPPicture* picture);
+
+// Progress hook, called from time to time to report progress. It can return
+// false to request an abort of the encoding process, or true otherwise if
+// everything is OK.
+typedef int (*WebPProgressHook)(int percent, const WebPPicture* picture);
typedef enum {
// chroma sampling
@@ -159,25 +235,49 @@ typedef enum {
VP8_ENC_ERROR_NULL_PARAMETER, // a pointer parameter is NULL
VP8_ENC_ERROR_INVALID_CONFIGURATION, // configuration is invalid
VP8_ENC_ERROR_BAD_DIMENSION, // picture has invalid width/height
- VP8_ENC_ERROR_PARTITION0_OVERFLOW, // partition is too bigger than 16M
- VP8_ENC_ERROR_PARTITION_OVERFLOW, // partition is too bigger than 512k
+ VP8_ENC_ERROR_PARTITION0_OVERFLOW, // partition is bigger than 512k
+ VP8_ENC_ERROR_PARTITION_OVERFLOW, // partition is bigger than 16M
VP8_ENC_ERROR_BAD_WRITE, // error while flushing bytes
+ VP8_ENC_ERROR_FILE_TOO_BIG, // file is bigger than 4G
+ VP8_ENC_ERROR_USER_ABORT, // abort request by user
+ VP8_ENC_ERROR_LAST // list terminator. always last.
} WebPEncodingError;
+// maximum width/height allowed (inclusive), in pixels
+#define WEBP_MAX_DIMENSION 16383
+
+// Main exchange structure (input samples, output bytes, statistics)
struct WebPPicture {
- // input
+
+ // INPUT
+ //////////////
+ // Main flag for encoder selecting between ARGB or YUV input.
+ // It is recommended to use ARGB input (*argb, argb_stride) for lossless
+ // compression, and YUV input (*y, *u, *v, etc.) for lossy compression
+ // since these are the respective native colorspace for these formats.
+ int use_argb;
+
+ // YUV input (mostly used for input to lossy compression)
WebPEncCSP colorspace; // colorspace: should be YUV420 for now (=Y'CbCr).
- int width, height; // dimensions.
+ int width, height; // dimensions (less or equal to WEBP_MAX_DIMENSION)
uint8_t *y, *u, *v; // pointers to luma/chroma planes.
int y_stride, uv_stride; // luma/chroma strides.
- uint8_t *a; // pointer to the alpha plane
+ uint8_t* a; // pointer to the alpha plane
int a_stride; // stride of the alpha plane
+ uint32_t pad1[2]; // padding for later use
+
+ // ARGB input (mostly used for input to lossless compression)
+ uint32_t* argb; // Pointer to argb (32 bit) plane.
+ int argb_stride; // This is stride in pixels units, not bytes.
+ uint32_t pad2[3]; // padding for later use
- // output
+ // OUTPUT
+ ///////////////
+ // Byte-emission hook, to store compressed bytes as they are ready.
WebPWriterFunction writer; // can be NULL
void* custom_ptr; // can be used by the writer.
- // map for extra information
+ // map for extra information (only for lossy compression mode)
int extra_info_type; // 1: intra type, 2: segment, 3: quant
// 4: intra-16 prediction mode,
// 5: chroma prediction mode,
@@ -187,84 +287,174 @@ struct WebPPicture {
// will be filled with a macroblock map, depending
// on extra_info_type.
- // where to store statistics, if not NULL:
+ // STATS AND REPORTS
+ ///////////////////////////
+ // Pointer to side statistics (updated only if not NULL)
WebPAuxStats* stats;
- // original samples (for non-YUV420 modes)
+ // Error code for the latest error encountered during encoding
+ WebPEncodingError error_code;
+
+ // If not NULL, report progress during encoding.
+ WebPProgressHook progress_hook;
+
+ void* user_data; // this field is free to be set to any value and
+ // used during callbacks (like progress-report e.g.).
+
+ uint32_t pad3[3]; // padding for later use
+
+ // Unused for now: original samples (for non-YUV420 modes)
uint8_t *u0, *v0;
int uv0_stride;
- WebPEncodingError error_code; // error code in case of problem.
+ uint32_t pad4[7]; // padding for later use
+
+ // PRIVATE FIELDS
+ ////////////////////
+ void* memory_; // row chunk of memory for yuva planes
+ void* memory_argb_; // and for argb too.
+ void* pad5[2]; // padding for later use
};
// Internal, version-checked, entry point
-WEBP_EXTERN(int) WebPPictureInitInternal(WebPPicture* const, int);
+WEBP_EXTERN(int) WebPPictureInitInternal(WebPPicture*, int);
-// Should always be called, to initialize the structure. Returns 0 in case of
-// version mismatch. WebPPictureInit() must have succeeded before using the
+// Should always be called, to initialize the structure. Returns false in case
+// of version mismatch. WebPPictureInit() must have succeeded before using the
// 'picture' object.
-static inline int WebPPictureInit(WebPPicture* const picture) {
+// Note that, by default, use_argb is false and colorspace is WEBP_YUV420.
+static WEBP_INLINE int WebPPictureInit(WebPPicture* picture) {
return WebPPictureInitInternal(picture, WEBP_ENCODER_ABI_VERSION);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// WebPPicture utils
// Convenience allocation / deallocation based on picture->width/height:
// Allocate y/u/v buffers as per colorspace/width/height specification.
// Note! This function will free the previous buffer if needed.
-// Returns 0 in case of memory error.
-WEBP_EXTERN(int) WebPPictureAlloc(WebPPicture* const picture);
-
-// Release memory allocated by WebPPictureAlloc() or WebPPictureImport*()
-// Note that this function does _not_ free the memory pointed to by 'picture'.
-WEBP_EXTERN(void) WebPPictureFree(WebPPicture* const picture);
-
-// Copy the pixels of *src into *dst, using WebPPictureAlloc.
-// Returns 0 in case of memory allocation error.
-WEBP_EXTERN(int) WebPPictureCopy(const WebPPicture* const src,
- WebPPicture* const dst);
+// Returns false in case of memory error.
+WEBP_EXTERN(int) WebPPictureAlloc(WebPPicture* picture);
+
+// Release the memory allocated by WebPPictureAlloc() or WebPPictureImport*().
+// Note that this function does _not_ free the memory used by the 'picture'
+// object itself.
+// Besides memory (which is reclaimed) all other fields of 'picture' are
+// preserved.
+WEBP_EXTERN(void) WebPPictureFree(WebPPicture* picture);
+
+// Copy the pixels of *src into *dst, using WebPPictureAlloc. Upon return,
+// *dst will fully own the copied pixels (this is not a view).
+// Returns false in case of memory allocation error.
+WEBP_EXTERN(int) WebPPictureCopy(const WebPPicture* src, WebPPicture* dst);
+
+// Compute PSNR, SSIM or LSIM distortion metric between two pictures.
+// Result is in dB, stores in result[] in the Y/U/V/Alpha/All order.
+// Returns false in case of error (pic1 and pic2 don't have same dimension, ...)
+// Warning: this function is rather CPU-intensive.
+WEBP_EXTERN(int) WebPPictureDistortion(
+ const WebPPicture* pic1, const WebPPicture* pic2,
+ int metric_type, // 0 = PSNR, 1 = SSIM, 2 = LSIM
+ float result[5]);
// self-crops a picture to the rectangle defined by top/left/width/height.
-// Returns 0 in case of memory allocation error, or if the rectangle is
+// Returns false in case of memory allocation error, or if the rectangle is
// outside of the source picture.
-WEBP_EXTERN(int) WebPPictureCrop(WebPPicture* const picture,
+// The rectangle for the view is defined by the top-left corner pixel
+// coordinates (left, top) as well as its width and height. This rectangle
+// must be fully be comprised inside the 'src' source picture. If the source
+// picture uses the YUV420 colorspace, the top and left coordinates will be
+// snapped to even values.
+WEBP_EXTERN(int) WebPPictureCrop(WebPPicture* picture,
int left, int top, int width, int height);
+// Extracts a view from 'src' picture into 'dst'. The rectangle for the view
+// is defined by the top-left corner pixel coordinates (left, top) as well
+// as its width and height. This rectangle must be fully be comprised inside
+// the 'src' source picture. If the source picture uses the YUV420 colorspace,
+// the top and left coordinates will be snapped to even values.
+// Picture 'src' must out-live 'dst' picture. Self-extraction of view is allowed
+// ('src' equal to 'dst') as a mean of fast-cropping (but note that doing so,
+// the original dimension will be lost).
+// Returns false in case of memory allocation error or invalid parameters.
+WEBP_EXTERN(int) WebPPictureView(const WebPPicture* src,
+ int left, int top, int width, int height,
+ WebPPicture* dst);
+
+// Returns true if the 'picture' is actually a view and therefore does
+// not own the memory for pixels.
+WEBP_EXTERN(int) WebPPictureIsView(const WebPPicture* picture);
+
// Rescale a picture to new dimension width x height.
// Now gamma correction is applied.
// Returns false in case of error (invalid parameter or insufficient memory).
-WEBP_EXTERN(int) WebPPictureRescale(WebPPicture* const pic,
- int width, int height);
+WEBP_EXTERN(int) WebPPictureRescale(WebPPicture* pic, int width, int height);
// Colorspace conversion function to import RGB samples.
// Previous buffer will be free'd, if any.
// *rgb buffer should have a size of at least height * rgb_stride.
-// Returns 0 in case of memory error.
+// Returns false in case of memory error.
WEBP_EXTERN(int) WebPPictureImportRGB(
- WebPPicture* const picture, const uint8_t* const rgb, int rgb_stride);
-// Same, but for RGBA buffer
+ WebPPicture* picture, const uint8_t* rgb, int rgb_stride);
+// Same, but for RGBA buffer.
WEBP_EXTERN(int) WebPPictureImportRGBA(
- WebPPicture* const picture, const uint8_t* const rgba, int rgba_stride);
-
-// Variant of the above, but taking BGR(A) input:
+ WebPPicture* picture, const uint8_t* rgba, int rgba_stride);
+// Same, but for RGBA buffer. Imports the RGB direct from the 32-bit format
+// input buffer ignoring the alpha channel. Avoids needing to copy the data
+// to a temporary 24-bit RGB buffer to import the RGB only.
+WEBP_EXTERN(int) WebPPictureImportRGBX(
+ WebPPicture* picture, const uint8_t* rgbx, int rgbx_stride);
+
+// Variants of the above, but taking BGR(A|X) input.
WEBP_EXTERN(int) WebPPictureImportBGR(
- WebPPicture* const picture, const uint8_t* const bgr, int bgr_stride);
+ WebPPicture* picture, const uint8_t* bgr, int bgr_stride);
WEBP_EXTERN(int) WebPPictureImportBGRA(
- WebPPicture* const picture, const uint8_t* const bgra, int bgra_stride);
-
-//-----------------------------------------------------------------------------
+ WebPPicture* picture, const uint8_t* bgra, int bgra_stride);
+WEBP_EXTERN(int) WebPPictureImportBGRX(
+ WebPPicture* picture, const uint8_t* bgrx, int bgrx_stride);
+
+// Converts picture->argb data to the YUVA format specified by 'colorspace'.
+// Upon return, picture->use_argb is set to false. The presence of real
+// non-opaque transparent values is detected, and 'colorspace' will be
+// adjusted accordingly. Note that this method is lossy.
+// Returns false in case of error.
+WEBP_EXTERN(int) WebPPictureARGBToYUVA(WebPPicture* picture,
+ WebPEncCSP colorspace);
+
+// Converts picture->yuv to picture->argb and sets picture->use_argb to true.
+// The input format must be YUV_420 or YUV_420A.
+// Note that the use of this method is discouraged if one has access to the
+// raw ARGB samples, since using YUV420 is comparatively lossy. Also, the
+// conversion from YUV420 to ARGB incurs a small loss too.
+// Returns false in case of error.
+WEBP_EXTERN(int) WebPPictureYUVAToARGB(WebPPicture* picture);
+
+// Helper function: given a width x height plane of YUV(A) samples
+// (with stride 'stride'), clean-up the YUV samples under fully transparent
+// area, to help compressibility (no guarantee, though).
+WEBP_EXTERN(void) WebPCleanupTransparentArea(WebPPicture* picture);
+
+// Scan the picture 'picture' for the presence of non fully opaque alpha values.
+// Returns true in such case. Otherwise returns false (indicating that the
+// alpha plane can be ignored altogether e.g.).
+WEBP_EXTERN(int) WebPPictureHasTransparency(const WebPPicture* picture);
+
+//------------------------------------------------------------------------------
// Main call
// Main encoding call, after config and picture have been initialized.
-// 'picture' must be less than 16384x16384 in dimension, and the 'config' object
-// must be a valid one.
+// 'picture' must be less than 16384x16384 in dimension (cf WEBP_MAX_DIMENSION),
+// and the 'config' object must be a valid one.
// Returns false in case of error, true otherwise.
// In case of error, picture->error_code is updated accordingly.
-WEBP_EXTERN(int) WebPEncode(
- const WebPConfig* const config, WebPPicture* const picture);
-
-//-----------------------------------------------------------------------------
+// 'picture' can hold the source samples in both YUV(A) or ARGB input, depending
+// on the value of 'picture->use_argb'. It is highly recommended to use
+// the former for lossy encoding, and the latter for lossless encoding
+// (when config.lossless is true). Automatic conversion from one format to
+// another is provided but they both incur some loss.
+WEBP_EXTERN(int) WebPEncode(const WebPConfig* config, WebPPicture* picture);
+
+//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
diff --git a/include/webp/format_constants.h b/include/webp/format_constants.h
new file mode 100644
index 00000000..7ce498f6
--- /dev/null
+++ b/include/webp/format_constants.h
@@ -0,0 +1,90 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Internal header for constants related to WebP file format.
+//
+// Author: Urvang (urvang@google.com)
+
+#ifndef WEBP_WEBP_FORMAT_CONSTANTS_H_
+#define WEBP_WEBP_FORMAT_CONSTANTS_H_
+
+// VP8 related constants.
+#define VP8_SIGNATURE 0x9d012a // Signature in VP8 data.
+#define VP8_MAX_PARTITION0_SIZE (1 << 19) // max size of mode partition
+#define VP8_MAX_PARTITION_SIZE (1 << 24) // max size for token partition
+#define VP8_FRAME_HEADER_SIZE 10 // Size of the frame header within VP8 data.
+
+// VP8L related constants.
+#define VP8L_SIGNATURE_SIZE 1 // VP8L signature size.
+#define VP8L_MAGIC_BYTE 0x2f // VP8L signature byte.
+#define VP8L_IMAGE_SIZE_BITS 14 // Number of bits used to store
+ // width and height.
+#define VP8L_VERSION_BITS 3 // 3 bits reserved for version.
+#define VP8L_VERSION 0 // version 0
+#define VP8L_FRAME_HEADER_SIZE 5 // Size of the VP8L frame header.
+
+#define MAX_PALETTE_SIZE 256
+#define MAX_CACHE_BITS 11
+#define HUFFMAN_CODES_PER_META_CODE 5
+#define ARGB_BLACK 0xff000000
+
+#define DEFAULT_CODE_LENGTH 8
+#define MAX_ALLOWED_CODE_LENGTH 15
+
+#define NUM_LITERAL_CODES 256
+#define NUM_LENGTH_CODES 24
+#define NUM_DISTANCE_CODES 40
+#define CODE_LENGTH_CODES 19
+
+#define MIN_HUFFMAN_BITS 2 // min number of Huffman bits
+#define MAX_HUFFMAN_BITS 9 // max number of Huffman bits
+
+#define TRANSFORM_PRESENT 1 // The bit to be written when next data
+ // to be read is a transform.
+#define NUM_TRANSFORMS 4 // Maximum number of allowed transform
+ // in a bitstream.
+typedef enum {
+ PREDICTOR_TRANSFORM = 0,
+ CROSS_COLOR_TRANSFORM = 1,
+ SUBTRACT_GREEN = 2,
+ COLOR_INDEXING_TRANSFORM = 3
+} VP8LImageTransformType;
+
+// Alpha related constants.
+#define ALPHA_HEADER_LEN 1
+#define ALPHA_NO_COMPRESSION 0
+#define ALPHA_LOSSLESS_COMPRESSION 1
+#define ALPHA_PREPROCESSED_LEVELS 1
+
+// Mux related constants.
+#define TAG_SIZE 4 // Size of a chunk tag (e.g. "VP8L").
+#define CHUNK_SIZE_BYTES 4 // Size needed to store chunk's size.
+#define CHUNK_HEADER_SIZE 8 // Size of a chunk header.
+#define RIFF_HEADER_SIZE 12 // Size of the RIFF header ("RIFFnnnnWEBP").
+#define FRAME_CHUNK_SIZE 15 // Size of a FRM chunk.
+#define LOOP_CHUNK_SIZE 2 // Size of a LOOP chunk.
+#define TILE_CHUNK_SIZE 6 // Size of a TILE chunk.
+#define VP8X_CHUNK_SIZE 10 // Size of a VP8X chunk.
+
+#define TILING_FLAG_BIT 0x01 // Set if tiles are possibly used.
+#define ANIMATION_FLAG_BIT 0x02 // Set if some animation is expected
+#define ICC_FLAG_BIT 0x04 // Whether ICC is present or not.
+#define METADATA_FLAG_BIT 0x08 // Set if some META chunk is possibly present.
+#define ALPHA_FLAG_BIT 0x10 // Should be same as the ALPHA_FLAG in mux.h
+#define ROTATION_FLAG_BITS 0xe0 // all 3 bits for rotation + symmetry
+
+#define MAX_CANVAS_SIZE (1 << 24) // 24-bit max for VP8X width/height.
+#define MAX_IMAGE_AREA (1ULL << 32) // 32-bit max for width x height.
+#define MAX_LOOP_COUNT (1 << 16) // maximum value for loop-count
+#define MAX_DURATION (1 << 24) // maximum duration
+#define MAX_POSITION_OFFSET (1 << 24) // maximum frame/tile x/y offset
+
+// Maximum chunk payload is such that adding the header and padding won't
+// overflow a uint32_t.
+#define MAX_CHUNK_PAYLOAD (~0U - CHUNK_HEADER_SIZE - 1)
+
+#endif /* WEBP_WEBP_FORMAT_CONSTANTS_H_ */
diff --git a/include/webp/types.h b/include/webp/types.h
index c48527b6..3e27190b 100644
--- a/include/webp/types.h
+++ b/include/webp/types.h
@@ -1,4 +1,4 @@
-// Copyright 2010 Google Inc.
+// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -12,11 +12,15 @@
#ifndef WEBP_WEBP_TYPES_H_
#define WEBP_WEBP_TYPES_H_
+#include <stddef.h> // for size_t
+
#ifndef _MSC_VER
#include <inttypes.h>
-#ifdef ANSI
-#define inline
-#endif /* ANSI */
+#ifdef __STRICT_ANSI__
+#define WEBP_INLINE
+#else /* __STRICT_ANSI__ */
+#define WEBP_INLINE inline
+#endif
#else
typedef signed char int8_t;
typedef unsigned char uint8_t;
@@ -26,7 +30,7 @@ typedef signed int int32_t;
typedef unsigned int uint32_t;
typedef unsigned long long int uint64_t;
typedef long long int int64_t;
-#define inline __forceinline
+#define WEBP_INLINE __forceinline
#endif /* _MSC_VER */
#ifndef WEBP_EXTERN
@@ -35,4 +39,7 @@ typedef long long int int64_t;
#define WEBP_EXTERN(type) extern type
#endif /* WEBP_EXTERN */
+// Macro to check ABI compatibility (same major revision number)
+#define WEBP_ABI_IS_INCOMPATIBLE(a, b) (((a) >> 8) != ((b) >> 8))
+
#endif /* WEBP_WEBP_TYPES_H_ */
diff --git a/src/dec/Android.mk b/src/dec/Android.mk
index 2f15866a..ab795aee 100644
--- a/src/dec/Android.mk
+++ b/src/dec/Android.mk
@@ -16,27 +16,43 @@ LOCAL_PATH:= $(call my-dir)
include $(CLEAR_VARS)
LOCAL_SRC_FILES := \
- alpha.c \
- bits.c \
- buffer.c \
- dsp.c \
- dsp_sse2.c \
- frame.c \
- idec.c \
- io.c \
- io_sse2.c \
- layer.c \
- quant.c \
- tree.c \
- vp8.c \
- webp.c \
- yuv.c
+ alpha.c \
+ buffer.c \
+ frame.c \
+ idec.c \
+ io.c \
+ layer.c \
+ quant.c \
+ tree.c \
+ vp8.c \
+ vp8l.c \
+ webp.c \
+ ../dsp/cpu.c \
+ ../dsp/dec.c \
+ ../dsp/dec_neon.c \
+ ../dsp/dec_sse2.c \
+ ../dsp/enc.c \
+ ../dsp/enc_sse2.c \
+ ../dsp/lossless.c \
+ ../dsp/upsampling.c \
+ ../dsp/upsampling_sse2.c \
+ ../dsp/yuv.c \
+ ../utils/bit_reader.c \
+ ../utils/bit_writer.c \
+ ../utils/color_cache.c \
+ ../utils/filters.c \
+ ../utils/huffman.c \
+ ../utils/huffman_encode.c \
+ ../utils/quant_levels.c \
+ ../utils/rescaler.c \
+ ../utils/thread.c \
+ ../utils/utils.c
LOCAL_CFLAGS := -DANDROID
LOCAL_C_INCLUDES += \
- $(LOCAL_PATH) \
- $(LOCAL_PATH)/../../include
+ $(LOCAL_PATH) \
+ $(LOCAL_PATH)/../../include
LOCAL_MODULE:= libwebp-decode
diff --git a/src/dec/alpha.c b/src/dec/alpha.c
index 585695e2..3c19a892 100644
--- a/src/dec/alpha.c
+++ b/src/dec/alpha.c
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -10,60 +10,132 @@
// Author: Skal (pascal.massimino@gmail.com)
#include <stdlib.h>
-#include "vp8i.h"
-
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-
-#include "zlib.h"
+#include "./vp8i.h"
+#include "./vp8li.h"
+#include "../utils/filters.h"
+#include "../utils/quant_levels.h"
+#include "webp/format_constants.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-//-----------------------------------------------------------------------------
+// TODO(skal): move to dsp/ ?
+static void CopyPlane(const uint8_t* src, int src_stride,
+ uint8_t* dst, int dst_stride, int width, int height) {
+ while (height-- > 0) {
+ memcpy(dst, src, width);
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+//------------------------------------------------------------------------------
+// Decodes the compressed data 'data' of size 'data_size' into the 'output'.
+// The 'output' buffer should be pre-allocated and must be of the same
+// dimension 'height'x'stride', as that of the image.
+//
+// Returns 1 on successfully decoding the compressed alpha and
+// 0 if either:
+// error in bit-stream header (invalid compression mode or filter), or
+// error returned by appropriate compression method.
+
+static int DecodeAlpha(const uint8_t* data, size_t data_size,
+ int width, int height, int stride, uint8_t* output) {
+ uint8_t* decoded_data = NULL;
+ const size_t decoded_size = height * width;
+ uint8_t* unfiltered_data = NULL;
+ WEBP_FILTER_TYPE filter;
+ int pre_processing;
+ int rsrv;
+ int ok = 0;
+ int method;
+
+ assert(width > 0 && height > 0 && stride >= width);
+ assert(data != NULL && output != NULL);
+
+ if (data_size <= ALPHA_HEADER_LEN) {
+ return 0;
+ }
+
+ method = (data[0] >> 0) & 0x03;
+ filter = (data[0] >> 2) & 0x03;
+ pre_processing = (data[0] >> 4) & 0x03;
+ rsrv = (data[0] >> 6) & 0x03;
+ if (method < ALPHA_NO_COMPRESSION ||
+ method > ALPHA_LOSSLESS_COMPRESSION ||
+ filter >= WEBP_FILTER_LAST ||
+ pre_processing > ALPHA_PREPROCESSED_LEVELS ||
+ rsrv != 0) {
+ return 0;
+ }
+
+ if (method == ALPHA_NO_COMPRESSION) {
+ ok = (data_size >= decoded_size);
+ decoded_data = (uint8_t*)data + ALPHA_HEADER_LEN;
+ } else {
+ decoded_data = (uint8_t*)malloc(decoded_size);
+ if (decoded_data == NULL) return 0;
+ ok = VP8LDecodeAlphaImageStream(width, height,
+ data + ALPHA_HEADER_LEN,
+ data_size - ALPHA_HEADER_LEN,
+ decoded_data);
+ }
+
+ if (ok) {
+ WebPFilterFunc unfilter_func = WebPUnfilters[filter];
+ if (unfilter_func != NULL) {
+ unfiltered_data = (uint8_t*)malloc(decoded_size);
+ if (unfiltered_data == NULL) {
+ ok = 0;
+ goto Error;
+ }
+ // TODO(vikas): Implement on-the-fly decoding & filter mechanism to decode
+ // and apply filter per image-row.
+ unfilter_func(decoded_data, width, height, 1, width, unfiltered_data);
+ // Construct raw_data (height x stride) from alpha data (height x width).
+ CopyPlane(unfiltered_data, width, output, stride, width, height);
+ free(unfiltered_data);
+ } else {
+ // Construct raw_data (height x stride) from alpha data (height x width).
+ CopyPlane(decoded_data, width, output, stride, width, height);
+ }
+ if (pre_processing == ALPHA_PREPROCESSED_LEVELS) {
+ ok = DequantizeLevels(decoded_data, width, height);
+ }
+ }
+
+ Error:
+ if (method != ALPHA_NO_COMPRESSION) {
+ free(decoded_data);
+ }
+ return ok;
+}
+
+//------------------------------------------------------------------------------
const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
int row, int num_rows) {
- uint8_t* output = dec->alpha_plane_;
const int stride = dec->pic_hdr_.width_;
- if (row < 0 || row + num_rows > dec->pic_hdr_.height_) {
- return NULL; // sanity check
+
+ if (row < 0 || num_rows < 0 || row + num_rows > dec->pic_hdr_.height_) {
+ return NULL; // sanity check.
}
- if (row == 0) {
- // TODO(skal): for now, we just decompress everything during the first call.
- // Later, we'll decode progressively, but we need to store the
- // z_stream state.
- const uint8_t* data = dec->alpha_data_;
- size_t data_size = dec->alpha_data_size_;
- const size_t output_size = stride * dec->pic_hdr_.height_;
- int ret = Z_OK;
- z_stream strm;
-
- memset(&strm, 0, sizeof(strm));
- if (inflateInit(&strm) != Z_OK) {
- return 0;
- }
- strm.avail_in = data_size;
- strm.next_in = (unsigned char*)data;
- do {
- strm.avail_out = output_size;
- strm.next_out = output;
- ret = inflate(&strm, Z_NO_FLUSH);
- if (ret == Z_NEED_DICT || ret == Z_DATA_ERROR || ret == Z_MEM_ERROR) {
- break;
- }
- } while (strm.avail_out == 0);
- inflateEnd(&strm);
- if (ret != Z_STREAM_END) {
- return NULL; // error
+ if (row == 0) {
+ // Decode everything during the first call.
+ if (!DecodeAlpha(dec->alpha_data_, (size_t)dec->alpha_data_size_,
+ dec->pic_hdr_.width_, dec->pic_hdr_.height_, stride,
+ dec->alpha_plane_)) {
+ // TODO(urvang): Add a test where DecodeAlpha fails to test this.
+ return NULL; // Error.
}
}
- return output + row * stride;
+
+ // Return a pointer to the current decoded row.
+ return dec->alpha_plane_ + row * stride;
}
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif
-
-#endif // WEBP_EXPERIMENTAL_FEATURES
diff --git a/src/dec/bits.c b/src/dec/bits.c
deleted file mode 100644
index da3b777f..00000000
--- a/src/dec/bits.c
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright 2010 Google Inc.
-//
-// This code is licensed under the same terms as WebM:
-// Software License Agreement: http://www.webmproject.org/license/software/
-// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
-// -----------------------------------------------------------------------------
-//
-// Boolean decoder
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#include "bits.h"
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
-//-----------------------------------------------------------------------------
-// VP8BitReader
-
-void VP8InitBitReader(VP8BitReader* const br,
- const uint8_t* const start, const uint8_t* const end) {
- assert(br);
- assert(start);
- assert(start <= end);
- br->range_ = 255 - 1;
- br->buf_ = start;
- br->buf_end_ = end;
- br->value_ = 0;
- br->missing_ = 8;
- br->eof_ = 0;
-}
-
-const uint8_t kVP8Log2Range[128] = {
- 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 0
-};
-
-// range = ((range + 1) << kVP8Log2Range[range]) - 1
-const uint8_t kVP8NewRange[128] = {
- 127, 127, 191, 127, 159, 191, 223, 127, 143, 159, 175, 191, 207, 223, 239,
- 127, 135, 143, 151, 159, 167, 175, 183, 191, 199, 207, 215, 223, 231, 239,
- 247, 127, 131, 135, 139, 143, 147, 151, 155, 159, 163, 167, 171, 175, 179,
- 183, 187, 191, 195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239,
- 243, 247, 251, 127, 129, 131, 133, 135, 137, 139, 141, 143, 145, 147, 149,
- 151, 153, 155, 157, 159, 161, 163, 165, 167, 169, 171, 173, 175, 177, 179,
- 181, 183, 185, 187, 189, 191, 193, 195, 197, 199, 201, 203, 205, 207, 209,
- 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235, 237, 239,
- 241, 243, 245, 247, 249, 251, 253, 127
-};
-
-//-----------------------------------------------------------------------------
-// Higher-level calls
-
-uint32_t VP8GetValue(VP8BitReader* const br, int bits) {
- uint32_t v = 0;
- while (bits-- > 0) {
- v |= VP8GetBit(br, 0x80) << bits;
- }
- return v;
-}
-
-int32_t VP8GetSignedValue(VP8BitReader* const br, int bits) {
- const int value = VP8GetValue(br, bits);
- return VP8Get(br) ? -value : value;
-}
-
-//-----------------------------------------------------------------------------
-
-#if defined(__cplusplus) || defined(c_plusplus)
-} // extern "C"
-#endif
diff --git a/src/dec/bits.h b/src/dec/bits.h
deleted file mode 100644
index e33b0f87..00000000
--- a/src/dec/bits.h
+++ /dev/null
@@ -1,108 +0,0 @@
-// Copyright 2010 Google Inc.
-//
-// This code is licensed under the same terms as WebM:
-// Software License Agreement: http://www.webmproject.org/license/software/
-// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
-// -----------------------------------------------------------------------------
-//
-// Boolean decoder
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#ifndef WEBP_DEC_BITS_H_
-#define WEBP_DEC_BITS_H_
-
-#include <assert.h>
-#include "webp/decode_vp8.h"
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
-//-----------------------------------------------------------------------------
-// Bitreader and code-tree reader
-
-typedef struct {
- const uint8_t* buf_; // next byte to be read
- const uint8_t* buf_end_; // end of read buffer
- int eof_; // true if input is exhausted
-
- // boolean decoder
- uint32_t range_; // current range minus 1. In [127, 254] interval.
- uint32_t value_; // current value
- int missing_; // number of missing bits in value_ (8bit)
-} VP8BitReader;
-
-// Initialize the bit reader and the boolean decoder.
-void VP8InitBitReader(VP8BitReader* const br,
- const uint8_t* const start, const uint8_t* const end);
-
-// return the next value made of 'num_bits' bits
-uint32_t VP8GetValue(VP8BitReader* const br, int num_bits);
-static inline uint32_t VP8Get(VP8BitReader* const br) {
- return VP8GetValue(br, 1);
-}
-
-// return the next value with sign-extension.
-int32_t VP8GetSignedValue(VP8BitReader* const br, int num_bits);
-
-// Read a bit with proba 'prob'. Speed-critical function!
-extern const uint8_t kVP8Log2Range[128];
-extern const uint8_t kVP8NewRange[128];
-static inline uint32_t VP8GetByte(VP8BitReader* const br) {
- assert(br);
- if (br->buf_ < br->buf_end_) {
- assert(br->buf_);
- return *br->buf_++;
- }
- br->eof_ = 1;
- return 0xff;
-}
-
-static inline uint32_t VP8BitUpdate(VP8BitReader* const br, uint32_t split) {
- uint32_t bit;
- const uint32_t value_split = (split + 1) << 8;
- // Make sure we have a least 8 bits in 'value_'
- if (br->missing_ > 0) {
- br->value_ |= VP8GetByte(br) << br->missing_;
- br->missing_ -= 8;
- }
- bit = (br->value_ >= value_split);
- if (bit) {
- br->range_ -= split + 1;
- br->value_ -= value_split;
- } else {
- br->range_ = split;
- }
- return bit;
-}
-
-static inline void VP8Shift(VP8BitReader* const br) {
- // range_ is in [0..127] interval here.
- const int shift = kVP8Log2Range[br->range_];
- br->range_ = kVP8NewRange[br->range_];
- br->value_ <<= shift;
- br->missing_ += shift;
-}
-
-static inline uint32_t VP8GetBit(VP8BitReader* const br, int prob) {
- const uint32_t split = (br->range_ * prob) >> 8;
- const uint32_t bit = VP8BitUpdate(br, split);
- if (br->range_ < 0x7f) {
- VP8Shift(br);
- }
- return bit;
-}
-
-static inline int VP8GetSigned(VP8BitReader* const br, int v) {
- const uint32_t split = br->range_ >> 1;
- const uint32_t bit = VP8BitUpdate(br, split);
- VP8Shift(br);
- return bit ? -v : v;
-}
-
-#if defined(__cplusplus) || defined(c_plusplus)
-} // extern "C"
-#endif
-
-#endif // WEBP_DEC_BITS_H_
diff --git a/src/dec/buffer.c b/src/dec/buffer.c
index c433d633..c159f6f2 100644
--- a/src/dec/buffer.c
+++ b/src/dec/buffer.c
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -10,44 +10,64 @@
// Author: Skal (pascal.massimino@gmail.com)
#include <stdlib.h>
-#include "vp8i.h"
-#include "webpi.h"
+
+#include "./vp8i.h"
+#include "./webpi.h"
+#include "../utils/utils.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// WebPDecBuffer
// Number of bytes per pixel for the different color-spaces.
-static const int kModeBpp[MODE_LAST] = { 3, 4, 3, 4, 4, 2, 2, 1, 1 };
+static const int kModeBpp[MODE_LAST] = {
+ 3, 4, 3, 4, 4, 2, 2,
+ 4, 4, 4, 2, // pre-multiplied modes
+ 1, 1 };
+
+// Check that webp_csp_mode is within the bounds of WEBP_CSP_MODE.
+// Convert to an integer to handle both the unsigned/signed enum cases
+// without the need for casting to remove type limit warnings.
+static int IsValidColorspace(int webp_csp_mode) {
+ return (webp_csp_mode >= MODE_RGB && webp_csp_mode < MODE_LAST);
+}
static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
int ok = 1;
- WEBP_CSP_MODE mode = buffer->colorspace;
+ const WEBP_CSP_MODE mode = buffer->colorspace;
const int width = buffer->width;
const int height = buffer->height;
- if (mode >= MODE_YUV) { // YUV checks
+ if (!IsValidColorspace(mode)) {
+ ok = 0;
+ } else if (!WebPIsRGBMode(mode)) { // YUV checks
const WebPYUVABuffer* const buf = &buffer->u.YUVA;
- const int size = buf->y_stride * height;
- const int u_size = buf->u_stride * ((height + 1) / 2);
- const int v_size = buf->v_stride * ((height + 1) / 2);
- const int a_size = buf->a_stride * height;
- ok &= (size <= buf->y_size);
+ const uint64_t y_size = (uint64_t)buf->y_stride * height;
+ const uint64_t u_size = (uint64_t)buf->u_stride * ((height + 1) / 2);
+ const uint64_t v_size = (uint64_t)buf->v_stride * ((height + 1) / 2);
+ const uint64_t a_size = (uint64_t)buf->a_stride * height;
+ ok &= (y_size <= buf->y_size);
ok &= (u_size <= buf->u_size);
ok &= (v_size <= buf->v_size);
- ok &= (a_size <= buf->a_size);
ok &= (buf->y_stride >= width);
ok &= (buf->u_stride >= (width + 1) / 2);
ok &= (buf->v_stride >= (width + 1) / 2);
- if (buf->a) {
+ ok &= (buf->y != NULL);
+ ok &= (buf->u != NULL);
+ ok &= (buf->v != NULL);
+ if (mode == MODE_YUVA) {
ok &= (buf->a_stride >= width);
+ ok &= (a_size <= buf->a_size);
+ ok &= (buf->a != NULL);
}
} else { // RGB checks
const WebPRGBABuffer* const buf = &buffer->u.RGBA;
- ok &= (buf->stride * height <= buf->size);
+ const uint64_t size = (uint64_t)buf->stride * height;
+ ok &= (size <= buf->size);
ok &= (buf->stride >= width * kModeBpp[mode]);
+ ok &= (buf->rgba != NULL);
}
return ok ? VP8_STATUS_OK : VP8_STATUS_INVALID_PARAM;
}
@@ -55,24 +75,22 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
const int w = buffer->width;
const int h = buffer->height;
+ const WEBP_CSP_MODE mode = buffer->colorspace;
- if (w <= 0 || h <= 0) {
+ if (w <= 0 || h <= 0 || !IsValidColorspace(mode)) {
return VP8_STATUS_INVALID_PARAM;
}
if (!buffer->is_external_memory && buffer->private_memory == NULL) {
uint8_t* output;
- WEBP_CSP_MODE mode = buffer->colorspace;
- int stride;
int uv_stride = 0, a_stride = 0;
- int uv_size = 0;
- uint64_t size, a_size = 0, total_size;
+ uint64_t uv_size = 0, a_size = 0, total_size;
// We need memory and it hasn't been allocated yet.
// => initialize output buffer, now that dimensions are known.
- stride = w * kModeBpp[mode];
- size = (uint64_t)stride * h;
+ const int stride = w * kModeBpp[mode];
+ const uint64_t size = (uint64_t)stride * h;
- if (mode >= MODE_YUV) {
+ if (!WebPIsRGBMode(mode)) {
uv_stride = (w + 1) / 2;
uv_size = (uint64_t)uv_stride * ((h + 1) / 2);
if (mode == MODE_YUVA) {
@@ -83,36 +101,33 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
total_size = size + 2 * uv_size + a_size;
// Security/sanity checks
- if (((size_t)total_size != total_size) || (total_size >= (1ULL << 40))) {
- return VP8_STATUS_INVALID_PARAM;
- }
-
- buffer->private_memory = output = (uint8_t*)malloc((size_t)total_size);
+ output = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*output));
if (output == NULL) {
return VP8_STATUS_OUT_OF_MEMORY;
}
+ buffer->private_memory = output;
- if (mode >= MODE_YUV) { // YUVA initialization
+ if (!WebPIsRGBMode(mode)) { // YUVA initialization
WebPYUVABuffer* const buf = &buffer->u.YUVA;
buf->y = output;
buf->y_stride = stride;
- buf->y_size = size;
+ buf->y_size = (size_t)size;
buf->u = output + size;
buf->u_stride = uv_stride;
- buf->u_size = uv_size;
+ buf->u_size = (size_t)uv_size;
buf->v = output + size + uv_size;
buf->v_stride = uv_stride;
- buf->v_size = uv_size;
+ buf->v_size = (size_t)uv_size;
if (mode == MODE_YUVA) {
buf->a = output + size + 2 * uv_size;
}
- buf->a_size = a_size;
+ buf->a_size = (size_t)a_size;
buf->a_stride = a_stride;
} else { // RGBA initialization
WebPRGBABuffer* const buf = &buffer->u.RGBA;
buf->rgba = output;
buf->stride = stride;
- buf->size = size;
+ buf->size = (size_t)size;
}
}
return CheckDecBuffer(buffer);
@@ -140,7 +155,7 @@ VP8StatusCode WebPAllocateDecBuffer(int w, int h,
if (options->scaled_width <= 0 || options->scaled_height <= 0) {
return VP8_STATUS_INVALID_PARAM;
}
- w = options->scaled_width;
+ w = options->scaled_width;
h = options->scaled_height;
}
}
@@ -151,18 +166,20 @@ VP8StatusCode WebPAllocateDecBuffer(int w, int h,
return AllocateBuffer(out);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// constructors / destructors
-int WebPInitDecBufferInternal(WebPDecBuffer* const buffer, int version) {
- if (version != WEBP_DECODER_ABI_VERSION) return 0; // version mismatch
- if (!buffer) return 0;
+int WebPInitDecBufferInternal(WebPDecBuffer* buffer, int version) {
+ if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
+ return 0; // version mismatch
+ }
+ if (buffer == NULL) return 0;
memset(buffer, 0, sizeof(*buffer));
return 1;
}
-void WebPFreeDecBuffer(WebPDecBuffer* const buffer) {
- if (buffer) {
+void WebPFreeDecBuffer(WebPDecBuffer* buffer) {
+ if (buffer != NULL) {
if (!buffer->is_external_memory)
free(buffer->private_memory);
buffer->private_memory = NULL;
@@ -171,9 +188,9 @@ void WebPFreeDecBuffer(WebPDecBuffer* const buffer) {
void WebPCopyDecBuffer(const WebPDecBuffer* const src,
WebPDecBuffer* const dst) {
- if (src && dst) {
+ if (src != NULL && dst != NULL) {
*dst = *src;
- if (src->private_memory) {
+ if (src->private_memory != NULL) {
dst->is_external_memory = 1; // dst buffer doesn't own the memory.
dst->private_memory = NULL;
}
@@ -182,16 +199,16 @@ void WebPCopyDecBuffer(const WebPDecBuffer* const src,
// Copy and transfer ownership from src to dst (beware of parameter order!)
void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst) {
- if (src && dst) {
+ if (src != NULL && dst != NULL) {
*dst = *src;
- if (src->private_memory) {
+ if (src->private_memory != NULL) {
src->is_external_memory = 1; // src relinquishes ownership
src->private_memory = NULL;
}
}
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
diff --git a/src/dec/decode_vp8.h b/src/dec/decode_vp8.h
new file mode 100644
index 00000000..ee914c8d
--- /dev/null
+++ b/src/dec/decode_vp8.h
@@ -0,0 +1,182 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Low-level API for VP8 decoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_WEBP_DECODE_VP8_H_
+#define WEBP_WEBP_DECODE_VP8_H_
+
+#include "webp/decode.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Lower-level API
+//
+// These functions provide fine-grained control of the decoding process.
+// The call flow should resemble:
+//
+// VP8Io io;
+// VP8InitIo(&io);
+// io.data = data;
+// io.data_size = size;
+// /* customize io's functions (setup()/put()/teardown()) if needed. */
+//
+// VP8Decoder* dec = VP8New();
+// bool ok = VP8Decode(dec);
+// if (!ok) printf("Error: %s\n", VP8StatusMessage(dec));
+// VP8Delete(dec);
+// return ok;
+
+// Input / Output
+typedef struct VP8Io VP8Io;
+typedef int (*VP8IoPutHook)(const VP8Io* io);
+typedef int (*VP8IoSetupHook)(VP8Io* io);
+typedef void (*VP8IoTeardownHook)(const VP8Io* io);
+
+struct VP8Io {
+ // set by VP8GetHeaders()
+ int width, height; // picture dimensions, in pixels (invariable).
+ // These are the original, uncropped dimensions.
+ // The actual area passed to put() is stored
+ // in mb_w / mb_h fields.
+
+ // set before calling put()
+ int mb_y; // position of the current rows (in pixels)
+ int mb_w; // number of columns in the sample
+ int mb_h; // number of rows in the sample
+ const uint8_t* y, *u, *v; // rows to copy (in yuv420 format)
+ int y_stride; // row stride for luma
+ int uv_stride; // row stride for chroma
+
+ void* opaque; // user data
+
+ // called when fresh samples are available. Currently, samples are in
+ // YUV420 format, and can be up to width x 24 in size (depending on the
+ // in-loop filtering level, e.g.). Should return false in case of error
+ // or abort request. The actual size of the area to update is mb_w x mb_h
+ // in size, taking cropping into account.
+ VP8IoPutHook put;
+
+ // called just before starting to decode the blocks.
+ // Must return false in case of setup error, true otherwise. If false is
+ // returned, teardown() will NOT be called. But if the setup succeeded
+ // and true is returned, then teardown() will always be called afterward.
+ VP8IoSetupHook setup;
+
+ // Called just after block decoding is finished (or when an error occurred
+ // during put()). Is NOT called if setup() failed.
+ VP8IoTeardownHook teardown;
+
+ // this is a recommendation for the user-side yuv->rgb converter. This flag
+ // is set when calling setup() hook and can be overwritten by it. It then
+ // can be taken into consideration during the put() method.
+ int fancy_upsampling;
+
+ // Input buffer.
+ size_t data_size;
+ const uint8_t* data;
+
+ // If true, in-loop filtering will not be performed even if present in the
+ // bitstream. Switching off filtering may speed up decoding at the expense
+ // of more visible blocking. Note that output will also be non-compliant
+ // with the VP8 specifications.
+ int bypass_filtering;
+
+ // Cropping parameters.
+ int use_cropping;
+ int crop_left, crop_right, crop_top, crop_bottom;
+
+ // Scaling parameters.
+ int use_scaling;
+ int scaled_width, scaled_height;
+
+ // If non NULL, pointer to the alpha data (if present) corresponding to the
+ // start of the current row (That is: it is pre-offset by mb_y and takes
+ // cropping into account).
+ const uint8_t* a;
+};
+
+// Internal, version-checked, entry point
+int VP8InitIoInternal(VP8Io* const, int);
+
+// Set the custom IO function pointers and user-data. The setter for IO hooks
+// should be called before initiating incremental decoding. Returns true if
+// WebPIDecoder object is successfully modified, false otherwise.
+int WebPISetIOHooks(WebPIDecoder* const idec,
+ VP8IoPutHook put,
+ VP8IoSetupHook setup,
+ VP8IoTeardownHook teardown,
+ void* user_data);
+
+// Main decoding object. This is an opaque structure.
+typedef struct VP8Decoder VP8Decoder;
+
+// Create a new decoder object.
+VP8Decoder* VP8New(void);
+
+// Must be called to make sure 'io' is initialized properly.
+// Returns false in case of version mismatch. Upon such failure, no other
+// decoding function should be called (VP8Decode, VP8GetHeaders, ...)
+static WEBP_INLINE int VP8InitIo(VP8Io* const io) {
+ return VP8InitIoInternal(io, WEBP_DECODER_ABI_VERSION);
+}
+
+// Start decoding a new picture. Returns true if ok.
+int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io);
+
+// Decode a picture. Will call VP8GetHeaders() if it wasn't done already.
+// Returns false in case of error.
+int VP8Decode(VP8Decoder* const dec, VP8Io* const io);
+
+// Return current status of the decoder:
+VP8StatusCode VP8Status(VP8Decoder* const dec);
+
+// return readable string corresponding to the last status.
+const char* VP8StatusMessage(VP8Decoder* const dec);
+
+// Resets the decoder in its initial state, reclaiming memory.
+// Not a mandatory call between calls to VP8Decode().
+void VP8Clear(VP8Decoder* const dec);
+
+// Destroy the decoder object.
+void VP8Delete(VP8Decoder* const dec);
+
+//------------------------------------------------------------------------------
+// Miscellaneous VP8/VP8L bitstream probing functions.
+
+// Returns true if the next 3 bytes in data contain the VP8 signature.
+WEBP_EXTERN(int) VP8CheckSignature(const uint8_t* const data, size_t data_size);
+
+// Validates the VP8 data-header and retrieves basic header information viz
+// width and height. Returns 0 in case of formatting error. *width/*height
+// can be passed NULL.
+WEBP_EXTERN(int) VP8GetInfo(
+ const uint8_t* data,
+ size_t data_size, // data available so far
+ size_t chunk_size, // total data size expected in the chunk
+ int* const width, int* const height);
+
+// Returns true if the next byte(s) in data is a VP8L signature.
+WEBP_EXTERN(int) VP8LCheckSignature(const uint8_t* const data, size_t size);
+
+// Validates the VP8L data-header and retrieves basic header information viz
+// width, height and alpha. Returns 0 in case of formatting error.
+// width/height/has_alpha can be passed NULL.
+WEBP_EXTERN(int) VP8LGetInfo(
+ const uint8_t* data, size_t data_size, // data available so far
+ int* const width, int* const height, int* const has_alpha);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif /* WEBP_WEBP_DECODE_VP8_H_ */
diff --git a/src/dec/frame.c b/src/dec/frame.c
index 29a0f757..9c91a48e 100644
--- a/src/dec/frame.c
+++ b/src/dec/frame.c
@@ -1,4 +1,4 @@
-// Copyright 2010 Google Inc.
+// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -11,6 +11,7 @@
#include <stdlib.h>
#include "./vp8i.h"
+#include "../utils/utils.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@@ -19,7 +20,7 @@ extern "C" {
#define ALIGN_MASK (32 - 1)
//------------------------------------------------------------------------------
-// Memory setup
+// Filtering
// kFilterExtraRows[] = How many extra lines are needed on the MB boundary
// for caching, given a filtering level.
@@ -28,99 +29,7 @@ extern "C" {
// U/V, so it's 8 samples total (because of the 2x upsampling).
static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 };
-int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) {
- const int mb_w = dec->mb_w_;
- const int intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t);
- const int top_size = (16 + 8 + 8) * mb_w;
- const int info_size = (mb_w + 1) * sizeof(VP8MB);
- const int yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);
- const int coeffs_size = 384 * sizeof(*dec->coeffs_);
- const int cache_height = (16 + kFilterExtraRows[dec->filter_type_]) * 3 / 2;
- const int cache_size = top_size * cache_height;
- const int alpha_size =
- dec->alpha_data_ ? (dec->pic_hdr_.width_ * dec->pic_hdr_.height_) : 0;
- const int needed = intra_pred_mode_size
- + top_size + info_size
- + yuv_size + coeffs_size
- + cache_size + alpha_size + ALIGN_MASK;
- uint8_t* mem;
-
- if (needed > dec->mem_size_) {
- free(dec->mem_);
- dec->mem_size_ = 0;
- dec->mem_ = (uint8_t*)malloc(needed);
- if (dec->mem_ == NULL) {
- return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
- "no memory during frame initialization.");
- }
- dec->mem_size_ = needed;
- }
-
- mem = (uint8_t*)dec->mem_;
- dec->intra_t_ = (uint8_t*)mem;
- mem += intra_pred_mode_size;
-
- dec->y_t_ = (uint8_t*)mem;
- mem += 16 * mb_w;
- dec->u_t_ = (uint8_t*)mem;
- mem += 8 * mb_w;
- dec->v_t_ = (uint8_t*)mem;
- mem += 8 * mb_w;
-
- dec->mb_info_ = ((VP8MB*)mem) + 1;
- mem += info_size;
-
- mem = (uint8_t*)((uintptr_t)(mem + ALIGN_MASK) & ~ALIGN_MASK);
- assert((yuv_size & ALIGN_MASK) == 0);
- dec->yuv_b_ = (uint8_t*)mem;
- mem += yuv_size;
-
- dec->coeffs_ = (int16_t*)mem;
- mem += coeffs_size;
-
- dec->cache_y_stride_ = 16 * mb_w;
- dec->cache_uv_stride_ = 8 * mb_w;
- {
- const int extra_rows = kFilterExtraRows[dec->filter_type_];
- const int extra_y = extra_rows * dec->cache_y_stride_;
- const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_;
- dec->cache_y_ = ((uint8_t*)mem) + extra_y;
- dec->cache_u_ = dec->cache_y_ + 16 * dec->cache_y_stride_ + extra_uv;
- dec->cache_v_ = dec->cache_u_ + 8 * dec->cache_uv_stride_ + extra_uv;
- }
- mem += cache_size;
-
- // alpha plane
- dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL;
- mem += alpha_size;
-
- // note: left-info is initialized once for all.
- memset(dec->mb_info_ - 1, 0, (mb_w + 1) * sizeof(*dec->mb_info_));
-
- // initialize top
- memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size);
-
- // prepare 'io'
- io->mb_y = 0;
- io->y = dec->cache_y_;
- io->u = dec->cache_u_;
- io->v = dec->cache_v_;
- io->y_stride = dec->cache_y_stride_;
- io->uv_stride = dec->cache_uv_stride_;
- io->fancy_upsampling = 0; // default
- io->a = NULL;
-
- // Init critical function pointers and look-up tables.
- VP8DspInitTables();
- VP8DspInit();
-
- return 1;
-}
-
-//------------------------------------------------------------------------------
-// Filtering
-
-static inline int hev_thresh_from_level(int level, int keyframe) {
+static WEBP_INLINE int hev_thresh_from_level(int level, int keyframe) {
if (keyframe) {
return (level >= 40) ? 2 : (level >= 15) ? 1 : 0;
} else {
@@ -129,11 +38,12 @@ static inline int hev_thresh_from_level(int level, int keyframe) {
}
static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
- VP8MB* const mb = dec->mb_info_ + mb_x;
- uint8_t* const y_dst = dec->cache_y_ + mb_x * 16;
+ const VP8ThreadContext* const ctx = &dec->thread_ctx_;
const int y_bps = dec->cache_y_stride_;
- const int level = mb->f_level_;
- const int ilevel = mb->f_ilevel_;
+ VP8FInfo* const f_info = ctx->f_info_ + mb_x;
+ uint8_t* const y_dst = dec->cache_y_ + ctx->id_ * 16 * y_bps + mb_x * 16;
+ const int level = f_info->f_level_;
+ const int ilevel = f_info->f_ilevel_;
const int limit = 2 * level + ilevel;
if (level == 0) {
return;
@@ -142,26 +52,26 @@ static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
if (mb_x > 0) {
VP8SimpleHFilter16(y_dst, y_bps, limit + 4);
}
- if (mb->f_inner_) {
+ if (f_info->f_inner_) {
VP8SimpleHFilter16i(y_dst, y_bps, limit);
}
if (mb_y > 0) {
VP8SimpleVFilter16(y_dst, y_bps, limit + 4);
}
- if (mb->f_inner_) {
+ if (f_info->f_inner_) {
VP8SimpleVFilter16i(y_dst, y_bps, limit);
}
} else { // complex
- uint8_t* const u_dst = dec->cache_u_ + mb_x * 8;
- uint8_t* const v_dst = dec->cache_v_ + mb_x * 8;
const int uv_bps = dec->cache_uv_stride_;
+ uint8_t* const u_dst = dec->cache_u_ + ctx->id_ * 8 * uv_bps + mb_x * 8;
+ uint8_t* const v_dst = dec->cache_v_ + ctx->id_ * 8 * uv_bps + mb_x * 8;
const int hev_thresh =
hev_thresh_from_level(level, dec->frm_hdr_.key_frame_);
if (mb_x > 0) {
VP8HFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
VP8HFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
}
- if (mb->f_inner_) {
+ if (f_info->f_inner_) {
VP8HFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);
VP8HFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);
}
@@ -169,21 +79,20 @@ static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
VP8VFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
VP8VFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
}
- if (mb->f_inner_) {
+ if (f_info->f_inner_) {
VP8VFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);
VP8VFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);
}
}
}
-void VP8FilterRow(const VP8Decoder* const dec) {
+// Filter the decoded macroblock row (if needed)
+static void FilterRow(const VP8Decoder* const dec) {
int mb_x;
- assert(dec->filter_type_ > 0);
- if (dec->mb_y_ < dec->tl_mb_y_ || dec->mb_y_ > dec->br_mb_y_) {
- return;
- }
+ const int mb_y = dec->thread_ctx_.mb_y_;
+ assert(dec->thread_ctx_.filter_row_);
for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) {
- DoFilter(dec, mb_x, dec->mb_y_);
+ DoFilter(dec, mb_x, mb_y);
}
}
@@ -191,7 +100,8 @@ void VP8FilterRow(const VP8Decoder* const dec) {
void VP8StoreBlock(VP8Decoder* const dec) {
if (dec->filter_type_ > 0) {
- VP8MB* const info = dec->mb_info_ + dec->mb_x_;
+ VP8FInfo* const info = dec->f_info_ + dec->mb_x_;
+ const int skip = dec->mb_info_[dec->mb_x_].skip_;
int level = dec->filter_levels_[dec->segment_];
if (dec->filter_hdr_.use_lf_delta_) {
// TODO(skal): only CURRENT is handled for now.
@@ -215,14 +125,16 @@ void VP8StoreBlock(VP8Decoder* const dec) {
}
info->f_ilevel_ = (level < 1) ? 1 : level;
- info->f_inner_ = (!info->skip_ || dec->is_i4x4_);
+ info->f_inner_ = (!skip || dec->is_i4x4_);
}
{
// Transfer samples to row cache
int y;
- uint8_t* const ydst = dec->cache_y_ + dec->mb_x_ * 16;
- uint8_t* const udst = dec->cache_u_ + dec->mb_x_ * 8;
- uint8_t* const vdst = dec->cache_v_ + dec->mb_x_ * 8;
+ const int y_offset = dec->cache_id_ * 16 * dec->cache_y_stride_;
+ const int uv_offset = dec->cache_id_ * 8 * dec->cache_uv_stride_;
+ uint8_t* const ydst = dec->cache_y_ + dec->mb_x_ * 16 + y_offset;
+ uint8_t* const udst = dec->cache_u_ + dec->mb_x_ * 8 + uv_offset;
+ uint8_t* const vdst = dec->cache_v_ + dec->mb_x_ * 8 + uv_offset;
for (y = 0; y < 16; ++y) {
memcpy(ydst + y * dec->cache_y_stride_,
dec->yuv_b_ + Y_OFF + y * BPS, 16);
@@ -249,17 +161,27 @@ void VP8StoreBlock(VP8Decoder* const dec) {
#define MACROBLOCK_VPOS(mb_y) ((mb_y) * 16) // vertical position of a MB
-int VP8FinishRow(VP8Decoder* const dec, VP8Io* io) {
+// Finalize and transmit a complete row. Return false in case of user-abort.
+static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
+ int ok = 1;
+ const VP8ThreadContext* const ctx = &dec->thread_ctx_;
const int extra_y_rows = kFilterExtraRows[dec->filter_type_];
const int ysize = extra_y_rows * dec->cache_y_stride_;
const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_;
- uint8_t* const ydst = dec->cache_y_ - ysize;
- uint8_t* const udst = dec->cache_u_ - uvsize;
- uint8_t* const vdst = dec->cache_v_ - uvsize;
- const int first_row = (dec->mb_y_ == 0);
- const int last_row = (dec->mb_y_ >= dec->br_mb_y_ - 1);
- int y_start = MACROBLOCK_VPOS(dec->mb_y_);
- int y_end = MACROBLOCK_VPOS(dec->mb_y_ + 1);
+ const int y_offset = ctx->id_ * 16 * dec->cache_y_stride_;
+ const int uv_offset = ctx->id_ * 8 * dec->cache_uv_stride_;
+ uint8_t* const ydst = dec->cache_y_ - ysize + y_offset;
+ uint8_t* const udst = dec->cache_u_ - uvsize + uv_offset;
+ uint8_t* const vdst = dec->cache_v_ - uvsize + uv_offset;
+ const int first_row = (ctx->mb_y_ == 0);
+ const int last_row = (ctx->mb_y_ >= dec->br_mb_y_ - 1);
+ int y_start = MACROBLOCK_VPOS(ctx->mb_y_);
+ int y_end = MACROBLOCK_VPOS(ctx->mb_y_ + 1);
+
+ if (ctx->filter_row_) {
+ FilterRow(dec);
+ }
+
if (io->put) {
if (!first_row) {
y_start -= extra_y_rows;
@@ -267,9 +189,9 @@ int VP8FinishRow(VP8Decoder* const dec, VP8Io* io) {
io->u = udst;
io->v = vdst;
} else {
- io->y = dec->cache_y_;
- io->u = dec->cache_u_;
- io->v = dec->cache_v_;
+ io->y = dec->cache_y_ + y_offset;
+ io->u = dec->cache_u_ + uv_offset;
+ io->v = dec->cache_v_ + uv_offset;
}
if (!last_row) {
@@ -279,15 +201,18 @@ int VP8FinishRow(VP8Decoder* const dec, VP8Io* io) {
y_end = io->crop_bottom; // make sure we don't overflow on last row.
}
io->a = NULL;
-#ifdef WEBP_EXPERIMENTAL_FEATURES
- if (dec->alpha_data_) {
+ if (dec->alpha_data_ != NULL && y_start < y_end) {
+ // TODO(skal): several things to correct here:
+ // * testing presence of alpha with dec->alpha_data_ is not a good idea
+ // * we're actually decompressing the full plane only once. It should be
+ // more obvious from signature.
+ // * we could free alpha_data_ right after this call, but we don't own.
io->a = VP8DecompressAlphaRows(dec, y_start, y_end - y_start);
if (io->a == NULL) {
return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
"Could not decode alpha data.");
}
}
-#endif
if (y_start < io->crop_top) {
const int delta_y = io->crop_top - y_start;
y_start = io->crop_top;
@@ -295,7 +220,7 @@ int VP8FinishRow(VP8Decoder* const dec, VP8Io* io) {
io->y += dec->cache_y_stride_ * delta_y;
io->u += dec->cache_uv_stride_ * (delta_y >> 1);
io->v += dec->cache_uv_stride_ * (delta_y >> 1);
- if (io->a) {
+ if (io->a != NULL) {
io->a += io->width * delta_y;
}
}
@@ -303,33 +228,69 @@ int VP8FinishRow(VP8Decoder* const dec, VP8Io* io) {
io->y += io->crop_left;
io->u += io->crop_left >> 1;
io->v += io->crop_left >> 1;
- if (io->a) {
+ if (io->a != NULL) {
io->a += io->crop_left;
}
io->mb_y = y_start - io->crop_top;
io->mb_w = io->crop_right - io->crop_left;
io->mb_h = y_end - y_start;
- if (!io->put(io)) {
- return 0;
- }
+ ok = io->put(io);
}
}
- // rotate top samples
- if (!last_row) {
- memcpy(ydst, ydst + 16 * dec->cache_y_stride_, ysize);
- memcpy(udst, udst + 8 * dec->cache_uv_stride_, uvsize);
- memcpy(vdst, vdst + 8 * dec->cache_uv_stride_, uvsize);
+ // rotate top samples if needed
+ if (ctx->id_ + 1 == dec->num_caches_) {
+ if (!last_row) {
+ memcpy(dec->cache_y_ - ysize, ydst + 16 * dec->cache_y_stride_, ysize);
+ memcpy(dec->cache_u_ - uvsize, udst + 8 * dec->cache_uv_stride_, uvsize);
+ memcpy(dec->cache_v_ - uvsize, vdst + 8 * dec->cache_uv_stride_, uvsize);
+ }
}
- return 1;
+
+ return ok;
}
#undef MACROBLOCK_VPOS
//------------------------------------------------------------------------------
+
+int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {
+ int ok = 1;
+ VP8ThreadContext* const ctx = &dec->thread_ctx_;
+ if (!dec->use_threads_) {
+ // ctx->id_ and ctx->f_info_ are already set
+ ctx->mb_y_ = dec->mb_y_;
+ ctx->filter_row_ = dec->filter_row_;
+ ok = FinishRow(dec, io);
+ } else {
+ WebPWorker* const worker = &dec->worker_;
+ // Finish previous job *before* updating context
+ ok &= WebPWorkerSync(worker);
+ assert(worker->status_ == OK);
+ if (ok) { // spawn a new deblocking/output job
+ ctx->io_ = *io;
+ ctx->id_ = dec->cache_id_;
+ ctx->mb_y_ = dec->mb_y_;
+ ctx->filter_row_ = dec->filter_row_;
+ if (ctx->filter_row_) { // just swap filter info
+ VP8FInfo* const tmp = ctx->f_info_;
+ ctx->f_info_ = dec->f_info_;
+ dec->f_info_ = tmp;
+ }
+ WebPWorkerLaunch(worker);
+ if (++dec->cache_id_ == dec->num_caches_) {
+ dec->cache_id_ = 0;
+ }
+ }
+ }
+ return ok;
+}
+
+//------------------------------------------------------------------------------
// Finish setting up the decoding parameter once user's setup() is called.
-VP8StatusCode VP8FinishFrameSetup(VP8Decoder* const dec, VP8Io* const io) {
+VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
// Call setup() first. This may trigger additional decoding features on 'io'.
+ // Note: Afterward, we must call teardown() not matter what.
if (io->setup && !io->setup(io)) {
VP8SetError(dec, VP8_STATUS_USER_ABORT, "Frame setup failed");
return dec->status_;
@@ -360,8 +321,13 @@ VP8StatusCode VP8FinishFrameSetup(VP8Decoder* const dec, VP8Io* const io) {
dec->tl_mb_y_ = 0;
} else {
// For simple filter, we can filter only the cropped region.
- dec->tl_mb_y_ = io->crop_top >> 4;
- dec->tl_mb_x_ = io->crop_left >> 4;
+ // We include 'extra_pixels' on the other side of the boundary, since
+ // vertical or horizontal filtering of the previous macroblock can
+ // modify some abutting pixels.
+ dec->tl_mb_x_ = (io->crop_left - extra_pixels) >> 4;
+ dec->tl_mb_y_ = (io->crop_top - extra_pixels) >> 4;
+ if (dec->tl_mb_x_ < 0) dec->tl_mb_x_ = 0;
+ if (dec->tl_mb_y_ < 0) dec->tl_mb_y_ = 0;
}
// We need some 'extra' pixels on the right/bottom.
dec->br_mb_y_ = (io->crop_bottom + 15 + extra_pixels) >> 4;
@@ -376,6 +342,189 @@ VP8StatusCode VP8FinishFrameSetup(VP8Decoder* const dec, VP8Io* const io) {
return VP8_STATUS_OK;
}
+int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {
+ int ok = 1;
+ if (dec->use_threads_) {
+ ok = WebPWorkerSync(&dec->worker_);
+ }
+
+ if (io->teardown) {
+ io->teardown(io);
+ }
+ return ok;
+}
+
+//------------------------------------------------------------------------------
+// For multi-threaded decoding we need to use 3 rows of 16 pixels as delay line.
+//
+// Reason is: the deblocking filter cannot deblock the bottom horizontal edges
+// immediately, and needs to wait for first few rows of the next macroblock to
+// be decoded. Hence, deblocking is lagging behind by 4 or 8 pixels (depending
+// on strength).
+// With two threads, the vertical positions of the rows being decoded are:
+// Decode: [ 0..15][16..31][32..47][48..63][64..79][...
+// Deblock: [ 0..11][12..27][28..43][44..59][...
+// If we use two threads and two caches of 16 pixels, the sequence would be:
+// Decode: [ 0..15][16..31][ 0..15!!][16..31][ 0..15][...
+// Deblock: [ 0..11][12..27!!][-4..11][12..27][...
+// The problem occurs during row [12..15!!] that both the decoding and
+// deblocking threads are writing simultaneously.
+// With 3 cache lines, one get a safe write pattern:
+// Decode: [ 0..15][16..31][32..47][ 0..15][16..31][32..47][0..
+// Deblock: [ 0..11][12..27][28..43][-4..11][12..27][28...
+// Note that multi-threaded output _without_ deblocking can make use of two
+// cache lines of 16 pixels only, since there's no lagging behind. The decoding
+// and output process have non-concurrent writing:
+// Decode: [ 0..15][16..31][ 0..15][16..31][...
+// io->put: [ 0..15][16..31][ 0..15][...
+
+#define MT_CACHE_LINES 3
+#define ST_CACHE_LINES 1 // 1 cache row only for single-threaded case
+
+// Initialize multi/single-thread worker
+static int InitThreadContext(VP8Decoder* const dec) {
+ dec->cache_id_ = 0;
+ if (dec->use_threads_) {
+ WebPWorker* const worker = &dec->worker_;
+ if (!WebPWorkerReset(worker)) {
+ return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
+ "thread initialization failed.");
+ }
+ worker->data1 = dec;
+ worker->data2 = (void*)&dec->thread_ctx_.io_;
+ worker->hook = (WebPWorkerHook)FinishRow;
+ dec->num_caches_ =
+ (dec->filter_type_ > 0) ? MT_CACHE_LINES : MT_CACHE_LINES - 1;
+ } else {
+ dec->num_caches_ = ST_CACHE_LINES;
+ }
+ return 1;
+}
+
+#undef MT_CACHE_LINES
+#undef ST_CACHE_LINES
+
+//------------------------------------------------------------------------------
+// Memory setup
+
+static int AllocateMemory(VP8Decoder* const dec) {
+ const int num_caches = dec->num_caches_;
+ const int mb_w = dec->mb_w_;
+ // Note: we use 'size_t' when there's no overflow risk, uint64_t otherwise.
+ const size_t intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t);
+ const size_t top_size = (16 + 8 + 8) * mb_w;
+ const size_t mb_info_size = (mb_w + 1) * sizeof(VP8MB);
+ const size_t f_info_size =
+ (dec->filter_type_ > 0) ?
+ mb_w * (dec->use_threads_ ? 2 : 1) * sizeof(VP8FInfo)
+ : 0;
+ const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);
+ const size_t coeffs_size = 384 * sizeof(*dec->coeffs_);
+ const size_t cache_height = (16 * num_caches
+ + kFilterExtraRows[dec->filter_type_]) * 3 / 2;
+ const size_t cache_size = top_size * cache_height;
+ // alpha_size is the only one that scales as width x height.
+ const uint64_t alpha_size = (dec->alpha_data_ != NULL) ?
+ (uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL;
+ const uint64_t needed = (uint64_t)intra_pred_mode_size
+ + top_size + mb_info_size + f_info_size
+ + yuv_size + coeffs_size
+ + cache_size + alpha_size + ALIGN_MASK;
+ uint8_t* mem;
+
+ if (needed != (size_t)needed) return 0; // check for overflow
+ if (needed > dec->mem_size_) {
+ free(dec->mem_);
+ dec->mem_size_ = 0;
+ dec->mem_ = WebPSafeMalloc(needed, sizeof(uint8_t));
+ if (dec->mem_ == NULL) {
+ return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
+ "no memory during frame initialization.");
+ }
+ // down-cast is ok, thanks to WebPSafeAlloc() above.
+ dec->mem_size_ = (size_t)needed;
+ }
+
+ mem = (uint8_t*)dec->mem_;
+ dec->intra_t_ = (uint8_t*)mem;
+ mem += intra_pred_mode_size;
+
+ dec->y_t_ = (uint8_t*)mem;
+ mem += 16 * mb_w;
+ dec->u_t_ = (uint8_t*)mem;
+ mem += 8 * mb_w;
+ dec->v_t_ = (uint8_t*)mem;
+ mem += 8 * mb_w;
+
+ dec->mb_info_ = ((VP8MB*)mem) + 1;
+ mem += mb_info_size;
+
+ dec->f_info_ = f_info_size ? (VP8FInfo*)mem : NULL;
+ mem += f_info_size;
+ dec->thread_ctx_.id_ = 0;
+ dec->thread_ctx_.f_info_ = dec->f_info_;
+ if (dec->use_threads_) {
+ // secondary cache line. The deblocking process need to make use of the
+ // filtering strength from previous macroblock row, while the new ones
+ // are being decoded in parallel. We'll just swap the pointers.
+ dec->thread_ctx_.f_info_ += mb_w;
+ }
+
+ mem = (uint8_t*)((uintptr_t)(mem + ALIGN_MASK) & ~ALIGN_MASK);
+ assert((yuv_size & ALIGN_MASK) == 0);
+ dec->yuv_b_ = (uint8_t*)mem;
+ mem += yuv_size;
+
+ dec->coeffs_ = (int16_t*)mem;
+ mem += coeffs_size;
+
+ dec->cache_y_stride_ = 16 * mb_w;
+ dec->cache_uv_stride_ = 8 * mb_w;
+ {
+ const int extra_rows = kFilterExtraRows[dec->filter_type_];
+ const int extra_y = extra_rows * dec->cache_y_stride_;
+ const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_;
+ dec->cache_y_ = ((uint8_t*)mem) + extra_y;
+ dec->cache_u_ = dec->cache_y_
+ + 16 * num_caches * dec->cache_y_stride_ + extra_uv;
+ dec->cache_v_ = dec->cache_u_
+ + 8 * num_caches * dec->cache_uv_stride_ + extra_uv;
+ dec->cache_id_ = 0;
+ }
+ mem += cache_size;
+
+ // alpha plane
+ dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL;
+ mem += alpha_size;
+
+ // note: left-info is initialized once for all.
+ memset(dec->mb_info_ - 1, 0, mb_info_size);
+
+ // initialize top
+ memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size);
+
+ return 1;
+}
+
+static void InitIo(VP8Decoder* const dec, VP8Io* io) {
+ // prepare 'io'
+ io->mb_y = 0;
+ io->y = dec->cache_y_;
+ io->u = dec->cache_u_;
+ io->v = dec->cache_v_;
+ io->y_stride = dec->cache_y_stride_;
+ io->uv_stride = dec->cache_uv_stride_;
+ io->a = NULL;
+}
+
+int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) {
+ if (!InitThreadContext(dec)) return 0; // call first. Sets dec->num_caches_.
+ if (!AllocateMemory(dec)) return 0;
+ InitIo(dec, io);
+ VP8DspInit(); // Init critical function pointers and look-up tables.
+ return 1;
+}
+
//------------------------------------------------------------------------------
// Main reconstruction function.
@@ -386,7 +535,7 @@ static const int kScan[16] = {
0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS
};
-static inline int CheckMode(VP8Decoder* const dec, int mode) {
+static WEBP_INLINE int CheckMode(VP8Decoder* const dec, int mode) {
if (mode == B_DC_PRED) {
if (dec->mb_x_ == 0) {
return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT;
@@ -397,7 +546,7 @@ static inline int CheckMode(VP8Decoder* const dec, int mode) {
return mode;
}
-static inline void Copy32b(uint8_t* dst, uint8_t* src) {
+static WEBP_INLINE void Copy32b(uint8_t* dst, uint8_t* src) {
*(uint32_t*)dst = *(uint32_t*)src;
}
diff --git a/src/dec/idec.c b/src/dec/idec.c
index 1e51f0d7..7df790ce 100644
--- a/src/dec/idec.c
+++ b/src/dec/idec.c
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -13,16 +13,16 @@
#include <string.h>
#include <stdlib.h>
-#include "webpi.h"
-#include "vp8i.h"
+#include "./webpi.h"
+#include "./vp8i.h"
+#include "../utils/utils.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-#define RIFF_HEADER_SIZE 20
-#define VP8_HEADER_SIZE 10
-#define WEBP_HEADER_SIZE (RIFF_HEADER_SIZE + VP8_HEADER_SIZE)
+// In append mode, buffer allocations increase as multiples of this value.
+// Needs to be a power of 2.
#define CHUNK_SIZE 4096
#define MAX_MB_SIZE 4096
@@ -31,21 +31,29 @@ extern "C" {
// Decoding states. State normally flows like HEADER->PARTS0->DATA->DONE.
// If there is any error the decoder goes into state ERROR.
-typedef enum { STATE_HEADER = 0, STATE_PARTS0 = 1,
- STATE_DATA = 2, STATE_DONE = 3,
- STATE_ERROR = 4
+typedef enum {
+ STATE_PRE_VP8, // All data before that of the first VP8 chunk.
+ STATE_VP8_FRAME_HEADER, // For VP8 Frame header (within VP8 chunk).
+ STATE_VP8_PARTS0,
+ STATE_VP8_DATA,
+ STATE_VP8L_HEADER,
+ STATE_VP8L_DATA,
+ STATE_DONE,
+ STATE_ERROR
} DecState;
// Operating state for the MemBuffer
-typedef enum { MEM_MODE_NONE = 0,
- MEM_MODE_APPEND, MEM_MODE_MAP
+typedef enum {
+ MEM_MODE_NONE = 0,
+ MEM_MODE_APPEND,
+ MEM_MODE_MAP
} MemBufferMode;
// storage for partition #0 and partial data (in a rolling fashion)
typedef struct {
MemBufferMode mode_; // Operation mode
- uint32_t start_; // start location of the data to be decoded
- uint32_t end_; // end location
+ size_t start_; // start location of the data to be decoded
+ size_t end_; // end location
size_t buf_size_; // size of the allocated buffer
uint8_t* buf_; // We don't own this buffer in case WebPIUpdate()
@@ -56,11 +64,13 @@ typedef struct {
struct WebPIDecoder {
DecState state_; // current decoding state
WebPDecParams params_; // Params to store output info
- VP8Decoder* dec_;
+ int is_lossless_; // for down-casting 'dec_'.
+ void* dec_; // either a VP8Decoder or a VP8LDecoder instance
VP8Io io_;
MemBuffer mem_; // input memory buffer.
WebPDecBuffer output_; // output buffer (when no external one is supplied)
+ size_t chunk_size_; // Compressed VP8/VP8L size extracted from Header.
};
// MB context to restore in case VP8DecodeMB() fails
@@ -76,102 +86,105 @@ typedef struct {
//------------------------------------------------------------------------------
// MemBuffer: incoming data handling
-#define REMAP(PTR, OLD_BASE, NEW_BASE) (PTR) = (NEW_BASE) + ((PTR) - OLD_BASE)
+static void RemapBitReader(VP8BitReader* const br, ptrdiff_t offset) {
+ if (br->buf_ != NULL) {
+ br->buf_ += offset;
+ br->buf_end_ += offset;
+ }
+}
-static inline size_t MemDataSize(const MemBuffer* mem) {
+static WEBP_INLINE size_t MemDataSize(const MemBuffer* mem) {
return (mem->end_ - mem->start_);
}
+static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
+ MemBuffer* const mem = &idec->mem_;
+ const uint8_t* const new_base = mem->buf_ + mem->start_;
+ // note: for VP8, setting up idec->io_ is only really needed at the beginning
+ // of the decoding, till partition #0 is complete.
+ idec->io_.data = new_base;
+ idec->io_.data_size = MemDataSize(mem);
+
+ if (idec->dec_ != NULL) {
+ if (!idec->is_lossless_) {
+ VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+ const int last_part = dec->num_parts_ - 1;
+ if (offset != 0) {
+ int p;
+ for (p = 0; p <= last_part; ++p) {
+ RemapBitReader(dec->parts_ + p, offset);
+ }
+ // Remap partition #0 data pointer to new offset, but only in MAP
+ // mode (in APPEND mode, partition #0 is copied into a fixed memory).
+ if (mem->mode_ == MEM_MODE_MAP) {
+ RemapBitReader(&dec->br_, offset);
+ }
+ }
+ assert(last_part >= 0);
+ dec->parts_[last_part].buf_end_ = mem->buf_ + mem->end_;
+ } else { // Resize lossless bitreader
+ VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
+ VP8LBitReaderSetBuffer(&dec->br_, new_base, MemDataSize(mem));
+ }
+ }
+}
+
// Appends data to the end of MemBuffer->buf_. It expands the allocated memory
// size if required and also updates VP8BitReader's if new memory is allocated.
static int AppendToMemBuffer(WebPIDecoder* const idec,
const uint8_t* const data, size_t data_size) {
MemBuffer* const mem = &idec->mem_;
- VP8Decoder* const dec = idec->dec_;
- const int last_part = dec->num_parts_ - 1;
+ const uint8_t* const old_base = mem->buf_ + mem->start_;
assert(mem->mode_ == MEM_MODE_APPEND);
+ if (data_size > MAX_CHUNK_PAYLOAD) {
+ // security safeguard: trying to allocate more than what the format
+ // allows for a chunk should be considered a smoke smell.
+ return 0;
+ }
if (mem->end_ + data_size > mem->buf_size_) { // Need some free memory
- int p;
- uint8_t* new_buf = NULL;
- const int num_chunks = (MemDataSize(mem) + data_size + CHUNK_SIZE - 1)
- / CHUNK_SIZE;
- const size_t new_size = num_chunks * CHUNK_SIZE;
- const uint8_t* const base = mem->buf_ + mem->start_;
-
- new_buf = (uint8_t*)malloc(new_size);
- if (!new_buf) return 0;
- memcpy(new_buf, base, MemDataSize(mem));
-
- // adjust VP8BitReader pointers
- for (p = 0; p <= last_part; ++p) {
- if (dec->parts_[p].buf_) {
- REMAP(dec->parts_[p].buf_, base, new_buf);
- REMAP(dec->parts_[p].buf_end_, base, new_buf);
- }
- }
-
- // adjust memory pointers
+ const size_t current_size = MemDataSize(mem);
+ const uint64_t new_size = (uint64_t)current_size + data_size;
+ const uint64_t extra_size = (new_size + CHUNK_SIZE - 1) & ~(CHUNK_SIZE - 1);
+ uint8_t* const new_buf =
+ (uint8_t*)WebPSafeMalloc(extra_size, sizeof(*new_buf));
+ if (new_buf == NULL) return 0;
+ memcpy(new_buf, old_base, current_size);
free(mem->buf_);
mem->buf_ = new_buf;
- mem->buf_size_ = new_size;
-
- mem->end_ = MemDataSize(mem);
+ mem->buf_size_ = (size_t)extra_size;
mem->start_ = 0;
+ mem->end_ = current_size;
}
memcpy(mem->buf_ + mem->end_, data, data_size);
mem->end_ += data_size;
assert(mem->end_ <= mem->buf_size_);
- dec->parts_[last_part].buf_end_ = mem->buf_ + mem->end_;
- // note: setting up idec->io_ is only really needed at the beginning
- // of the decoding, till partition #0 is complete.
- idec->io_.data = mem->buf_ + mem->start_;
- idec->io_.data_size = MemDataSize(mem);
+ DoRemap(idec, mem->buf_ + mem->start_ - old_base);
return 1;
}
static int RemapMemBuffer(WebPIDecoder* const idec,
const uint8_t* const data, size_t data_size) {
- int p;
MemBuffer* const mem = &idec->mem_;
- VP8Decoder* const dec = idec->dec_;
- const int last_part = dec->num_parts_ - 1;
- const uint8_t* base = mem->buf_;
-
+ const uint8_t* const old_base = mem->buf_ + mem->start_;
assert(mem->mode_ == MEM_MODE_MAP);
- if (data_size < mem->buf_size_) {
- return 0; // we cannot remap to a shorter buffer!
- }
- for (p = 0; p <= last_part; ++p) {
- if (dec->parts_[p].buf_) {
- REMAP(dec->parts_[p].buf_, base, data);
- REMAP(dec->parts_[p].buf_end_, base, data);
- }
- }
- dec->parts_[last_part].buf_end_ = data + data_size;
-
- // Remap partition #0 data pointer to new offset.
- if (dec->br_.buf_) {
- REMAP(dec->br_.buf_, base, data);
- REMAP(dec->br_.buf_end_, base, data);
- }
+ if (data_size < mem->buf_size_) return 0; // can't remap to a shorter buffer!
mem->buf_ = (uint8_t*)data;
mem->end_ = mem->buf_size_ = data_size;
- idec->io_.data = data;
- idec->io_.data_size = data_size;
+ DoRemap(idec, mem->buf_ + mem->start_ - old_base);
return 1;
}
static void InitMemBuffer(MemBuffer* const mem) {
mem->mode_ = MEM_MODE_NONE;
- mem->buf_ = 0;
+ mem->buf_ = NULL;
mem->buf_size_ = 0;
- mem->part0_buf_ = 0;
+ mem->part0_buf_ = NULL;
mem->part0_size_ = 0;
}
@@ -193,8 +206,6 @@ static int CheckMemBufferMode(MemBuffer* const mem, MemBufferMode expected) {
return 1;
}
-#undef REMAP
-
//------------------------------------------------------------------------------
// Macroblock-decoding contexts
@@ -228,8 +239,8 @@ static void RestoreContext(const MBContext* context, VP8Decoder* const dec,
//------------------------------------------------------------------------------
-static VP8StatusCode IDecError(WebPIDecoder* idec, VP8StatusCode error) {
- if (idec->state_ == STATE_DATA) {
+static VP8StatusCode IDecError(WebPIDecoder* const idec, VP8StatusCode error) {
+ if (idec->state_ == STATE_VP8_DATA) {
VP8Io* const io = &idec->io_;
if (io->teardown) {
io->teardown(io);
@@ -239,65 +250,112 @@ static VP8StatusCode IDecError(WebPIDecoder* idec, VP8StatusCode error) {
return error;
}
-// Header
-static VP8StatusCode DecodeHeader(WebPIDecoder* const idec) {
- uint32_t riff_header_size, bits;
+static void ChangeState(WebPIDecoder* const idec, DecState new_state,
+ size_t consumed_bytes) {
+ MemBuffer* const mem = &idec->mem_;
+ idec->state_ = new_state;
+ mem->start_ += consumed_bytes;
+ assert(mem->start_ <= mem->end_);
+ idec->io_.data = mem->buf_ + mem->start_;
+ idec->io_.data_size = MemDataSize(mem);
+}
+
+// Headers
+static VP8StatusCode DecodeWebPHeaders(WebPIDecoder* const idec) {
+ MemBuffer* const mem = &idec->mem_;
+ const uint8_t* data = mem->buf_ + mem->start_;
+ size_t curr_size = MemDataSize(mem);
+ VP8StatusCode status;
+ WebPHeaderStructure headers;
+
+ headers.data = data;
+ headers.data_size = curr_size;
+ status = WebPParseHeaders(&headers);
+ if (status == VP8_STATUS_NOT_ENOUGH_DATA) {
+ return VP8_STATUS_SUSPENDED; // We haven't found a VP8 chunk yet.
+ } else if (status != VP8_STATUS_OK) {
+ return IDecError(idec, status);
+ }
+
+ idec->chunk_size_ = headers.compressed_size;
+ idec->is_lossless_ = headers.is_lossless;
+ if (!idec->is_lossless_) {
+ VP8Decoder* const dec = VP8New();
+ if (dec == NULL) {
+ return VP8_STATUS_OUT_OF_MEMORY;
+ }
+ idec->dec_ = dec;
+#ifdef WEBP_USE_THREAD
+ dec->use_threads_ = (idec->params_.options != NULL) &&
+ (idec->params_.options->use_threads > 0);
+#else
+ dec->use_threads_ = 0;
+#endif
+ dec->alpha_data_ = headers.alpha_data;
+ dec->alpha_data_size_ = headers.alpha_data_size;
+ ChangeState(idec, STATE_VP8_FRAME_HEADER, headers.offset);
+ } else {
+ VP8LDecoder* const dec = VP8LNew();
+ if (dec == NULL) {
+ return VP8_STATUS_OUT_OF_MEMORY;
+ }
+ idec->dec_ = dec;
+ ChangeState(idec, STATE_VP8L_HEADER, headers.offset);
+ }
+ return VP8_STATUS_OK;
+}
+
+static VP8StatusCode DecodeVP8FrameHeader(WebPIDecoder* const idec) {
const uint8_t* data = idec->mem_.buf_ + idec->mem_.start_;
- uint32_t curr_size = MemDataSize(&idec->mem_);
- uint32_t chunk_size;
+ const size_t curr_size = MemDataSize(&idec->mem_);
+ uint32_t bits;
- if (curr_size < WEBP_HEADER_SIZE) {
+ if (curr_size < VP8_FRAME_HEADER_SIZE) {
+ // Not enough data bytes to extract VP8 Frame Header.
return VP8_STATUS_SUSPENDED;
}
-
- // Validate and Skip over RIFF header
- chunk_size = WebPCheckRIFFHeader(&data, &curr_size);
- if (chunk_size == 0 ||
- curr_size < VP8_HEADER_SIZE ||
- !VP8GetInfo(data, curr_size, chunk_size, NULL, NULL, NULL)) {
+ if (!VP8GetInfo(data, curr_size, idec->chunk_size_, NULL, NULL)) {
return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
}
- riff_header_size = idec->mem_.end_ - curr_size;
bits = data[0] | (data[1] << 8) | (data[2] << 16);
+ idec->mem_.part0_size_ = (bits >> 5) + VP8_FRAME_HEADER_SIZE;
- idec->mem_.part0_size_ = (bits >> 5) + VP8_HEADER_SIZE;
- idec->mem_.start_ += riff_header_size;
- assert(idec->mem_.start_ <= idec->mem_.end_);
-
- idec->io_.data_size -= riff_header_size;
idec->io_.data = data;
- idec->state_ = STATE_PARTS0;
+ idec->io_.data_size = curr_size;
+ idec->state_ = STATE_VP8_PARTS0;
return VP8_STATUS_OK;
}
// Partition #0
-static int CopyParts0Data(WebPIDecoder* idec) {
- VP8BitReader* const br = &idec->dec_->br_;
+static int CopyParts0Data(WebPIDecoder* const idec) {
+ VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+ VP8BitReader* const br = &dec->br_;
const size_t psize = br->buf_end_ - br->buf_;
MemBuffer* const mem = &idec->mem_;
- assert(!mem->part0_buf_);
+ assert(!idec->is_lossless_);
+ assert(mem->part0_buf_ == NULL);
assert(psize > 0);
- assert(psize <= mem->part0_size_);
+ assert(psize <= mem->part0_size_); // Format limit: no need for runtime check
if (mem->mode_ == MEM_MODE_APPEND) {
// We copy and grab ownership of the partition #0 data.
uint8_t* const part0_buf = (uint8_t*)malloc(psize);
- if (!part0_buf) {
+ if (part0_buf == NULL) {
return 0;
}
memcpy(part0_buf, br->buf_, psize);
mem->part0_buf_ = part0_buf;
- mem->start_ += psize;
br->buf_ = part0_buf;
br->buf_end_ = part0_buf + psize;
} else {
// Else: just keep pointers to the partition #0's data in dec_->br_.
}
+ mem->start_ += psize;
return 1;
}
static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) {
- VP8Decoder* const dec = idec->dec_;
+ VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
VP8Io* const io = &idec->io_;
const WebPDecParams* const params = &idec->params_;
WebPDecBuffer* const output = params->output;
@@ -324,43 +382,37 @@ static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) {
return IDecError(idec, dec->status_);
}
- // Allocate memory and prepare everything.
- if (!VP8InitFrame(dec, io)) {
- return IDecError(idec, dec->status_);
- }
-
if (!CopyParts0Data(idec)) {
return IDecError(idec, VP8_STATUS_OUT_OF_MEMORY);
}
- // Finish setting up the decoding parameters.
- if (VP8FinishFrameSetup(dec, io) != VP8_STATUS_OK) {
+ // Finish setting up the decoding parameters. Will call io->setup().
+ if (VP8EnterCritical(dec, io) != VP8_STATUS_OK) {
return IDecError(idec, dec->status_);
}
+
// Note: past this point, teardown() must always be called
// in case of error.
- idec->state_ = STATE_DATA;
+ idec->state_ = STATE_VP8_DATA;
+ // Allocate memory and prepare everything.
+ if (!VP8InitFrame(dec, io)) {
+ return IDecError(idec, dec->status_);
+ }
return VP8_STATUS_OK;
}
// Remaining partitions
static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
- VP8BitReader* br;
- VP8Decoder* const dec = idec->dec_;
+ VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
VP8Io* const io = &idec->io_;
assert(dec->ready_);
- br = &dec->br_;
for (; dec->mb_y_ < dec->mb_h_; ++dec->mb_y_) {
VP8BitReader* token_br = &dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)];
if (dec->mb_x_ == 0) {
- VP8MB* const left = dec->mb_info_ - 1;
- left->nz_ = 0;
- left->dc_nz_ = 0;
- memset(dec->intra_l_, B_DC_PRED, sizeof(dec->intra_l_));
+ VP8InitScanline(dec);
}
-
for (; dec->mb_x_ < dec->mb_w_; dec->mb_x_++) {
MBContext context;
SaveContext(dec, token_br, &context);
@@ -383,17 +435,14 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
assert(idec->mem_.start_ <= idec->mem_.end_);
}
}
- if (dec->filter_type_ > 0) {
- VP8FilterRow(dec);
- }
- if (!VP8FinishRow(dec, io)) {
+ if (!VP8ProcessRow(dec, io)) {
return IDecError(idec, VP8_STATUS_USER_ABORT);
}
dec->mb_x_ = 0;
}
-
- if (io->teardown) {
- io->teardown(io);
+ // Synchronize the thread and check for errors.
+ if (!VP8ExitCritical(dec, io)) {
+ return IDecError(idec, VP8_STATUS_USER_ABORT);
}
dec->ready_ = 0;
idec->state_ = STATE_DONE;
@@ -401,39 +450,99 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
return VP8_STATUS_OK;
}
+static int ErrorStatusLossless(WebPIDecoder* const idec, VP8StatusCode status) {
+ if (status == VP8_STATUS_SUSPENDED || status == VP8_STATUS_NOT_ENOUGH_DATA) {
+ return VP8_STATUS_SUSPENDED;
+ }
+ return IDecError(idec, status);
+}
+
+static VP8StatusCode DecodeVP8LHeader(WebPIDecoder* const idec) {
+ VP8Io* const io = &idec->io_;
+ VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
+ const WebPDecParams* const params = &idec->params_;
+ WebPDecBuffer* const output = params->output;
+ size_t curr_size = MemDataSize(&idec->mem_);
+ assert(idec->is_lossless_);
+
+ // Wait until there's enough data for decoding header.
+ if (curr_size < (idec->chunk_size_ >> 3)) {
+ return VP8_STATUS_SUSPENDED;
+ }
+ if (!VP8LDecodeHeader(dec, io)) {
+ return ErrorStatusLossless(idec, dec->status_);
+ }
+ // Allocate/verify output buffer now.
+ dec->status_ = WebPAllocateDecBuffer(io->width, io->height, params->options,
+ output);
+ if (dec->status_ != VP8_STATUS_OK) {
+ return IDecError(idec, dec->status_);
+ }
+
+ idec->state_ = STATE_VP8L_DATA;
+ return VP8_STATUS_OK;
+}
+
+static VP8StatusCode DecodeVP8LData(WebPIDecoder* const idec) {
+ VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
+ const size_t curr_size = MemDataSize(&idec->mem_);
+ assert(idec->is_lossless_);
+
+ // At present Lossless decoder can't decode image incrementally. So wait till
+ // all the image data is aggregated before image can be decoded.
+ if (curr_size < idec->chunk_size_) {
+ return VP8_STATUS_SUSPENDED;
+ }
+
+ if (!VP8LDecodeImage(dec)) {
+ return ErrorStatusLossless(idec, dec->status_);
+ }
+
+ idec->state_ = STATE_DONE;
+
+ return VP8_STATUS_OK;
+}
+
// Main decoding loop
static VP8StatusCode IDecode(WebPIDecoder* idec) {
VP8StatusCode status = VP8_STATUS_SUSPENDED;
- assert(idec->dec_);
- if (idec->state_ == STATE_HEADER) {
- status = DecodeHeader(idec);
+ if (idec->state_ == STATE_PRE_VP8) {
+ status = DecodeWebPHeaders(idec);
+ } else {
+ if (idec->dec_ == NULL) {
+ return VP8_STATUS_SUSPENDED; // can't continue if we have no decoder.
+ }
}
- if (idec->state_ == STATE_PARTS0) {
+ if (idec->state_ == STATE_VP8_FRAME_HEADER) {
+ status = DecodeVP8FrameHeader(idec);
+ }
+ if (idec->state_ == STATE_VP8_PARTS0) {
status = DecodePartition0(idec);
}
- if (idec->state_ == STATE_DATA) {
+ if (idec->state_ == STATE_VP8_DATA) {
status = DecodeRemaining(idec);
}
+ if (idec->state_ == STATE_VP8L_HEADER) {
+ status = DecodeVP8LHeader(idec);
+ }
+ if (idec->state_ == STATE_VP8L_DATA) {
+ status = DecodeVP8LData(idec);
+ }
return status;
}
//------------------------------------------------------------------------------
// Public functions
-WebPIDecoder* WebPINewDecoder(WebPDecBuffer* const output_buffer) {
- WebPIDecoder* idec = (WebPIDecoder*)calloc(1, sizeof(WebPIDecoder));
+WebPIDecoder* WebPINewDecoder(WebPDecBuffer* output_buffer) {
+ WebPIDecoder* idec = (WebPIDecoder*)calloc(1, sizeof(*idec));
if (idec == NULL) {
return NULL;
}
- idec->dec_ = VP8New();
- if (idec->dec_ == NULL) {
- free(idec);
- return NULL;
- }
-
- idec->state_ = STATE_HEADER;
+ idec->state_ = STATE_PRE_VP8;
+ idec->chunk_size_ = 0;
InitMemBuffer(&idec->mem_);
WebPInitDecBuffer(&idec->output_);
@@ -446,8 +555,8 @@ WebPIDecoder* WebPINewDecoder(WebPDecBuffer* const output_buffer) {
return idec;
}
-WebPIDecoder* WebPIDecode(const uint8_t* data, uint32_t data_size,
- WebPDecoderConfig* const config) {
+WebPIDecoder* WebPIDecode(const uint8_t* data, size_t data_size,
+ WebPDecoderConfig* config) {
WebPIDecoder* idec;
// Parse the bitstream's features, if requested:
@@ -458,7 +567,7 @@ WebPIDecoder* WebPIDecode(const uint8_t* data, uint32_t data_size,
}
// Create an instance of the incremental decoder
idec = WebPINewDecoder(config ? &config->output : NULL);
- if (!idec) {
+ if (idec == NULL) {
return NULL;
}
// Finish initialization
@@ -468,9 +577,15 @@ WebPIDecoder* WebPIDecode(const uint8_t* data, uint32_t data_size,
return idec;
}
-void WebPIDelete(WebPIDecoder* const idec) {
- if (!idec) return;
- VP8Delete(idec->dec_);
+void WebPIDelete(WebPIDecoder* idec) {
+ if (idec == NULL) return;
+ if (idec->dec_ != NULL) {
+ if (!idec->is_lossless_) {
+ VP8Delete(idec->dec_);
+ } else {
+ VP8LDelete(idec->dec_);
+ }
+ }
ClearMemBuffer(&idec->mem_);
WebPFreeDecBuffer(&idec->output_);
free(idec);
@@ -479,19 +594,12 @@ void WebPIDelete(WebPIDecoder* const idec) {
//------------------------------------------------------------------------------
// Wrapper toward WebPINewDecoder
-WebPIDecoder* WebPINew(WEBP_CSP_MODE mode) {
- WebPIDecoder* const idec = WebPINewDecoder(NULL);
- if (!idec) return NULL;
- idec->output_.colorspace = mode;
- return idec;
-}
-
WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer,
- int output_buffer_size, int output_stride) {
+ size_t output_buffer_size, int output_stride) {
WebPIDecoder* idec;
if (mode >= MODE_YUV) return NULL;
idec = WebPINewDecoder(NULL);
- if (!idec) return NULL;
+ if (idec == NULL) return NULL;
idec->output_.colorspace = mode;
idec->output_.is_external_memory = 1;
idec->output_.u.RGBA.rgba = output_buffer;
@@ -500,12 +608,13 @@ WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer,
return idec;
}
-WebPIDecoder* WebPINewYUV(uint8_t* luma, int luma_size, int luma_stride,
- uint8_t* u, int u_size, int u_stride,
- uint8_t* v, int v_size, int v_stride) {
+WebPIDecoder* WebPINewYUVA(uint8_t* luma, size_t luma_size, int luma_stride,
+ uint8_t* u, size_t u_size, int u_stride,
+ uint8_t* v, size_t v_size, int v_stride,
+ uint8_t* a, size_t a_size, int a_stride) {
WebPIDecoder* const idec = WebPINewDecoder(NULL);
- if (!idec) return NULL;
- idec->output_.colorspace = MODE_YUV;
+ if (idec == NULL) return NULL;
+ idec->output_.colorspace = (a == NULL) ? MODE_YUV : MODE_YUVA;
idec->output_.is_external_memory = 1;
idec->output_.u.YUVA.y = luma;
idec->output_.u.YUVA.y_stride = luma_stride;
@@ -516,16 +625,25 @@ WebPIDecoder* WebPINewYUV(uint8_t* luma, int luma_size, int luma_stride,
idec->output_.u.YUVA.v = v;
idec->output_.u.YUVA.v_stride = v_stride;
idec->output_.u.YUVA.v_size = v_size;
+ idec->output_.u.YUVA.a = a;
+ idec->output_.u.YUVA.a_stride = a_stride;
+ idec->output_.u.YUVA.a_size = a_size;
return idec;
}
+WebPIDecoder* WebPINewYUV(uint8_t* luma, size_t luma_size, int luma_stride,
+ uint8_t* u, size_t u_size, int u_stride,
+ uint8_t* v, size_t v_size, int v_stride) {
+ return WebPINewYUVA(luma, luma_size, luma_stride,
+ u, u_size, u_stride,
+ v, v_size, v_stride,
+ NULL, 0, 0);
+}
+
//------------------------------------------------------------------------------
static VP8StatusCode IDecCheckStatus(const WebPIDecoder* const idec) {
assert(idec);
- if (idec->dec_ == NULL) {
- return VP8_STATUS_USER_ABORT;
- }
if (idec->state_ == STATE_ERROR) {
return VP8_STATUS_BITSTREAM_ERROR;
}
@@ -535,8 +653,8 @@ static VP8StatusCode IDecCheckStatus(const WebPIDecoder* const idec) {
return VP8_STATUS_SUSPENDED;
}
-VP8StatusCode WebPIAppend(WebPIDecoder* const idec, const uint8_t* data,
- uint32_t data_size) {
+VP8StatusCode WebPIAppend(WebPIDecoder* idec,
+ const uint8_t* data, size_t data_size) {
VP8StatusCode status;
if (idec == NULL || data == NULL) {
return VP8_STATUS_INVALID_PARAM;
@@ -556,8 +674,8 @@ VP8StatusCode WebPIAppend(WebPIDecoder* const idec, const uint8_t* data,
return IDecode(idec);
}
-VP8StatusCode WebPIUpdate(WebPIDecoder* const idec, const uint8_t* data,
- uint32_t data_size) {
+VP8StatusCode WebPIUpdate(WebPIDecoder* idec,
+ const uint8_t* data, size_t data_size) {
VP8StatusCode status;
if (idec == NULL || data == NULL) {
return VP8_STATUS_INVALID_PARAM;
@@ -580,61 +698,67 @@ VP8StatusCode WebPIUpdate(WebPIDecoder* const idec, const uint8_t* data,
//------------------------------------------------------------------------------
static const WebPDecBuffer* GetOutputBuffer(const WebPIDecoder* const idec) {
- if (!idec || !idec->dec_ || idec->state_ <= STATE_PARTS0) {
+ if (idec == NULL || idec->dec_ == NULL) {
+ return NULL;
+ }
+ if (idec->state_ <= STATE_VP8_PARTS0) {
return NULL;
}
return idec->params_.output;
}
-const WebPDecBuffer* WebPIDecodedArea(const WebPIDecoder* const idec,
- int* const left, int* const top,
- int* const width, int* const height) {
+const WebPDecBuffer* WebPIDecodedArea(const WebPIDecoder* idec,
+ int* left, int* top,
+ int* width, int* height) {
const WebPDecBuffer* const src = GetOutputBuffer(idec);
- if (left) *left = 0;
- if (top) *top = 0;
+ if (left != NULL) *left = 0;
+ if (top != NULL) *top = 0;
// TODO(skal): later include handling of rotations.
if (src) {
- if (width) *width = src->width;
- if (height) *height = idec->params_.last_y;
+ if (width != NULL) *width = src->width;
+ if (height != NULL) *height = idec->params_.last_y;
} else {
- if (width) *width = 0;
- if (height) *height = 0;
+ if (width != NULL) *width = 0;
+ if (height != NULL) *height = 0;
}
return src;
}
-uint8_t* WebPIDecGetRGB(const WebPIDecoder* const idec, int* last_y,
+uint8_t* WebPIDecGetRGB(const WebPIDecoder* idec, int* last_y,
int* width, int* height, int* stride) {
const WebPDecBuffer* const src = GetOutputBuffer(idec);
- if (!src) return NULL;
+ if (src == NULL) return NULL;
if (src->colorspace >= MODE_YUV) {
return NULL;
}
- if (last_y) *last_y = idec->params_.last_y;
- if (width) *width = src->width;
- if (height) *height = src->height;
- if (stride) *stride = src->u.RGBA.stride;
+ if (last_y != NULL) *last_y = idec->params_.last_y;
+ if (width != NULL) *width = src->width;
+ if (height != NULL) *height = src->height;
+ if (stride != NULL) *stride = src->u.RGBA.stride;
return src->u.RGBA.rgba;
}
-uint8_t* WebPIDecGetYUV(const WebPIDecoder* const idec, int* last_y,
- uint8_t** u, uint8_t** v,
- int* width, int* height, int *stride, int* uv_stride) {
+uint8_t* WebPIDecGetYUVA(const WebPIDecoder* idec, int* last_y,
+ uint8_t** u, uint8_t** v, uint8_t** a,
+ int* width, int* height,
+ int* stride, int* uv_stride, int* a_stride) {
const WebPDecBuffer* const src = GetOutputBuffer(idec);
- if (!src) return NULL;
+ if (src == NULL) return NULL;
if (src->colorspace < MODE_YUV) {
return NULL;
}
- if (last_y) *last_y = idec->params_.last_y;
- if (u) *u = src->u.YUVA.u;
- if (v) *v = src->u.YUVA.v;
- if (width) *width = src->width;
- if (height) *height = src->height;
- if (stride) *stride = src->u.YUVA.y_stride;
- if (uv_stride) *uv_stride = src->u.YUVA.u_stride;
+ if (last_y != NULL) *last_y = idec->params_.last_y;
+ if (u != NULL) *u = src->u.YUVA.u;
+ if (v != NULL) *v = src->u.YUVA.v;
+ if (a != NULL) *a = src->u.YUVA.a;
+ if (width != NULL) *width = src->width;
+ if (height != NULL) *height = src->height;
+ if (stride != NULL) *stride = src->u.YUVA.y_stride;
+ if (uv_stride != NULL) *uv_stride = src->u.YUVA.u_stride;
+ if (a_stride != NULL) *a_stride = src->u.YUVA.a_stride;
return src->u.YUVA.y;
}
@@ -644,7 +768,7 @@ int WebPISetIOHooks(WebPIDecoder* const idec,
VP8IoSetupHook setup,
VP8IoTeardownHook teardown,
void* user_data) {
- if (!idec || !idec->dec_ || idec->state_ > STATE_HEADER) {
+ if (idec == NULL || idec->state_ > STATE_PRE_VP8) {
return 0;
}
diff --git a/src/dec/io.c b/src/dec/io.c
index f1137e48..c5746f74 100644
--- a/src/dec/io.c
+++ b/src/dec/io.c
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -11,224 +11,15 @@
#include <assert.h>
#include <stdlib.h>
-#include "vp8i.h"
-#include "webpi.h"
-#include "yuv.h"
+#include "../dec/vp8i.h"
+#include "./webpi.h"
+#include "../dsp/dsp.h"
+#include "../dsp/yuv.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-#define FANCY_UPSAMPLING // undefined to remove fancy upsampling support
-
-//------------------------------------------------------------------------------
-// Fancy upsampler
-
-#ifdef FANCY_UPSAMPLING
-
-// Given samples laid out in a square as:
-// [a b]
-// [c d]
-// we interpolate u/v as:
-// ([9*a + 3*b + 3*c + d 3*a + 9*b + 3*c + d] + [8 8]) / 16
-// ([3*a + b + 9*c + 3*d a + 3*b + 3*c + 9*d] [8 8]) / 16
-
-// We process u and v together stashed into 32bit (16bit each).
-#define LOAD_UV(u,v) ((u) | ((v) << 16))
-
-#define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
-static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
- const uint8_t* top_u, const uint8_t* top_v, \
- const uint8_t* cur_u, const uint8_t* cur_v, \
- uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
- int x; \
- const int last_pixel_pair = (len - 1) >> 1; \
- uint32_t tl_uv = LOAD_UV(top_u[0], top_v[0]); /* top-left sample */ \
- uint32_t l_uv = LOAD_UV(cur_u[0], cur_v[0]); /* left-sample */ \
- if (top_y) { \
- const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2; \
- FUNC(top_y[0], uv0 & 0xff, (uv0 >> 16), top_dst); \
- } \
- if (bottom_y) { \
- const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2; \
- FUNC(bottom_y[0], uv0 & 0xff, (uv0 >> 16), bottom_dst); \
- } \
- for (x = 1; x <= last_pixel_pair; ++x) { \
- const uint32_t t_uv = LOAD_UV(top_u[x], top_v[x]); /* top sample */ \
- const uint32_t uv = LOAD_UV(cur_u[x], cur_v[x]); /* sample */ \
- /* precompute invariant values associated with first and second diagonals*/\
- const uint32_t avg = tl_uv + t_uv + l_uv + uv + 0x00080008u; \
- const uint32_t diag_12 = (avg + 2 * (t_uv + l_uv)) >> 3; \
- const uint32_t diag_03 = (avg + 2 * (tl_uv + uv)) >> 3; \
- if (top_y) { \
- const uint32_t uv0 = (diag_12 + tl_uv) >> 1; \
- const uint32_t uv1 = (diag_03 + t_uv) >> 1; \
- FUNC(top_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16), \
- top_dst + (2 * x - 1) * XSTEP); \
- FUNC(top_y[2 * x - 0], uv1 & 0xff, (uv1 >> 16), \
- top_dst + (2 * x - 0) * XSTEP); \
- } \
- if (bottom_y) { \
- const uint32_t uv0 = (diag_03 + l_uv) >> 1; \
- const uint32_t uv1 = (diag_12 + uv) >> 1; \
- FUNC(bottom_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16), \
- bottom_dst + (2 * x - 1) * XSTEP); \
- FUNC(bottom_y[2 * x + 0], uv1 & 0xff, (uv1 >> 16), \
- bottom_dst + (2 * x + 0) * XSTEP); \
- } \
- tl_uv = t_uv; \
- l_uv = uv; \
- } \
- if (!(len & 1)) { \
- if (top_y) { \
- const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2; \
- FUNC(top_y[len - 1], uv0 & 0xff, (uv0 >> 16), \
- top_dst + (len - 1) * XSTEP); \
- } \
- if (bottom_y) { \
- const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2; \
- FUNC(bottom_y[len - 1], uv0 & 0xff, (uv0 >> 16), \
- bottom_dst + (len - 1) * XSTEP); \
- } \
- } \
-}
-
-// All variants implemented.
-UPSAMPLE_FUNC(UpsampleRgbLinePair, VP8YuvToRgb, 3)
-UPSAMPLE_FUNC(UpsampleBgrLinePair, VP8YuvToBgr, 3)
-UPSAMPLE_FUNC(UpsampleRgbaLinePair, VP8YuvToRgba, 4)
-UPSAMPLE_FUNC(UpsampleBgraLinePair, VP8YuvToBgra, 4)
-UPSAMPLE_FUNC(UpsampleArgbLinePair, VP8YuvToArgb, 4)
-UPSAMPLE_FUNC(UpsampleRgba4444LinePair, VP8YuvToRgba4444, 2)
-UPSAMPLE_FUNC(UpsampleRgb565LinePair, VP8YuvToRgb565, 2)
-// These two don't erase the alpha value
-UPSAMPLE_FUNC(UpsampleRgbKeepAlphaLinePair, VP8YuvToRgb, 4)
-UPSAMPLE_FUNC(UpsampleBgrKeepAlphaLinePair, VP8YuvToBgr, 4)
-UPSAMPLE_FUNC(UpsampleArgbKeepAlphaLinePair, VP8YuvToArgbKeepA, 4)
-UPSAMPLE_FUNC(UpsampleRgba4444KeepAlphaLinePair, VP8YuvToRgba4444KeepA, 2)
-
-#undef LOAD_UV
-#undef UPSAMPLE_FUNC
-
-// Fancy upsampling functions to convert YUV to RGB
-WebPUpsampleLinePairFunc WebPUpsamplers[MODE_LAST];
-WebPUpsampleLinePairFunc WebPUpsamplersKeepAlpha[MODE_LAST];
-
-static void InitUpsamplers(void) {
- WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePair;
- WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair;
- WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePair;
- WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair;
- WebPUpsamplers[MODE_ARGB] = UpsampleArgbLinePair;
- WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair;
- WebPUpsamplers[MODE_RGB_565] = UpsampleRgb565LinePair;
-
- WebPUpsamplersKeepAlpha[MODE_RGB] = UpsampleRgbLinePair;
- WebPUpsamplersKeepAlpha[MODE_RGBA] = UpsampleRgbKeepAlphaLinePair;
- WebPUpsamplersKeepAlpha[MODE_BGR] = UpsampleBgrLinePair;
- WebPUpsamplersKeepAlpha[MODE_BGRA] = UpsampleBgrKeepAlphaLinePair;
- WebPUpsamplersKeepAlpha[MODE_ARGB] = UpsampleArgbKeepAlphaLinePair;
- WebPUpsamplersKeepAlpha[MODE_RGBA_4444] = UpsampleRgba4444KeepAlphaLinePair;
- WebPUpsamplersKeepAlpha[MODE_RGB_565] = UpsampleRgb565LinePair;
-
- // If defined, use CPUInfo() to overwrite some pointers with faster versions.
- if (VP8DecGetCPUInfo) {
- if (VP8DecGetCPUInfo(kSSE2)) {
-#if defined(__SSE2__) || defined(_MSC_VER)
- WebPInitUpsamplersSSE2();
-#endif
- }
- }
-}
-
-#endif // FANCY_UPSAMPLING
-
-//------------------------------------------------------------------------------
-// simple point-sampling
-
-#define SAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
-static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
- const uint8_t* u, const uint8_t* v, \
- uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
- int i; \
- for (i = 0; i < len - 1; i += 2) { \
- FUNC(top_y[0], u[0], v[0], top_dst); \
- FUNC(top_y[1], u[0], v[0], top_dst + XSTEP); \
- FUNC(bottom_y[0], u[0], v[0], bottom_dst); \
- FUNC(bottom_y[1], u[0], v[0], bottom_dst + XSTEP); \
- top_y += 2; \
- bottom_y += 2; \
- u++; \
- v++; \
- top_dst += 2 * XSTEP; \
- bottom_dst += 2 * XSTEP; \
- } \
- if (i == len - 1) { /* last one */ \
- FUNC(top_y[0], u[0], v[0], top_dst); \
- FUNC(bottom_y[0], u[0], v[0], bottom_dst); \
- } \
-}
-
-// All variants implemented.
-SAMPLE_FUNC(SampleRgbLinePair, VP8YuvToRgb, 3)
-SAMPLE_FUNC(SampleBgrLinePair, VP8YuvToBgr, 3)
-SAMPLE_FUNC(SampleRgbaLinePair, VP8YuvToRgba, 4)
-SAMPLE_FUNC(SampleBgraLinePair, VP8YuvToBgra, 4)
-SAMPLE_FUNC(SampleArgbLinePair, VP8YuvToArgb, 4)
-SAMPLE_FUNC(SampleRgba4444LinePair, VP8YuvToRgba4444, 2)
-SAMPLE_FUNC(SampleRgb565LinePair, VP8YuvToRgb565, 2)
-
-#undef SAMPLE_FUNC
-
-// Main methods.
-typedef void (*SampleLinePairFunc)(
- const uint8_t* top_y, const uint8_t* bottom_y,
- const uint8_t* u, const uint8_t* v,
- uint8_t* top_dst, uint8_t* bottom_dst, int len);
-
-static const SampleLinePairFunc kSamplers[MODE_LAST] = {
- SampleRgbLinePair, // MODE_RGB
- SampleRgbaLinePair, // MODE_RGBA
- SampleBgrLinePair, // MODE_BGR
- SampleBgraLinePair, // MODE_BGRA
- SampleArgbLinePair, // MODE_ARGB
- SampleRgba4444LinePair, // MODE_RGBA_4444
- SampleRgb565LinePair // MODE_RGB_565
-};
-
-//------------------------------------------------------------------------------
-// YUV444 converter
-
-#define YUV444_FUNC(FUNC_NAME, FUNC, XSTEP) \
-static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
- uint8_t* dst, int len) { \
- int i; \
- for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * XSTEP]); \
-}
-
-YUV444_FUNC(Yuv444ToRgb, VP8YuvToRgb, 3)
-YUV444_FUNC(Yuv444ToBgr, VP8YuvToBgr, 3)
-YUV444_FUNC(Yuv444ToRgba, VP8YuvToRgba, 4)
-YUV444_FUNC(Yuv444ToBgra, VP8YuvToBgra, 4)
-YUV444_FUNC(Yuv444ToArgb, VP8YuvToArgb, 4)
-YUV444_FUNC(Yuv444ToRgba4444, VP8YuvToRgba4444, 2)
-YUV444_FUNC(Yuv444ToRgb565, VP8YuvToRgb565, 2)
-
-#undef YUV444_FUNC
-
-typedef void (*YUV444Func)(const uint8_t* y, const uint8_t* u, const uint8_t* v,
- uint8_t* dst, int len);
-
-static const YUV444Func kYUV444Converters[MODE_LAST] = {
- Yuv444ToRgb, // MODE_RGB
- Yuv444ToRgba, // MODE_RGBA
- Yuv444ToBgr, // MODE_BGR
- Yuv444ToBgra, // MODE_BGRA
- Yuv444ToArgb, // MODE_ARGB
- Yuv444ToRgba4444, // MODE_RGBA_4444
- Yuv444ToRgb565 // MODE_RGB_565
-};
-
//------------------------------------------------------------------------------
// Main YUV<->RGB conversion functions
@@ -241,11 +32,12 @@ static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) {
const int mb_w = io->mb_w;
const int mb_h = io->mb_h;
const int uv_w = (mb_w + 1) / 2;
+ const int uv_h = (mb_h + 1) / 2;
int j;
for (j = 0; j < mb_h; ++j) {
memcpy(y_dst + j * buf->y_stride, io->y + j * io->y_stride, mb_w);
}
- for (j = 0; j < (mb_h + 1) / 2; ++j) {
+ for (j = 0; j < uv_h; ++j) {
memcpy(u_dst + j * buf->u_stride, io->u + j * io->uv_stride, uv_w);
memcpy(v_dst + j * buf->v_stride, io->v + j * io->uv_stride, uv_w);
}
@@ -260,7 +52,7 @@ static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) {
const uint8_t* y_src = io->y;
const uint8_t* u_src = io->u;
const uint8_t* v_src = io->v;
- const SampleLinePairFunc sample = kSamplers[output->colorspace];
+ const WebPSampleLinePairFunc sample = WebPSamplers[output->colorspace];
const int mb_w = io->mb_w;
const int last = io->mb_h - 1;
int j;
@@ -289,7 +81,7 @@ static int EmitRGB(const VP8Io* const io, WebPDecParams* const p) {
const uint8_t* y_src = io->y;
const uint8_t* u_src = io->u;
const uint8_t* v_src = io->v;
- const YUV444Func convert = kYUV444Converters[output->colorspace];
+ const WebPYUV444Converter convert = WebPYUV444Converters[output->colorspace];
const int mb_w = io->mb_w;
const int last = io->mb_h;
int j;
@@ -312,9 +104,7 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
int num_lines_out = io->mb_h; // a priori guess
const WebPRGBABuffer* const buf = &p->output->u.RGBA;
uint8_t* dst = buf->rgba + io->mb_y * buf->stride;
- const WebPUpsampleLinePairFunc upsample =
- io->a ? WebPUpsamplersKeepAlpha[p->output->colorspace]
- : WebPUpsamplers[p->output->colorspace];
+ WebPUpsampleLinePairFunc upsample = WebPUpsamplers[p->output->colorspace];
const uint8_t* cur_y = io->y;
const uint8_t* cur_u = io->u;
const uint8_t* cur_v = io->v;
@@ -330,11 +120,9 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
upsample(NULL, cur_y, cur_u, cur_v, cur_u, cur_v, NULL, dst, mb_w);
} else {
// We can finish the left-over line from previous call.
- // Warning! Don't overwrite the alpha values (if any), as they
- // are not lagging one line behind but are already written.
upsample(p->tmp_y, cur_y, top_u, top_v, cur_u, cur_v,
dst - buf->stride, dst, mb_w);
- num_lines_out++;
+ ++num_lines_out;
}
// Loop over each output pairs of row.
for (; y + 2 < y_end; y += 2) {
@@ -372,151 +160,133 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
//------------------------------------------------------------------------------
-#ifdef WEBP_EXPERIMENTAL_FEATURES
static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p) {
+ const uint8_t* alpha = io->a;
+ const WebPYUVABuffer* const buf = &p->output->u.YUVA;
const int mb_w = io->mb_w;
const int mb_h = io->mb_h;
- int j;
- const WebPYUVABuffer* const buf = &p->output->u.YUVA;
uint8_t* dst = buf->a + io->mb_y * buf->a_stride;
- const uint8_t* alpha = io->a;
- if (alpha) {
+ int j;
+
+ if (alpha != NULL) {
for (j = 0; j < mb_h; ++j) {
memcpy(dst, alpha, mb_w * sizeof(*dst));
alpha += io->width;
dst += buf->a_stride;
}
+ } else if (buf->a != NULL) {
+ // the user requested alpha, but there is none, set it to opaque.
+ for (j = 0; j < mb_h; ++j) {
+ memset(dst, 0xff, mb_w * sizeof(*dst));
+ dst += buf->a_stride;
+ }
}
return 0;
}
+static int GetAlphaSourceRow(const VP8Io* const io,
+ const uint8_t** alpha, int* const num_rows) {
+ int start_y = io->mb_y;
+ *num_rows = io->mb_h;
+
+ // Compensate for the 1-line delay of the fancy upscaler.
+ // This is similar to EmitFancyRGB().
+ if (io->fancy_upsampling) {
+ if (start_y == 0) {
+ // We don't process the last row yet. It'll be done during the next call.
+ --*num_rows;
+ } else {
+ --start_y;
+ // Fortunately, *alpha data is persistent, so we can go back
+ // one row and finish alpha blending, now that the fancy upscaler
+ // completed the YUV->RGB interpolation.
+ *alpha -= io->width;
+ }
+ if (io->crop_top + io->mb_y + io->mb_h == io->crop_bottom) {
+ // If it's the very last call, we process all the remaing rows!
+ *num_rows = io->crop_bottom - io->crop_top - start_y;
+ }
+ }
+ return start_y;
+}
+
static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p) {
- const int mb_w = io->mb_w;
- const int mb_h = io->mb_h;
- int i, j;
- const WebPRGBABuffer* const buf = &p->output->u.RGBA;
- uint8_t* dst = buf->rgba + io->mb_y * buf->stride;
const uint8_t* alpha = io->a;
- if (alpha) {
- for (j = 0; j < mb_h; ++j) {
- for (i = 0; i < mb_w; ++i) {
- dst[4 * i + 3] = alpha[i];
+ if (alpha != NULL) {
+ const int mb_w = io->mb_w;
+ int i, j;
+ const WEBP_CSP_MODE colorspace = p->output->colorspace;
+ const int alpha_first =
+ (colorspace == MODE_ARGB || colorspace == MODE_Argb);
+ const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+ int num_rows;
+ const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
+ uint32_t alpha_mask = 0xff;
+
+ {
+ uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
+ uint8_t* dst = base_rgba + (alpha_first ? 0 : 3);
+ for (j = 0; j < num_rows; ++j) {
+ for (i = 0; i < mb_w; ++i) {
+ const uint32_t alpha_value = alpha[i];
+ dst[4 * i] = alpha_value;
+ alpha_mask &= alpha_value;
+ }
+ alpha += io->width;
+ dst += buf->stride;
+ }
+ // alpha_mask is < 0xff if there's non-trivial alpha to premultiply with.
+ if (alpha_mask != 0xff && WebPIsPremultipliedMode(colorspace)) {
+ WebPApplyAlphaMultiply(base_rgba, alpha_first,
+ mb_w, num_rows, buf->stride);
}
- alpha += io->width;
- dst += buf->stride;
}
}
return 0;
}
-#endif /* WEBP_EXPERIMENTAL_FEATURES */
-
-//------------------------------------------------------------------------------
-// Simple picture rescaler
-
-// TODO(skal): start a common library for encoder and decoder, and factorize
-// this code in.
-
-#define RFIX 30
-#define MULT(x,y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX)
-
-static void InitRescaler(WebPRescaler* const wrk,
- int src_width, int src_height,
- uint8_t* dst,
- int dst_width, int dst_height, int dst_stride,
- int x_add, int x_sub, int y_add, int y_sub,
- int32_t* work) {
- wrk->x_expand = (src_width < dst_width);
- wrk->src_width = src_width;
- wrk->src_height = src_height;
- wrk->dst_width = dst_width;
- wrk->dst_height = dst_height;
- wrk->dst = dst;
- wrk->dst_stride = dst_stride;
- // for 'x_expand', we use bilinear interpolation
- wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add - x_sub;
- wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub;
- wrk->y_accum = y_add;
- wrk->y_add = y_add;
- wrk->y_sub = y_sub;
- wrk->fx_scale = (1 << RFIX) / x_sub;
- wrk->fy_scale = (1 << RFIX) / y_sub;
- wrk->fxy_scale = wrk->x_expand ?
- ((int64_t)dst_height << RFIX) / (x_sub * src_height) :
- ((int64_t)dst_height << RFIX) / (x_add * src_height);
- wrk->irow = work;
- wrk->frow = work + dst_width;
-}
-
-static inline void ImportRow(const uint8_t* const src,
- WebPRescaler* const wrk) {
- int x_in = 0;
- int x_out;
- int accum = 0;
- if (!wrk->x_expand) {
- int sum = 0;
- for (x_out = 0; x_out < wrk->dst_width; ++x_out) {
- accum += wrk->x_add;
- for (; accum > 0; accum -= wrk->x_sub) {
- sum += src[x_in++];
+static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p) {
+ const uint8_t* alpha = io->a;
+ if (alpha != NULL) {
+ const int mb_w = io->mb_w;
+ int i, j;
+ const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+ int num_rows;
+ const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
+ uint32_t alpha_mask = 0x0f;
+
+ {
+ uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
+ uint8_t* alpha_dst = base_rgba + 1;
+ for (j = 0; j < num_rows; ++j) {
+ for (i = 0; i < mb_w; ++i) {
+ // Fill in the alpha value (converted to 4 bits).
+ const uint32_t alpha_value = alpha[i] >> 4;
+ alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_value;
+ alpha_mask &= alpha_value;
+ }
+ alpha += io->width;
+ alpha_dst += buf->stride;
}
- { // Emit next horizontal pixel.
- const int32_t base = src[x_in++];
- const int32_t frac = base * (-accum);
- wrk->frow[x_out] = (sum + base) * wrk->x_sub - frac;
- // fresh fractional start for next pixel
- sum = MULT(frac, wrk->fx_scale);
+ if (alpha_mask != 0x0f && p->output->colorspace == MODE_rgbA_4444) {
+ WebPApplyAlphaMultiply4444(base_rgba, mb_w, num_rows, buf->stride);
}
}
- } else { // simple bilinear interpolation
- int left = src[0], right = src[0];
- for (x_out = 0; x_out < wrk->dst_width; ++x_out) {
- if (accum < 0) {
- left = right;
- right = src[++x_in];
- accum += wrk->x_add;
- }
- wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum;
- accum -= wrk->x_sub;
- }
- }
- // Accumulate the new row's contribution
- for (x_out = 0; x_out < wrk->dst_width; ++x_out) {
- wrk->irow[x_out] += wrk->frow[x_out];
}
+ return 0;
}
-static void ExportRow(WebPRescaler* const wrk) {
- int x_out;
- const int yscale = wrk->fy_scale * (-wrk->y_accum);
- assert(wrk->y_accum <= 0);
- for (x_out = 0; x_out < wrk->dst_width; ++x_out) {
- const int frac = MULT(wrk->frow[x_out], yscale);
- const int v = MULT(wrk->irow[x_out] - frac, wrk->fxy_scale);
- wrk->dst[x_out] = (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
- wrk->irow[x_out] = frac; // new fractional start
- }
- wrk->y_accum += wrk->y_add;
- wrk->dst += wrk->dst_stride;
-}
-
-#undef MULT
-#undef RFIX
-
//------------------------------------------------------------------------------
// YUV rescaling (no final RGB conversion needed)
static int Rescale(const uint8_t* src, int src_stride,
int new_lines, WebPRescaler* const wrk) {
int num_lines_out = 0;
- while (new_lines-- > 0) { // import new contribution of one source row.
- ImportRow(src, wrk);
- src += src_stride;
- wrk->y_accum -= wrk->y_sub;
- while (wrk->y_accum <= 0) { // emit output row(s)
- ExportRow(wrk);
- num_lines_out++;
- }
+ while (new_lines > 0) { // import new contributions of source rows.
+ const int lines_in = WebPRescalerImport(wrk, new_lines, src, src_stride);
+ src += lines_in * src_stride;
+ new_lines -= lines_in;
+ num_lines_out += WebPRescalerExport(wrk); // emit output row(s)
}
return num_lines_out;
}
@@ -531,19 +301,14 @@ static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) {
}
static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p) {
- if (io->a) {
+ if (io->a != NULL) {
Rescale(io->a, io->width, io->mb_h, &p->scaler_a);
}
return 0;
}
-static int IsAlphaMode(WEBP_CSP_MODE mode) {
- return (mode == MODE_RGBA || mode == MODE_BGRA || mode == MODE_ARGB ||
- mode == MODE_RGBA_4444 || mode == MODE_YUVA);
-}
-
static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) {
- const int has_alpha = IsAlphaMode(p->output->colorspace);
+ const int has_alpha = WebPIsAlphaMode(p->output->colorspace);
const WebPYUVABuffer* const buf = &p->output->u.YUVA;
const int out_width = io->scaled_width;
const int out_height = io->scaled_height;
@@ -565,26 +330,27 @@ static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) {
return 0; // memory error
}
work = (int32_t*)p->memory;
- InitRescaler(&p->scaler_y, io->mb_w, io->mb_h,
- buf->y, out_width, out_height, buf->y_stride,
- io->mb_w, out_width, io->mb_h, out_height,
- work);
- InitRescaler(&p->scaler_u, uv_in_width, uv_in_height,
- buf->u, uv_out_width, uv_out_height, buf->u_stride,
- uv_in_width, uv_out_width,
- uv_in_height, uv_out_height,
- work + work_size);
- InitRescaler(&p->scaler_v, uv_in_width, uv_in_height,
- buf->v, uv_out_width, uv_out_height, buf->v_stride,
- uv_in_width, uv_out_width,
- uv_in_height, uv_out_height,
- work + work_size + uv_work_size);
+ WebPRescalerInit(&p->scaler_y, io->mb_w, io->mb_h,
+ buf->y, out_width, out_height, buf->y_stride, 1,
+ io->mb_w, out_width, io->mb_h, out_height,
+ work);
+ WebPRescalerInit(&p->scaler_u, uv_in_width, uv_in_height,
+ buf->u, uv_out_width, uv_out_height, buf->u_stride, 1,
+ uv_in_width, uv_out_width,
+ uv_in_height, uv_out_height,
+ work + work_size);
+ WebPRescalerInit(&p->scaler_v, uv_in_width, uv_in_height,
+ buf->v, uv_out_width, uv_out_height, buf->v_stride, 1,
+ uv_in_width, uv_out_width,
+ uv_in_height, uv_out_height,
+ work + work_size + uv_work_size);
p->emit = EmitRescaledYUV;
+
if (has_alpha) {
- InitRescaler(&p->scaler_a, io->mb_w, io->mb_h,
- buf->a, out_width, out_height, buf->a_stride,
- io->mb_w, out_width, io->mb_h, out_height,
- work + work_size + 2 * uv_work_size);
+ WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h,
+ buf->a, out_width, out_height, buf->a_stride, 1,
+ io->mb_w, out_width, io->mb_h, out_height,
+ work + work_size + 2 * uv_work_size);
p->emit_alpha = EmitRescaledAlphaYUV;
}
return 1;
@@ -593,37 +359,25 @@ static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) {
//------------------------------------------------------------------------------
// RGBA rescaling
-// import new contributions until one row is ready to be output, or all input
-// is consumed.
-static int Import(const uint8_t* src, int src_stride,
- int new_lines, WebPRescaler* const wrk) {
- int num_lines_in = 0;
- while (num_lines_in < new_lines && wrk->y_accum > 0) {
- ImportRow(src, wrk);
- src += src_stride;
- ++num_lines_in;
- wrk->y_accum -= wrk->y_sub;
- }
- return num_lines_in;
-}
-
static int ExportRGB(WebPDecParams* const p, int y_pos) {
- const YUV444Func convert = kYUV444Converters[p->output->colorspace];
+ const WebPYUV444Converter convert =
+ WebPYUV444Converters[p->output->colorspace];
const WebPRGBABuffer* const buf = &p->output->u.RGBA;
uint8_t* dst = buf->rgba + (p->last_y + y_pos) * buf->stride;
int num_lines_out = 0;
// For RGB rescaling, because of the YUV420, current scan position
// U/V can be +1/-1 line from the Y one. Hence the double test.
- while (p->scaler_y.y_accum <= 0 && p->scaler_u.y_accum <= 0) {
+ while (WebPRescalerHasPendingOutput(&p->scaler_y) &&
+ WebPRescalerHasPendingOutput(&p->scaler_u)) {
assert(p->last_y + y_pos + num_lines_out < p->output->height);
assert(p->scaler_u.y_accum == p->scaler_v.y_accum);
- ExportRow(&p->scaler_y);
- ExportRow(&p->scaler_u);
- ExportRow(&p->scaler_v);
+ WebPRescalerExportRow(&p->scaler_y);
+ WebPRescalerExportRow(&p->scaler_u);
+ WebPRescalerExportRow(&p->scaler_v);
convert(p->scaler_y.dst, p->scaler_u.dst, p->scaler_v.dst,
dst, p->scaler_y.dst_width);
dst += buf->stride;
- num_lines_out++;
+ ++num_lines_out;
}
return num_lines_out;
}
@@ -634,12 +388,15 @@ static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) {
int j = 0, uv_j = 0;
int num_lines_out = 0;
while (j < mb_h) {
- const int y_lines_in = Import(io->y + j * io->y_stride, io->y_stride,
- mb_h - j, &p->scaler_y);
- const int u_lines_in = Import(io->u + uv_j * io->uv_stride, io->uv_stride,
- uv_mb_h - uv_j, &p->scaler_u);
- const int v_lines_in = Import(io->v + uv_j * io->uv_stride, io->uv_stride,
- uv_mb_h - uv_j, &p->scaler_v);
+ const int y_lines_in =
+ WebPRescalerImport(&p->scaler_y, mb_h - j,
+ io->y + j * io->y_stride, io->y_stride);
+ const int u_lines_in =
+ WebPRescalerImport(&p->scaler_u, uv_mb_h - uv_j,
+ io->u + uv_j * io->uv_stride, io->uv_stride);
+ const int v_lines_in =
+ WebPRescalerImport(&p->scaler_v, uv_mb_h - uv_j,
+ io->v + uv_j * io->uv_stride, io->uv_stride);
(void)v_lines_in; // remove a gcc warning
assert(u_lines_in == v_lines_in);
j += y_lines_in;
@@ -651,34 +408,80 @@ static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) {
static int ExportAlpha(WebPDecParams* const p, int y_pos) {
const WebPRGBABuffer* const buf = &p->output->u.RGBA;
- uint8_t* dst = buf->rgba + (p->last_y + y_pos) * buf->stride;
+ uint8_t* const base_rgba = buf->rgba + (p->last_y + y_pos) * buf->stride;
+ const WEBP_CSP_MODE colorspace = p->output->colorspace;
+ const int alpha_first =
+ (colorspace == MODE_ARGB || colorspace == MODE_Argb);
+ uint8_t* dst = base_rgba + (alpha_first ? 0 : 3);
int num_lines_out = 0;
- while (p->scaler_a.y_accum <= 0) {
+ const int is_premult_alpha = WebPIsPremultipliedMode(colorspace);
+ uint32_t alpha_mask = 0xff;
+ const int width = p->scaler_a.dst_width;
+
+ while (WebPRescalerHasPendingOutput(&p->scaler_a)) {
int i;
assert(p->last_y + y_pos + num_lines_out < p->output->height);
- ExportRow(&p->scaler_a);
- for (i = 0; i < p->scaler_a.dst_width; ++i) {
- dst[4 * i + 3] = p->scaler_a.dst[i];
+ WebPRescalerExportRow(&p->scaler_a);
+ for (i = 0; i < width; ++i) {
+ const uint32_t alpha_value = p->scaler_a.dst[i];
+ dst[4 * i] = alpha_value;
+ alpha_mask &= alpha_value;
}
dst += buf->stride;
- num_lines_out++;
+ ++num_lines_out;
+ }
+ if (is_premult_alpha && alpha_mask != 0xff) {
+ WebPApplyAlphaMultiply(base_rgba, alpha_first,
+ width, num_lines_out, buf->stride);
+ }
+ return num_lines_out;
+}
+
+static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos) {
+ const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+ uint8_t* const base_rgba = buf->rgba + (p->last_y + y_pos) * buf->stride;
+ uint8_t* alpha_dst = base_rgba + 1;
+ int num_lines_out = 0;
+ const WEBP_CSP_MODE colorspace = p->output->colorspace;
+ const int width = p->scaler_a.dst_width;
+ const int is_premult_alpha = WebPIsPremultipliedMode(colorspace);
+ uint32_t alpha_mask = 0x0f;
+
+ while (WebPRescalerHasPendingOutput(&p->scaler_a)) {
+ int i;
+ assert(p->last_y + y_pos + num_lines_out < p->output->height);
+ WebPRescalerExportRow(&p->scaler_a);
+ for (i = 0; i < width; ++i) {
+ // Fill in the alpha value (converted to 4 bits).
+ const uint32_t alpha_value = p->scaler_a.dst[i] >> 4;
+ alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_value;
+ alpha_mask &= alpha_value;
+ }
+ alpha_dst += buf->stride;
+ ++num_lines_out;
+ }
+ if (is_premult_alpha && alpha_mask != 0x0f) {
+ WebPApplyAlphaMultiply4444(base_rgba, width, num_lines_out, buf->stride);
}
return num_lines_out;
}
static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p) {
- if (io->a) {
- int j = 0, pos = 0;
+ if (io->a != NULL) {
+ WebPRescaler* const scaler = &p->scaler_a;
+ int j = 0;
+ int pos = 0;
while (j < io->mb_h) {
- j += Import(io->a + j * io->width, io->width, io->mb_h - j, &p->scaler_a);
- pos += ExportAlpha(p, pos);
+ j += WebPRescalerImport(scaler, io->mb_h - j,
+ io->a + j * io->width, io->width);
+ pos += p->emit_alpha_row(p, pos);
}
}
return 0;
}
static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
- const int has_alpha = IsAlphaMode(p->output->colorspace);
+ const int has_alpha = WebPIsAlphaMode(p->output->colorspace);
const int out_width = io->scaled_width;
const int out_height = io->scaled_height;
const int uv_in_width = (io->mb_w + 1) >> 1;
@@ -701,26 +504,32 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
}
work = (int32_t*)p->memory;
tmp = (uint8_t*)(work + tmp_size1);
- InitRescaler(&p->scaler_y, io->mb_w, io->mb_h,
- tmp + 0 * out_width, out_width, out_height, 0,
- io->mb_w, out_width, io->mb_h, out_height,
- work + 0 * work_size);
- InitRescaler(&p->scaler_u, uv_in_width, uv_in_height,
- tmp + 1 * out_width, out_width, out_height, 0,
- io->mb_w, 2 * out_width, io->mb_h, 2 * out_height,
- work + 1 * work_size);
- InitRescaler(&p->scaler_v, uv_in_width, uv_in_height,
- tmp + 2 * out_width, out_width, out_height, 0,
- io->mb_w, 2 * out_width, io->mb_h, 2 * out_height,
- work + 2 * work_size);
+ WebPRescalerInit(&p->scaler_y, io->mb_w, io->mb_h,
+ tmp + 0 * out_width, out_width, out_height, 0, 1,
+ io->mb_w, out_width, io->mb_h, out_height,
+ work + 0 * work_size);
+ WebPRescalerInit(&p->scaler_u, uv_in_width, uv_in_height,
+ tmp + 1 * out_width, out_width, out_height, 0, 1,
+ io->mb_w, 2 * out_width, io->mb_h, 2 * out_height,
+ work + 1 * work_size);
+ WebPRescalerInit(&p->scaler_v, uv_in_width, uv_in_height,
+ tmp + 2 * out_width, out_width, out_height, 0, 1,
+ io->mb_w, 2 * out_width, io->mb_h, 2 * out_height,
+ work + 2 * work_size);
p->emit = EmitRescaledRGB;
if (has_alpha) {
- InitRescaler(&p->scaler_a, io->mb_w, io->mb_h,
- tmp + 3 * out_width, out_width, out_height, 0,
- io->mb_w, out_width, io->mb_h, out_height,
- work + 3 * work_size);
+ WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h,
+ tmp + 3 * out_width, out_width, out_height, 0, 1,
+ io->mb_w, out_width, io->mb_h, out_height,
+ work + 3 * work_size);
p->emit_alpha = EmitRescaledAlphaRGB;
+ if (p->output->colorspace == MODE_RGBA_4444 ||
+ p->output->colorspace == MODE_rgbA_4444) {
+ p->emit_alpha_row = ExportAlphaRGBA4444;
+ } else {
+ p->emit_alpha_row = ExportAlpha;
+ }
}
return 1;
}
@@ -728,67 +537,17 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
//------------------------------------------------------------------------------
// Default custom functions
-// Setup crop_xxx fields, mb_w and mb_h
-static int InitFromOptions(const WebPDecoderOptions* const options,
- VP8Io* const io) {
- const int W = io->width;
- const int H = io->height;
- int x = 0, y = 0, w = W, h = H;
-
- // Cropping
- io->use_cropping = (options != NULL) && (options->use_cropping > 0);
- if (io->use_cropping) {
- w = options->crop_width;
- h = options->crop_height;
- // TODO(skal): take colorspace into account. Don't assume YUV420.
- x = options->crop_left & ~1;
- y = options->crop_top & ~1;
- if (x < 0 || y < 0 || w <= 0 || h <= 0 || x + w > W || y + h > H) {
- return 0; // out of frame boundary error
- }
- }
- io->crop_left = x;
- io->crop_top = y;
- io->crop_right = x + w;
- io->crop_bottom = y + h;
- io->mb_w = w;
- io->mb_h = h;
-
- // Scaling
- io->use_scaling = (options != NULL) && (options->use_scaling > 0);
- if (io->use_scaling) {
- if (options->scaled_width <= 0 || options->scaled_height <= 0) {
- return 0;
- }
- io->scaled_width = options->scaled_width;
- io->scaled_height = options->scaled_height;
- }
-
- // Filter
- io->bypass_filtering = options && options->bypass_filtering;
-
- // Fancy upsampler
-#ifdef FANCY_UPSAMPLING
- io->fancy_upsampling = (options == NULL) || (!options->no_fancy_upsampling);
-#endif
-
- if (io->use_scaling) {
- // disable filter (only for large downscaling ratio).
- io->bypass_filtering = (io->scaled_width < W * 3 / 4) &&
- (io->scaled_height < H * 3 / 4);
- io->fancy_upsampling = 0;
- }
- return 1;
-}
-
static int CustomSetup(VP8Io* io) {
WebPDecParams* const p = (WebPDecParams*)io->opaque;
- const int is_rgb = (p->output->colorspace < MODE_YUV);
+ const WEBP_CSP_MODE colorspace = p->output->colorspace;
+ const int is_rgb = WebPIsRGBMode(colorspace);
+ const int is_alpha = WebPIsAlphaMode(colorspace);
p->memory = NULL;
p->emit = NULL;
p->emit_alpha = NULL;
- if (!InitFromOptions(p->options, io)) {
+ p->emit_alpha_row = NULL;
+ if (!WebPIoInitFromOptions(p->options, io, is_alpha ? MODE_YUV : MODE_YUVA)) {
return 0;
}
@@ -811,18 +570,20 @@ static int CustomSetup(VP8Io* io) {
p->tmp_u = p->tmp_y + io->mb_w;
p->tmp_v = p->tmp_u + uv_width;
p->emit = EmitFancyRGB;
- InitUpsamplers();
+ WebPInitUpsamplers();
}
#endif
} else {
p->emit = EmitYUV;
}
-#ifdef WEBP_EXPERIMENTAL_FEATURES
- if (IsAlphaMode(p->output->colorspace)) {
- // We need transparency output
- p->emit_alpha = is_rgb ? EmitAlphaRGB : EmitAlphaYUV;
+ if (is_alpha) { // need transparency output
+ if (WebPIsPremultipliedMode(colorspace)) WebPInitPremultiply();
+ p->emit_alpha =
+ (colorspace == MODE_RGBA_4444 || colorspace == MODE_rgbA_4444) ?
+ EmitAlphaRGBA4444
+ : is_rgb ? EmitAlphaRGB
+ : EmitAlphaYUV;
}
-#endif
}
if (is_rgb) {
diff --git a/src/dec/layer.c b/src/dec/layer.c
index 357ad21d..a3a5bdcf 100644
--- a/src/dec/layer.c
+++ b/src/dec/layer.c
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -11,13 +11,14 @@
#include <assert.h>
#include <stdlib.h>
-#include "vp8i.h"
+
+#include "./vp8i.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
int VP8DecodeLayer(VP8Decoder* const dec) {
assert(dec);
diff --git a/src/dec/quant.c b/src/dec/quant.c
index 47edbf5d..d54097af 100644
--- a/src/dec/quant.c
+++ b/src/dec/quant.c
@@ -1,4 +1,4 @@
-// Copyright 2010 Google Inc.
+// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -9,13 +9,13 @@
//
// Author: Skal (pascal.massimino@gmail.com)
-#include "vp8i.h"
+#include "./vp8i.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-static inline int clip(int v, int M) {
+static WEBP_INLINE int clip(int v, int M) {
return v < 0 ? 0 : v > M ? M : v;
}
@@ -58,7 +58,7 @@ static const uint16_t kAcTable[128] = {
249, 254, 259, 264, 269, 274, 279, 284
};
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Paragraph 9.6
void VP8ParseQuant(VP8Decoder* const dec) {
@@ -94,8 +94,10 @@ void VP8ParseQuant(VP8Decoder* const dec) {
m->y1_mat_[1] = kAcTable[clip(q + 0, 127)];
m->y2_mat_[0] = kDcTable[clip(q + dqy2_dc, 127)] * 2;
- // TODO(skal): make it another table?
- m->y2_mat_[1] = kAcTable[clip(q + dqy2_ac, 127)] * 155 / 100;
+ // For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16.
+ // The smallest precision for that is '(x*6349) >> 12' but 16 is a good
+ // word size.
+ m->y2_mat_[1] = (kAcTable[clip(q + dqy2_ac, 127)] * 101581) >> 16;
if (m->y2_mat_[1] < 8) m->y2_mat_[1] = 8;
m->uv_mat_[0] = kDcTable[clip(q + dquv_dc, 117)];
@@ -104,7 +106,7 @@ void VP8ParseQuant(VP8Decoder* const dec) {
}
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
diff --git a/src/dec/tree.c b/src/dec/tree.c
index ed6caad2..82484e4c 100644
--- a/src/dec/tree.c
+++ b/src/dec/tree.c
@@ -1,4 +1,4 @@
-// Copyright 2010 Google Inc.
+// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -59,13 +59,13 @@ static const int8_t kMVRef[8] = {
};
static const int8_t kMVRef4[6] = {
- -LEFT4, 1
- -ABOVE4, 2
+ -LEFT4, 1,
+ -ABOVE4, 2,
-ZERO4, -NEW4
};
#endif
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Default probabilities
// Inter
@@ -385,7 +385,7 @@ void VP8ParseIntraMode(VP8BitReader* const br, VP8Decoder* const dec) {
: VP8GetBit(br, 183) ? TM_PRED : H_PRED;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Paragraph 13
static const uint8_t
diff --git a/src/dec/vp8.c b/src/dec/vp8.c
index 1f1ce29d..b0ccfa2a 100644
--- a/src/dec/vp8.c
+++ b/src/dec/vp8.c
@@ -1,4 +1,4 @@
-// Copyright 2010 Google Inc.
+// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -10,19 +10,23 @@
// Author: Skal (pascal.massimino@gmail.com)
#include <stdlib.h>
-#include "vp8i.h"
+
+#include "./vp8i.h"
+#include "./vp8li.h"
+#include "./webpi.h"
+#include "../utils/bit_reader.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
int WebPGetDecoderVersion(void) {
return (DEC_MAJ_VERSION << 16) | (DEC_MIN_VERSION << 8) | DEC_REV_VERSION;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// VP8Decoder
static void SetOk(VP8Decoder* const dec) {
@@ -31,19 +35,22 @@ static void SetOk(VP8Decoder* const dec) {
}
int VP8InitIoInternal(VP8Io* const io, int version) {
- if (version != WEBP_DECODER_ABI_VERSION)
+ if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
return 0; // mismatch error
- if (io) {
+ }
+ if (io != NULL) {
memset(io, 0, sizeof(*io));
}
return 1;
}
VP8Decoder* VP8New(void) {
- VP8Decoder* dec = (VP8Decoder*)calloc(1, sizeof(VP8Decoder));
- if (dec) {
+ VP8Decoder* const dec = (VP8Decoder*)calloc(1, sizeof(*dec));
+ if (dec != NULL) {
SetOk(dec);
+ WebPWorkerInit(&dec->worker_);
dec->ready_ = 0;
+ dec->num_parts_ = 1;
}
return dec;
}
@@ -54,36 +61,46 @@ VP8StatusCode VP8Status(VP8Decoder* const dec) {
}
const char* VP8StatusMessage(VP8Decoder* const dec) {
- if (!dec) return "no object";
+ if (dec == NULL) return "no object";
if (!dec->error_msg_) return "OK";
return dec->error_msg_;
}
void VP8Delete(VP8Decoder* const dec) {
- if (dec) {
+ if (dec != NULL) {
VP8Clear(dec);
free(dec);
}
}
int VP8SetError(VP8Decoder* const dec,
- VP8StatusCode error, const char * const msg) {
- dec->status_ = error;
- dec->error_msg_ = msg;
- dec->ready_ = 0;
+ VP8StatusCode error, const char* const msg) {
+ // TODO This check would be unnecessary if alpha decompression was separated
+ // from VP8ProcessRow/FinishRow. This avoids setting 'dec->status_' to
+ // something other than VP8_STATUS_BITSTREAM_ERROR on alpha decompression
+ // failure.
+ if (dec->status_ == VP8_STATUS_OK) {
+ dec->status_ = error;
+ dec->error_msg_ = msg;
+ dec->ready_ = 0;
+ }
return 0;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+
+int VP8CheckSignature(const uint8_t* const data, size_t data_size) {
+ return (data_size >= 3 &&
+ data[0] == 0x9d && data[1] == 0x01 && data[2] == 0x2a);
+}
-int VP8GetInfo(const uint8_t* data,
- uint32_t data_size, uint32_t chunk_size,
- int* width, int* height, int* has_alpha) {
- if (data_size < 10) {
+int VP8GetInfo(const uint8_t* data, size_t data_size, size_t chunk_size,
+ int* const width, int* const height) {
+ if (data == NULL || data_size < VP8_FRAME_HEADER_SIZE) {
return 0; // not enough data
}
// check signature
- if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a) {
+ if (!VP8CheckSignature(data + 3, data_size - 3)) {
return 0; // Wrong signature.
} else {
const uint32_t bits = data[0] | (data[1] << 8) | (data[2] << 16);
@@ -91,14 +108,6 @@ int VP8GetInfo(const uint8_t* data,
const int w = ((data[7] << 8) | data[6]) & 0x3fff;
const int h = ((data[9] << 8) | data[8]) & 0x3fff;
- if (has_alpha) {
-#ifdef WEBP_EXPERIMENTAL_FEATURES
- if (data_size < 11) return 0;
- *has_alpha = !!(data[10] & 0x80); // the colorspace_ bit
-#else
- *has_alpha = 0;
-#endif
- }
if (!key_frame) { // Not a keyframe.
return 0;
}
@@ -124,11 +133,11 @@ int VP8GetInfo(const uint8_t* data,
}
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Header parsing
static void ResetSegmentHeader(VP8SegmentHeader* const hdr) {
- assert(hdr);
+ assert(hdr != NULL);
hdr->use_segment_ = 0;
hdr->update_map_ = 0;
hdr->absolute_delta_ = 1;
@@ -139,8 +148,8 @@ static void ResetSegmentHeader(VP8SegmentHeader* const hdr) {
// Paragraph 9.3
static int ParseSegmentHeader(VP8BitReader* br,
VP8SegmentHeader* hdr, VP8Proba* proba) {
- assert(br);
- assert(hdr);
+ assert(br != NULL);
+ assert(hdr != NULL);
hdr->use_segment_ = VP8Get(br);
if (hdr->use_segment_) {
hdr->update_map_ = VP8Get(br);
@@ -176,7 +185,7 @@ static int ParseSegmentHeader(VP8BitReader* br,
// is returned, and this is an unrecoverable error.
// If the partitions were positioned ok, VP8_STATUS_OK is returned.
static VP8StatusCode ParsePartitions(VP8Decoder* const dec,
- const uint8_t* buf, uint32_t size) {
+ const uint8_t* buf, size_t size) {
VP8BitReader* const br = &dec->br_;
const uint8_t* sz = buf;
const uint8_t* buf_end = buf + size;
@@ -244,18 +253,15 @@ static int ParseFilterHeader(VP8BitReader* br, VP8Decoder* const dec) {
return !br->eof_;
}
-static inline uint32_t get_le32(const uint8_t* const data) {
- return data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
-}
-
// Topmost call
int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
- uint8_t* buf;
- uint32_t buf_size;
+ const uint8_t* buf;
+ size_t buf_size;
VP8FrameHeader* frm_hdr;
VP8PictureHeader* pic_hdr;
VP8BitReader* br;
VP8StatusCode status;
+ WebPHeaderStructure headers;
if (dec == NULL) {
return 0;
@@ -266,41 +272,35 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
"null VP8Io passed to VP8GetHeaders()");
}
- buf = (uint8_t*)io->data;
- buf_size = io->data_size;
- if (buf == NULL || buf_size <= 4) {
- return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
- "Not enough data to parse frame header");
+ // Process Pre-VP8 chunks.
+ headers.data = io->data;
+ headers.data_size = io->data_size;
+ status = WebPParseHeaders(&headers);
+ if (status != VP8_STATUS_OK) {
+ return VP8SetError(dec, status, "Incorrect/incomplete header.");
+ }
+ if (headers.is_lossless) {
+ return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
+ "Unexpected lossless format encountered.");
}
- // Skip over valid RIFF headers
- if (!memcmp(buf, "RIFF", 4)) {
- uint32_t riff_size;
- uint32_t chunk_size;
- if (buf_size < 20 + 4) {
- return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
- "RIFF: Truncated header.");
- }
- if (memcmp(buf + 8, "WEBP", 4)) { // wrong image file signature
- return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
- "RIFF: WEBP signature not found.");
- }
- riff_size = get_le32(buf + 4);
- if (riff_size < 12) {
- return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
- "RIFF: Truncated header.");
- }
- if (memcmp(buf + 12, "VP8 ", 4)) {
- return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
- "RIFF: Invalid compression format.");
- }
- chunk_size = get_le32(buf + 16);
- if (chunk_size > riff_size - 12) {
- return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
- "RIFF: Inconsistent size information.");
- }
- buf += 20;
- buf_size -= 20;
+ if (dec->alpha_data_ == NULL) {
+ assert(dec->alpha_data_size_ == 0);
+ // We have NOT set alpha data yet. Set it now.
+ // (This is to ensure that dec->alpha_data_ is NOT reset to NULL if
+ // WebPParseHeaders() is called more than once, as in incremental decoding
+ // case.)
+ dec->alpha_data_ = headers.alpha_data;
+ dec->alpha_data_size_ = headers.alpha_data_size;
+ }
+
+ // Process the VP8 frame header.
+ buf = headers.data + headers.offset;
+ buf_size = headers.data_size - headers.offset;
+ assert(headers.data_size >= headers.offset); // WebPParseHeaders' guarantee
+ if (buf_size < 4) {
+ return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
+ "Truncated header.");
}
// Paragraph 9.1
@@ -328,7 +328,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
"cannot parse picture header");
}
- if (buf[0] != 0x9d || buf[1] != 0x01 || buf[2] != 0x2a) {
+ if (!VP8CheckSignature(buf, buf_size)) {
return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
"Bad code word");
}
@@ -365,9 +365,6 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
"bad partition length");
}
- dec->alpha_data_ = NULL;
- dec->alpha_data_size_ = 0;
-
br = &dec->br_;
VP8InitBitReader(br, buf, buf + frm_hdr->partition_length_);
buf += frm_hdr->partition_length_;
@@ -436,22 +433,14 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
if (dec->pic_hdr_.colorspace_) {
const size_t kTrailerSize = 8;
const uint8_t kTrailerMarker = 0x01;
- uint8_t* const ext_buf = buf - kTrailerSize;
+ const uint8_t* ext_buf = buf - kTrailerSize;
size_t size;
if (frm_hdr->partition_length_ < kTrailerSize ||
ext_buf[kTrailerSize - 1] != kTrailerMarker) {
- Error:
return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
"RIFF: Inconsistent extra information.");
}
- // Alpha
- size = (ext_buf[4] << 0) | (ext_buf[5] << 8) | (ext_buf[6] << 16);
- if (frm_hdr->partition_length_ < size + kTrailerSize) {
- goto Error;
- }
- dec->alpha_data_ = (size > 0) ? ext_buf - size : NULL;
- dec->alpha_data_size_ = size;
// Layer
size = (ext_buf[0] << 0) | (ext_buf[1] << 8) | (ext_buf[2] << 16);
@@ -466,7 +455,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
return 1;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Residual decoding (Paragraph 13.2 / 13.3)
static const uint8_t kBands[16 + 1] = {
@@ -489,8 +478,9 @@ typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS]; // for const-casting
// Returns the position of the last non-zero coeff plus one
// (and 0 if there's no coeff at all)
static int GetCoeffs(VP8BitReader* const br, ProbaArray prob,
- int ctx, const uint16_t dq[2], int n, int16_t* out) {
- const uint8_t* p = prob[kBands[n]][ctx];
+ int ctx, const quant_t dq, int n, int16_t* out) {
+ // n is either 0 or 1 here. kBands[n] is not necessary for extracting '*p'.
+ const uint8_t* p = prob[n][ctx];
if (!VP8GetBit(br, p[0])) { // first EOB is more a 'CBP' bit.
return 0;
}
@@ -579,6 +569,7 @@ static void ParseResiduals(VP8Decoder* const dec,
uint32_t non_zero_dc = 0;
int x, y, ch;
+ nz_dc.i32 = nz_ac.i32 = 0;
memset(dst, 0, 384 * sizeof(*dst));
if (!dec->is_i4x4_) { // parse DC
int16_t dc[16] = { 0 };
@@ -645,7 +636,7 @@ static void ParseResiduals(VP8Decoder* const dec,
}
#undef PACK
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Main loop
int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) {
@@ -682,15 +673,21 @@ int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) {
return (!token_br->eof_);
}
+void VP8InitScanline(VP8Decoder* const dec) {
+ VP8MB* const left = dec->mb_info_ - 1;
+ left->nz_ = 0;
+ left->dc_nz_ = 0;
+ memset(dec->intra_l_, B_DC_PRED, sizeof(dec->intra_l_));
+ dec->filter_row_ =
+ (dec->filter_type_ > 0) &&
+ (dec->mb_y_ >= dec->tl_mb_y_) && (dec->mb_y_ <= dec->br_mb_y_);
+}
+
static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
for (dec->mb_y_ = 0; dec->mb_y_ < dec->br_mb_y_; ++dec->mb_y_) {
- VP8MB* const left = dec->mb_info_ - 1;
VP8BitReader* const token_br =
&dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)];
- left->nz_ = 0;
- left->dc_nz_ = 0;
- memset(dec->intra_l_, B_DC_PRED, sizeof(dec->intra_l_));
-
+ VP8InitScanline(dec);
for (dec->mb_x_ = 0; dec->mb_x_ < dec->mb_w_; dec->mb_x_++) {
if (!VP8DecodeMB(dec, token_br)) {
return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
@@ -701,13 +698,13 @@ static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
// Store data and save block's filtering params
VP8StoreBlock(dec);
}
- if (dec->filter_type_ > 0) {
- VP8FilterRow(dec);
- }
- if (!VP8FinishRow(dec, io)) {
+ if (!VP8ProcessRow(dec, io)) {
return VP8SetError(dec, VP8_STATUS_USER_ABORT, "Output aborted.");
}
}
+ if (dec->use_threads_ && !WebPWorkerSync(&dec->worker_)) {
+ return 0;
+ }
// Finish
#ifndef ONLY_KEYFRAME_CODE
@@ -729,6 +726,7 @@ static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
// Main entry point
int VP8Decode(VP8Decoder* const dec, VP8Io* const io) {
+ int ok = 0;
if (dec == NULL) {
return 0;
}
@@ -744,38 +742,35 @@ int VP8Decode(VP8Decoder* const dec, VP8Io* const io) {
}
assert(dec->ready_);
- // Will allocate memory and prepare everything.
- if (!VP8InitFrame(dec, io)) {
- VP8Clear(dec);
- return 0;
+ // Finish setting up the decoding parameter. Will call io->setup().
+ ok = (VP8EnterCritical(dec, io) == VP8_STATUS_OK);
+ if (ok) { // good to go.
+ // Will allocate memory and prepare everything.
+ if (ok) ok = VP8InitFrame(dec, io);
+
+ // Main decoding loop
+ if (ok) ok = ParseFrame(dec, io);
+
+ // Exit.
+ ok &= VP8ExitCritical(dec, io);
}
- // Finish setting up the decoding parameter
- if (VP8FinishFrameSetup(dec, io) != VP8_STATUS_OK) {
+ if (!ok) {
VP8Clear(dec);
return 0;
}
- // Main decoding loop
- {
- const int ret = ParseFrame(dec, io);
- if (io->teardown) {
- io->teardown(io);
- }
- if (!ret) {
- VP8Clear(dec);
- return 0;
- }
- }
-
dec->ready_ = 0;
- return 1;
+ return ok;
}
void VP8Clear(VP8Decoder* const dec) {
if (dec == NULL) {
return;
}
+ if (dec->use_threads_) {
+ WebPWorkerEnd(&dec->worker_);
+ }
if (dec->mem_) {
free(dec->mem_);
}
@@ -785,7 +780,7 @@ void VP8Clear(VP8Decoder* const dec) {
dec->ready_ = 0;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
diff --git a/src/dec/vp8i.h b/src/dec/vp8i.h
index 587b1cb4..4382edfd 100644
--- a/src/dec/vp8i.h
+++ b/src/dec/vp8i.h
@@ -1,4 +1,4 @@
-// Copyright 2010 Google Inc.
+// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -13,19 +13,22 @@
#define WEBP_DEC_VP8I_H_
#include <string.h> // for memcpy()
-#include "bits.h"
+#include "./vp8li.h"
+#include "../utils/bit_reader.h"
+#include "../utils/thread.h"
+#include "../dsp/dsp.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Various defines and enums
// version numbers
#define DEC_MAJ_VERSION 0
-#define DEC_MIN_VERSION 1
-#define DEC_REV_VERSION 2
+#define DEC_MIN_VERSION 2
+#define DEC_REV_VERSION 0
#define ONLY_KEYFRAME_CODE // to remove any code related to P-Frames
@@ -95,7 +98,7 @@ enum { MB_FEATURE_TREE_PROBS = 3,
#define U_OFF (Y_OFF + BPS * 16 + BPS)
#define V_OFF (U_OFF + 16)
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Headers
typedef struct {
@@ -144,27 +147,37 @@ typedef struct {
int mode_lf_delta_[NUM_MODE_LF_DELTAS];
} VP8FilterHeader;
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Informations about the macroblocks.
-typedef struct {
- // block type
- uint8_t skip_:1;
- // filter specs
- uint8_t f_level_:6; // filter strength: 0..63
- uint8_t f_ilevel_:6; // inner limit: 1..63
- uint8_t f_inner_:1; // do inner filtering?
- // cbp
- uint8_t nz_; // non-zero AC/DC coeffs
- uint8_t dc_nz_; // non-zero DC coeffs
+typedef struct { // filter specs
+ unsigned int f_level_:6; // filter strength: 0..63
+ unsigned int f_ilevel_:6; // inner limit: 1..63
+ unsigned int f_inner_:1; // do inner filtering?
+} VP8FInfo;
+
+typedef struct { // used for syntax-parsing
+ unsigned int nz_; // non-zero AC/DC coeffs
+ unsigned int dc_nz_:1; // non-zero DC coeffs
+ unsigned int skip_:1; // block type
} VP8MB;
// Dequantization matrices
+typedef int quant_t[2]; // [DC / AC]. Can be 'uint16_t[2]' too (~slower).
typedef struct {
- uint16_t y1_mat_[2], y2_mat_[2], uv_mat_[2]; // [DC / AC]
+ quant_t y1_mat_, y2_mat_, uv_mat_;
} VP8QuantMatrix;
-//-----------------------------------------------------------------------------
+// Persistent information needed by the parallel processing
+typedef struct {
+ int id_; // cache row to process (in [0..2])
+ int mb_y_; // macroblock position of the row
+ int filter_row_; // true if row-filtering is needed
+ VP8FInfo* f_info_; // filter strengths
+ VP8Io io_; // copy of the VP8Io to pass to put()
+} VP8ThreadContext;
+
+//------------------------------------------------------------------------------
// VP8Decoder: the main opaque structure handed over to user
struct VP8Decoder {
@@ -181,6 +194,13 @@ struct VP8Decoder {
VP8FilterHeader filter_hdr_;
VP8SegmentHeader segment_hdr_;
+ // Worker
+ WebPWorker worker_;
+ int use_threads_; // use multi-thread
+ int cache_id_; // current cache row
+ int num_caches_; // number of cached rows of 16 pixels (1, 2 or 3)
+ VP8ThreadContext thread_ctx_; // Thread context
+
// dimension, in macroblock units.
int mb_w_, mb_h_;
@@ -219,7 +239,8 @@ struct VP8Decoder {
uint8_t* y_t_; // top luma samples: 16 * mb_w_
uint8_t* u_t_, *v_t_; // top u/v samples: 8 * mb_w_ each
- VP8MB* mb_info_; // contextual macroblock infos (mb_w_ + 1)
+ VP8MB* mb_info_; // contextual macroblock info (mb_w_ + 1)
+ VP8FInfo* f_info_; // filter strength info
uint8_t* yuv_b_; // main block for Y/U/V (size = YUV_SIZE)
int16_t* coeffs_; // 384 coeffs = (16+8+8) * 4*4
@@ -231,7 +252,7 @@ struct VP8Decoder {
// main memory chunk for the above data. Persistent.
void* mem_;
- int mem_size_;
+ size_t mem_size_;
// Per macroblock non-persistent infos.
int mb_x_, mb_y_; // current position, in macroblock units
@@ -249,31 +270,25 @@ struct VP8Decoder {
// Filtering side-info
int filter_type_; // 0=off, 1=simple, 2=complex
+ int filter_row_; // per-row flag
uint8_t filter_levels_[NUM_MB_SEGMENTS]; // precalculated per-segment
// extensions
const uint8_t* alpha_data_; // compressed alpha data (if present)
size_t alpha_data_size_;
- uint8_t* alpha_plane_; // output
+ uint8_t* alpha_plane_; // output. Persistent, contains the whole data.
int layer_colorspace_;
const uint8_t* layer_data_; // compressed layer data (if present)
size_t layer_data_size_;
};
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// internal functions. Not public.
// in vp8.c
int VP8SetError(VP8Decoder* const dec,
- VP8StatusCode error, const char * const msg);
-// Validates the VP8 data-header and retrieve basic header information viz width
-// and height. Returns 0 in case of formatting error. *width/*height/*has_alpha
-// can be passed NULL.
-int VP8GetInfo(const uint8_t* data,
- uint32_t data_size, // data available so far
- uint32_t chunk_size, // total data size expect in the chunk
- int *width, int *height, int *has_alpha);
+ VP8StatusCode error, const char* const msg);
// in tree.c
void VP8ResetProba(VP8Proba* const proba);
@@ -288,13 +303,19 @@ int VP8InitFrame(VP8Decoder* const dec, VP8Io* io);
// Predict a block and add residual
void VP8ReconstructBlock(VP8Decoder* const dec);
// Call io->setup() and finish setting up scan parameters.
-VP8StatusCode VP8FinishFrameSetup(VP8Decoder* const dec, VP8Io* const io);
-// Filter the decoded macroblock row (if needed)
-void VP8FilterRow(const VP8Decoder* const dec);
+// After this call returns, one must always call VP8ExitCritical() with the
+// same parameters. Both functions should be used in pair. Returns VP8_STATUS_OK
+// if ok, otherwise sets and returns the error status on *dec.
+VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io);
+// Must always be called in pair with VP8EnterCritical().
+// Returns false in case of error.
+int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io);
+// Process the last decoded row (filtering + output)
+int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io);
// Store a block, along with filtering params
void VP8StoreBlock(VP8Decoder* const dec);
-// Finalize and transmit a complete row. Return false in case of user-abort.
-int VP8FinishRow(VP8Decoder* const dec, VP8Io* const io);
+// To be called at the start of a new scanline, to initialize predictors.
+void VP8InitScanline(VP8Decoder* const dec);
// Decode one macroblock. Returns false if there is not enough data.
int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br);
@@ -305,62 +326,10 @@ const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
// in layer.c
int VP8DecodeLayer(VP8Decoder* const dec);
-// in dsp.c
-typedef void (*VP8Idct)(const int16_t* coeffs, uint8_t* dst);
-// when doing two transforms, coeffs is actually int16_t[2][16].
-typedef void (*VP8Idct2)(const int16_t* coeffs, uint8_t* dst, int do_two);
-extern VP8Idct2 VP8Transform;
-extern VP8Idct VP8TransformUV;
-extern VP8Idct VP8TransformDC;
-extern VP8Idct VP8TransformDCUV;
-extern void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
-
-// *dst is the destination block, with stride BPS. Boundary samples are
-// assumed accessible when needed.
-typedef void (*VP8PredFunc)(uint8_t* dst);
-extern VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
-extern VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES];
-extern VP8PredFunc VP8PredLuma4[NUM_BMODES];
-
-void VP8DspInit(void); // must be called before anything using the above
-void VP8DspInitTables(void); // needs to be called no matter what.
-
-// simple filter (only for luma)
-typedef void (*VP8SimpleFilterFunc)(uint8_t* p, int stride, int thresh);
-extern VP8SimpleFilterFunc VP8SimpleVFilter16;
-extern VP8SimpleFilterFunc VP8SimpleHFilter16;
-extern VP8SimpleFilterFunc VP8SimpleVFilter16i; // filter 3 inner edges
-extern VP8SimpleFilterFunc VP8SimpleHFilter16i;
-
-// regular filter (on both macroblock edges and inner edges)
-typedef void (*VP8LumaFilterFunc)(uint8_t* luma, int stride,
- int thresh, int ithresh, int hev_t);
-typedef void (*VP8ChromaFilterFunc)(uint8_t* u, uint8_t* v, int stride,
- int thresh, int ithresh, int hev_t);
-// on outter edge
-extern VP8LumaFilterFunc VP8VFilter16;
-extern VP8LumaFilterFunc VP8HFilter16;
-extern VP8ChromaFilterFunc VP8VFilter8;
-extern VP8ChromaFilterFunc VP8HFilter8;
-
-// on inner edge
-extern VP8LumaFilterFunc VP8VFilter16i; // filtering 3 inner edges altogether
-extern VP8LumaFilterFunc VP8HFilter16i;
-extern VP8ChromaFilterFunc VP8VFilter8i; // filtering u and v altogether
-extern VP8ChromaFilterFunc VP8HFilter8i;
-
-typedef enum {
- kSSE2,
- kSSE3
-} CPUFeature;
-// returns true if the CPU supports the feature.
-typedef int (*VP8CPUInfo)(CPUFeature feature);
-extern VP8CPUInfo VP8DecGetCPUInfo;
-
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif
-#endif // WEBP_DEC_VP8I_H_
+#endif /* WEBP_DEC_VP8I_H_ */
diff --git a/src/dec/vp8l.c b/src/dec/vp8l.c
new file mode 100644
index 00000000..897e4395
--- /dev/null
+++ b/src/dec/vp8l.c
@@ -0,0 +1,1200 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// main entry for the decoder
+//
+// Authors: Vikas Arora (vikaas.arora@gmail.com)
+// Jyrki Alakuijala (jyrki@google.com)
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "./vp8li.h"
+#include "../dsp/lossless.h"
+#include "../dsp/yuv.h"
+#include "../utils/huffman.h"
+#include "../utils/utils.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define NUM_ARGB_CACHE_ROWS 16
+
+static const int kCodeLengthLiterals = 16;
+static const int kCodeLengthRepeatCode = 16;
+static const int kCodeLengthExtraBits[3] = { 2, 3, 7 };
+static const int kCodeLengthRepeatOffsets[3] = { 3, 3, 11 };
+
+// -----------------------------------------------------------------------------
+// Five Huffman codes are used at each meta code:
+// 1. green + length prefix codes + color cache codes,
+// 2. alpha,
+// 3. red,
+// 4. blue, and,
+// 5. distance prefix codes.
+typedef enum {
+ GREEN = 0,
+ RED = 1,
+ BLUE = 2,
+ ALPHA = 3,
+ DIST = 4
+} HuffIndex;
+
+static const uint16_t kAlphabetSize[HUFFMAN_CODES_PER_META_CODE] = {
+ NUM_LITERAL_CODES + NUM_LENGTH_CODES,
+ NUM_LITERAL_CODES, NUM_LITERAL_CODES, NUM_LITERAL_CODES,
+ NUM_DISTANCE_CODES
+};
+
+
+#define NUM_CODE_LENGTH_CODES 19
+static const uint8_t kCodeLengthCodeOrder[NUM_CODE_LENGTH_CODES] = {
+ 17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+};
+
+#define CODE_TO_PLANE_CODES 120
+static const uint8_t code_to_plane_lut[CODE_TO_PLANE_CODES] = {
+ 0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a,
+ 0x26, 0x2a, 0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a,
+ 0x25, 0x2b, 0x48, 0x04, 0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b,
+ 0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45, 0x4b, 0x34, 0x3c, 0x03,
+ 0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d, 0x44, 0x4c,
+ 0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e,
+ 0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b,
+ 0x32, 0x3e, 0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f,
+ 0x64, 0x6c, 0x42, 0x4e, 0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b,
+ 0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e, 0x00, 0x74, 0x7c, 0x41,
+ 0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d, 0x51, 0x5f,
+ 0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70
+};
+
+static int DecodeImageStream(int xsize, int ysize,
+ int is_level0,
+ VP8LDecoder* const dec,
+ uint32_t** const decoded_data);
+
+//------------------------------------------------------------------------------
+
+int VP8LCheckSignature(const uint8_t* const data, size_t size) {
+ return (size >= 1) && (data[0] == VP8L_MAGIC_BYTE);
+}
+
+static int ReadImageInfo(VP8LBitReader* const br,
+ int* const width, int* const height,
+ int* const has_alpha) {
+ const uint8_t signature = VP8LReadBits(br, 8);
+ if (!VP8LCheckSignature(&signature, 1)) {
+ return 0;
+ }
+ *width = VP8LReadBits(br, VP8L_IMAGE_SIZE_BITS) + 1;
+ *height = VP8LReadBits(br, VP8L_IMAGE_SIZE_BITS) + 1;
+ *has_alpha = VP8LReadBits(br, 1);
+ VP8LReadBits(br, VP8L_VERSION_BITS); // Read/ignore the version number.
+ return 1;
+}
+
+int VP8LGetInfo(const uint8_t* data, size_t data_size,
+ int* const width, int* const height, int* const has_alpha) {
+ if (data == NULL || data_size < VP8L_FRAME_HEADER_SIZE) {
+ return 0; // not enough data
+ } else {
+ int w, h, a;
+ VP8LBitReader br;
+ VP8LInitBitReader(&br, data, data_size);
+ if (!ReadImageInfo(&br, &w, &h, &a)) {
+ return 0;
+ }
+ if (width != NULL) *width = w;
+ if (height != NULL) *height = h;
+ if (has_alpha != NULL) *has_alpha = a;
+ return 1;
+ }
+}
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE int GetCopyDistance(int distance_symbol,
+ VP8LBitReader* const br) {
+ int extra_bits, offset;
+ if (distance_symbol < 4) {
+ return distance_symbol + 1;
+ }
+ extra_bits = (distance_symbol - 2) >> 1;
+ offset = (2 + (distance_symbol & 1)) << extra_bits;
+ return offset + VP8LReadBits(br, extra_bits) + 1;
+}
+
+static WEBP_INLINE int GetCopyLength(int length_symbol,
+ VP8LBitReader* const br) {
+ // Length and distance prefixes are encoded the same way.
+ return GetCopyDistance(length_symbol, br);
+}
+
+static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) {
+ if (plane_code > CODE_TO_PLANE_CODES) {
+ return plane_code - CODE_TO_PLANE_CODES;
+ } else {
+ const int dist_code = code_to_plane_lut[plane_code - 1];
+ const int yoffset = dist_code >> 4;
+ const int xoffset = 8 - (dist_code & 0xf);
+ const int dist = yoffset * xsize + xoffset;
+ return (dist >= 1) ? dist : 1;
+ }
+}
+
+//------------------------------------------------------------------------------
+// Decodes the next Huffman code from bit-stream.
+// FillBitWindow(br) needs to be called at minimum every second call
+// to ReadSymbolUnsafe.
+static int ReadSymbolUnsafe(const HuffmanTree* tree, VP8LBitReader* const br) {
+ const HuffmanTreeNode* node = tree->root_;
+ assert(node != NULL);
+ while (!HuffmanTreeNodeIsLeaf(node)) {
+ node = HuffmanTreeNextNode(node, VP8LReadOneBitUnsafe(br));
+ }
+ return node->symbol_;
+}
+
+static WEBP_INLINE int ReadSymbol(const HuffmanTree* tree,
+ VP8LBitReader* const br) {
+ const int read_safe = (br->pos_ + 8 > br->len_);
+ if (!read_safe) {
+ return ReadSymbolUnsafe(tree, br);
+ } else {
+ const HuffmanTreeNode* node = tree->root_;
+ assert(node != NULL);
+ while (!HuffmanTreeNodeIsLeaf(node)) {
+ node = HuffmanTreeNextNode(node, VP8LReadOneBit(br));
+ }
+ return node->symbol_;
+ }
+}
+
+static int ReadHuffmanCodeLengths(
+ VP8LDecoder* const dec, const int* const code_length_code_lengths,
+ int num_symbols, int* const code_lengths) {
+ int ok = 0;
+ VP8LBitReader* const br = &dec->br_;
+ int symbol;
+ int max_symbol;
+ int prev_code_len = DEFAULT_CODE_LENGTH;
+ HuffmanTree tree;
+
+ if (!HuffmanTreeBuildImplicit(&tree, code_length_code_lengths,
+ NUM_CODE_LENGTH_CODES)) {
+ dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+ return 0;
+ }
+
+ if (VP8LReadBits(br, 1)) { // use length
+ const int length_nbits = 2 + 2 * VP8LReadBits(br, 3);
+ max_symbol = 2 + VP8LReadBits(br, length_nbits);
+ if (max_symbol > num_symbols) {
+ dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+ goto End;
+ }
+ } else {
+ max_symbol = num_symbols;
+ }
+
+ symbol = 0;
+ while (symbol < num_symbols) {
+ int code_len;
+ if (max_symbol-- == 0) break;
+ VP8LFillBitWindow(br);
+ code_len = ReadSymbol(&tree, br);
+ if (code_len < kCodeLengthLiterals) {
+ code_lengths[symbol++] = code_len;
+ if (code_len != 0) prev_code_len = code_len;
+ } else {
+ const int use_prev = (code_len == kCodeLengthRepeatCode);
+ const int slot = code_len - kCodeLengthLiterals;
+ const int extra_bits = kCodeLengthExtraBits[slot];
+ const int repeat_offset = kCodeLengthRepeatOffsets[slot];
+ int repeat = VP8LReadBits(br, extra_bits) + repeat_offset;
+ if (symbol + repeat > num_symbols) {
+ dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+ goto End;
+ } else {
+ const int length = use_prev ? prev_code_len : 0;
+ while (repeat-- > 0) code_lengths[symbol++] = length;
+ }
+ }
+ }
+ ok = 1;
+
+ End:
+ HuffmanTreeRelease(&tree);
+ return ok;
+}
+
+static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
+ HuffmanTree* const tree) {
+ int ok = 0;
+ VP8LBitReader* const br = &dec->br_;
+ const int simple_code = VP8LReadBits(br, 1);
+
+ if (simple_code) { // Read symbols, codes & code lengths directly.
+ int symbols[2];
+ int codes[2];
+ int code_lengths[2];
+ const int num_symbols = VP8LReadBits(br, 1) + 1;
+ const int first_symbol_len_code = VP8LReadBits(br, 1);
+ // The first code is either 1 bit or 8 bit code.
+ symbols[0] = VP8LReadBits(br, (first_symbol_len_code == 0) ? 1 : 8);
+ codes[0] = 0;
+ code_lengths[0] = num_symbols - 1;
+ // The second code (if present), is always 8 bit long.
+ if (num_symbols == 2) {
+ symbols[1] = VP8LReadBits(br, 8);
+ codes[1] = 1;
+ code_lengths[1] = num_symbols - 1;
+ }
+ ok = HuffmanTreeBuildExplicit(tree, code_lengths, codes, symbols,
+ alphabet_size, num_symbols);
+ } else { // Decode Huffman-coded code lengths.
+ int* code_lengths = NULL;
+ int i;
+ int code_length_code_lengths[NUM_CODE_LENGTH_CODES] = { 0 };
+ const int num_codes = VP8LReadBits(br, 4) + 4;
+ if (num_codes > NUM_CODE_LENGTH_CODES) {
+ dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+ return 0;
+ }
+
+ code_lengths =
+ (int*)WebPSafeCalloc((uint64_t)alphabet_size, sizeof(*code_lengths));
+ if (code_lengths == NULL) {
+ dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+ return 0;
+ }
+
+ for (i = 0; i < num_codes; ++i) {
+ code_length_code_lengths[kCodeLengthCodeOrder[i]] = VP8LReadBits(br, 3);
+ }
+ ok = ReadHuffmanCodeLengths(dec, code_length_code_lengths, alphabet_size,
+ code_lengths);
+ if (ok) {
+ ok = HuffmanTreeBuildImplicit(tree, code_lengths, alphabet_size);
+ }
+ free(code_lengths);
+ }
+ ok = ok && !br->error_;
+ if (!ok) {
+ dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+ return 0;
+ }
+ return 1;
+}
+
+static void DeleteHtreeGroups(HTreeGroup* htree_groups, int num_htree_groups) {
+ if (htree_groups != NULL) {
+ int i, j;
+ for (i = 0; i < num_htree_groups; ++i) {
+ HuffmanTree* const htrees = htree_groups[i].htrees_;
+ for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
+ HuffmanTreeRelease(&htrees[j]);
+ }
+ }
+ free(htree_groups);
+ }
+}
+
+static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
+ int color_cache_bits, int allow_recursion) {
+ int i, j;
+ VP8LBitReader* const br = &dec->br_;
+ VP8LMetadata* const hdr = &dec->hdr_;
+ uint32_t* huffman_image = NULL;
+ HTreeGroup* htree_groups = NULL;
+ int num_htree_groups = 1;
+
+ if (allow_recursion && VP8LReadBits(br, 1)) {
+ // use meta Huffman codes.
+ const int huffman_precision = VP8LReadBits(br, 3) + 2;
+ const int huffman_xsize = VP8LSubSampleSize(xsize, huffman_precision);
+ const int huffman_ysize = VP8LSubSampleSize(ysize, huffman_precision);
+ const int huffman_pixs = huffman_xsize * huffman_ysize;
+ if (!DecodeImageStream(huffman_xsize, huffman_ysize, 0, dec,
+ &huffman_image)) {
+ dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+ goto Error;
+ }
+ hdr->huffman_subsample_bits_ = huffman_precision;
+ for (i = 0; i < huffman_pixs; ++i) {
+ // The huffman data is stored in red and green bytes.
+ const int index = (huffman_image[i] >> 8) & 0xffff;
+ huffman_image[i] = index;
+ if (index >= num_htree_groups) {
+ num_htree_groups = index + 1;
+ }
+ }
+ }
+
+ if (br->error_) goto Error;
+
+ assert(num_htree_groups <= 0x10000);
+ htree_groups =
+ (HTreeGroup*)WebPSafeCalloc((uint64_t)num_htree_groups,
+ sizeof(*htree_groups));
+ if (htree_groups == NULL) {
+ dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+ goto Error;
+ }
+
+ for (i = 0; i < num_htree_groups; ++i) {
+ HuffmanTree* const htrees = htree_groups[i].htrees_;
+ for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
+ int alphabet_size = kAlphabetSize[j];
+ if (j == 0 && color_cache_bits > 0) {
+ alphabet_size += 1 << color_cache_bits;
+ }
+ if (!ReadHuffmanCode(alphabet_size, dec, htrees + j)) goto Error;
+ }
+ }
+
+ // All OK. Finalize pointers and return.
+ hdr->huffman_image_ = huffman_image;
+ hdr->num_htree_groups_ = num_htree_groups;
+ hdr->htree_groups_ = htree_groups;
+ return 1;
+
+ Error:
+ free(huffman_image);
+ DeleteHtreeGroups(htree_groups, num_htree_groups);
+ return 0;
+}
+
+//------------------------------------------------------------------------------
+// Scaling.
+
+static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
+ const int num_channels = 4;
+ const int in_width = io->mb_w;
+ const int out_width = io->scaled_width;
+ const int in_height = io->mb_h;
+ const int out_height = io->scaled_height;
+ const uint64_t work_size = 2 * num_channels * (uint64_t)out_width;
+ int32_t* work; // Rescaler work area.
+ const uint64_t scaled_data_size = num_channels * (uint64_t)out_width;
+ uint32_t* scaled_data; // Temporary storage for scaled BGRA data.
+ const uint64_t memory_size = sizeof(*dec->rescaler) +
+ work_size * sizeof(*work) +
+ scaled_data_size * sizeof(*scaled_data);
+ uint8_t* memory = (uint8_t*)WebPSafeCalloc(memory_size, sizeof(*memory));
+ if (memory == NULL) {
+ dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+ return 0;
+ }
+ assert(dec->rescaler_memory == NULL);
+ dec->rescaler_memory = memory;
+
+ dec->rescaler = (WebPRescaler*)memory;
+ memory += sizeof(*dec->rescaler);
+ work = (int32_t*)memory;
+ memory += work_size * sizeof(*work);
+ scaled_data = (uint32_t*)memory;
+
+ WebPRescalerInit(dec->rescaler, in_width, in_height, (uint8_t*)scaled_data,
+ out_width, out_height, 0, num_channels,
+ in_width, out_width, in_height, out_height, work);
+ return 1;
+}
+
+//------------------------------------------------------------------------------
+// Export to ARGB
+
+// We have special "export" function since we need to convert from BGRA
+static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace,
+ int rgba_stride, uint8_t* const rgba) {
+ const uint32_t* const src = (const uint32_t*)rescaler->dst;
+ const int dst_width = rescaler->dst_width;
+ int num_lines_out = 0;
+ while (WebPRescalerHasPendingOutput(rescaler)) {
+ uint8_t* const dst = rgba + num_lines_out * rgba_stride;
+ WebPRescalerExportRow(rescaler);
+ VP8LConvertFromBGRA(src, dst_width, colorspace, dst);
+ ++num_lines_out;
+ }
+ return num_lines_out;
+}
+
+// Emit scaled rows.
+static int EmitRescaledRows(const VP8LDecoder* const dec,
+ const uint32_t* const data, int in_stride, int mb_h,
+ uint8_t* const out, int out_stride) {
+ const WEBP_CSP_MODE colorspace = dec->output_->colorspace;
+ const uint8_t* const in = (const uint8_t*)data;
+ int num_lines_in = 0;
+ int num_lines_out = 0;
+ while (num_lines_in < mb_h) {
+ const uint8_t* const row_in = in + num_lines_in * in_stride;
+ uint8_t* const row_out = out + num_lines_out * out_stride;
+ num_lines_in += WebPRescalerImport(dec->rescaler, mb_h - num_lines_in,
+ row_in, in_stride);
+ num_lines_out += Export(dec->rescaler, colorspace, out_stride, row_out);
+ }
+ return num_lines_out;
+}
+
+// Emit rows without any scaling.
+static int EmitRows(WEBP_CSP_MODE colorspace,
+ const uint32_t* const data, int in_stride,
+ int mb_w, int mb_h,
+ uint8_t* const out, int out_stride) {
+ int lines = mb_h;
+ const uint8_t* row_in = (const uint8_t*)data;
+ uint8_t* row_out = out;
+ while (lines-- > 0) {
+ VP8LConvertFromBGRA((const uint32_t*)row_in, mb_w, colorspace, row_out);
+ row_in += in_stride;
+ row_out += out_stride;
+ }
+ return mb_h; // Num rows out == num rows in.
+}
+
+//------------------------------------------------------------------------------
+// Export to YUVA
+
+static void ConvertToYUVA(const uint32_t* const src, int width, int y_pos,
+ const WebPDecBuffer* const output) {
+ const WebPYUVABuffer* const buf = &output->u.YUVA;
+ // first, the luma plane
+ {
+ int i;
+ uint8_t* const y = buf->y + y_pos * buf->y_stride;
+ for (i = 0; i < width; ++i) {
+ const uint32_t p = src[i];
+ y[i] = VP8RGBToY((p >> 16) & 0xff, (p >> 8) & 0xff, (p >> 0) & 0xff);
+ }
+ }
+
+ // then U/V planes
+ {
+ uint8_t* const u = buf->u + (y_pos >> 1) * buf->u_stride;
+ uint8_t* const v = buf->v + (y_pos >> 1) * buf->v_stride;
+ const int uv_width = width >> 1;
+ int i;
+ for (i = 0; i < uv_width; ++i) {
+ const uint32_t v0 = src[2 * i + 0];
+ const uint32_t v1 = src[2 * i + 1];
+ // VP8RGBToU/V expects four accumulated pixels. Hence we need to
+ // scale r/g/b value by a factor 2. We just shift v0/v1 one bit less.
+ const int r = ((v0 >> 15) & 0x1fe) + ((v1 >> 15) & 0x1fe);
+ const int g = ((v0 >> 7) & 0x1fe) + ((v1 >> 7) & 0x1fe);
+ const int b = ((v0 << 1) & 0x1fe) + ((v1 << 1) & 0x1fe);
+ if (!(y_pos & 1)) { // even lines: store values
+ u[i] = VP8RGBToU(r, g, b);
+ v[i] = VP8RGBToV(r, g, b);
+ } else { // odd lines: average with previous values
+ const int tmp_u = VP8RGBToU(r, g, b);
+ const int tmp_v = VP8RGBToV(r, g, b);
+ // Approximated average-of-four. But it's an acceptable diff.
+ u[i] = (u[i] + tmp_u + 1) >> 1;
+ v[i] = (v[i] + tmp_v + 1) >> 1;
+ }
+ }
+ if (width & 1) { // last pixel
+ const uint32_t v0 = src[2 * i + 0];
+ const int r = (v0 >> 14) & 0x3fc;
+ const int g = (v0 >> 6) & 0x3fc;
+ const int b = (v0 << 2) & 0x3fc;
+ if (!(y_pos & 1)) { // even lines
+ u[i] = VP8RGBToU(r, g, b);
+ v[i] = VP8RGBToV(r, g, b);
+ } else { // odd lines (note: we could just skip this)
+ const int tmp_u = VP8RGBToU(r, g, b);
+ const int tmp_v = VP8RGBToV(r, g, b);
+ u[i] = (u[i] + tmp_u + 1) >> 1;
+ v[i] = (v[i] + tmp_v + 1) >> 1;
+ }
+ }
+ }
+ // Lastly, store alpha if needed.
+ if (buf->a != NULL) {
+ int i;
+ uint8_t* const a = buf->a + y_pos * buf->a_stride;
+ for (i = 0; i < width; ++i) a[i] = (src[i] >> 24);
+ }
+}
+
+static int ExportYUVA(const VP8LDecoder* const dec, int y_pos) {
+ WebPRescaler* const rescaler = dec->rescaler;
+ const uint32_t* const src = (const uint32_t*)rescaler->dst;
+ const int dst_width = rescaler->dst_width;
+ int num_lines_out = 0;
+ while (WebPRescalerHasPendingOutput(rescaler)) {
+ WebPRescalerExportRow(rescaler);
+ ConvertToYUVA(src, dst_width, y_pos, dec->output_);
+ ++y_pos;
+ ++num_lines_out;
+ }
+ return num_lines_out;
+}
+
+static int EmitRescaledRowsYUVA(const VP8LDecoder* const dec,
+ const uint32_t* const data,
+ int in_stride, int mb_h) {
+ const uint8_t* const in = (const uint8_t*)data;
+ int num_lines_in = 0;
+ int y_pos = dec->last_out_row_;
+ while (num_lines_in < mb_h) {
+ const uint8_t* const row_in = in + num_lines_in * in_stride;
+ num_lines_in += WebPRescalerImport(dec->rescaler, mb_h - num_lines_in,
+ row_in, in_stride);
+ y_pos += ExportYUVA(dec, y_pos);
+ }
+ return y_pos;
+}
+
+static int EmitRowsYUVA(const VP8LDecoder* const dec,
+ const uint32_t* const data, int in_stride,
+ int mb_w, int num_rows) {
+ int y_pos = dec->last_out_row_;
+ const uint8_t* row_in = (const uint8_t*)data;
+ while (num_rows-- > 0) {
+ ConvertToYUVA((const uint32_t*)row_in, mb_w, y_pos, dec->output_);
+ row_in += in_stride;
+ ++y_pos;
+ }
+ return y_pos;
+}
+
+//------------------------------------------------------------------------------
+// Cropping.
+
+// Sets io->mb_y, io->mb_h & io->mb_w according to start row, end row and
+// crop options. Also updates the input data pointer, so that it points to the
+// start of the cropped window.
+// Note that 'pixel_stride' is in units of 'uint32_t' (and not 'bytes).
+// Returns true if the crop window is not empty.
+static int SetCropWindow(VP8Io* const io, int y_start, int y_end,
+ const uint32_t** const in_data, int pixel_stride) {
+ assert(y_start < y_end);
+ assert(io->crop_left < io->crop_right);
+ if (y_end > io->crop_bottom) {
+ y_end = io->crop_bottom; // make sure we don't overflow on last row.
+ }
+ if (y_start < io->crop_top) {
+ const int delta = io->crop_top - y_start;
+ y_start = io->crop_top;
+ *in_data += pixel_stride * delta;
+ }
+ if (y_start >= y_end) return 0; // Crop window is empty.
+
+ *in_data += io->crop_left;
+
+ io->mb_y = y_start - io->crop_top;
+ io->mb_w = io->crop_right - io->crop_left;
+ io->mb_h = y_end - y_start;
+ return 1; // Non-empty crop window.
+}
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE int GetMetaIndex(
+ const uint32_t* const image, int xsize, int bits, int x, int y) {
+ if (bits == 0) return 0;
+ return image[xsize * (y >> bits) + (x >> bits)];
+}
+
+static WEBP_INLINE HTreeGroup* GetHtreeGroupForPos(VP8LMetadata* const hdr,
+ int x, int y) {
+ const int meta_index = GetMetaIndex(hdr->huffman_image_, hdr->huffman_xsize_,
+ hdr->huffman_subsample_bits_, x, y);
+ assert(meta_index < hdr->num_htree_groups_);
+ return hdr->htree_groups_ + meta_index;
+}
+
+//------------------------------------------------------------------------------
+// Main loop, with custom row-processing function
+
+typedef void (*ProcessRowsFunc)(VP8LDecoder* const dec, int row);
+
+static void ApplyInverseTransforms(VP8LDecoder* const dec, int num_rows,
+ const uint32_t* const rows) {
+ int n = dec->next_transform_;
+ const int cache_pixs = dec->width_ * num_rows;
+ const int start_row = dec->last_row_;
+ const int end_row = start_row + num_rows;
+ const uint32_t* rows_in = rows;
+ uint32_t* const rows_out = dec->argb_cache_;
+
+ // Inverse transforms.
+ // TODO: most transforms only need to operate on the cropped region only.
+ memcpy(rows_out, rows_in, cache_pixs * sizeof(*rows_out));
+ while (n-- > 0) {
+ VP8LTransform* const transform = &dec->transforms_[n];
+ VP8LInverseTransform(transform, start_row, end_row, rows_in, rows_out);
+ rows_in = rows_out;
+ }
+}
+
+// Processes (transforms, scales & color-converts) the rows decoded after the
+// last call.
+static void ProcessRows(VP8LDecoder* const dec, int row) {
+ const uint32_t* const rows = dec->argb_ + dec->width_ * dec->last_row_;
+ const int num_rows = row - dec->last_row_;
+
+ if (num_rows <= 0) return; // Nothing to be done.
+ ApplyInverseTransforms(dec, num_rows, rows);
+
+ // Emit output.
+ {
+ VP8Io* const io = dec->io_;
+ const uint32_t* rows_data = dec->argb_cache_;
+ if (!SetCropWindow(io, dec->last_row_, row, &rows_data, io->width)) {
+ // Nothing to output (this time).
+ } else {
+ const WebPDecBuffer* const output = dec->output_;
+ const int in_stride = io->width * sizeof(*rows_data);
+ if (output->colorspace < MODE_YUV) { // convert to RGBA
+ const WebPRGBABuffer* const buf = &output->u.RGBA;
+ uint8_t* const rgba = buf->rgba + dec->last_out_row_ * buf->stride;
+ const int num_rows_out = io->use_scaling ?
+ EmitRescaledRows(dec, rows_data, in_stride, io->mb_h,
+ rgba, buf->stride) :
+ EmitRows(output->colorspace, rows_data, in_stride,
+ io->mb_w, io->mb_h, rgba, buf->stride);
+ // Update 'last_out_row_'.
+ dec->last_out_row_ += num_rows_out;
+ } else { // convert to YUVA
+ dec->last_out_row_ = io->use_scaling ?
+ EmitRescaledRowsYUVA(dec, rows_data, in_stride, io->mb_h) :
+ EmitRowsYUVA(dec, rows_data, in_stride, io->mb_w, io->mb_h);
+ }
+ assert(dec->last_out_row_ <= output->height);
+ }
+ }
+
+ // Update 'last_row_'.
+ dec->last_row_ = row;
+ assert(dec->last_row_ <= dec->height_);
+}
+
+static int DecodeImageData(VP8LDecoder* const dec,
+ uint32_t* const data, int width, int height,
+ ProcessRowsFunc process_func) {
+ int ok = 1;
+ int col = 0, row = 0;
+ VP8LBitReader* const br = &dec->br_;
+ VP8LMetadata* const hdr = &dec->hdr_;
+ HTreeGroup* htree_group = hdr->htree_groups_;
+ uint32_t* src = data;
+ uint32_t* last_cached = data;
+ uint32_t* const src_end = data + width * height;
+ const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES;
+ const int color_cache_limit = len_code_limit + hdr->color_cache_size_;
+ VP8LColorCache* const color_cache =
+ (hdr->color_cache_size_ > 0) ? &hdr->color_cache_ : NULL;
+ const int mask = hdr->huffman_mask_;
+
+ assert(htree_group != NULL);
+
+ while (!br->eos_ && src < src_end) {
+ int code;
+ // Only update when changing tile. Note we could use the following test:
+ // if "((((prev_col ^ col) | prev_row ^ row)) > mask)" -> tile changed
+ // but that's actually slower and requires storing the previous col/row
+ if ((col & mask) == 0) {
+ htree_group = GetHtreeGroupForPos(hdr, col, row);
+ }
+ VP8LFillBitWindow(br);
+ code = ReadSymbol(&htree_group->htrees_[GREEN], br);
+ if (code < NUM_LITERAL_CODES) { // Literal.
+ int red, green, blue, alpha;
+ red = ReadSymbol(&htree_group->htrees_[RED], br);
+ green = code;
+ VP8LFillBitWindow(br);
+ blue = ReadSymbol(&htree_group->htrees_[BLUE], br);
+ alpha = ReadSymbol(&htree_group->htrees_[ALPHA], br);
+ *src = (alpha << 24) + (red << 16) + (green << 8) + blue;
+ AdvanceByOne:
+ ++src;
+ ++col;
+ if (col >= width) {
+ col = 0;
+ ++row;
+ if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) {
+ process_func(dec, row);
+ }
+ if (color_cache != NULL) {
+ while (last_cached < src) {
+ VP8LColorCacheInsert(color_cache, *last_cached++);
+ }
+ }
+ }
+ } else if (code < len_code_limit) { // Backward reference
+ int dist_code, dist;
+ const int length_sym = code - NUM_LITERAL_CODES;
+ const int length = GetCopyLength(length_sym, br);
+ const int dist_symbol = ReadSymbol(&htree_group->htrees_[DIST], br);
+ VP8LFillBitWindow(br);
+ dist_code = GetCopyDistance(dist_symbol, br);
+ dist = PlaneCodeToDistance(width, dist_code);
+ if (src - data < dist || src_end - src < length) {
+ ok = 0;
+ goto End;
+ }
+ {
+ int i;
+ for (i = 0; i < length; ++i) src[i] = src[i - dist];
+ src += length;
+ }
+ col += length;
+ while (col >= width) {
+ col -= width;
+ ++row;
+ if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) {
+ process_func(dec, row);
+ }
+ }
+ if (src < src_end) {
+ htree_group = GetHtreeGroupForPos(hdr, col, row);
+ if (color_cache != NULL) {
+ while (last_cached < src) {
+ VP8LColorCacheInsert(color_cache, *last_cached++);
+ }
+ }
+ }
+ } else if (code < color_cache_limit) { // Color cache.
+ const int key = code - len_code_limit;
+ assert(color_cache != NULL);
+ while (last_cached < src) {
+ VP8LColorCacheInsert(color_cache, *last_cached++);
+ }
+ *src = VP8LColorCacheLookup(color_cache, key);
+ goto AdvanceByOne;
+ } else { // Not reached.
+ ok = 0;
+ goto End;
+ }
+ ok = !br->error_;
+ if (!ok) goto End;
+ }
+ // Process the remaining rows corresponding to last row-block.
+ if (process_func != NULL) process_func(dec, row);
+
+ End:
+ if (br->error_ || !ok || (br->eos_ && src < src_end)) {
+ ok = 0;
+ dec->status_ = (!br->eos_) ?
+ VP8_STATUS_BITSTREAM_ERROR : VP8_STATUS_SUSPENDED;
+ } else if (src == src_end) {
+ dec->state_ = READ_DATA;
+ }
+
+ return ok;
+}
+
+// -----------------------------------------------------------------------------
+// VP8LTransform
+
+static void ClearTransform(VP8LTransform* const transform) {
+ free(transform->data_);
+ transform->data_ = NULL;
+}
+
+// For security reason, we need to remap the color map to span
+// the total possible bundled values, and not just the num_colors.
+static int ExpandColorMap(int num_colors, VP8LTransform* const transform) {
+ int i;
+ const int final_num_colors = 1 << (8 >> transform->bits_);
+ uint32_t* const new_color_map =
+ (uint32_t*)WebPSafeMalloc((uint64_t)final_num_colors,
+ sizeof(*new_color_map));
+ if (new_color_map == NULL) {
+ return 0;
+ } else {
+ uint8_t* const data = (uint8_t*)transform->data_;
+ uint8_t* const new_data = (uint8_t*)new_color_map;
+ new_color_map[0] = transform->data_[0];
+ for (i = 4; i < 4 * num_colors; ++i) {
+ // Equivalent to AddPixelEq(), on a byte-basis.
+ new_data[i] = (data[i] + new_data[i - 4]) & 0xff;
+ }
+ for (; i < 4 * final_num_colors; ++i)
+ new_data[i] = 0; // black tail.
+ free(transform->data_);
+ transform->data_ = new_color_map;
+ }
+ return 1;
+}
+
+static int ReadTransform(int* const xsize, int const* ysize,
+ VP8LDecoder* const dec) {
+ int ok = 1;
+ VP8LBitReader* const br = &dec->br_;
+ VP8LTransform* transform = &dec->transforms_[dec->next_transform_];
+ const VP8LImageTransformType type =
+ (VP8LImageTransformType)VP8LReadBits(br, 2);
+
+ // Each transform type can only be present once in the stream.
+ if (dec->transforms_seen_ & (1U << type)) {
+ return 0; // Already there, let's not accept the second same transform.
+ }
+ dec->transforms_seen_ |= (1U << type);
+
+ transform->type_ = type;
+ transform->xsize_ = *xsize;
+ transform->ysize_ = *ysize;
+ transform->data_ = NULL;
+ ++dec->next_transform_;
+ assert(dec->next_transform_ <= NUM_TRANSFORMS);
+
+ switch (type) {
+ case PREDICTOR_TRANSFORM:
+ case CROSS_COLOR_TRANSFORM:
+ transform->bits_ = VP8LReadBits(br, 3) + 2;
+ ok = DecodeImageStream(VP8LSubSampleSize(transform->xsize_,
+ transform->bits_),
+ VP8LSubSampleSize(transform->ysize_,
+ transform->bits_),
+ 0, dec, &transform->data_);
+ break;
+ case COLOR_INDEXING_TRANSFORM: {
+ const int num_colors = VP8LReadBits(br, 8) + 1;
+ const int bits = (num_colors > 16) ? 0
+ : (num_colors > 4) ? 1
+ : (num_colors > 2) ? 2
+ : 3;
+ *xsize = VP8LSubSampleSize(transform->xsize_, bits);
+ transform->bits_ = bits;
+ ok = DecodeImageStream(num_colors, 1, 0, dec, &transform->data_);
+ ok = ok && ExpandColorMap(num_colors, transform);
+ break;
+ }
+ case SUBTRACT_GREEN:
+ break;
+ default:
+ assert(0); // can't happen
+ break;
+ }
+
+ return ok;
+}
+
+// -----------------------------------------------------------------------------
+// VP8LMetadata
+
+static void InitMetadata(VP8LMetadata* const hdr) {
+ assert(hdr);
+ memset(hdr, 0, sizeof(*hdr));
+}
+
+static void ClearMetadata(VP8LMetadata* const hdr) {
+ assert(hdr);
+
+ free(hdr->huffman_image_);
+ DeleteHtreeGroups(hdr->htree_groups_, hdr->num_htree_groups_);
+ VP8LColorCacheClear(&hdr->color_cache_);
+ InitMetadata(hdr);
+}
+
+// -----------------------------------------------------------------------------
+// VP8LDecoder
+
+VP8LDecoder* VP8LNew(void) {
+ VP8LDecoder* const dec = (VP8LDecoder*)calloc(1, sizeof(*dec));
+ if (dec == NULL) return NULL;
+ dec->status_ = VP8_STATUS_OK;
+ dec->action_ = READ_DIM;
+ dec->state_ = READ_DIM;
+ return dec;
+}
+
+void VP8LClear(VP8LDecoder* const dec) {
+ int i;
+ if (dec == NULL) return;
+ ClearMetadata(&dec->hdr_);
+
+ free(dec->argb_);
+ dec->argb_ = NULL;
+ for (i = 0; i < dec->next_transform_; ++i) {
+ ClearTransform(&dec->transforms_[i]);
+ }
+ dec->next_transform_ = 0;
+ dec->transforms_seen_ = 0;
+
+ free(dec->rescaler_memory);
+ dec->rescaler_memory = NULL;
+
+ dec->output_ = NULL; // leave no trace behind
+}
+
+void VP8LDelete(VP8LDecoder* const dec) {
+ if (dec != NULL) {
+ VP8LClear(dec);
+ free(dec);
+ }
+}
+
+static void UpdateDecoder(VP8LDecoder* const dec, int width, int height) {
+ VP8LMetadata* const hdr = &dec->hdr_;
+ const int num_bits = hdr->huffman_subsample_bits_;
+ dec->width_ = width;
+ dec->height_ = height;
+
+ hdr->huffman_xsize_ = VP8LSubSampleSize(width, num_bits);
+ hdr->huffman_mask_ = (num_bits == 0) ? ~0 : (1 << num_bits) - 1;
+}
+
+static int DecodeImageStream(int xsize, int ysize,
+ int is_level0,
+ VP8LDecoder* const dec,
+ uint32_t** const decoded_data) {
+ int ok = 1;
+ int transform_xsize = xsize;
+ int transform_ysize = ysize;
+ VP8LBitReader* const br = &dec->br_;
+ VP8LMetadata* const hdr = &dec->hdr_;
+ uint32_t* data = NULL;
+ int color_cache_bits = 0;
+
+ // Read the transforms (may recurse).
+ if (is_level0) {
+ while (ok && VP8LReadBits(br, 1)) {
+ ok = ReadTransform(&transform_xsize, &transform_ysize, dec);
+ }
+ }
+
+ // Color cache
+ if (ok && VP8LReadBits(br, 1)) {
+ color_cache_bits = VP8LReadBits(br, 4);
+ ok = (color_cache_bits >= 1 && color_cache_bits <= MAX_CACHE_BITS);
+ if (!ok) {
+ dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+ goto End;
+ }
+ }
+
+ // Read the Huffman codes (may recurse).
+ ok = ok && ReadHuffmanCodes(dec, transform_xsize, transform_ysize,
+ color_cache_bits, is_level0);
+ if (!ok) {
+ dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+ goto End;
+ }
+
+ // Finish setting up the color-cache
+ if (color_cache_bits > 0) {
+ hdr->color_cache_size_ = 1 << color_cache_bits;
+ if (!VP8LColorCacheInit(&hdr->color_cache_, color_cache_bits)) {
+ dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+ ok = 0;
+ goto End;
+ }
+ } else {
+ hdr->color_cache_size_ = 0;
+ }
+ UpdateDecoder(dec, transform_xsize, transform_ysize);
+
+ if (is_level0) { // level 0 complete
+ dec->state_ = READ_HDR;
+ goto End;
+ }
+
+ {
+ const uint64_t total_size = (uint64_t)transform_xsize * transform_ysize;
+ data = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*data));
+ if (data == NULL) {
+ dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+ ok = 0;
+ goto End;
+ }
+ }
+
+ // Use the Huffman trees to decode the LZ77 encoded data.
+ ok = DecodeImageData(dec, data, transform_xsize, transform_ysize, NULL);
+ ok = ok && !br->error_;
+
+ End:
+
+ if (!ok) {
+ free(data);
+ ClearMetadata(hdr);
+ // If not enough data (br.eos_) resulted in BIT_STREAM_ERROR, update the
+ // status appropriately.
+ if (dec->status_ == VP8_STATUS_BITSTREAM_ERROR && dec->br_.eos_) {
+ dec->status_ = VP8_STATUS_SUSPENDED;
+ }
+ } else {
+ if (decoded_data != NULL) {
+ *decoded_data = data;
+ } else {
+ // We allocate image data in this function only for transforms. At level 0
+ // (that is: not the transforms), we shouldn't have allocated anything.
+ assert(data == NULL);
+ assert(is_level0);
+ }
+ if (!is_level0) ClearMetadata(hdr); // Clean up temporary data behind.
+ }
+ return ok;
+}
+
+//------------------------------------------------------------------------------
+// Allocate dec->argb_ and dec->argb_cache_ using dec->width_ and dec->height_
+
+static int AllocateARGBBuffers(VP8LDecoder* const dec, int final_width) {
+ const uint64_t num_pixels = (uint64_t)dec->width_ * dec->height_;
+ // Scratch buffer corresponding to top-prediction row for transforming the
+ // first row in the row-blocks.
+ const uint64_t cache_top_pixels = final_width;
+ // Scratch buffer for temporary BGRA storage.
+ const uint64_t cache_pixels = (uint64_t)final_width * NUM_ARGB_CACHE_ROWS;
+ const uint64_t total_num_pixels =
+ num_pixels + cache_top_pixels + cache_pixels;
+
+ assert(dec->width_ <= final_width);
+ dec->argb_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(*dec->argb_));
+ if (dec->argb_ == NULL) {
+ dec->argb_cache_ = NULL; // for sanity check
+ dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+ return 0;
+ }
+ dec->argb_cache_ = dec->argb_ + num_pixels + cache_top_pixels;
+ return 1;
+}
+
+//------------------------------------------------------------------------------
+// Special row-processing that only stores the alpha data.
+
+static void ExtractAlphaRows(VP8LDecoder* const dec, int row) {
+ const int num_rows = row - dec->last_row_;
+ const uint32_t* const in = dec->argb_ + dec->width_ * dec->last_row_;
+
+ if (num_rows <= 0) return; // Nothing to be done.
+ ApplyInverseTransforms(dec, num_rows, in);
+
+ // Extract alpha (which is stored in the green plane).
+ {
+ const int width = dec->io_->width; // the final width (!= dec->width_)
+ const int cache_pixs = width * num_rows;
+ uint8_t* const dst = (uint8_t*)dec->io_->opaque + width * dec->last_row_;
+ const uint32_t* const src = dec->argb_cache_;
+ int i;
+ for (i = 0; i < cache_pixs; ++i) dst[i] = (src[i] >> 8) & 0xff;
+ }
+
+ dec->last_row_ = dec->last_out_row_ = row;
+}
+
+int VP8LDecodeAlphaImageStream(int width, int height, const uint8_t* const data,
+ size_t data_size, uint8_t* const output) {
+ VP8Io io;
+ int ok = 0;
+ VP8LDecoder* const dec = VP8LNew();
+ if (dec == NULL) return 0;
+
+ dec->width_ = width;
+ dec->height_ = height;
+ dec->io_ = &io;
+
+ VP8InitIo(&io);
+ WebPInitCustomIo(NULL, &io); // Just a sanity Init. io won't be used.
+ io.opaque = output;
+ io.width = width;
+ io.height = height;
+
+ dec->status_ = VP8_STATUS_OK;
+ VP8LInitBitReader(&dec->br_, data, data_size);
+
+ dec->action_ = READ_HDR;
+ if (!DecodeImageStream(width, height, 1, dec, NULL)) goto Err;
+
+ // Allocate output (note that dec->width_ may have changed here).
+ if (!AllocateARGBBuffers(dec, width)) goto Err;
+
+ // Decode (with special row processing).
+ dec->action_ = READ_DATA;
+ ok = DecodeImageData(dec, dec->argb_, dec->width_, dec->height_,
+ ExtractAlphaRows);
+
+ Err:
+ VP8LDelete(dec);
+ return ok;
+}
+
+//------------------------------------------------------------------------------
+
+int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io) {
+ int width, height, has_alpha;
+
+ if (dec == NULL) return 0;
+ if (io == NULL) {
+ dec->status_ = VP8_STATUS_INVALID_PARAM;
+ return 0;
+ }
+
+ dec->io_ = io;
+ dec->status_ = VP8_STATUS_OK;
+ VP8LInitBitReader(&dec->br_, io->data, io->data_size);
+ if (!ReadImageInfo(&dec->br_, &width, &height, &has_alpha)) {
+ dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+ goto Error;
+ }
+ dec->state_ = READ_DIM;
+ io->width = width;
+ io->height = height;
+
+ dec->action_ = READ_HDR;
+ if (!DecodeImageStream(width, height, 1, dec, NULL)) goto Error;
+ return 1;
+
+ Error:
+ VP8LClear(dec);
+ assert(dec->status_ != VP8_STATUS_OK);
+ return 0;
+}
+
+int VP8LDecodeImage(VP8LDecoder* const dec) {
+ VP8Io* io = NULL;
+ WebPDecParams* params = NULL;
+
+ // Sanity checks.
+ if (dec == NULL) return 0;
+
+ io = dec->io_;
+ assert(io != NULL);
+ params = (WebPDecParams*)io->opaque;
+ assert(params != NULL);
+ dec->output_ = params->output;
+ assert(dec->output_ != NULL);
+
+ // Initialization.
+ if (!WebPIoInitFromOptions(params->options, io, MODE_BGRA)) {
+ dec->status_ = VP8_STATUS_INVALID_PARAM;
+ goto Err;
+ }
+
+ if (!AllocateARGBBuffers(dec, io->width)) goto Err;
+
+ if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err;
+
+ // Decode.
+ dec->action_ = READ_DATA;
+ if (!DecodeImageData(dec, dec->argb_, dec->width_, dec->height_,
+ ProcessRows)) {
+ goto Err;
+ }
+
+ // Cleanup.
+ params->last_y = dec->last_out_row_;
+ VP8LClear(dec);
+ return 1;
+
+ Err:
+ VP8LClear(dec);
+ assert(dec->status_ != VP8_STATUS_OK);
+ return 0;
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
diff --git a/src/dec/vp8li.h b/src/dec/vp8li.h
new file mode 100644
index 00000000..be50e45b
--- /dev/null
+++ b/src/dec/vp8li.h
@@ -0,0 +1,121 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Lossless decoder: internal header.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+// Vikas Arora(vikaas.arora@gmail.com)
+
+#ifndef WEBP_DEC_VP8LI_H_
+#define WEBP_DEC_VP8LI_H_
+
+#include <string.h> // for memcpy()
+#include "./webpi.h"
+#include "../utils/bit_reader.h"
+#include "../utils/color_cache.h"
+#include "../utils/huffman.h"
+#include "webp/format_constants.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef enum {
+ READ_DATA = 0,
+ READ_HDR = 1,
+ READ_DIM = 2
+} VP8LDecodeState;
+
+typedef struct VP8LTransform VP8LTransform;
+struct VP8LTransform {
+ VP8LImageTransformType type_; // transform type.
+ int bits_; // subsampling bits defining transform window.
+ int xsize_; // transform window X index.
+ int ysize_; // transform window Y index.
+ uint32_t *data_; // transform data.
+};
+
+typedef struct {
+ HuffmanTree htrees_[HUFFMAN_CODES_PER_META_CODE];
+} HTreeGroup;
+
+typedef struct {
+ int color_cache_size_;
+ VP8LColorCache color_cache_;
+
+ int huffman_mask_;
+ int huffman_subsample_bits_;
+ int huffman_xsize_;
+ uint32_t *huffman_image_;
+ int num_htree_groups_;
+ HTreeGroup *htree_groups_;
+} VP8LMetadata;
+
+typedef struct {
+ VP8StatusCode status_;
+ VP8LDecodeState action_;
+ VP8LDecodeState state_;
+ VP8Io *io_;
+
+ const WebPDecBuffer *output_; // shortcut to io->opaque->output
+
+ uint32_t *argb_; // Internal data: always in BGRA color mode.
+ uint32_t *argb_cache_; // Scratch buffer for temporary BGRA storage.
+
+ VP8LBitReader br_;
+
+ int width_;
+ int height_;
+ int last_row_; // last input row decoded so far.
+ int last_out_row_; // last row output so far.
+
+ VP8LMetadata hdr_;
+
+ int next_transform_;
+ VP8LTransform transforms_[NUM_TRANSFORMS];
+ // or'd bitset storing the transforms types.
+ uint32_t transforms_seen_;
+
+ uint8_t *rescaler_memory; // Working memory for rescaling work.
+ WebPRescaler *rescaler; // Common rescaler for all channels.
+} VP8LDecoder;
+
+//------------------------------------------------------------------------------
+// internal functions. Not public.
+
+// in vp8l.c
+
+// Decodes a raw image stream (without header) and store the alpha data
+// into *output, which must be of size width x height. Returns false in case
+// of error.
+int VP8LDecodeAlphaImageStream(int width, int height, const uint8_t* const data,
+ size_t data_size, uint8_t* const output);
+
+// Allocates and initialize a new lossless decoder instance.
+VP8LDecoder* VP8LNew(void);
+
+// Decodes the image header. Returns false in case of error.
+int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io);
+
+// Decodes an image. It's required to decode the lossless header before calling
+// this function. Returns false in case of error, with updated dec->status_.
+int VP8LDecodeImage(VP8LDecoder* const dec);
+
+// Resets the decoder in its initial state, reclaiming memory.
+// Preserves the dec->status_ value.
+void VP8LClear(VP8LDecoder* const dec);
+
+// Clears and deallocate a lossless decoder instance.
+void VP8LDelete(VP8LDecoder* const dec);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif /* WEBP_DEC_VP8LI_H_ */
diff --git a/src/dec/webp.c b/src/dec/webp.c
index eea5e6ee..54cb6d32 100644
--- a/src/dec/webp.c
+++ b/src/dec/webp.c
@@ -1,4 +1,4 @@
-// Copyright 2010 Google Inc.
+// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -10,60 +10,367 @@
// Author: Skal (pascal.massimino@gmail.com)
#include <stdlib.h>
-#include "vp8i.h"
-#include "webpi.h"
+
+#include "./vp8i.h"
+#include "./vp8li.h"
+#include "./webpi.h"
+#include "webp/format_constants.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// RIFF layout is:
-// 0ffset tag
+// Offset tag
// 0...3 "RIFF" 4-byte tag
// 4...7 size of image data (including metadata) starting at offset 8
// 8...11 "WEBP" our form-type signature
-// 12..15 "VP8 ": 4-bytes tags, describing the raw video format used
+// The RIFF container (12 bytes) is followed by appropriate chunks:
+// 12..15 "VP8 ": 4-bytes tags, signaling the use of VP8 video format
// 16..19 size of the raw VP8 image data, starting at offset 20
// 20.... the VP8 bytes
-// There can be extra chunks after the "VP8 " chunk (ICMT, ICOP, ...)
-// All 32-bits sizes are in little-endian order.
-// Note: chunk data must be padded to multiple of 2 in size
+// Or,
+// 12..15 "VP8L": 4-bytes tags, signaling the use of VP8L lossless format
+// 16..19 size of the raw VP8L image data, starting at offset 20
+// 20.... the VP8L bytes
+// Or,
+// 12..15 "VP8X": 4-bytes tags, describing the extended-VP8 chunk.
+// 16..19 size of the VP8X chunk starting at offset 20.
+// 20..23 VP8X flags bit-map corresponding to the chunk-types present.
+// 24..26 Width of the Canvas Image.
+// 27..29 Height of the Canvas Image.
+// There can be extra chunks after the "VP8X" chunk (ICCP, TILE, FRM, VP8,
+// META ...)
+// All sizes are in little-endian order.
+// Note: chunk data size must be padded to multiple of 2 when written.
+
+static WEBP_INLINE uint32_t get_le24(const uint8_t* const data) {
+ return data[0] | (data[1] << 8) | (data[2] << 16);
+}
-static inline uint32_t get_le32(const uint8_t* const data) {
- return data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
+static WEBP_INLINE uint32_t get_le32(const uint8_t* const data) {
+ return (uint32_t)get_le24(data) | (data[3] << 24);
}
-// If a RIFF container is detected, validate it and skip over it.
-uint32_t WebPCheckRIFFHeader(const uint8_t** data_ptr,
- uint32_t* data_size_ptr) {
- uint32_t chunk_size = 0xffffffffu;
- if (*data_size_ptr >= 10 + 20 && !memcmp(*data_ptr, "RIFF", 4)) {
- if (memcmp(*data_ptr + 8, "WEBP", 4)) {
- return 0; // wrong image file signature
+// Validates the RIFF container (if detected) and skips over it.
+// If a RIFF container is detected,
+// Returns VP8_STATUS_BITSTREAM_ERROR for invalid header, and
+// VP8_STATUS_OK otherwise.
+// In case there are not enough bytes (partial RIFF container), return 0 for
+// *riff_size. Else return the RIFF size extracted from the header.
+static VP8StatusCode ParseRIFF(const uint8_t** const data,
+ size_t* const data_size,
+ size_t* const riff_size) {
+ assert(data != NULL);
+ assert(data_size != NULL);
+ assert(riff_size != NULL);
+
+ *riff_size = 0; // Default: no RIFF present.
+ if (*data_size >= RIFF_HEADER_SIZE && !memcmp(*data, "RIFF", TAG_SIZE)) {
+ if (memcmp(*data + 8, "WEBP", TAG_SIZE)) {
+ return VP8_STATUS_BITSTREAM_ERROR; // Wrong image file signature.
} else {
- const uint32_t riff_size = get_le32(*data_ptr + 4);
- if (riff_size < 12) {
- return 0; // we should have at least one chunk
- }
- if (memcmp(*data_ptr + 12, "VP8 ", 4)) {
- return 0; // invalid compression format
- }
- chunk_size = get_le32(*data_ptr + 16);
- if (chunk_size > riff_size - 12) {
- return 0; // inconsistent size information.
+ const uint32_t size = get_le32(*data + TAG_SIZE);
+ // Check that we have at least one chunk (i.e "WEBP" + "VP8?nnnn").
+ if (size < TAG_SIZE + CHUNK_HEADER_SIZE) {
+ return VP8_STATUS_BITSTREAM_ERROR;
}
// We have a RIFF container. Skip it.
- *data_ptr += 20;
- *data_size_ptr -= 20;
- // Note: we don't report error for odd-sized chunks.
+ *riff_size = size;
+ *data += RIFF_HEADER_SIZE;
+ *data_size -= RIFF_HEADER_SIZE;
+ }
+ }
+ return VP8_STATUS_OK;
+}
+
+// Validates the VP8X header and skips over it.
+// Returns VP8_STATUS_BITSTREAM_ERROR for invalid VP8X header,
+// VP8_STATUS_NOT_ENOUGH_DATA in case of insufficient data, and
+// VP8_STATUS_OK otherwise.
+// If a VP8X chunk is found, found_vp8x is set to true and *width_ptr,
+// *height_ptr and *flags_ptr are set to the corresponding values extracted
+// from the VP8X chunk.
+static VP8StatusCode ParseVP8X(const uint8_t** const data,
+ size_t* const data_size,
+ int* const found_vp8x,
+ int* const width_ptr, int* const height_ptr,
+ uint32_t* const flags_ptr) {
+ const uint32_t vp8x_size = CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE;
+ assert(data != NULL);
+ assert(data_size != NULL);
+ assert(found_vp8x != NULL);
+
+ *found_vp8x = 0;
+
+ if (*data_size < CHUNK_HEADER_SIZE) {
+ return VP8_STATUS_NOT_ENOUGH_DATA; // Insufficient data.
+ }
+
+ if (!memcmp(*data, "VP8X", TAG_SIZE)) {
+ int width, height;
+ uint32_t flags;
+ const uint32_t chunk_size = get_le32(*data + TAG_SIZE);
+ if (chunk_size != VP8X_CHUNK_SIZE) {
+ return VP8_STATUS_BITSTREAM_ERROR; // Wrong chunk size.
+ }
+
+ // Verify if enough data is available to validate the VP8X chunk.
+ if (*data_size < vp8x_size) {
+ return VP8_STATUS_NOT_ENOUGH_DATA; // Insufficient data.
+ }
+ flags = get_le32(*data + 8);
+ width = 1 + get_le24(*data + 12);
+ height = 1 + get_le24(*data + 15);
+ if (width * (uint64_t)height >= MAX_IMAGE_AREA) {
+ return VP8_STATUS_BITSTREAM_ERROR; // image is too large
+ }
+
+ if (flags_ptr != NULL) *flags_ptr = flags;
+ if (width_ptr != NULL) *width_ptr = width;
+ if (height_ptr != NULL) *height_ptr = height;
+ // Skip over VP8X header bytes.
+ *data += vp8x_size;
+ *data_size -= vp8x_size;
+ *found_vp8x = 1;
+ }
+ return VP8_STATUS_OK;
+}
+
+// Skips to the next VP8/VP8L chunk header in the data given the size of the
+// RIFF chunk 'riff_size'.
+// Returns VP8_STATUS_BITSTREAM_ERROR if any invalid chunk size is encountered,
+// VP8_STATUS_NOT_ENOUGH_DATA in case of insufficient data, and
+// VP8_STATUS_OK otherwise.
+// If an alpha chunk is found, *alpha_data and *alpha_size are set
+// appropriately.
+static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,
+ size_t* const data_size,
+ size_t const riff_size,
+ const uint8_t** const alpha_data,
+ size_t* const alpha_size) {
+ const uint8_t* buf;
+ size_t buf_size;
+ uint32_t total_size = TAG_SIZE + // "WEBP".
+ CHUNK_HEADER_SIZE + // "VP8Xnnnn".
+ VP8X_CHUNK_SIZE; // data.
+ assert(data != NULL);
+ assert(data_size != NULL);
+ buf = *data;
+ buf_size = *data_size;
+
+ assert(alpha_data != NULL);
+ assert(alpha_size != NULL);
+ *alpha_data = NULL;
+ *alpha_size = 0;
+
+ while (1) {
+ uint32_t chunk_size;
+ uint32_t disk_chunk_size; // chunk_size with padding
+
+ *data = buf;
+ *data_size = buf_size;
+
+ if (buf_size < CHUNK_HEADER_SIZE) { // Insufficient data.
+ return VP8_STATUS_NOT_ENOUGH_DATA;
+ }
+
+ chunk_size = get_le32(buf + TAG_SIZE);
+ // For odd-sized chunk-payload, there's one byte padding at the end.
+ disk_chunk_size = (CHUNK_HEADER_SIZE + chunk_size + 1) & ~1;
+ total_size += disk_chunk_size;
+
+ // Check that total bytes skipped so far does not exceed riff_size.
+ if (riff_size > 0 && (total_size > riff_size)) {
+ return VP8_STATUS_BITSTREAM_ERROR; // Not a valid chunk size.
+ }
+
+ if (buf_size < disk_chunk_size) { // Insufficient data.
+ return VP8_STATUS_NOT_ENOUGH_DATA;
+ }
+
+ if (!memcmp(buf, "ALPH", TAG_SIZE)) { // A valid ALPH header.
+ *alpha_data = buf + CHUNK_HEADER_SIZE;
+ *alpha_size = chunk_size;
+ } else if (!memcmp(buf, "VP8 ", TAG_SIZE) ||
+ !memcmp(buf, "VP8L", TAG_SIZE)) { // A valid VP8/VP8L header.
+ return VP8_STATUS_OK; // Found.
+ }
+
+ // We have a full and valid chunk; skip it.
+ buf += disk_chunk_size;
+ buf_size -= disk_chunk_size;
+ }
+}
+
+// Validates the VP8/VP8L Header ("VP8 nnnn" or "VP8L nnnn") and skips over it.
+// Returns VP8_STATUS_BITSTREAM_ERROR for invalid (chunk larger than
+// riff_size) VP8/VP8L header,
+// VP8_STATUS_NOT_ENOUGH_DATA in case of insufficient data, and
+// VP8_STATUS_OK otherwise.
+// If a VP8/VP8L chunk is found, *chunk_size is set to the total number of bytes
+// extracted from the VP8/VP8L chunk header.
+// The flag '*is_lossless' is set to 1 in case of VP8L chunk / raw VP8L data.
+static VP8StatusCode ParseVP8Header(const uint8_t** const data_ptr,
+ size_t* const data_size,
+ size_t riff_size,
+ size_t* const chunk_size,
+ int* const is_lossless) {
+ const uint8_t* const data = *data_ptr;
+ const int is_vp8 = !memcmp(data, "VP8 ", TAG_SIZE);
+ const int is_vp8l = !memcmp(data, "VP8L", TAG_SIZE);
+ const uint32_t minimal_size =
+ TAG_SIZE + CHUNK_HEADER_SIZE; // "WEBP" + "VP8 nnnn" OR
+ // "WEBP" + "VP8Lnnnn"
+ assert(data != NULL);
+ assert(data_size != NULL);
+ assert(chunk_size != NULL);
+ assert(is_lossless != NULL);
+
+ if (*data_size < CHUNK_HEADER_SIZE) {
+ return VP8_STATUS_NOT_ENOUGH_DATA; // Insufficient data.
+ }
+
+ if (is_vp8 || is_vp8l) {
+ // Bitstream contains VP8/VP8L header.
+ const uint32_t size = get_le32(data + TAG_SIZE);
+ if ((riff_size >= minimal_size) && (size > riff_size - minimal_size)) {
+ return VP8_STATUS_BITSTREAM_ERROR; // Inconsistent size information.
+ }
+ // Skip over CHUNK_HEADER_SIZE bytes from VP8/VP8L Header.
+ *chunk_size = size;
+ *data_ptr += CHUNK_HEADER_SIZE;
+ *data_size -= CHUNK_HEADER_SIZE;
+ *is_lossless = is_vp8l;
+ } else {
+ // Raw VP8/VP8L bitstream (no header).
+ *is_lossless = VP8LCheckSignature(data, *data_size);
+ *chunk_size = *data_size;
+ }
+
+ return VP8_STATUS_OK;
+}
+
+//------------------------------------------------------------------------------
+
+// Fetch '*width', '*height', '*has_alpha' and fill out 'headers' based on
+// 'data'. All the output parameters may be NULL. If 'headers' is NULL only the
+// minimal amount will be read to fetch the remaining parameters.
+// If 'headers' is non-NULL this function will attempt to locate both alpha
+// data (with or without a VP8X chunk) and the bitstream chunk (VP8/VP8L).
+// Note: The following chunk sequences (before the raw VP8/VP8L data) are
+// considered valid by this function:
+// RIFF + VP8(L)
+// RIFF + VP8X + (optional chunks) + VP8(L)
+// ALPH + VP8 <-- Not a valid WebP format: only allowed for internal purpose.
+// VP8(L) <-- Not a valid WebP format: only allowed for internal purpose.
+static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
+ size_t data_size,
+ int* const width,
+ int* const height,
+ int* const has_alpha,
+ WebPHeaderStructure* const headers) {
+ int found_riff = 0;
+ int found_vp8x = 0;
+ VP8StatusCode status;
+ WebPHeaderStructure hdrs;
+
+ if (data == NULL || data_size < RIFF_HEADER_SIZE) {
+ return VP8_STATUS_NOT_ENOUGH_DATA;
+ }
+ memset(&hdrs, 0, sizeof(hdrs));
+ hdrs.data = data;
+ hdrs.data_size = data_size;
+
+ // Skip over RIFF header.
+ status = ParseRIFF(&data, &data_size, &hdrs.riff_size);
+ if (status != VP8_STATUS_OK) {
+ return status; // Wrong RIFF header / insufficient data.
+ }
+ found_riff = (hdrs.riff_size > 0);
+
+ // Skip over VP8X.
+ {
+ uint32_t flags = 0;
+ status = ParseVP8X(&data, &data_size, &found_vp8x, width, height, &flags);
+ if (status != VP8_STATUS_OK) {
+ return status; // Wrong VP8X / insufficient data.
+ }
+ if (!found_riff && found_vp8x) {
+ // Note: This restriction may be removed in the future, if it becomes
+ // necessary to send VP8X chunk to the decoder.
+ return VP8_STATUS_BITSTREAM_ERROR;
+ }
+ if (has_alpha != NULL) *has_alpha = !!(flags & ALPHA_FLAG_BIT);
+ if (found_vp8x && headers == NULL) {
+ return VP8_STATUS_OK; // Return features from VP8X header.
+ }
+ }
+
+ if (data_size < TAG_SIZE) return VP8_STATUS_NOT_ENOUGH_DATA;
+
+ // Skip over optional chunks if data started with "RIFF + VP8X" or "ALPH".
+ if ((found_riff && found_vp8x) ||
+ (!found_riff && !found_vp8x && !memcmp(data, "ALPH", TAG_SIZE))) {
+ status = ParseOptionalChunks(&data, &data_size, hdrs.riff_size,
+ &hdrs.alpha_data, &hdrs.alpha_data_size);
+ if (status != VP8_STATUS_OK) {
+ return status; // Found an invalid chunk size / insufficient data.
+ }
+ }
+
+ // Skip over VP8/VP8L header.
+ status = ParseVP8Header(&data, &data_size, hdrs.riff_size,
+ &hdrs.compressed_size, &hdrs.is_lossless);
+ if (status != VP8_STATUS_OK) {
+ return status; // Wrong VP8/VP8L chunk-header / insufficient data.
+ }
+ if (hdrs.compressed_size > MAX_CHUNK_PAYLOAD) {
+ return VP8_STATUS_BITSTREAM_ERROR;
+ }
+
+ if (!hdrs.is_lossless) {
+ if (data_size < VP8_FRAME_HEADER_SIZE) {
+ return VP8_STATUS_NOT_ENOUGH_DATA;
+ }
+ // Validates raw VP8 data.
+ if (!VP8GetInfo(data, data_size,
+ (uint32_t)hdrs.compressed_size, width, height)) {
+ return VP8_STATUS_BITSTREAM_ERROR;
}
- return chunk_size;
+ } else {
+ if (data_size < VP8L_FRAME_HEADER_SIZE) {
+ return VP8_STATUS_NOT_ENOUGH_DATA;
+ }
+ // Validates raw VP8L data.
+ if (!VP8LGetInfo(data, data_size, width, height, has_alpha)) {
+ return VP8_STATUS_BITSTREAM_ERROR;
+ }
+ }
+
+ if (has_alpha != NULL) {
+ // If the data did not contain a VP8X/VP8L chunk the only definitive way
+ // to set this is by looking for alpha data (from an ALPH chunk).
+ *has_alpha |= (hdrs.alpha_data != NULL);
+ }
+ if (headers != NULL) {
+ *headers = hdrs;
+ headers->offset = data - headers->data;
+ assert((uint64_t)(data - headers->data) < MAX_CHUNK_PAYLOAD);
+ assert(headers->offset == headers->data_size - data_size);
}
- return *data_size_ptr;
+ return VP8_STATUS_OK; // Return features from VP8 header.
+}
+
+VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
+ assert(headers != NULL);
+ // fill out headers, ignore width/height/has_alpha.
+ return ParseHeadersInternal(headers->data, headers->data_size,
+ NULL, NULL, NULL, headers);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// WebPDecParams
void WebPResetDecParams(WebPDecParams* const params) {
@@ -72,41 +379,76 @@ void WebPResetDecParams(WebPDecParams* const params) {
}
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// "Into" decoding variants
// Main flow
-static VP8StatusCode DecodeInto(const uint8_t* data, uint32_t data_size,
+static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size,
WebPDecParams* const params) {
- VP8Decoder* dec = VP8New();
- VP8StatusCode status = VP8_STATUS_OK;
+ VP8StatusCode status;
VP8Io io;
+ WebPHeaderStructure headers;
- assert(params);
- if (dec == NULL) {
- return VP8_STATUS_INVALID_PARAM;
+ headers.data = data;
+ headers.data_size = data_size;
+ status = WebPParseHeaders(&headers); // Process Pre-VP8 chunks.
+ if (status != VP8_STATUS_OK) {
+ return status;
}
+ assert(params != NULL);
VP8InitIo(&io);
- io.data = data;
- io.data_size = data_size;
+ io.data = headers.data + headers.offset;
+ io.data_size = headers.data_size - headers.offset;
WebPInitCustomIo(params, &io); // Plug the I/O functions.
- // Decode bitstream header, update io->width/io->height.
- if (!VP8GetHeaders(dec, &io)) {
- status = VP8_STATUS_BITSTREAM_ERROR;
+ if (!headers.is_lossless) {
+ VP8Decoder* const dec = VP8New();
+ if (dec == NULL) {
+ return VP8_STATUS_OUT_OF_MEMORY;
+ }
+#ifdef WEBP_USE_THREAD
+ dec->use_threads_ = params->options && (params->options->use_threads > 0);
+#else
+ dec->use_threads_ = 0;
+#endif
+ dec->alpha_data_ = headers.alpha_data;
+ dec->alpha_data_size_ = headers.alpha_data_size;
+
+ // Decode bitstream header, update io->width/io->height.
+ if (!VP8GetHeaders(dec, &io)) {
+ status = dec->status_; // An error occurred. Grab error status.
+ } else {
+ // Allocate/check output buffers.
+ status = WebPAllocateDecBuffer(io.width, io.height, params->options,
+ params->output);
+ if (status == VP8_STATUS_OK) { // Decode
+ if (!VP8Decode(dec, &io)) {
+ status = dec->status_;
+ }
+ }
+ }
+ VP8Delete(dec);
} else {
- // Allocate/check output buffers.
- status = WebPAllocateDecBuffer(io.width, io.height, params->options,
- params->output);
- if (status == VP8_STATUS_OK) {
- // Decode
- if (!VP8Decode(dec, &io)) {
- status = dec->status_;
+ VP8LDecoder* const dec = VP8LNew();
+ if (dec == NULL) {
+ return VP8_STATUS_OUT_OF_MEMORY;
+ }
+ if (!VP8LDecodeHeader(dec, &io)) {
+ status = dec->status_; // An error occurred. Grab error status.
+ } else {
+ // Allocate/check output buffers.
+ status = WebPAllocateDecBuffer(io.width, io.height, params->options,
+ params->output);
+ if (status == VP8_STATUS_OK) { // Decode
+ if (!VP8LDecodeImage(dec)) {
+ status = dec->status_;
+ }
}
}
+ VP8LDelete(dec);
}
- VP8Delete(dec);
+
if (status != VP8_STATUS_OK) {
WebPFreeDecBuffer(params->output);
}
@@ -115,8 +457,10 @@ static VP8StatusCode DecodeInto(const uint8_t* data, uint32_t data_size,
// Helpers
static uint8_t* DecodeIntoRGBABuffer(WEBP_CSP_MODE colorspace,
- const uint8_t* data, uint32_t data_size,
- uint8_t* rgba, int stride, int size) {
+ const uint8_t* const data,
+ size_t data_size,
+ uint8_t* const rgba,
+ int stride, size_t size) {
WebPDecParams params;
WebPDecBuffer buf;
if (rgba == NULL) {
@@ -136,35 +480,35 @@ static uint8_t* DecodeIntoRGBABuffer(WEBP_CSP_MODE colorspace,
return rgba;
}
-uint8_t* WebPDecodeRGBInto(const uint8_t* data, uint32_t data_size,
- uint8_t* output, int size, int stride) {
+uint8_t* WebPDecodeRGBInto(const uint8_t* data, size_t data_size,
+ uint8_t* output, size_t size, int stride) {
return DecodeIntoRGBABuffer(MODE_RGB, data, data_size, output, stride, size);
}
-uint8_t* WebPDecodeRGBAInto(const uint8_t* data, uint32_t data_size,
- uint8_t* output, int size, int stride) {
+uint8_t* WebPDecodeRGBAInto(const uint8_t* data, size_t data_size,
+ uint8_t* output, size_t size, int stride) {
return DecodeIntoRGBABuffer(MODE_RGBA, data, data_size, output, stride, size);
}
-uint8_t* WebPDecodeARGBInto(const uint8_t* data, uint32_t data_size,
- uint8_t* output, int size, int stride) {
+uint8_t* WebPDecodeARGBInto(const uint8_t* data, size_t data_size,
+ uint8_t* output, size_t size, int stride) {
return DecodeIntoRGBABuffer(MODE_ARGB, data, data_size, output, stride, size);
}
-uint8_t* WebPDecodeBGRInto(const uint8_t* data, uint32_t data_size,
- uint8_t* output, int size, int stride) {
+uint8_t* WebPDecodeBGRInto(const uint8_t* data, size_t data_size,
+ uint8_t* output, size_t size, int stride) {
return DecodeIntoRGBABuffer(MODE_BGR, data, data_size, output, stride, size);
}
-uint8_t* WebPDecodeBGRAInto(const uint8_t* data, uint32_t data_size,
- uint8_t* output, int size, int stride) {
+uint8_t* WebPDecodeBGRAInto(const uint8_t* data, size_t data_size,
+ uint8_t* output, size_t size, int stride) {
return DecodeIntoRGBABuffer(MODE_BGRA, data, data_size, output, stride, size);
}
-uint8_t* WebPDecodeYUVInto(const uint8_t* data, uint32_t data_size,
- uint8_t* luma, int luma_size, int luma_stride,
- uint8_t* u, int u_size, int u_stride,
- uint8_t* v, int v_size, int v_stride) {
+uint8_t* WebPDecodeYUVInto(const uint8_t* data, size_t data_size,
+ uint8_t* luma, size_t luma_size, int luma_stride,
+ uint8_t* u, size_t u_size, int u_stride,
+ uint8_t* v, size_t v_size, int v_stride) {
WebPDecParams params;
WebPDecBuffer output;
if (luma == NULL) return NULL;
@@ -188,11 +532,11 @@ uint8_t* WebPDecodeYUVInto(const uint8_t* data, uint32_t data_size,
return luma;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
-static uint8_t* Decode(WEBP_CSP_MODE mode, const uint8_t* data,
- uint32_t data_size, int* width, int* height,
- WebPDecBuffer* keep_info) {
+static uint8_t* Decode(WEBP_CSP_MODE mode, const uint8_t* const data,
+ size_t data_size, int* const width, int* const height,
+ WebPDecBuffer* const keep_info) {
WebPDecParams params;
WebPDecBuffer output;
@@ -205,53 +549,53 @@ static uint8_t* Decode(WEBP_CSP_MODE mode, const uint8_t* data,
if (!WebPGetInfo(data, data_size, &output.width, &output.height)) {
return NULL;
}
- if (width) *width = output.width;
- if (height) *height = output.height;
+ if (width != NULL) *width = output.width;
+ if (height != NULL) *height = output.height;
// Decode
if (DecodeInto(data, data_size, &params) != VP8_STATUS_OK) {
return NULL;
}
- if (keep_info) { // keep track of the side-info
+ if (keep_info != NULL) { // keep track of the side-info
WebPCopyDecBuffer(&output, keep_info);
}
// return decoded samples (don't clear 'output'!)
- return (mode >= MODE_YUV) ? output.u.YUVA.y : output.u.RGBA.rgba;
+ return WebPIsRGBMode(mode) ? output.u.RGBA.rgba : output.u.YUVA.y;
}
-uint8_t* WebPDecodeRGB(const uint8_t* data, uint32_t data_size,
+uint8_t* WebPDecodeRGB(const uint8_t* data, size_t data_size,
int* width, int* height) {
return Decode(MODE_RGB, data, data_size, width, height, NULL);
}
-uint8_t* WebPDecodeRGBA(const uint8_t* data, uint32_t data_size,
+uint8_t* WebPDecodeRGBA(const uint8_t* data, size_t data_size,
int* width, int* height) {
return Decode(MODE_RGBA, data, data_size, width, height, NULL);
}
-uint8_t* WebPDecodeARGB(const uint8_t* data, uint32_t data_size,
+uint8_t* WebPDecodeARGB(const uint8_t* data, size_t data_size,
int* width, int* height) {
return Decode(MODE_ARGB, data, data_size, width, height, NULL);
}
-uint8_t* WebPDecodeBGR(const uint8_t* data, uint32_t data_size,
+uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size,
int* width, int* height) {
return Decode(MODE_BGR, data, data_size, width, height, NULL);
}
-uint8_t* WebPDecodeBGRA(const uint8_t* data, uint32_t data_size,
+uint8_t* WebPDecodeBGRA(const uint8_t* data, size_t data_size,
int* width, int* height) {
return Decode(MODE_BGRA, data, data_size, width, height, NULL);
}
-uint8_t* WebPDecodeYUV(const uint8_t* data, uint32_t data_size,
+uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size,
int* width, int* height, uint8_t** u, uint8_t** v,
int* stride, int* uv_stride) {
WebPDecBuffer output; // only to preserve the side-infos
uint8_t* const out = Decode(MODE_YUV, data, data_size,
width, height, &output);
- if (out) {
+ if (out != NULL) {
const WebPYUVABuffer* const buf = &output.u.YUVA;
*u = buf->u;
*v = buf->v;
@@ -262,52 +606,52 @@ uint8_t* WebPDecodeYUV(const uint8_t* data, uint32_t data_size,
return out;
}
-//-----------------------------------------------------------------------------
-// WebPGetInfo()
-
-int WebPGetInfo(const uint8_t* data, uint32_t data_size,
- int* width, int* height) {
- const uint32_t chunk_size = WebPCheckRIFFHeader(&data, &data_size);
- if (!chunk_size) {
- return 0; // unsupported RIFF header
- }
- // Validate raw video data
- return VP8GetInfo(data, data_size, chunk_size, width, height, NULL);
-}
-
static void DefaultFeatures(WebPBitstreamFeatures* const features) {
- assert(features);
+ assert(features != NULL);
memset(features, 0, sizeof(*features));
features->bitstream_version = 0;
}
-static VP8StatusCode GetFeatures(const uint8_t** data, uint32_t* data_size,
+static VP8StatusCode GetFeatures(const uint8_t* const data, size_t data_size,
WebPBitstreamFeatures* const features) {
- uint32_t chunk_size;
- if (features == NULL) {
+ if (features == NULL || data == NULL) {
return VP8_STATUS_INVALID_PARAM;
}
DefaultFeatures(features);
- if (data == NULL || *data == NULL || data_size == 0) {
- return VP8_STATUS_INVALID_PARAM;
+
+ // Only parse enough of the data to retrieve width/height/has_alpha.
+ return ParseHeadersInternal(data, data_size,
+ &features->width, &features->height,
+ &features->has_alpha, NULL);
+}
+
+//------------------------------------------------------------------------------
+// WebPGetInfo()
+
+int WebPGetInfo(const uint8_t* data, size_t data_size,
+ int* width, int* height) {
+ WebPBitstreamFeatures features;
+
+ if (GetFeatures(data, data_size, &features) != VP8_STATUS_OK) {
+ return 0;
}
- chunk_size = WebPCheckRIFFHeader(data, data_size);
- if (chunk_size == 0) {
- return VP8_STATUS_BITSTREAM_ERROR; // unsupported RIFF header
+
+ if (width != NULL) {
+ *width = features.width;
}
- if (!VP8GetInfo(*data, *data_size, chunk_size,
- &features->width, &features->height, &features->has_alpha)) {
- return VP8_STATUS_BITSTREAM_ERROR;
+ if (height != NULL) {
+ *height = features.height;
}
- return VP8_STATUS_OK;
+
+ return 1;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Advance decoding API
-int WebPInitDecoderConfigInternal(WebPDecoderConfig* const config,
+int WebPInitDecoderConfigInternal(WebPDecoderConfig* config,
int version) {
- if (version != WEBP_DECODER_ABI_VERSION) {
+ if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
return 0; // version mismatch
}
if (config == NULL) {
@@ -319,29 +663,38 @@ int WebPInitDecoderConfigInternal(WebPDecoderConfig* const config,
return 1;
}
-VP8StatusCode WebPGetFeaturesInternal(const uint8_t* data, uint32_t data_size,
- WebPBitstreamFeatures* const features,
- int version) {
- if (version != WEBP_DECODER_ABI_VERSION) {
+VP8StatusCode WebPGetFeaturesInternal(const uint8_t* data, size_t data_size,
+ WebPBitstreamFeatures* features,
+ int version) {
+ VP8StatusCode status;
+ if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
return VP8_STATUS_INVALID_PARAM; // version mismatch
}
if (features == NULL) {
return VP8_STATUS_INVALID_PARAM;
}
- return GetFeatures(&data, &data_size, features);
+
+ status = GetFeatures(data, data_size, features);
+ if (status == VP8_STATUS_NOT_ENOUGH_DATA) {
+ return VP8_STATUS_BITSTREAM_ERROR; // Not-enough-data treated as error.
+ }
+ return status;
}
-VP8StatusCode WebPDecode(const uint8_t* data, uint32_t data_size,
- WebPDecoderConfig* const config) {
+VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size,
+ WebPDecoderConfig* config) {
WebPDecParams params;
VP8StatusCode status;
- if (!config) {
+ if (config == NULL) {
return VP8_STATUS_INVALID_PARAM;
}
- status = GetFeatures(&data, &data_size, &config->input);
+ status = GetFeatures(data, data_size, &config->input);
if (status != VP8_STATUS_OK) {
+ if (status == VP8_STATUS_NOT_ENOUGH_DATA) {
+ return VP8_STATUS_BITSTREAM_ERROR; // Not-enough-data treated as error.
+ }
return status;
}
@@ -353,6 +706,66 @@ VP8StatusCode WebPDecode(const uint8_t* data, uint32_t data_size,
return status;
}
+//------------------------------------------------------------------------------
+// Cropping and rescaling.
+
+int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
+ VP8Io* const io, WEBP_CSP_MODE src_colorspace) {
+ const int W = io->width;
+ const int H = io->height;
+ int x = 0, y = 0, w = W, h = H;
+
+ // Cropping
+ io->use_cropping = (options != NULL) && (options->use_cropping > 0);
+ if (io->use_cropping) {
+ w = options->crop_width;
+ h = options->crop_height;
+ x = options->crop_left;
+ y = options->crop_top;
+ if (!WebPIsRGBMode(src_colorspace)) { // only snap for YUV420 or YUV422
+ x &= ~1;
+ y &= ~1; // TODO(later): only for YUV420, not YUV422.
+ }
+ if (x < 0 || y < 0 || w <= 0 || h <= 0 || x + w > W || y + h > H) {
+ return 0; // out of frame boundary error
+ }
+ }
+ io->crop_left = x;
+ io->crop_top = y;
+ io->crop_right = x + w;
+ io->crop_bottom = y + h;
+ io->mb_w = w;
+ io->mb_h = h;
+
+ // Scaling
+ io->use_scaling = (options != NULL) && (options->use_scaling > 0);
+ if (io->use_scaling) {
+ if (options->scaled_width <= 0 || options->scaled_height <= 0) {
+ return 0;
+ }
+ io->scaled_width = options->scaled_width;
+ io->scaled_height = options->scaled_height;
+ }
+
+ // Filter
+ io->bypass_filtering = options && options->bypass_filtering;
+
+ // Fancy upsampler
+#ifdef FANCY_UPSAMPLING
+ io->fancy_upsampling = (options == NULL) || (!options->no_fancy_upsampling);
+#endif
+
+ if (io->use_scaling) {
+ // disable filter (only for large downscaling ratio).
+ io->bypass_filtering = (io->scaled_width < W * 3 / 4) &&
+ (io->scaled_height < H * 3 / 4);
+ io->fancy_upsampling = 0;
+ }
+ return 1;
+}
+
+//------------------------------------------------------------------------------
+
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif
diff --git a/src/dec/webpi.h b/src/dec/webpi.h
index 6f6a72fb..44e57444 100644
--- a/src/dec/webpi.h
+++ b/src/dec/webpi.h
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -9,36 +9,22 @@
//
// Author: somnath@google.com (Somnath Banerjee)
-#ifndef WEBP_DEC_WEBPI_H
-#define WEBP_DEC_WEBPI_H
+#ifndef WEBP_DEC_WEBPI_H_
+#define WEBP_DEC_WEBPI_H_
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-#include "webp/decode_vp8.h"
+#include "../utils/rescaler.h"
+#include "./decode_vp8.h"
//------------------------------------------------------------------------------
-// WebPDecParams: Decoding output parameters. Transcient internal object.
+// WebPDecParams: Decoding output parameters. Transient internal object.
typedef struct WebPDecParams WebPDecParams;
typedef int (*OutputFunc)(const VP8Io* const io, WebPDecParams* const p);
-
-// Structure use for on-the-fly rescaling
-typedef struct {
- int x_expand; // true if we're expanding in the x direction
- int fy_scale, fx_scale; // fixed-point scaling factor
- int64_t fxy_scale; // ''
- // we need hpel-precise add/sub increments, for the downsampled U/V planes.
- int y_accum; // vertical accumulator
- int y_add, y_sub; // vertical increments (add ~= src, sub ~= dst)
- int x_add, x_sub; // horizontal increments (add ~= src, sub ~= dst)
- int src_width, src_height; // source dimensions
- int dst_width, dst_height; // destination dimensions
- uint8_t* dst;
- int dst_stride;
- int32_t* irow, *frow; // work buffer
-} WebPRescaler;
+typedef int (*OutputRowFunc)(WebPDecParams* const p, int y_pos);
struct WebPDecParams {
WebPDecBuffer* output; // output buffer.
@@ -49,42 +35,51 @@ struct WebPDecParams {
const WebPDecoderOptions* options; // if not NULL, use alt decoding features
// rescalers
WebPRescaler scaler_y, scaler_u, scaler_v, scaler_a;
- void* memory; // overall scratch memory for the output work.
- OutputFunc emit; // output RGB or YUV samples
- OutputFunc emit_alpha; // output alpha channel
+ void* memory; // overall scratch memory for the output work.
+
+ OutputFunc emit; // output RGB or YUV samples
+ OutputFunc emit_alpha; // output alpha channel
+ OutputRowFunc emit_alpha_row; // output one line of rescaled alpha values
};
// Should be called first, before any use of the WebPDecParams object.
void WebPResetDecParams(WebPDecParams* const params);
//------------------------------------------------------------------------------
-// Upsampler function to overwrite fancy upsampler.
-
-typedef void (*WebPUpsampleLinePairFunc)(
- const uint8_t* top_y, const uint8_t* bottom_y,
- const uint8_t* top_u, const uint8_t* top_v,
- const uint8_t* cur_u, const uint8_t* cur_v,
- uint8_t* top_dst, uint8_t* bottom_dst, int len);
-
-// Upsampler functions to be used to convert YUV to RGB(A) modes
-extern WebPUpsampleLinePairFunc WebPUpsamplers[MODE_LAST];
-extern WebPUpsampleLinePairFunc WebPUpsamplersKeepAlpha[MODE_LAST];
+// Header parsing helpers
-// Initializes SSE2 version of the fancy upsamplers.
-void WebPInitUpsamplersSSE2(void);
+// Structure storing a description of the RIFF headers.
+typedef struct {
+ const uint8_t* data; // input buffer
+ size_t data_size; // input buffer size
+ size_t offset; // offset to main data chunk (VP8 or VP8L)
+ const uint8_t* alpha_data; // points to alpha chunk (if present)
+ size_t alpha_data_size; // alpha chunk size
+ size_t compressed_size; // VP8/VP8L compressed data size
+ size_t riff_size; // size of the riff payload (or 0 if absent)
+ int is_lossless; // true if a VP8L chunk is present
+} WebPHeaderStructure;
+
+// Skips over all valid chunks prior to the first VP8/VP8L frame header.
+// Returns VP8_STATUS_OK on success,
+// VP8_STATUS_BITSTREAM_ERROR if an invalid header/chunk is found, and
+// VP8_STATUS_NOT_ENOUGH_DATA if case of insufficient data.
+// In 'headers', compressed_size, offset, alpha_data, alpha_size and lossless
+// fields are updated appropriately upon success.
+VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers);
//------------------------------------------------------------------------------
// Misc utils
-// If a RIFF container is detected, validate it and skip over it. Returns
-// VP8 bit-stream size if RIFF header is valid else returns 0
-uint32_t WebPCheckRIFFHeader(const uint8_t** data_ptr,
- uint32_t* data_size_ptr);
-
// Initializes VP8Io with custom setup, io and teardown functions. The default
// hooks will use the supplied 'params' as io->opaque handle.
void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io);
+// Setup crop_xxx fields, mb_w and mb_h in io. 'src_colorspace' refers
+// to the *compressed* format, not the output one.
+int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
+ VP8Io* const io, WEBP_CSP_MODE src_colorspace);
+
//------------------------------------------------------------------------------
// Internal functions regarding WebPDecBuffer memory (in buffer.c).
// Don't really need to be externally visible for now.
@@ -108,10 +103,12 @@ void WebPCopyDecBuffer(const WebPDecBuffer* const src,
// Copy and transfer ownership from src to dst (beware of parameter order!)
void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst);
+
+
//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif
-#endif // WEBP_DEC_WEBPI_H
+#endif /* WEBP_DEC_WEBPI_H_ */
diff --git a/src/dsp/cpu.c b/src/dsp/cpu.c
new file mode 100644
index 00000000..10e0936c
--- /dev/null
+++ b/src/dsp/cpu.c
@@ -0,0 +1,86 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// CPU detection
+//
+// Author: Christian Duvivier (cduvivier@google.com)
+
+#include "./dsp.h"
+
+//#if defined(__ANDROID__)
+//#include <cpu-features.h>
+//#endif
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// SSE2 detection.
+//
+
+// apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC.
+#if (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
+static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
+ __asm__ volatile (
+ "mov %%ebx, %%edi\n"
+ "cpuid\n"
+ "xchg %%edi, %%ebx\n"
+ : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
+ : "a"(info_type));
+}
+#elif defined(__i386__) || defined(__x86_64__)
+static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
+ __asm__ volatile (
+ "cpuid\n"
+ : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
+ : "a"(info_type));
+}
+#elif defined(WEBP_MSC_SSE2)
+#define GetCPUInfo __cpuid
+#endif
+
+#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2)
+static int x86CPUInfo(CPUFeature feature) {
+ int cpu_info[4];
+ GetCPUInfo(cpu_info, 1);
+ if (feature == kSSE2) {
+ return 0 != (cpu_info[3] & 0x04000000);
+ }
+ if (feature == kSSE3) {
+ return 0 != (cpu_info[2] & 0x00000001);
+ }
+ return 0;
+}
+VP8CPUInfo VP8GetCPUInfo = x86CPUInfo;
+#elif defined(WEBP_ANDROID_NEON)
+static int AndroidCPUInfo(CPUFeature feature) {
+// const AndroidCpuFamily cpu_family = android_getCpuFamily();
+// const uint64_t cpu_features = android_getCpuFeatures();
+// if (feature == kNEON) {
+// return (cpu_family == ANDROID_CPU_FAMILY_ARM &&
+// 0 != (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON));
+// }
+// return 0;
+ return 1;
+}
+VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
+#elif defined(__ARM_NEON__)
+// define a dummy function to enable turning off NEON at runtime by setting
+// VP8DecGetCPUInfo = NULL
+static int armCPUInfo(CPUFeature feature) {
+ (void)feature;
+ return 1;
+}
+VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
+#else
+VP8CPUInfo VP8GetCPUInfo = NULL;
+#endif
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
diff --git a/src/dec/dsp.c b/src/dsp/dec.c
index 0dea42a4..9ae7b6fa 100644
--- a/src/dec/dsp.c
+++ b/src/dsp/dec.c
@@ -1,21 +1,22 @@
-// Copyright 2010 Google Inc.
+// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// -----------------------------------------------------------------------------
//
-// speed-critical functions.
+// Speed-critical decoding functions.
//
// Author: Skal (pascal.massimino@gmail.com)
-#include "vp8i.h"
+#include "./dsp.h"
+#include "../dec/vp8i.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// run-time tables (~4k)
static uint8_t abs0[255 + 255 + 1]; // abs(i)
@@ -28,7 +29,7 @@ static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255]
// and make sure it's set to true _last_ (so as to be thread-safe)
static volatile int tables_ok = 0;
-void VP8DspInitTables(void) {
+static void DspInitTables(void) {
if (!tables_ok) {
int i;
for (i = -255; i <= 255; ++i) {
@@ -48,11 +49,11 @@ void VP8DspInitTables(void) {
}
}
-static inline uint8_t clip_8b(int v) {
+static WEBP_INLINE uint8_t clip_8b(int v) {
return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Transforms (Paragraph 14.4)
#define STORE(x, y, v) \
@@ -133,13 +134,7 @@ static void TransformDCUV(const int16_t* in, uint8_t* dst) {
#undef STORE
-// default C implementations:
-VP8Idct2 VP8Transform = TransformTwo;
-VP8Idct VP8TransformUV = TransformUV;
-VP8Idct VP8TransformDC = TransformDC;
-VP8Idct VP8TransformDCUV = TransformDCUV;
-
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Paragraph 14.3
static void TransformWHT(const int16_t* in, int16_t* out) {
@@ -171,12 +166,12 @@ static void TransformWHT(const int16_t* in, int16_t* out) {
void (*VP8TransformWHT)(const int16_t* in, int16_t* out) = TransformWHT;
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Intra predictions
-#define OUT(x, y) dst[(x) + (y) * BPS]
+#define DST(x, y) dst[(x) + (y) * BPS]
-static inline void TrueMotion(uint8_t *dst, int size) {
+static WEBP_INLINE void TrueMotion(uint8_t *dst, int size) {
const uint8_t* top = dst - BPS;
const uint8_t* const clip0 = clip1 + 255 - top[-1];
int y;
@@ -193,7 +188,7 @@ static void TM4(uint8_t *dst) { TrueMotion(dst, 4); }
static void TM8uv(uint8_t *dst) { TrueMotion(dst, 8); }
static void TM16(uint8_t *dst) { TrueMotion(dst, 16); }
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// 16x16
static void VE16(uint8_t *dst) { // vertical
@@ -211,7 +206,7 @@ static void HE16(uint8_t *dst) { // horizontal
}
}
-static inline void Put16(int v, uint8_t* dst) {
+static WEBP_INLINE void Put16(int v, uint8_t* dst) {
int j;
for (j = 0; j < 16; ++j) {
memset(dst + j * BPS, v, 16);
@@ -249,7 +244,7 @@ static void DC16NoTopLeft(uint8_t *dst) { // DC with no top and left samples
Put16(0x80, dst);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// 4x4
#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
@@ -299,13 +294,13 @@ static void RD4(uint8_t *dst) { // Down-right
const int B = dst[1 - BPS];
const int C = dst[2 - BPS];
const int D = dst[3 - BPS];
- OUT(0, 3) = AVG3(J, K, L);
- OUT(0, 2) = OUT(1, 3) = AVG3(I, J, K);
- OUT(0, 1) = OUT(1, 2) = OUT(2, 3) = AVG3(X, I, J);
- OUT(0, 0) = OUT(1, 1) = OUT(2, 2) = OUT(3, 3) = AVG3(A, X, I);
- OUT(1, 0) = OUT(2, 1) = OUT(3, 2) = AVG3(B, A, X);
- OUT(2, 0) = OUT(3, 1) = AVG3(C, B, A);
- OUT(3, 0) = AVG3(D, C, B);
+ DST(0, 3) = AVG3(J, K, L);
+ DST(0, 2) = DST(1, 3) = AVG3(I, J, K);
+ DST(0, 1) = DST(1, 2) = DST(2, 3) = AVG3(X, I, J);
+ DST(0, 0) = DST(1, 1) = DST(2, 2) = DST(3, 3) = AVG3(A, X, I);
+ DST(1, 0) = DST(2, 1) = DST(3, 2) = AVG3(B, A, X);
+ DST(2, 0) = DST(3, 1) = AVG3(C, B, A);
+ DST(3, 0) = AVG3(D, C, B);
}
static void LD4(uint8_t *dst) { // Down-Left
@@ -317,13 +312,13 @@ static void LD4(uint8_t *dst) { // Down-Left
const int F = dst[5 - BPS];
const int G = dst[6 - BPS];
const int H = dst[7 - BPS];
- OUT(0, 0) = AVG3(A, B, C);
- OUT(1, 0) = OUT(0, 1) = AVG3(B, C, D);
- OUT(2, 0) = OUT(1, 1) = OUT(0, 2) = AVG3(C, D, E);
- OUT(3, 0) = OUT(2, 1) = OUT(1, 2) = OUT(0, 3) = AVG3(D, E, F);
- OUT(3, 1) = OUT(2, 2) = OUT(1, 3) = AVG3(E, F, G);
- OUT(3, 2) = OUT(2, 3) = AVG3(F, G, H);
- OUT(3, 3) = AVG3(G, H, H);
+ DST(0, 0) = AVG3(A, B, C);
+ DST(1, 0) = DST(0, 1) = AVG3(B, C, D);
+ DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E);
+ DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
+ DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
+ DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
+ DST(3, 3) = AVG3(G, H, H);
}
static void VR4(uint8_t *dst) { // Vertical-Right
@@ -335,17 +330,17 @@ static void VR4(uint8_t *dst) { // Vertical-Right
const int B = dst[1 - BPS];
const int C = dst[2 - BPS];
const int D = dst[3 - BPS];
- OUT(0, 0) = OUT(1, 2) = AVG2(X, A);
- OUT(1, 0) = OUT(2, 2) = AVG2(A, B);
- OUT(2, 0) = OUT(3, 2) = AVG2(B, C);
- OUT(3, 0) = AVG2(C, D);
+ DST(0, 0) = DST(1, 2) = AVG2(X, A);
+ DST(1, 0) = DST(2, 2) = AVG2(A, B);
+ DST(2, 0) = DST(3, 2) = AVG2(B, C);
+ DST(3, 0) = AVG2(C, D);
- OUT(0, 3) = AVG3(K, J, I);
- OUT(0, 2) = AVG3(J, I, X);
- OUT(0, 1) = OUT(1, 3) = AVG3(I, X, A);
- OUT(1, 1) = OUT(2, 3) = AVG3(X, A, B);
- OUT(2, 1) = OUT(3, 3) = AVG3(A, B, C);
- OUT(3, 1) = AVG3(B, C, D);
+ DST(0, 3) = AVG3(K, J, I);
+ DST(0, 2) = AVG3(J, I, X);
+ DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
+ DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
+ DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
+ DST(3, 1) = AVG3(B, C, D);
}
static void VL4(uint8_t *dst) { // Vertical-Left
@@ -357,17 +352,17 @@ static void VL4(uint8_t *dst) { // Vertical-Left
const int F = dst[5 - BPS];
const int G = dst[6 - BPS];
const int H = dst[7 - BPS];
- OUT(0, 0) = AVG2(A, B);
- OUT(1, 0) = OUT(0, 2) = AVG2(B, C);
- OUT(2, 0) = OUT(1, 2) = AVG2(C, D);
- OUT(3, 0) = OUT(2, 2) = AVG2(D, E);
+ DST(0, 0) = AVG2(A, B);
+ DST(1, 0) = DST(0, 2) = AVG2(B, C);
+ DST(2, 0) = DST(1, 2) = AVG2(C, D);
+ DST(3, 0) = DST(2, 2) = AVG2(D, E);
- OUT(0, 1) = AVG3(A, B, C);
- OUT(1, 1) = OUT(0, 3) = AVG3(B, C, D);
- OUT(2, 1) = OUT(1, 3) = AVG3(C, D, E);
- OUT(3, 1) = OUT(2, 3) = AVG3(D, E, F);
- OUT(3, 2) = AVG3(E, F, G);
- OUT(3, 3) = AVG3(F, G, H);
+ DST(0, 1) = AVG3(A, B, C);
+ DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
+ DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
+ DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
+ DST(3, 2) = AVG3(E, F, G);
+ DST(3, 3) = AVG3(F, G, H);
}
static void HU4(uint8_t *dst) { // Horizontal-Up
@@ -375,14 +370,14 @@ static void HU4(uint8_t *dst) { // Horizontal-Up
const int J = dst[-1 + 1 * BPS];
const int K = dst[-1 + 2 * BPS];
const int L = dst[-1 + 3 * BPS];
- OUT(0, 0) = AVG2(I, J);
- OUT(2, 0) = OUT(0, 1) = AVG2(J, K);
- OUT(2, 1) = OUT(0, 2) = AVG2(K, L);
- OUT(1, 0) = AVG3(I, J, K);
- OUT(3, 0) = OUT(1, 1) = AVG3(J, K, L);
- OUT(3, 1) = OUT(1, 2) = AVG3(K, L, L);
- OUT(3, 2) = OUT(2, 2) =
- OUT(0, 3) = OUT(1, 3) = OUT(2, 3) = OUT(3, 3) = L;
+ DST(0, 0) = AVG2(I, J);
+ DST(2, 0) = DST(0, 1) = AVG2(J, K);
+ DST(2, 1) = DST(0, 2) = AVG2(K, L);
+ DST(1, 0) = AVG3(I, J, K);
+ DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
+ DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
+ DST(3, 2) = DST(2, 2) =
+ DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
}
static void HD4(uint8_t *dst) { // Horizontal-Down
@@ -395,23 +390,24 @@ static void HD4(uint8_t *dst) { // Horizontal-Down
const int B = dst[1 - BPS];
const int C = dst[2 - BPS];
- OUT(0, 0) = OUT(2, 1) = AVG2(I, X);
- OUT(0, 1) = OUT(2, 2) = AVG2(J, I);
- OUT(0, 2) = OUT(2, 3) = AVG2(K, J);
- OUT(0, 3) = AVG2(L, K);
+ DST(0, 0) = DST(2, 1) = AVG2(I, X);
+ DST(0, 1) = DST(2, 2) = AVG2(J, I);
+ DST(0, 2) = DST(2, 3) = AVG2(K, J);
+ DST(0, 3) = AVG2(L, K);
- OUT(3, 0) = AVG3(A, B, C);
- OUT(2, 0) = AVG3(X, A, B);
- OUT(1, 0) = OUT(3, 1) = AVG3(I, X, A);
- OUT(1, 1) = OUT(3, 2) = AVG3(J, I, X);
- OUT(1, 2) = OUT(3, 3) = AVG3(K, J, I);
- OUT(1, 3) = AVG3(L, K, J);
+ DST(3, 0) = AVG3(A, B, C);
+ DST(2, 0) = AVG3(X, A, B);
+ DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
+ DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
+ DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
+ DST(1, 3) = AVG3(L, K, J);
}
+#undef DST
#undef AVG3
#undef AVG2
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Chroma
static void VE8uv(uint8_t *dst) { // vertical
@@ -430,7 +426,7 @@ static void HE8uv(uint8_t *dst) { // horizontal
}
// helper for chroma-DC predictions
-static inline void Put8x8uv(uint64_t v, uint8_t* dst) {
+static WEBP_INLINE void Put8x8uv(uint64_t v, uint8_t* dst) {
int j;
for (j = 0; j < 8; ++j) {
*(uint64_t*)(dst + j * BPS) = v;
@@ -468,28 +464,28 @@ static void DC8uvNoTopLeft(uint8_t *dst) { // DC with nothing
Put8x8uv(0x8080808080808080ULL, dst);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// default C implementations
-VP8PredFunc VP8PredLuma4[NUM_BMODES] = {
+const VP8PredFunc VP8PredLuma4[NUM_BMODES] = {
DC4, TM4, VE4, HE4, RD4, VR4, LD4, VL4, HD4, HU4
};
-VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES] = {
+const VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES] = {
DC16, TM16, VE16, HE16,
DC16NoTop, DC16NoLeft, DC16NoTopLeft
};
-VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES] = {
+const VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES] = {
DC8uv, TM8uv, VE8uv, HE8uv,
DC8uvNoTop, DC8uvNoLeft, DC8uvNoTopLeft
};
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Edge filtering functions
// 4 pixels in, 2 pixels out
-static inline void do_filter2(uint8_t* p, int step) {
+static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
const int a = 3 * (q0 - p0) + sclip1[1020 + p1 - q1];
const int a1 = sclip2[112 + ((a + 4) >> 3)];
@@ -499,7 +495,7 @@ static inline void do_filter2(uint8_t* p, int step) {
}
// 4 pixels in, 4 pixels out
-static inline void do_filter4(uint8_t* p, int step) {
+static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
const int a = 3 * (q0 - p0);
const int a1 = sclip2[112 + ((a + 4) >> 3)];
@@ -512,7 +508,7 @@ static inline void do_filter4(uint8_t* p, int step) {
}
// 6 pixels in, 6 pixels out
-static inline void do_filter6(uint8_t* p, int step) {
+static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step];
const int q0 = p[0], q1 = p[step], q2 = p[2*step];
const int a = sclip1[1020 + 3 * (q0 - p0) + sclip1[1020 + p1 - q1]];
@@ -527,17 +523,18 @@ static inline void do_filter6(uint8_t* p, int step) {
p[ 2*step] = clip1[255 + q2 - a3];
}
-static inline int hev(const uint8_t* p, int step, int thresh) {
+static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
return (abs0[255 + p1 - p0] > thresh) || (abs0[255 + q1 - q0] > thresh);
}
-static inline int needs_filter(const uint8_t* p, int step, int thresh) {
+static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int thresh) {
const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
return (2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) <= thresh;
}
-static inline int needs_filter2(const uint8_t* p, int step, int t, int it) {
+static WEBP_INLINE int needs_filter2(const uint8_t* p,
+ int step, int t, int it) {
const int p3 = p[-4*step], p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step];
const int q0 = p[0], q1 = p[step], q2 = p[2*step], q3 = p[3*step];
if ((2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) > t)
@@ -547,7 +544,7 @@ static inline int needs_filter2(const uint8_t* p, int step, int t, int it) {
abs0[255 + q2 - q1] <= it && abs0[255 + q1 - q0] <= it;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Simple In-loop filtering (Paragraph 15.2)
static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
@@ -584,11 +581,12 @@ static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
}
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Complex In-loop filtering (Paragraph 15.3)
-static inline void FilterLoop26(uint8_t* p, int hstride, int vstride, int size,
- int thresh, int ithresh, int hev_thresh) {
+static WEBP_INLINE void FilterLoop26(uint8_t* p,
+ int hstride, int vstride, int size,
+ int thresh, int ithresh, int hev_thresh) {
while (size-- > 0) {
if (needs_filter2(p, hstride, thresh, ithresh)) {
if (hev(p, hstride, hev_thresh)) {
@@ -601,8 +599,9 @@ static inline void FilterLoop26(uint8_t* p, int hstride, int vstride, int size,
}
}
-static inline void FilterLoop24(uint8_t* p, int hstride, int vstride, int size,
- int thresh, int ithresh, int hev_thresh) {
+static WEBP_INLINE void FilterLoop24(uint8_t* p,
+ int hstride, int vstride, int size,
+ int thresh, int ithresh, int hev_thresh) {
while (size-- > 0) {
if (needs_filter2(p, hstride, thresh, ithresh)) {
if (hev(p, hstride, hev_thresh)) {
@@ -670,78 +669,61 @@ static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
}
-//-----------------------------------------------------------------------------
-
-void (*VP8VFilter16)(uint8_t*, int, int, int, int) = VFilter16;
-void (*VP8HFilter16)(uint8_t*, int, int, int, int) = HFilter16;
-void (*VP8VFilter8)(uint8_t*, uint8_t*, int, int, int, int) = VFilter8;
-void (*VP8HFilter8)(uint8_t*, uint8_t*, int, int, int, int) = HFilter8;
-void (*VP8VFilter16i)(uint8_t*, int, int, int, int) = VFilter16i;
-void (*VP8HFilter16i)(uint8_t*, int, int, int, int) = HFilter16i;
-void (*VP8VFilter8i)(uint8_t*, uint8_t*, int, int, int, int) = VFilter8i;
-void (*VP8HFilter8i)(uint8_t*, uint8_t*, int, int, int, int) = HFilter8i;
-
-void (*VP8SimpleVFilter16)(uint8_t*, int, int) = SimpleVFilter16;
-void (*VP8SimpleHFilter16)(uint8_t*, int, int) = SimpleHFilter16;
-void (*VP8SimpleVFilter16i)(uint8_t*, int, int) = SimpleVFilter16i;
-void (*VP8SimpleHFilter16i)(uint8_t*, int, int) = SimpleHFilter16i;
-
-//-----------------------------------------------------------------------------
-// SSE2 detection.
-//
-
-#if defined(__pic__) && defined(__i386__)
-static inline void GetCPUInfo(int cpu_info[4], int info_type) {
- __asm__ volatile (
- "mov %%ebx, %%edi\n"
- "cpuid\n"
- "xchg %%edi, %%ebx\n"
- : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
- : "a"(info_type));
-}
-#elif defined(__i386__) || defined(__x86_64__)
-static inline void GetCPUInfo(int cpu_info[4], int info_type) {
- __asm__ volatile (
- "cpuid\n"
- : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
- : "a"(info_type));
-}
-#elif defined(_MSC_VER) // Visual C++
-#define GetCPUInfo __cpuid
-#endif
+//------------------------------------------------------------------------------
-#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
-static int x86CPUInfo(CPUFeature feature) {
- int cpu_info[4];
- GetCPUInfo(cpu_info, 1);
- if (feature == kSSE2) {
- return 0 != (cpu_info[3] & 0x04000000);
- }
- if (feature == kSSE3) {
- return 0 != (cpu_info[2] & 0x00000001);
- }
- return 0;
-}
-VP8CPUInfo VP8DecGetCPUInfo = x86CPUInfo;
-#else
-VP8CPUInfo VP8DecGetCPUInfo = NULL;
-#endif
+VP8DecIdct2 VP8Transform;
+VP8DecIdct VP8TransformUV;
+VP8DecIdct VP8TransformDC;
+VP8DecIdct VP8TransformDCUV;
-//-----------------------------------------------------------------------------
+VP8LumaFilterFunc VP8VFilter16;
+VP8LumaFilterFunc VP8HFilter16;
+VP8ChromaFilterFunc VP8VFilter8;
+VP8ChromaFilterFunc VP8HFilter8;
+VP8LumaFilterFunc VP8VFilter16i;
+VP8LumaFilterFunc VP8HFilter16i;
+VP8ChromaFilterFunc VP8VFilter8i;
+VP8ChromaFilterFunc VP8HFilter8i;
+VP8SimpleFilterFunc VP8SimpleVFilter16;
+VP8SimpleFilterFunc VP8SimpleHFilter16;
+VP8SimpleFilterFunc VP8SimpleVFilter16i;
+VP8SimpleFilterFunc VP8SimpleHFilter16i;
extern void VP8DspInitSSE2(void);
+extern void VP8DspInitNEON(void);
void VP8DspInit(void) {
+ DspInitTables();
+
+ VP8Transform = TransformTwo;
+ VP8TransformUV = TransformUV;
+ VP8TransformDC = TransformDC;
+ VP8TransformDCUV = TransformDCUV;
+
+ VP8VFilter16 = VFilter16;
+ VP8HFilter16 = HFilter16;
+ VP8VFilter8 = VFilter8;
+ VP8HFilter8 = HFilter8;
+ VP8VFilter16i = VFilter16i;
+ VP8HFilter16i = HFilter16i;
+ VP8VFilter8i = VFilter8i;
+ VP8HFilter8i = HFilter8i;
+ VP8SimpleVFilter16 = SimpleVFilter16;
+ VP8SimpleHFilter16 = SimpleHFilter16;
+ VP8SimpleVFilter16i = SimpleVFilter16i;
+ VP8SimpleHFilter16i = SimpleHFilter16i;
+
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
- if (VP8DecGetCPUInfo) {
- if (VP8DecGetCPUInfo(kSSE2)) {
-#if defined(__SSE2__) || defined(_MSC_VER)
+ if (VP8GetCPUInfo) {
+#if defined(WEBP_USE_SSE2)
+ if (VP8GetCPUInfo(kSSE2)) {
VP8DspInitSSE2();
-#endif
}
- if (VP8DecGetCPUInfo(kSSE3)) {
- // later we'll plug some SSE3 variant here
+#elif defined(WEBP_USE_NEON)
+ if (VP8GetCPUInfo(kNEON)) {
+ VP8DspInitNEON();
}
+#endif
}
}
diff --git a/src/dsp/dec_neon.c b/src/dsp/dec_neon.c
new file mode 100644
index 00000000..ec824b79
--- /dev/null
+++ b/src/dsp/dec_neon.c
@@ -0,0 +1,329 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// ARM NEON version of dsp functions and loop filtering.
+//
+// Authors: Somnath Banerjee (somnath@google.com)
+// Johann Koenig (johannkoenig@google.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include "../dec/vp8i.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define QRegs "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", \
+ "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+
+#define FLIP_SIGN_BIT2(a, b, s) \
+ "veor " #a "," #a "," #s " \n" \
+ "veor " #b "," #b "," #s " \n" \
+
+#define FLIP_SIGN_BIT4(a, b, c, d, s) \
+ FLIP_SIGN_BIT2(a, b, s) \
+ FLIP_SIGN_BIT2(c, d, s) \
+
+#define NEEDS_FILTER(p1, p0, q0, q1, thresh, mask) \
+ "vabd.u8 q15," #p0 "," #q0 " \n" /* abs(p0 - q0) */ \
+ "vabd.u8 q14," #p1 "," #q1 " \n" /* abs(p1 - q1) */ \
+ "vqadd.u8 q15, q15, q15 \n" /* abs(p0 - q0) * 2 */ \
+ "vshr.u8 q14, q14, #1 \n" /* abs(p1 - q1) / 2 */ \
+ "vqadd.u8 q15, q15, q14 \n" /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 */ \
+ "vdup.8 q14, " #thresh " \n" \
+ "vcge.u8 " #mask ", q14, q15 \n" /* mask <= thresh */
+
+#define GET_BASE_DELTA(p1, p0, q0, q1, o) \
+ "vqsub.s8 q15," #q0 "," #p0 " \n" /* (q0 - p0) */ \
+ "vqsub.s8 " #o "," #p1 "," #q1 " \n" /* (p1 - q1) */ \
+ "vqadd.s8 " #o "," #o ", q15 \n" /* (p1 - q1) + 1 * (p0 - q0) */ \
+ "vqadd.s8 " #o "," #o ", q15 \n" /* (p1 - q1) + 2 * (p0 - q0) */ \
+ "vqadd.s8 " #o "," #o ", q15 \n" /* (p1 - q1) + 3 * (p0 - q0) */
+
+#define DO_SIMPLE_FILTER(p0, q0, fl) \
+ "vmov.i8 q15, #0x03 \n" \
+ "vqadd.s8 q15, q15, " #fl " \n" /* filter1 = filter + 3 */ \
+ "vshr.s8 q15, q15, #3 \n" /* filter1 >> 3 */ \
+ "vqadd.s8 " #p0 "," #p0 ", q15 \n" /* p0 += filter1 */ \
+ \
+ "vmov.i8 q15, #0x04 \n" \
+ "vqadd.s8 q15, q15, " #fl " \n" /* filter1 = filter + 4 */ \
+ "vshr.s8 q15, q15, #3 \n" /* filter2 >> 3 */ \
+ "vqsub.s8 " #q0 "," #q0 ", q15 \n" /* q0 -= filter2 */
+
+// Applies filter on 2 pixels (p0 and q0)
+#define DO_FILTER2(p1, p0, q0, q1, thresh) \
+ NEEDS_FILTER(p1, p0, q0, q1, thresh, q9) /* filter mask in q9 */ \
+ "vmov.i8 q10, #0x80 \n" /* sign bit */ \
+ FLIP_SIGN_BIT4(p1, p0, q0, q1, q10) /* convert to signed value */ \
+ GET_BASE_DELTA(p1, p0, q0, q1, q11) /* get filter level */ \
+ "vand q9, q9, q11 \n" /* apply filter mask */ \
+ DO_SIMPLE_FILTER(p0, q0, q9) /* apply filter */ \
+ FLIP_SIGN_BIT2(p0, q0, q10)
+
+// Load/Store vertical edge
+#define LOAD8x4(c1, c2, c3, c4, b1, b2, stride) \
+ "vld4.8 {" #c1"[0], " #c2"[0], " #c3"[0], " #c4"[0]}," #b1 "," #stride"\n" \
+ "vld4.8 {" #c1"[1], " #c2"[1], " #c3"[1], " #c4"[1]}," #b2 "," #stride"\n" \
+ "vld4.8 {" #c1"[2], " #c2"[2], " #c3"[2], " #c4"[2]}," #b1 "," #stride"\n" \
+ "vld4.8 {" #c1"[3], " #c2"[3], " #c3"[3], " #c4"[3]}," #b2 "," #stride"\n" \
+ "vld4.8 {" #c1"[4], " #c2"[4], " #c3"[4], " #c4"[4]}," #b1 "," #stride"\n" \
+ "vld4.8 {" #c1"[5], " #c2"[5], " #c3"[5], " #c4"[5]}," #b2 "," #stride"\n" \
+ "vld4.8 {" #c1"[6], " #c2"[6], " #c3"[6], " #c4"[6]}," #b1 "," #stride"\n" \
+ "vld4.8 {" #c1"[7], " #c2"[7], " #c3"[7], " #c4"[7]}," #b2 "," #stride"\n"
+
+#define STORE8x2(c1, c2, p,stride) \
+ "vst2.8 {" #c1"[0], " #c2"[0]}," #p "," #stride " \n" \
+ "vst2.8 {" #c1"[1], " #c2"[1]}," #p "," #stride " \n" \
+ "vst2.8 {" #c1"[2], " #c2"[2]}," #p "," #stride " \n" \
+ "vst2.8 {" #c1"[3], " #c2"[3]}," #p "," #stride " \n" \
+ "vst2.8 {" #c1"[4], " #c2"[4]}," #p "," #stride " \n" \
+ "vst2.8 {" #c1"[5], " #c2"[5]}," #p "," #stride " \n" \
+ "vst2.8 {" #c1"[6], " #c2"[6]}," #p "," #stride " \n" \
+ "vst2.8 {" #c1"[7], " #c2"[7]}," #p "," #stride " \n"
+
+//-----------------------------------------------------------------------------
+// Simple In-loop filtering (Paragraph 15.2)
+
+static void SimpleVFilter16NEON(uint8_t* p, int stride, int thresh) {
+ __asm__ volatile (
+ "sub %[p], %[p], %[stride], lsl #1 \n" // p -= 2 * stride
+
+ "vld1.u8 {q1}, [%[p]], %[stride] \n" // p1
+ "vld1.u8 {q2}, [%[p]], %[stride] \n" // p0
+ "vld1.u8 {q3}, [%[p]], %[stride] \n" // q0
+ "vld1.u8 {q4}, [%[p]] \n" // q1
+
+ DO_FILTER2(q1, q2, q3, q4, %[thresh])
+
+ "sub %[p], %[p], %[stride], lsl #1 \n" // p -= 2 * stride
+
+ "vst1.u8 {q2}, [%[p]], %[stride] \n" // store op0
+ "vst1.u8 {q3}, [%[p]] \n" // store oq0
+ : [p] "+r"(p)
+ : [stride] "r"(stride), [thresh] "r"(thresh)
+ : "memory", QRegs
+ );
+}
+
+static void SimpleHFilter16NEON(uint8_t* p, int stride, int thresh) {
+ __asm__ volatile (
+ "sub r4, %[p], #2 \n" // base1 = p - 2
+ "lsl r6, %[stride], #1 \n" // r6 = 2 * stride
+ "add r5, r4, %[stride] \n" // base2 = base1 + stride
+
+ LOAD8x4(d2, d3, d4, d5, [r4], [r5], r6)
+ LOAD8x4(d6, d7, d8, d9, [r4], [r5], r6)
+ "vswp d3, d6 \n" // p1:q1 p0:q3
+ "vswp d5, d8 \n" // q0:q2 q1:q4
+ "vswp q2, q3 \n" // p1:q1 p0:q2 q0:q3 q1:q4
+
+ DO_FILTER2(q1, q2, q3, q4, %[thresh])
+
+ "sub %[p], %[p], #1 \n" // p - 1
+
+ "vswp d5, d6 \n"
+ STORE8x2(d4, d5, [%[p]], %[stride])
+ STORE8x2(d6, d7, [%[p]], %[stride])
+
+ : [p] "+r"(p)
+ : [stride] "r"(stride), [thresh] "r"(thresh)
+ : "memory", "r4", "r5", "r6", QRegs
+ );
+}
+
+static void SimpleVFilter16iNEON(uint8_t* p, int stride, int thresh) {
+ int k;
+ for (k = 3; k > 0; --k) {
+ p += 4 * stride;
+ SimpleVFilter16NEON(p, stride, thresh);
+ }
+}
+
+static void SimpleHFilter16iNEON(uint8_t* p, int stride, int thresh) {
+ int k;
+ for (k = 3; k > 0; --k) {
+ p += 4;
+ SimpleHFilter16NEON(p, stride, thresh);
+ }
+}
+
+static void TransformOneNEON(const int16_t *in, uint8_t *dst) {
+ const int kBPS = BPS;
+ const int16_t constants[] = {20091, 17734, 0, 0};
+ /* kC1, kC2. Padded because vld1.16 loads 8 bytes
+ * Technically these are unsigned but vqdmulh is only available in signed.
+ * vqdmulh returns high half (effectively >> 16) but also doubles the value,
+ * changing the >> 16 to >> 15 and requiring an additional >> 1.
+ * We use this to our advantage with kC2. The canonical value is 35468.
+ * However, the high bit is set so treating it as signed will give incorrect
+ * results. We avoid this by down shifting by 1 here to clear the highest bit.
+ * Combined with the doubling effect of vqdmulh we get >> 16.
+ * This can not be applied to kC1 because the lowest bit is set. Down shifting
+ * the constant would reduce precision.
+ */
+
+ /* libwebp uses a trick to avoid some extra addition that libvpx does.
+ * Instead of:
+ * temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
+ * libwebp adds 1 << 16 to cospi8sqrt2minus1 (kC1). However, this causes the
+ * same issue with kC1 and vqdmulh that we work around by down shifting kC2
+ */
+
+ /* Adapted from libvpx: vp8/common/arm/neon/shortidct4x4llm_neon.asm */
+ __asm__ volatile (
+ "vld1.16 {q1, q2}, [%[in]] \n"
+ "vld1.16 {d0}, [%[constants]] \n"
+
+ /* d2: in[0]
+ * d3: in[8]
+ * d4: in[4]
+ * d5: in[12]
+ */
+ "vswp d3, d4 \n"
+
+ /* q8 = {in[4], in[12]} * kC1 * 2 >> 16
+ * q9 = {in[4], in[12]} * kC2 >> 16
+ */
+ "vqdmulh.s16 q8, q2, d0[0] \n"
+ "vqdmulh.s16 q9, q2, d0[1] \n"
+
+ /* d22 = a = in[0] + in[8]
+ * d23 = b = in[0] - in[8]
+ */
+ "vqadd.s16 d22, d2, d3 \n"
+ "vqsub.s16 d23, d2, d3 \n"
+
+ /* The multiplication should be x * kC1 >> 16
+ * However, with vqdmulh we get x * kC1 * 2 >> 16
+ * (multiply, double, return high half)
+ * We avoided this in kC2 by pre-shifting the constant.
+ * q8 = in[4]/[12] * kC1 >> 16
+ */
+ "vshr.s16 q8, q8, #1 \n"
+
+ /* Add {in[4], in[12]} back after the multiplication. This is handled by
+ * adding 1 << 16 to kC1 in the libwebp C code.
+ */
+ "vqadd.s16 q8, q2, q8 \n"
+
+ /* d20 = c = in[4]*kC2 - in[12]*kC1
+ * d21 = d = in[4]*kC1 + in[12]*kC2
+ */
+ "vqsub.s16 d20, d18, d17 \n"
+ "vqadd.s16 d21, d19, d16 \n"
+
+ /* d2 = tmp[0] = a + d
+ * d3 = tmp[1] = b + c
+ * d4 = tmp[2] = b - c
+ * d5 = tmp[3] = a - d
+ */
+ "vqadd.s16 d2, d22, d21 \n"
+ "vqadd.s16 d3, d23, d20 \n"
+ "vqsub.s16 d4, d23, d20 \n"
+ "vqsub.s16 d5, d22, d21 \n"
+
+ "vzip.16 q1, q2 \n"
+ "vzip.16 q1, q2 \n"
+
+ "vswp d3, d4 \n"
+
+ /* q8 = {tmp[4], tmp[12]} * kC1 * 2 >> 16
+ * q9 = {tmp[4], tmp[12]} * kC2 >> 16
+ */
+ "vqdmulh.s16 q8, q2, d0[0] \n"
+ "vqdmulh.s16 q9, q2, d0[1] \n"
+
+ /* d22 = a = tmp[0] + tmp[8]
+ * d23 = b = tmp[0] - tmp[8]
+ */
+ "vqadd.s16 d22, d2, d3 \n"
+ "vqsub.s16 d23, d2, d3 \n"
+
+ /* See long winded explanations prior */
+ "vshr.s16 q8, q8, #1 \n"
+ "vqadd.s16 q8, q2, q8 \n"
+
+ /* d20 = c = in[4]*kC2 - in[12]*kC1
+ * d21 = d = in[4]*kC1 + in[12]*kC2
+ */
+ "vqsub.s16 d20, d18, d17 \n"
+ "vqadd.s16 d21, d19, d16 \n"
+
+ /* d2 = tmp[0] = a + d
+ * d3 = tmp[1] = b + c
+ * d4 = tmp[2] = b - c
+ * d5 = tmp[3] = a - d
+ */
+ "vqadd.s16 d2, d22, d21 \n"
+ "vqadd.s16 d3, d23, d20 \n"
+ "vqsub.s16 d4, d23, d20 \n"
+ "vqsub.s16 d5, d22, d21 \n"
+
+ "vld1.32 d6[0], [%[dst]], %[kBPS] \n"
+ "vld1.32 d6[1], [%[dst]], %[kBPS] \n"
+ "vld1.32 d7[0], [%[dst]], %[kBPS] \n"
+ "vld1.32 d7[1], [%[dst]], %[kBPS] \n"
+
+ "sub %[dst], %[dst], %[kBPS], lsl #2 \n"
+
+ /* (val) + 4 >> 3 */
+ "vrshr.s16 d2, d2, #3 \n"
+ "vrshr.s16 d3, d3, #3 \n"
+ "vrshr.s16 d4, d4, #3 \n"
+ "vrshr.s16 d5, d5, #3 \n"
+
+ "vzip.16 q1, q2 \n"
+ "vzip.16 q1, q2 \n"
+
+ /* Must accumulate before saturating */
+ "vmovl.u8 q8, d6 \n"
+ "vmovl.u8 q9, d7 \n"
+
+ "vqadd.s16 q1, q1, q8 \n"
+ "vqadd.s16 q2, q2, q9 \n"
+
+ "vqmovun.s16 d0, q1 \n"
+ "vqmovun.s16 d1, q2 \n"
+
+ "vst1.32 d0[0], [%[dst]], %[kBPS] \n"
+ "vst1.32 d0[1], [%[dst]], %[kBPS] \n"
+ "vst1.32 d1[0], [%[dst]], %[kBPS] \n"
+ "vst1.32 d1[1], [%[dst]] \n"
+
+ : [in] "+r"(in), [dst] "+r"(dst) /* modified registers */
+ : [kBPS] "r"(kBPS), [constants] "r"(constants) /* constants */
+ : "memory", "q0", "q1", "q2", "q8", "q9", "q10", "q11" /* clobbered */
+ );
+}
+
+static void TransformTwoNEON(const int16_t* in, uint8_t* dst, int do_two) {
+ TransformOneNEON(in, dst);
+ if (do_two) {
+ TransformOneNEON(in + 16, dst + 4);
+ }
+}
+
+extern void VP8DspInitNEON(void);
+
+void VP8DspInitNEON(void) {
+ VP8Transform = TransformTwoNEON;
+
+ VP8SimpleVFilter16 = SimpleVFilter16NEON;
+ VP8SimpleHFilter16 = SimpleHFilter16NEON;
+ VP8SimpleVFilter16i = SimpleVFilter16iNEON;
+ VP8SimpleHFilter16i = SimpleHFilter16iNEON;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif // WEBP_USE_NEON
diff --git a/src/dec/dsp_sse2.c b/src/dsp/dec_sse2.c
index 785f02e5..472b68ec 100644
--- a/src/dec/dsp_sse2.c
+++ b/src/dsp/dec_sse2.c
@@ -1,25 +1,27 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// -----------------------------------------------------------------------------
//
-// SSE2 version of dsp functions and loop filtering.
+// SSE2 version of some decoding functions (idct, loop filtering).
//
// Author: somnath@google.com (Somnath Banerjee)
// cduvivier@google.com (Christian Duvivier)
-#if defined(__SSE2__) || defined(_MSC_VER)
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
#include <emmintrin.h>
-#include "vp8i.h"
+#include "../dec/vp8i.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Transforms (Paragraph 14.4)
static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) {
@@ -240,7 +242,7 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) {
}
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Loop Filter (Paragraph 15)
// Compute abs(p - q) = subs(p - q) OR subs(q - p)
@@ -337,12 +339,12 @@ static void NeedsFilter(const __m128i* p1, const __m128i* p0, const __m128i* q0,
*mask = _mm_cmpeq_epi8(*mask, _mm_setzero_si128());
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Edge filtering functions
// Applies filter on 2 pixels (p0 and q0)
-static inline void DoFilter2(const __m128i* p1, __m128i* p0, __m128i* q0,
- const __m128i* q1, int thresh) {
+static WEBP_INLINE void DoFilter2(const __m128i* p1, __m128i* p0, __m128i* q0,
+ const __m128i* q1, int thresh) {
__m128i a, mask;
const __m128i sign_bit = _mm_set1_epi8(0x80);
const __m128i p1s = _mm_xor_si128(*p1, sign_bit);
@@ -362,8 +364,9 @@ static inline void DoFilter2(const __m128i* p1, __m128i* p0, __m128i* q0,
}
// Applies filter on 4 pixels (p1, p0, q0 and q1)
-static inline void DoFilter4(__m128i* p1, __m128i *p0, __m128i* q0, __m128i* q1,
- const __m128i* mask, int hev_thresh) {
+static WEBP_INLINE void DoFilter4(__m128i* p1, __m128i *p0,
+ __m128i* q0, __m128i* q1,
+ const __m128i* mask, int hev_thresh) {
__m128i not_hev;
__m128i t1, t2, t3;
const __m128i sign_bit = _mm_set1_epi8(0x80);
@@ -408,9 +411,9 @@ static inline void DoFilter4(__m128i* p1, __m128i *p0, __m128i* q0, __m128i* q1,
}
// Applies filter on 6 pixels (p2, p1, p0, q0, q1 and q2)
-static inline void DoFilter6(__m128i *p2, __m128i* p1, __m128i *p0,
- __m128i* q0, __m128i* q1, __m128i *q2,
- const __m128i* mask, int hev_thresh) {
+static WEBP_INLINE void DoFilter6(__m128i *p2, __m128i* p1, __m128i *p0,
+ __m128i* q0, __m128i* q1, __m128i *q2,
+ const __m128i* mask, int hev_thresh) {
__m128i a, not_hev;
const __m128i sign_bit = _mm_set1_epi8(0x80);
@@ -466,8 +469,8 @@ static inline void DoFilter6(__m128i *p2, __m128i* p1, __m128i *p0,
//
// TODO(somnath): Investigate _mm_shuffle* also see if it can be broken into
// two Load4x4() to avoid code duplication.
-static inline void Load8x4(const uint8_t* b, int stride,
- __m128i* p, __m128i* q) {
+static WEBP_INLINE void Load8x4(const uint8_t* b, int stride,
+ __m128i* p, __m128i* q) {
__m128i t1, t2;
// Load 0th, 1st, 4th and 5th rows
@@ -506,9 +509,10 @@ static inline void Load8x4(const uint8_t* b, int stride,
*q = _mm_unpackhi_epi32(t1, t2);
}
-static inline void Load16x4(const uint8_t* r0, const uint8_t* r8, int stride,
- __m128i* p1, __m128i* p0,
- __m128i* q0, __m128i* q1) {
+static WEBP_INLINE void Load16x4(const uint8_t* r0, const uint8_t* r8,
+ int stride,
+ __m128i* p1, __m128i* p0,
+ __m128i* q0, __m128i* q1) {
__m128i t1, t2;
// Assume the pixels around the edge (|) are numbered as follows
// 00 01 | 02 03
@@ -540,7 +544,7 @@ static inline void Load16x4(const uint8_t* r0, const uint8_t* r8, int stride,
*q1 = _mm_unpackhi_epi64(t2, *q1);
}
-static inline void Store4x4(__m128i* x, uint8_t* dst, int stride) {
+static WEBP_INLINE void Store4x4(__m128i* x, uint8_t* dst, int stride) {
int i;
for (i = 0; i < 4; ++i, dst += stride) {
*((int32_t*)dst) = _mm_cvtsi128_si32(*x);
@@ -549,8 +553,9 @@ static inline void Store4x4(__m128i* x, uint8_t* dst, int stride) {
}
// Transpose back and store
-static inline void Store16x4(uint8_t* r0, uint8_t* r8, int stride, __m128i* p1,
- __m128i* p0, __m128i* q0, __m128i* q1) {
+static WEBP_INLINE void Store16x4(uint8_t* r0, uint8_t* r8, int stride,
+ __m128i* p1, __m128i* p0,
+ __m128i* q0, __m128i* q1) {
__m128i t1;
// p0 = 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
@@ -586,7 +591,7 @@ static inline void Store16x4(uint8_t* r0, uint8_t* r8, int stride, __m128i* p1,
Store4x4(q1, r8, stride);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Simple In-loop filtering (Paragraph 15.2)
static void SimpleVFilter16SSE2(uint8_t* p, int stride, int thresh) {
@@ -629,7 +634,7 @@ static void SimpleHFilter16iSSE2(uint8_t* p, int stride, int thresh) {
}
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Complex In-loop filtering (Paragraph 15.3)
#define MAX_DIFF1(p3, p2, p1, p0, m) { \
@@ -895,4 +900,4 @@ void VP8DspInitSSE2(void) {
} // extern "C"
#endif
-#endif //__SSE2__ || _MSC_VER
+#endif // WEBP_USE_SSE2
diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h
new file mode 100644
index 00000000..c4061aa2
--- /dev/null
+++ b/src/dsp/dsp.h
@@ -0,0 +1,210 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Speed-critical functions.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_DSP_DSP_H_
+#define WEBP_DSP_DSP_H_
+
+#include "webp/types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// CPU detection
+
+#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
+#define WEBP_MSC_SSE2 // Visual C++ SSE2 targets
+#endif
+
+#if defined(__SSE2__) || defined(WEBP_MSC_SSE2)
+#define WEBP_USE_SSE2
+#endif
+
+#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__) && defined(__ARM_NEON__)
+#define WEBP_ANDROID_NEON // Android targets that might support NEON
+#endif
+
+#if defined(__ARM_NEON__) || defined(WEBP_ANDROID_NEON)
+#define WEBP_USE_NEON
+#endif
+
+typedef enum {
+ kSSE2,
+ kSSE3,
+ kNEON
+} CPUFeature;
+// returns true if the CPU supports the feature.
+typedef int (*VP8CPUInfo)(CPUFeature feature);
+extern VP8CPUInfo VP8GetCPUInfo;
+
+//------------------------------------------------------------------------------
+// Encoding
+
+int VP8GetAlpha(const int histo[]);
+
+// Transforms
+// VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms
+// will be done for (ref, in, dst) and (ref + 4, in + 16, dst + 4).
+typedef void (*VP8Idct)(const uint8_t* ref, const int16_t* in, uint8_t* dst,
+ int do_two);
+typedef void (*VP8Fdct)(const uint8_t* src, const uint8_t* ref, int16_t* out);
+typedef void (*VP8WHT)(const int16_t* in, int16_t* out);
+extern VP8Idct VP8ITransform;
+extern VP8Fdct VP8FTransform;
+extern VP8WHT VP8ITransformWHT;
+extern VP8WHT VP8FTransformWHT;
+// Predictions
+// *dst is the destination block. *top and *left can be NULL.
+typedef void (*VP8IntraPreds)(uint8_t *dst, const uint8_t* left,
+ const uint8_t* top);
+typedef void (*VP8Intra4Preds)(uint8_t *dst, const uint8_t* top);
+extern VP8Intra4Preds VP8EncPredLuma4;
+extern VP8IntraPreds VP8EncPredLuma16;
+extern VP8IntraPreds VP8EncPredChroma8;
+
+typedef int (*VP8Metric)(const uint8_t* pix, const uint8_t* ref);
+extern VP8Metric VP8SSE16x16, VP8SSE16x8, VP8SSE8x8, VP8SSE4x4;
+typedef int (*VP8WMetric)(const uint8_t* pix, const uint8_t* ref,
+ const uint16_t* const weights);
+extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16;
+
+typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst);
+extern VP8BlockCopy VP8Copy4x4;
+// Quantization
+struct VP8Matrix; // forward declaration
+typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16],
+ int n, const struct VP8Matrix* const mtx);
+extern VP8QuantizeBlock VP8EncQuantizeBlock;
+
+// Compute susceptibility based on DCT-coeff histograms:
+// the higher, the "easier" the macroblock is to compress.
+typedef int (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
+ int start_block, int end_block);
+extern const int VP8DspScan[16 + 4 + 4];
+extern VP8CHisto VP8CollectHistogram;
+
+void VP8EncDspInit(void); // must be called before using any of the above
+
+//------------------------------------------------------------------------------
+// Decoding
+
+typedef void (*VP8DecIdct)(const int16_t* coeffs, uint8_t* dst);
+// when doing two transforms, coeffs is actually int16_t[2][16].
+typedef void (*VP8DecIdct2)(const int16_t* coeffs, uint8_t* dst, int do_two);
+extern VP8DecIdct2 VP8Transform;
+extern VP8DecIdct VP8TransformUV;
+extern VP8DecIdct VP8TransformDC;
+extern VP8DecIdct VP8TransformDCUV;
+extern void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
+
+// *dst is the destination block, with stride BPS. Boundary samples are
+// assumed accessible when needed.
+typedef void (*VP8PredFunc)(uint8_t* dst);
+extern const VP8PredFunc VP8PredLuma16[/* NUM_B_DC_MODES */];
+extern const VP8PredFunc VP8PredChroma8[/* NUM_B_DC_MODES */];
+extern const VP8PredFunc VP8PredLuma4[/* NUM_BMODES */];
+
+// simple filter (only for luma)
+typedef void (*VP8SimpleFilterFunc)(uint8_t* p, int stride, int thresh);
+extern VP8SimpleFilterFunc VP8SimpleVFilter16;
+extern VP8SimpleFilterFunc VP8SimpleHFilter16;
+extern VP8SimpleFilterFunc VP8SimpleVFilter16i; // filter 3 inner edges
+extern VP8SimpleFilterFunc VP8SimpleHFilter16i;
+
+// regular filter (on both macroblock edges and inner edges)
+typedef void (*VP8LumaFilterFunc)(uint8_t* luma, int stride,
+ int thresh, int ithresh, int hev_t);
+typedef void (*VP8ChromaFilterFunc)(uint8_t* u, uint8_t* v, int stride,
+ int thresh, int ithresh, int hev_t);
+// on outer edge
+extern VP8LumaFilterFunc VP8VFilter16;
+extern VP8LumaFilterFunc VP8HFilter16;
+extern VP8ChromaFilterFunc VP8VFilter8;
+extern VP8ChromaFilterFunc VP8HFilter8;
+
+// on inner edge
+extern VP8LumaFilterFunc VP8VFilter16i; // filtering 3 inner edges altogether
+extern VP8LumaFilterFunc VP8HFilter16i;
+extern VP8ChromaFilterFunc VP8VFilter8i; // filtering u and v altogether
+extern VP8ChromaFilterFunc VP8HFilter8i;
+
+// must be called before anything using the above
+void VP8DspInit(void);
+
+//------------------------------------------------------------------------------
+// WebP I/O
+
+#define FANCY_UPSAMPLING // undefined to remove fancy upsampling support
+
+typedef void (*WebPUpsampleLinePairFunc)(
+ const uint8_t* top_y, const uint8_t* bottom_y,
+ const uint8_t* top_u, const uint8_t* top_v,
+ const uint8_t* cur_u, const uint8_t* cur_v,
+ uint8_t* top_dst, uint8_t* bottom_dst, int len);
+
+#ifdef FANCY_UPSAMPLING
+
+// Fancy upsampling functions to convert YUV to RGB(A) modes
+extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
+
+// Initializes SSE2 version of the fancy upsamplers.
+void WebPInitUpsamplersSSE2(void);
+
+#endif // FANCY_UPSAMPLING
+
+// Point-sampling methods.
+typedef void (*WebPSampleLinePairFunc)(
+ const uint8_t* top_y, const uint8_t* bottom_y,
+ const uint8_t* u, const uint8_t* v,
+ uint8_t* top_dst, uint8_t* bottom_dst, int len);
+
+extern const WebPSampleLinePairFunc WebPSamplers[/* MODE_LAST */];
+
+// General function for converting two lines of ARGB or RGBA.
+// 'alpha_is_last' should be true if 0xff000000 is stored in memory as
+// as 0x00, 0x00, 0x00, 0xff (little endian).
+WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last);
+
+// YUV444->RGB converters
+typedef void (*WebPYUV444Converter)(const uint8_t* y,
+ const uint8_t* u, const uint8_t* v,
+ uint8_t* dst, int len);
+
+extern const WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
+
+// Main function to be called
+void WebPInitUpsamplers(void);
+
+//------------------------------------------------------------------------------
+// Pre-multiply planes with alpha values
+
+// Apply alpha pre-multiply on an rgba, bgra or argb plane of size w * h.
+// alpha_first should be 0 for argb, 1 for rgba or bgra (where alpha is last).
+extern void (*WebPApplyAlphaMultiply)(
+ uint8_t* rgba, int alpha_first, int w, int h, int stride);
+
+// Same, buf specifically for RGBA4444 format
+extern void (*WebPApplyAlphaMultiply4444)(
+ uint8_t* rgba4444, int w, int h, int stride);
+
+// To be called first before using the above.
+void WebPInitPremultiply(void);
+
+void WebPInitPremultiplySSE2(void); // should not be called directly.
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif /* WEBP_DSP_DSP_H_ */
diff --git a/src/enc/dsp.c b/src/dsp/enc.c
index 25a9bf9c..02234564 100644
--- a/src/enc/dsp.c
+++ b/src/dsp/enc.c
@@ -1,22 +1,23 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// -----------------------------------------------------------------------------
//
-// speed-critical functions.
+// Speed-critical encoding functions.
//
// Author: Skal (pascal.massimino@gmail.com)
-#include <assert.h>
-#include "vp8enci.h"
+#include <stdlib.h> // for abs()
+#include "./dsp.h"
+#include "../enc/vp8enci.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Compute susceptibility based on DCT-coeff histograms:
// the higher, the "easier" the macroblock is to compress.
@@ -41,13 +42,24 @@ int VP8GetAlpha(const int histo[MAX_COEFF_THRESH + 1]) {
return ClipAlpha(alpha);
}
+const int VP8DspScan[16 + 4 + 4] = {
+ // Luma
+ 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
+ 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,
+ 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,
+ 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
+
+ 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U
+ 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V
+};
+
static int CollectHistogram(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block) {
int histo[MAX_COEFF_THRESH + 1] = { 0 };
int16_t out[16];
int j, k;
for (j = start_block; j < end_block; ++j) {
- VP8FTransform(ref + VP8Scan[j], pred + VP8Scan[j], out);
+ VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
// Convert coefficients to bin (within out[]).
for (k = 0; k < 16; ++k) {
@@ -64,7 +76,7 @@ static int CollectHistogram(const uint8_t* ref, const uint8_t* pred,
return VP8GetAlpha(histo);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// run-time tables (~4k)
static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255]
@@ -83,11 +95,11 @@ static void InitTables(void) {
}
}
-static inline uint8_t clip_8b(int v) {
+static WEBP_INLINE uint8_t clip_8b(int v) {
return (!(v & ~0xff)) ? v : v < 0 ? 0 : 255;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Transforms (Paragraph 14.4)
#define STORE(x, y, v) \
@@ -97,8 +109,8 @@ static const int kC1 = 20091 + (1 << 16);
static const int kC2 = 35468;
#define MUL(a, b) (((a) * (b)) >> 16)
-static inline void ITransformOne(const uint8_t* ref, const int16_t* in,
- uint8_t* dst) {
+static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
+ uint8_t* dst) {
int C[4 * 4], *tmp;
int i;
tmp = C;
@@ -226,19 +238,20 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
#undef MUL
#undef STORE
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Intra predictions
-#define OUT(x, y) dst[(x) + (y) * BPS]
+#define DST(x, y) dst[(x) + (y) * BPS]
-static inline void Fill(uint8_t* dst, int value, int size) {
+static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) {
int j;
for (j = 0; j < size; ++j) {
memset(dst + j * BPS, value, size);
}
}
-static inline void VerticalPred(uint8_t* dst, const uint8_t* top, int size) {
+static WEBP_INLINE void VerticalPred(uint8_t* dst,
+ const uint8_t* top, int size) {
int j;
if (top) {
for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size);
@@ -247,7 +260,8 @@ static inline void VerticalPred(uint8_t* dst, const uint8_t* top, int size) {
}
}
-static inline void HorizontalPred(uint8_t* dst, const uint8_t* left, int size) {
+static WEBP_INLINE void HorizontalPred(uint8_t* dst,
+ const uint8_t* left, int size) {
if (left) {
int j;
for (j = 0; j < size; ++j) {
@@ -258,8 +272,8 @@ static inline void HorizontalPred(uint8_t* dst, const uint8_t* left, int size) {
}
}
-static inline void TrueMotion(uint8_t* dst, const uint8_t* left,
- const uint8_t* top, int size) {
+static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
+ const uint8_t* top, int size) {
int y;
if (left) {
if (top) {
@@ -288,9 +302,9 @@ static inline void TrueMotion(uint8_t* dst, const uint8_t* left,
}
}
-static inline void DCMode(uint8_t* dst, const uint8_t* left,
- const uint8_t* top,
- int size, int round, int shift) {
+static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left,
+ const uint8_t* top,
+ int size, int round, int shift) {
int DC = 0;
int j;
if (top) {
@@ -311,7 +325,7 @@ static inline void DCMode(uint8_t* dst, const uint8_t* left,
Fill(dst, DC, size);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Chroma 8x8 prediction (paragraph 12.2)
static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
@@ -331,7 +345,7 @@ static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
TrueMotion(C8TM8 + dst, left, top, 8);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// luma 16x16 prediction (paragraph 12.3)
static void Intra16Preds(uint8_t* dst,
@@ -342,7 +356,7 @@ static void Intra16Preds(uint8_t* dst,
TrueMotion(I16TM16 + dst, left, top, 16);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// luma 4x4 prediction
#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
@@ -390,13 +404,13 @@ static void RD4(uint8_t* dst, const uint8_t* top) {
const int B = top[1];
const int C = top[2];
const int D = top[3];
- OUT(0, 3) = AVG3(J, K, L);
- OUT(0, 2) = OUT(1, 3) = AVG3(I, J, K);
- OUT(0, 1) = OUT(1, 2) = OUT(2, 3) = AVG3(X, I, J);
- OUT(0, 0) = OUT(1, 1) = OUT(2, 2) = OUT(3, 3) = AVG3(A, X, I);
- OUT(1, 0) = OUT(2, 1) = OUT(3, 2) = AVG3(B, A, X);
- OUT(2, 0) = OUT(3, 1) = AVG3(C, B, A);
- OUT(3, 0) = AVG3(D, C, B);
+ DST(0, 3) = AVG3(J, K, L);
+ DST(0, 2) = DST(1, 3) = AVG3(I, J, K);
+ DST(0, 1) = DST(1, 2) = DST(2, 3) = AVG3(X, I, J);
+ DST(0, 0) = DST(1, 1) = DST(2, 2) = DST(3, 3) = AVG3(A, X, I);
+ DST(1, 0) = DST(2, 1) = DST(3, 2) = AVG3(B, A, X);
+ DST(2, 0) = DST(3, 1) = AVG3(C, B, A);
+ DST(3, 0) = AVG3(D, C, B);
}
static void LD4(uint8_t* dst, const uint8_t* top) {
@@ -408,13 +422,13 @@ static void LD4(uint8_t* dst, const uint8_t* top) {
const int F = top[5];
const int G = top[6];
const int H = top[7];
- OUT(0, 0) = AVG3(A, B, C);
- OUT(1, 0) = OUT(0, 1) = AVG3(B, C, D);
- OUT(2, 0) = OUT(1, 1) = OUT(0, 2) = AVG3(C, D, E);
- OUT(3, 0) = OUT(2, 1) = OUT(1, 2) = OUT(0, 3) = AVG3(D, E, F);
- OUT(3, 1) = OUT(2, 2) = OUT(1, 3) = AVG3(E, F, G);
- OUT(3, 2) = OUT(2, 3) = AVG3(F, G, H);
- OUT(3, 3) = AVG3(G, H, H);
+ DST(0, 0) = AVG3(A, B, C);
+ DST(1, 0) = DST(0, 1) = AVG3(B, C, D);
+ DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E);
+ DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
+ DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
+ DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
+ DST(3, 3) = AVG3(G, H, H);
}
static void VR4(uint8_t* dst, const uint8_t* top) {
@@ -426,17 +440,17 @@ static void VR4(uint8_t* dst, const uint8_t* top) {
const int B = top[1];
const int C = top[2];
const int D = top[3];
- OUT(0, 0) = OUT(1, 2) = AVG2(X, A);
- OUT(1, 0) = OUT(2, 2) = AVG2(A, B);
- OUT(2, 0) = OUT(3, 2) = AVG2(B, C);
- OUT(3, 0) = AVG2(C, D);
+ DST(0, 0) = DST(1, 2) = AVG2(X, A);
+ DST(1, 0) = DST(2, 2) = AVG2(A, B);
+ DST(2, 0) = DST(3, 2) = AVG2(B, C);
+ DST(3, 0) = AVG2(C, D);
- OUT(0, 3) = AVG3(K, J, I);
- OUT(0, 2) = AVG3(J, I, X);
- OUT(0, 1) = OUT(1, 3) = AVG3(I, X, A);
- OUT(1, 1) = OUT(2, 3) = AVG3(X, A, B);
- OUT(2, 1) = OUT(3, 3) = AVG3(A, B, C);
- OUT(3, 1) = AVG3(B, C, D);
+ DST(0, 3) = AVG3(K, J, I);
+ DST(0, 2) = AVG3(J, I, X);
+ DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
+ DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
+ DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
+ DST(3, 1) = AVG3(B, C, D);
}
static void VL4(uint8_t* dst, const uint8_t* top) {
@@ -448,17 +462,17 @@ static void VL4(uint8_t* dst, const uint8_t* top) {
const int F = top[5];
const int G = top[6];
const int H = top[7];
- OUT(0, 0) = AVG2(A, B);
- OUT(1, 0) = OUT(0, 2) = AVG2(B, C);
- OUT(2, 0) = OUT(1, 2) = AVG2(C, D);
- OUT(3, 0) = OUT(2, 2) = AVG2(D, E);
+ DST(0, 0) = AVG2(A, B);
+ DST(1, 0) = DST(0, 2) = AVG2(B, C);
+ DST(2, 0) = DST(1, 2) = AVG2(C, D);
+ DST(3, 0) = DST(2, 2) = AVG2(D, E);
- OUT(0, 1) = AVG3(A, B, C);
- OUT(1, 1) = OUT(0, 3) = AVG3(B, C, D);
- OUT(2, 1) = OUT(1, 3) = AVG3(C, D, E);
- OUT(3, 1) = OUT(2, 3) = AVG3(D, E, F);
- OUT(3, 2) = AVG3(E, F, G);
- OUT(3, 3) = AVG3(F, G, H);
+ DST(0, 1) = AVG3(A, B, C);
+ DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
+ DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
+ DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
+ DST(3, 2) = AVG3(E, F, G);
+ DST(3, 3) = AVG3(F, G, H);
}
static void HU4(uint8_t* dst, const uint8_t* top) {
@@ -466,14 +480,14 @@ static void HU4(uint8_t* dst, const uint8_t* top) {
const int J = top[-3];
const int K = top[-4];
const int L = top[-5];
- OUT(0, 0) = AVG2(I, J);
- OUT(2, 0) = OUT(0, 1) = AVG2(J, K);
- OUT(2, 1) = OUT(0, 2) = AVG2(K, L);
- OUT(1, 0) = AVG3(I, J, K);
- OUT(3, 0) = OUT(1, 1) = AVG3(J, K, L);
- OUT(3, 1) = OUT(1, 2) = AVG3(K, L, L);
- OUT(3, 2) = OUT(2, 2) =
- OUT(0, 3) = OUT(1, 3) = OUT(2, 3) = OUT(3, 3) = L;
+ DST(0, 0) = AVG2(I, J);
+ DST(2, 0) = DST(0, 1) = AVG2(J, K);
+ DST(2, 1) = DST(0, 2) = AVG2(K, L);
+ DST(1, 0) = AVG3(I, J, K);
+ DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
+ DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
+ DST(3, 2) = DST(2, 2) =
+ DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
}
static void HD4(uint8_t* dst, const uint8_t* top) {
@@ -486,17 +500,17 @@ static void HD4(uint8_t* dst, const uint8_t* top) {
const int B = top[1];
const int C = top[2];
- OUT(0, 0) = OUT(2, 1) = AVG2(I, X);
- OUT(0, 1) = OUT(2, 2) = AVG2(J, I);
- OUT(0, 2) = OUT(2, 3) = AVG2(K, J);
- OUT(0, 3) = AVG2(L, K);
+ DST(0, 0) = DST(2, 1) = AVG2(I, X);
+ DST(0, 1) = DST(2, 2) = AVG2(J, I);
+ DST(0, 2) = DST(2, 3) = AVG2(K, J);
+ DST(0, 3) = AVG2(L, K);
- OUT(3, 0) = AVG3(A, B, C);
- OUT(2, 0) = AVG3(X, A, B);
- OUT(1, 0) = OUT(3, 1) = AVG3(I, X, A);
- OUT(1, 1) = OUT(3, 2) = AVG3(J, I, X);
- OUT(1, 2) = OUT(3, 3) = AVG3(K, J, I);
- OUT(1, 3) = AVG3(L, K, J);
+ DST(3, 0) = AVG3(A, B, C);
+ DST(2, 0) = AVG3(X, A, B);
+ DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
+ DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
+ DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
+ DST(1, 3) = AVG3(L, K, J);
}
static void TM4(uint8_t* dst, const uint8_t* top) {
@@ -511,6 +525,7 @@ static void TM4(uint8_t* dst, const uint8_t* top) {
}
}
+#undef DST
#undef AVG3
#undef AVG2
@@ -529,10 +544,11 @@ static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
HU4(I4HU4 + dst, top);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Metric
-static inline int GetSSE(const uint8_t* a, const uint8_t* b, int w, int h) {
+static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
+ int w, int h) {
int count = 0;
int y, x;
for (y = 0; y < h; ++y) {
@@ -559,7 +575,7 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
return GetSSE(a, b, 4, 4);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Texture distortion
//
// We try to match the spectral content (weighted) between source and
@@ -620,16 +636,20 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
return D;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Quantization
//
+static const uint8_t kZigzag[16] = {
+ 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
+};
+
// Simple quantization
static int QuantizeBlock(int16_t in[16], int16_t out[16],
int n, const VP8Matrix* const mtx) {
int last = -1;
for (; n < 16; ++n) {
- const int j = VP8Zigzag[n];
+ const int j = kZigzag[n];
const int sign = (in[j] < 0);
int coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
if (coeff > 2047) coeff = 2047;
@@ -649,10 +669,10 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
return (last >= 0);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Block copy
-static inline void Copy(const uint8_t* src, uint8_t* dst, int size) {
+static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int size) {
int y;
for (y = 0; y < size; ++y) {
memcpy(dst, src, size);
@@ -662,49 +682,9 @@ static inline void Copy(const uint8_t* src, uint8_t* dst, int size) {
}
static void Copy4x4(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 4); }
-static void Copy8x8(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 8); }
-static void Copy16x16(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 16); }
-
-//-----------------------------------------------------------------------------
-// SSE2 detection.
-//
-
-#if defined(__pic__) && defined(__i386__)
-static inline void GetCPUInfo(int cpu_info[4], int info_type) {
- __asm__ volatile (
- "mov %%ebx, %%edi\n"
- "cpuid\n"
- "xchg %%edi, %%ebx\n"
- : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
- : "a"(info_type));
-}
-#elif defined(__i386__) || defined(__x86_64__)
-static inline void GetCPUInfo(int cpu_info[4], int info_type) {
- __asm__ volatile (
- "cpuid\n"
- : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
- : "a"(info_type));
-}
-#elif defined(_MSC_VER) // Visual C++
-#define GetCPUInfo __cpuid
-#endif
-#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
-static int x86CPUInfo(CPUFeature feature) {
- int cpu_info[4];
- GetCPUInfo(cpu_info, 1);
- if (feature == kSSE2) {
- return 0 != (cpu_info[3] & 0x04000000);
- }
- if (feature == kSSE3) {
- return 0 != (cpu_info[2] & 0x00000001);
- }
- return 0;
-}
-VP8CPUInfo VP8EncGetCPUInfo = x86CPUInfo;
-#else
-VP8CPUInfo VP8EncGetCPUInfo = NULL;
-#endif
+//------------------------------------------------------------------------------
+// Initialization
// Speed-critical function pointers. We have to initialize them to the default
// implementations within VP8EncDspInit().
@@ -724,8 +704,6 @@ VP8WMetric VP8TDisto4x4;
VP8WMetric VP8TDisto16x16;
VP8QuantizeBlock VP8EncQuantizeBlock;
VP8BlockCopy VP8Copy4x4;
-VP8BlockCopy VP8Copy8x8;
-VP8BlockCopy VP8Copy16x16;
extern void VP8EncDspInitSSE2(void);
@@ -749,19 +727,14 @@ void VP8EncDspInit(void) {
VP8TDisto16x16 = Disto16x16;
VP8EncQuantizeBlock = QuantizeBlock;
VP8Copy4x4 = Copy4x4;
- VP8Copy8x8 = Copy8x8;
- VP8Copy16x16 = Copy16x16;
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
- if (VP8EncGetCPUInfo) {
- if (VP8EncGetCPUInfo(kSSE2)) {
-#if defined(__SSE2__) || defined(_MSC_VER)
+ if (VP8GetCPUInfo) {
+#if defined(WEBP_USE_SSE2)
+ if (VP8GetCPUInfo(kSSE2)) {
VP8EncDspInitSSE2();
-#endif
- }
- if (VP8EncGetCPUInfo(kSSE3)) {
- // later we'll plug some SSE3 variant here
}
+#endif
}
}
diff --git a/src/enc/dsp_sse2.c b/src/dsp/enc_sse2.c
index db20e648..b046761d 100644
--- a/src/enc/dsp_sse2.c
+++ b/src/dsp/enc_sse2.c
@@ -1,24 +1,27 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// -----------------------------------------------------------------------------
//
-// SSE2 version of speed-critical functions.
+// SSE2 version of speed-critical encoding functions.
//
// Author: Christian Duvivier (cduvivier@google.com)
-#if defined(__SSE2__) || defined(_MSC_VER)
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+#include <stdlib.h> // for abs()
#include <emmintrin.h>
-#include "vp8enci.h"
+#include "../enc/vp8enci.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Compute susceptibility based on DCT-coeff histograms:
// the higher, the "easier" the macroblock is to compress.
@@ -29,7 +32,7 @@ static int CollectHistogramSSE2(const uint8_t* ref, const uint8_t* pred,
int j, k;
const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
for (j = start_block; j < end_block; ++j) {
- VP8FTransform(ref + VP8Scan[j], pred + VP8Scan[j], out);
+ VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
// Convert coefficients to bin (within out[]).
{
@@ -64,7 +67,7 @@ static int CollectHistogramSSE2(const uint8_t* ref, const uint8_t* pred,
return VP8GetAlpha(histo);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Transforms (Paragraph 14.4)
// Does one or two inverse transforms.
@@ -436,7 +439,7 @@ static void FTransformSSE2(const uint8_t* src, const uint8_t* ref,
}
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Metric
static int SSE4x4SSE2(const uint8_t* a, const uint8_t* b) {
@@ -485,7 +488,7 @@ static int SSE4x4SSE2(const uint8_t* a, const uint8_t* b) {
return (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Texture distortion
//
// We try to match the spectral content (weighted) between source and
@@ -679,7 +682,7 @@ static int Disto16x16SSE2(const uint8_t* const a, const uint8_t* const b,
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Quantization
//
@@ -831,4 +834,4 @@ void VP8EncDspInitSSE2(void) {
} // extern "C"
#endif
-#endif //__SSE2__
+#endif // WEBP_USE_SSE2
diff --git a/src/dsp/lossless.c b/src/dsp/lossless.c
new file mode 100644
index 00000000..6d3094fd
--- /dev/null
+++ b/src/dsp/lossless.c
@@ -0,0 +1,1150 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Image transforms and color space conversion methods for lossless decoder.
+//
+// Authors: Vikas Arora (vikaas.arora@gmail.com)
+// Jyrki Alakuijala (jyrki@google.com)
+// Urvang Joshi (urvang@google.com)
+
+#define ANDROID_WEBP_RGB
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#include <math.h>
+#include <stdlib.h>
+#include "./lossless.h"
+#include "../dec/vp8li.h"
+#include "../dsp/yuv.h"
+#include "../dsp/dsp.h"
+#include "../enc/histogram.h"
+
+#define MAX_DIFF_COST (1e30f)
+
+// lookup table for small values of log2(int)
+#define APPROX_LOG_MAX 4096
+#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
+#define LOG_LOOKUP_IDX_MAX 256
+static const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
+ 0.0000000000000000f, 0.0000000000000000f,
+ 1.0000000000000000f, 1.5849625007211560f,
+ 2.0000000000000000f, 2.3219280948873621f,
+ 2.5849625007211560f, 2.8073549220576041f,
+ 3.0000000000000000f, 3.1699250014423121f,
+ 3.3219280948873621f, 3.4594316186372973f,
+ 3.5849625007211560f, 3.7004397181410921f,
+ 3.8073549220576041f, 3.9068905956085187f,
+ 4.0000000000000000f, 4.0874628412503390f,
+ 4.1699250014423121f, 4.2479275134435852f,
+ 4.3219280948873626f, 4.3923174227787606f,
+ 4.4594316186372973f, 4.5235619560570130f,
+ 4.5849625007211560f, 4.6438561897747243f,
+ 4.7004397181410917f, 4.7548875021634682f,
+ 4.8073549220576037f, 4.8579809951275718f,
+ 4.9068905956085187f, 4.9541963103868749f,
+ 5.0000000000000000f, 5.0443941193584533f,
+ 5.0874628412503390f, 5.1292830169449663f,
+ 5.1699250014423121f, 5.2094533656289501f,
+ 5.2479275134435852f, 5.2854022188622487f,
+ 5.3219280948873626f, 5.3575520046180837f,
+ 5.3923174227787606f, 5.4262647547020979f,
+ 5.4594316186372973f, 5.4918530963296747f,
+ 5.5235619560570130f, 5.5545888516776376f,
+ 5.5849625007211560f, 5.6147098441152083f,
+ 5.6438561897747243f, 5.6724253419714951f,
+ 5.7004397181410917f, 5.7279204545631987f,
+ 5.7548875021634682f, 5.7813597135246599f,
+ 5.8073549220576037f, 5.8328900141647412f,
+ 5.8579809951275718f, 5.8826430493618415f,
+ 5.9068905956085187f, 5.9307373375628866f,
+ 5.9541963103868749f, 5.9772799234999167f,
+ 6.0000000000000000f, 6.0223678130284543f,
+ 6.0443941193584533f, 6.0660891904577720f,
+ 6.0874628412503390f, 6.1085244567781691f,
+ 6.1292830169449663f, 6.1497471195046822f,
+ 6.1699250014423121f, 6.1898245588800175f,
+ 6.2094533656289501f, 6.2288186904958804f,
+ 6.2479275134435852f, 6.2667865406949010f,
+ 6.2854022188622487f, 6.3037807481771030f,
+ 6.3219280948873626f, 6.3398500028846243f,
+ 6.3575520046180837f, 6.3750394313469245f,
+ 6.3923174227787606f, 6.4093909361377017f,
+ 6.4262647547020979f, 6.4429434958487279f,
+ 6.4594316186372973f, 6.4757334309663976f,
+ 6.4918530963296747f, 6.5077946401986963f,
+ 6.5235619560570130f, 6.5391588111080309f,
+ 6.5545888516776376f, 6.5698556083309478f,
+ 6.5849625007211560f, 6.5999128421871278f,
+ 6.6147098441152083f, 6.6293566200796094f,
+ 6.6438561897747243f, 6.6582114827517946f,
+ 6.6724253419714951f, 6.6865005271832185f,
+ 6.7004397181410917f, 6.7142455176661224f,
+ 6.7279204545631987f, 6.7414669864011464f,
+ 6.7548875021634682f, 6.7681843247769259f,
+ 6.7813597135246599f, 6.7944158663501061f,
+ 6.8073549220576037f, 6.8201789624151878f,
+ 6.8328900141647412f, 6.8454900509443747f,
+ 6.8579809951275718f, 6.8703647195834047f,
+ 6.8826430493618415f, 6.8948177633079437f,
+ 6.9068905956085187f, 6.9188632372745946f,
+ 6.9307373375628866f, 6.9425145053392398f,
+ 6.9541963103868749f, 6.9657842846620869f,
+ 6.9772799234999167f, 6.9886846867721654f,
+ 7.0000000000000000f, 7.0112272554232539f,
+ 7.0223678130284543f, 7.0334230015374501f,
+ 7.0443941193584533f, 7.0552824355011898f,
+ 7.0660891904577720f, 7.0768155970508308f,
+ 7.0874628412503390f, 7.0980320829605263f,
+ 7.1085244567781691f, 7.1189410727235076f,
+ 7.1292830169449663f, 7.1395513523987936f,
+ 7.1497471195046822f, 7.1598713367783890f,
+ 7.1699250014423121f, 7.1799090900149344f,
+ 7.1898245588800175f, 7.1996723448363644f,
+ 7.2094533656289501f, 7.2191685204621611f,
+ 7.2288186904958804f, 7.2384047393250785f,
+ 7.2479275134435852f, 7.2573878426926521f,
+ 7.2667865406949010f, 7.2761244052742375f,
+ 7.2854022188622487f, 7.2946207488916270f,
+ 7.3037807481771030f, 7.3128829552843557f,
+ 7.3219280948873626f, 7.3309168781146167f,
+ 7.3398500028846243f, 7.3487281542310771f,
+ 7.3575520046180837f, 7.3663222142458160f,
+ 7.3750394313469245f, 7.3837042924740519f,
+ 7.3923174227787606f, 7.4008794362821843f,
+ 7.4093909361377017f, 7.4178525148858982f,
+ 7.4262647547020979f, 7.4346282276367245f,
+ 7.4429434958487279f, 7.4512111118323289f,
+ 7.4594316186372973f, 7.4676055500829976f,
+ 7.4757334309663976f, 7.4838157772642563f,
+ 7.4918530963296747f, 7.4998458870832056f,
+ 7.5077946401986963f, 7.5156998382840427f,
+ 7.5235619560570130f, 7.5313814605163118f,
+ 7.5391588111080309f, 7.5468944598876364f,
+ 7.5545888516776376f, 7.5622424242210728f,
+ 7.5698556083309478f, 7.5774288280357486f,
+ 7.5849625007211560f, 7.5924570372680806f,
+ 7.5999128421871278f, 7.6073303137496104f,
+ 7.6147098441152083f, 7.6220518194563764f,
+ 7.6293566200796094f, 7.6366246205436487f,
+ 7.6438561897747243f, 7.6510516911789281f,
+ 7.6582114827517946f, 7.6653359171851764f,
+ 7.6724253419714951f, 7.6794800995054464f,
+ 7.6865005271832185f, 7.6934869574993252f,
+ 7.7004397181410917f, 7.7073591320808825f,
+ 7.7142455176661224f, 7.7210991887071855f,
+ 7.7279204545631987f, 7.7347096202258383f,
+ 7.7414669864011464f, 7.7481928495894605f,
+ 7.7548875021634682f, 7.7615512324444795f,
+ 7.7681843247769259f, 7.7747870596011736f,
+ 7.7813597135246599f, 7.7879025593914317f,
+ 7.7944158663501061f, 7.8008998999203047f,
+ 7.8073549220576037f, 7.8137811912170374f,
+ 7.8201789624151878f, 7.8265484872909150f,
+ 7.8328900141647412f, 7.8392037880969436f,
+ 7.8454900509443747f, 7.8517490414160571f,
+ 7.8579809951275718f, 7.8641861446542797f,
+ 7.8703647195834047f, 7.8765169465649993f,
+ 7.8826430493618415f, 7.8887432488982591f,
+ 7.8948177633079437f, 7.9008668079807486f,
+ 7.9068905956085187f, 7.9128893362299619f,
+ 7.9188632372745946f, 7.9248125036057812f,
+ 7.9307373375628866f, 7.9366379390025709f,
+ 7.9425145053392398f, 7.9483672315846778f,
+ 7.9541963103868749f, 7.9600019320680805f,
+ 7.9657842846620869f, 7.9715435539507719f,
+ 7.9772799234999167f, 7.9829935746943103f,
+ 7.9886846867721654f, 7.9943534368588577f
+};
+
+float VP8LFastLog2(int v) {
+ if (v < LOG_LOOKUP_IDX_MAX) {
+ return kLog2Table[v];
+ } else if (v < APPROX_LOG_MAX) {
+ int log_cnt = 0;
+ while (v >= LOG_LOOKUP_IDX_MAX) {
+ ++log_cnt;
+ v = v >> 1;
+ }
+ return kLog2Table[v] + (float)log_cnt;
+ } else {
+ return (float)(LOG_2_RECIPROCAL * log((double)v));
+ }
+}
+
+//------------------------------------------------------------------------------
+// Image transforms.
+
+// In-place sum of each component with mod 256.
+static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
+ const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
+ const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
+ *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
+}
+
+static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
+ return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1);
+}
+
+static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
+ return Average2(Average2(a0, a2), a1);
+}
+
+static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
+ uint32_t a2, uint32_t a3) {
+ return Average2(Average2(a0, a1), Average2(a2, a3));
+}
+
+static WEBP_INLINE uint32_t Clip255(uint32_t a) {
+ if (a < 256) {
+ return a;
+ }
+ // return 0, when a is a negative integer.
+ // return 255, when a is positive.
+ return ~a >> 24;
+}
+
+static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
+ return Clip255(a + b - c);
+}
+
+static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
+ uint32_t c2) {
+ const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
+ const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
+ (c1 >> 16) & 0xff,
+ (c2 >> 16) & 0xff);
+ const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
+ (c1 >> 8) & 0xff,
+ (c2 >> 8) & 0xff);
+ const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
+ return (a << 24) | (r << 16) | (g << 8) | b;
+}
+
+static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
+ return Clip255(a + (a - b) / 2);
+}
+
+static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
+ uint32_t c2) {
+ const uint32_t ave = Average2(c0, c1);
+ const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
+ const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
+ const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
+ const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
+ return (a << 24) | (r << 16) | (g << 8) | b;
+}
+
+static WEBP_INLINE int Sub3(int a, int b, int c) {
+ const int pa = b - c;
+ const int pb = a - c;
+ return abs(pa) - abs(pb);
+}
+
+static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
+ const int pa_minus_pb =
+ Sub3((a >> 24) , (b >> 24) , (c >> 24) ) +
+ Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
+ Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) +
+ Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff);
+
+ return (pa_minus_pb <= 0) ? a : b;
+}
+
+//------------------------------------------------------------------------------
+// Predictors
+
+static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
+ (void)top;
+ (void)left;
+ return ARGB_BLACK;
+}
+static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
+ (void)top;
+ return left;
+}
+static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
+ (void)left;
+ return top[0];
+}
+static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
+ (void)left;
+ return top[1];
+}
+static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
+ (void)left;
+ return top[-1];
+}
+static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
+ const uint32_t pred = Average3(left, top[0], top[1]);
+ return pred;
+}
+static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
+ const uint32_t pred = Average2(left, top[-1]);
+ return pred;
+}
+static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
+ const uint32_t pred = Average2(left, top[0]);
+ return pred;
+}
+static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
+ const uint32_t pred = Average2(top[-1], top[0]);
+ (void)left;
+ return pred;
+}
+static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
+ const uint32_t pred = Average2(top[0], top[1]);
+ (void)left;
+ return pred;
+}
+static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
+ const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
+ return pred;
+}
+static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
+ const uint32_t pred = Select(top[0], left, top[-1]);
+ return pred;
+}
+static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
+ const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
+ return pred;
+}
+static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
+ const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
+ return pred;
+}
+
+typedef uint32_t (*PredictorFunc)(uint32_t left, const uint32_t* const top);
+static const PredictorFunc kPredictors[16] = {
+ Predictor0, Predictor1, Predictor2, Predictor3,
+ Predictor4, Predictor5, Predictor6, Predictor7,
+ Predictor8, Predictor9, Predictor10, Predictor11,
+ Predictor12, Predictor13,
+ Predictor0, Predictor0 // <- padding security sentinels
+};
+
+// TODO(vikasa): Replace 256 etc with defines.
+static float PredictionCostSpatial(const int* counts,
+ int weight_0, double exp_val) {
+ const int significant_symbols = 16;
+ const double exp_decay_factor = 0.6;
+ double bits = weight_0 * counts[0];
+ int i;
+ for (i = 1; i < significant_symbols; ++i) {
+ bits += exp_val * (counts[i] + counts[256 - i]);
+ exp_val *= exp_decay_factor;
+ }
+ return (float)(-0.1 * bits);
+}
+
+// Compute the Shanon's entropy: Sum(p*log2(p))
+static float ShannonEntropy(const int* const array, int n) {
+ int i;
+ float retval = 0.f;
+ int sum = 0;
+ for (i = 0; i < n; ++i) {
+ if (array[i] != 0) {
+ sum += array[i];
+ retval -= VP8LFastSLog2(array[i]);
+ }
+ }
+ retval += VP8LFastSLog2(sum);
+ return retval;
+}
+
+static float PredictionCostSpatialHistogram(int accumulated[4][256],
+ int tile[4][256]) {
+ int i;
+ int k;
+ int combo[256];
+ double retval = 0;
+ for (i = 0; i < 4; ++i) {
+ const double exp_val = 0.94;
+ retval += PredictionCostSpatial(&tile[i][0], 1, exp_val);
+ retval += ShannonEntropy(&tile[i][0], 256);
+ for (k = 0; k < 256; ++k) {
+ combo[k] = accumulated[i][k] + tile[i][k];
+ }
+ retval += ShannonEntropy(&combo[0], 256);
+ }
+ return (float)retval;
+}
+
+static int GetBestPredictorForTile(int width, int height,
+ int tile_x, int tile_y, int bits,
+ int accumulated[4][256],
+ const uint32_t* const argb_scratch) {
+ const int kNumPredModes = 14;
+ const int col_start = tile_x << bits;
+ const int row_start = tile_y << bits;
+ const int tile_size = 1 << bits;
+ const int ymax = (tile_size <= height - row_start) ?
+ tile_size : height - row_start;
+ const int xmax = (tile_size <= width - col_start) ?
+ tile_size : width - col_start;
+ int histo[4][256];
+ float best_diff = MAX_DIFF_COST;
+ int best_mode = 0;
+
+ int mode;
+ for (mode = 0; mode < kNumPredModes; ++mode) {
+ const uint32_t* current_row = argb_scratch;
+ const PredictorFunc pred_func = kPredictors[mode];
+ float cur_diff;
+ int y;
+ memset(&histo[0][0], 0, sizeof(histo));
+ for (y = 0; y < ymax; ++y) {
+ int x;
+ const int row = row_start + y;
+ const uint32_t* const upper_row = current_row;
+ current_row = upper_row + width;
+ for (x = 0; x < xmax; ++x) {
+ const int col = col_start + x;
+ uint32_t predict;
+ uint32_t predict_diff;
+ if (row == 0) {
+ predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left.
+ } else if (col == 0) {
+ predict = upper_row[col]; // Top.
+ } else {
+ predict = pred_func(current_row[col - 1], upper_row + col);
+ }
+ predict_diff = VP8LSubPixels(current_row[col], predict);
+ ++histo[0][predict_diff >> 24];
+ ++histo[1][((predict_diff >> 16) & 0xff)];
+ ++histo[2][((predict_diff >> 8) & 0xff)];
+ ++histo[3][(predict_diff & 0xff)];
+ }
+ }
+ cur_diff = PredictionCostSpatialHistogram(accumulated, histo);
+ if (cur_diff < best_diff) {
+ best_diff = cur_diff;
+ best_mode = mode;
+ }
+ }
+
+ return best_mode;
+}
+
+static void CopyTileWithPrediction(int width, int height,
+ int tile_x, int tile_y, int bits, int mode,
+ const uint32_t* const argb_scratch,
+ uint32_t* const argb) {
+ const int col_start = tile_x << bits;
+ const int row_start = tile_y << bits;
+ const int tile_size = 1 << bits;
+ const int ymax = (tile_size <= height - row_start) ?
+ tile_size : height - row_start;
+ const int xmax = (tile_size <= width - col_start) ?
+ tile_size : width - col_start;
+ const PredictorFunc pred_func = kPredictors[mode];
+ const uint32_t* current_row = argb_scratch;
+
+ int y;
+ for (y = 0; y < ymax; ++y) {
+ int x;
+ const int row = row_start + y;
+ const uint32_t* const upper_row = current_row;
+ current_row = upper_row + width;
+ for (x = 0; x < xmax; ++x) {
+ const int col = col_start + x;
+ const int pix = row * width + col;
+ uint32_t predict;
+ if (row == 0) {
+ predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left.
+ } else if (col == 0) {
+ predict = upper_row[col]; // Top.
+ } else {
+ predict = pred_func(current_row[col - 1], upper_row + col);
+ }
+ argb[pix] = VP8LSubPixels(current_row[col], predict);
+ }
+ }
+}
+
+void VP8LResidualImage(int width, int height, int bits,
+ uint32_t* const argb, uint32_t* const argb_scratch,
+ uint32_t* const image) {
+ const int max_tile_size = 1 << bits;
+ const int tiles_per_row = VP8LSubSampleSize(width, bits);
+ const int tiles_per_col = VP8LSubSampleSize(height, bits);
+ uint32_t* const upper_row = argb_scratch;
+ uint32_t* const current_tile_rows = argb_scratch + width;
+ int tile_y;
+ int histo[4][256];
+ memset(histo, 0, sizeof(histo));
+ for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
+ const int tile_y_offset = tile_y * max_tile_size;
+ const int this_tile_height =
+ (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
+ int tile_x;
+ if (tile_y > 0) {
+ memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
+ width * sizeof(*upper_row));
+ }
+ memcpy(current_tile_rows, &argb[tile_y_offset * width],
+ this_tile_height * width * sizeof(*current_tile_rows));
+ for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
+ int pred;
+ int y;
+ const int tile_x_offset = tile_x * max_tile_size;
+ int all_x_max = tile_x_offset + max_tile_size;
+ if (all_x_max > width) {
+ all_x_max = width;
+ }
+ pred = GetBestPredictorForTile(width, height, tile_x, tile_y, bits, histo,
+ argb_scratch);
+ image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8);
+ CopyTileWithPrediction(width, height, tile_x, tile_y, bits, pred,
+ argb_scratch, argb);
+ for (y = 0; y < max_tile_size; ++y) {
+ int ix;
+ int all_x;
+ int all_y = tile_y_offset + y;
+ if (all_y >= height) {
+ break;
+ }
+ ix = all_y * width + tile_x_offset;
+ for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
+ const uint32_t a = argb[ix];
+ ++histo[0][a >> 24];
+ ++histo[1][((a >> 16) & 0xff)];
+ ++histo[2][((a >> 8) & 0xff)];
+ ++histo[3][(a & 0xff)];
+ }
+ }
+ }
+ }
+}
+
+// Inverse prediction.
+static void PredictorInverseTransform(const VP8LTransform* const transform,
+ int y_start, int y_end, uint32_t* data) {
+ const int width = transform->xsize_;
+ if (y_start == 0) { // First Row follows the L (mode=1) mode.
+ int x;
+ const uint32_t pred0 = Predictor0(data[-1], NULL);
+ AddPixelsEq(data, pred0);
+ for (x = 1; x < width; ++x) {
+ const uint32_t pred1 = Predictor1(data[x - 1], NULL);
+ AddPixelsEq(data + x, pred1);
+ }
+ data += width;
+ ++y_start;
+ }
+
+ {
+ int y = y_start;
+ const int mask = (1 << transform->bits_) - 1;
+ const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
+ const uint32_t* pred_mode_base =
+ transform->data_ + (y >> transform->bits_) * tiles_per_row;
+
+ while (y < y_end) {
+ int x;
+ const uint32_t pred2 = Predictor2(data[-1], data - width);
+ const uint32_t* pred_mode_src = pred_mode_base;
+ PredictorFunc pred_func;
+
+ // First pixel follows the T (mode=2) mode.
+ AddPixelsEq(data, pred2);
+
+ // .. the rest:
+ pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf];
+ for (x = 1; x < width; ++x) {
+ uint32_t pred;
+ if ((x & mask) == 0) { // start of tile. Read predictor function.
+ pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf];
+ }
+ pred = pred_func(data[x - 1], data + x - width);
+ AddPixelsEq(data + x, pred);
+ }
+ data += width;
+ ++y;
+ if ((y & mask) == 0) { // Use the same mask, since tiles are squares.
+ pred_mode_base += tiles_per_row;
+ }
+ }
+ }
+}
+
+void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs) {
+ int i;
+ for (i = 0; i < num_pixs; ++i) {
+ const uint32_t argb = argb_data[i];
+ const uint32_t green = (argb >> 8) & 0xff;
+ const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
+ const uint32_t new_b = ((argb & 0xff) - green) & 0xff;
+ argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b;
+ }
+}
+
+// Add green to blue and red channels (i.e. perform the inverse transform of
+// 'subtract green').
+static void AddGreenToBlueAndRed(const VP8LTransform* const transform,
+ int y_start, int y_end, uint32_t* data) {
+ const int width = transform->xsize_;
+ const uint32_t* const data_end = data + (y_end - y_start) * width;
+ while (data < data_end) {
+ const uint32_t argb = *data;
+ // "* 0001001u" is equivalent to "(green << 16) + green)"
+ const uint32_t green = ((argb >> 8) & 0xff);
+ uint32_t red_blue = (argb & 0x00ff00ffu);
+ red_blue += (green << 16) | green;
+ red_blue &= 0x00ff00ffu;
+ *data++ = (argb & 0xff00ff00u) | red_blue;
+ }
+}
+
+typedef struct {
+ // Note: the members are uint8_t, so that any negative values are
+ // automatically converted to "mod 256" values.
+ uint8_t green_to_red_;
+ uint8_t green_to_blue_;
+ uint8_t red_to_blue_;
+} Multipliers;
+
+static WEBP_INLINE void MultipliersClear(Multipliers* m) {
+ m->green_to_red_ = 0;
+ m->green_to_blue_ = 0;
+ m->red_to_blue_ = 0;
+}
+
+static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
+ int8_t color) {
+ return (uint32_t)((int)(color_pred) * color) >> 5;
+}
+
+static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
+ Multipliers* const m) {
+ m->green_to_red_ = (color_code >> 0) & 0xff;
+ m->green_to_blue_ = (color_code >> 8) & 0xff;
+ m->red_to_blue_ = (color_code >> 16) & 0xff;
+}
+
+static WEBP_INLINE uint32_t MultipliersToColorCode(Multipliers* const m) {
+ return 0xff000000u |
+ ((uint32_t)(m->red_to_blue_) << 16) |
+ ((uint32_t)(m->green_to_blue_) << 8) |
+ m->green_to_red_;
+}
+
+static WEBP_INLINE uint32_t TransformColor(const Multipliers* const m,
+ uint32_t argb, int inverse) {
+ const uint32_t green = argb >> 8;
+ const uint32_t red = argb >> 16;
+ uint32_t new_red = red;
+ uint32_t new_blue = argb;
+
+ if (inverse) {
+ new_red += ColorTransformDelta(m->green_to_red_, green);
+ new_red &= 0xff;
+ new_blue += ColorTransformDelta(m->green_to_blue_, green);
+ new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
+ new_blue &= 0xff;
+ } else {
+ new_red -= ColorTransformDelta(m->green_to_red_, green);
+ new_red &= 0xff;
+ new_blue -= ColorTransformDelta(m->green_to_blue_, green);
+ new_blue -= ColorTransformDelta(m->red_to_blue_, red);
+ new_blue &= 0xff;
+ }
+ return (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
+}
+
+static WEBP_INLINE int SkipRepeatedPixels(const uint32_t* const argb,
+ int ix, int xsize) {
+ const uint32_t v = argb[ix];
+ if (ix >= xsize + 3) {
+ if (v == argb[ix - xsize] &&
+ argb[ix - 1] == argb[ix - xsize - 1] &&
+ argb[ix - 2] == argb[ix - xsize - 2] &&
+ argb[ix - 3] == argb[ix - xsize - 3]) {
+ return 1;
+ }
+ return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1];
+ } else if (ix >= 3) {
+ return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1];
+ }
+ return 0;
+}
+
+static float PredictionCostCrossColor(const int accumulated[256],
+ const int counts[256]) {
+ // Favor low entropy, locally and globally.
+ int i;
+ int combo[256];
+ for (i = 0; i < 256; ++i) {
+ combo[i] = accumulated[i] + counts[i];
+ }
+ return ShannonEntropy(combo, 256) +
+ ShannonEntropy(counts, 256) +
+ PredictionCostSpatial(counts, 3, 2.4); // Favor small absolute values.
+}
+
+static Multipliers GetBestColorTransformForTile(
+ int tile_x, int tile_y, int bits,
+ Multipliers prevX,
+ Multipliers prevY,
+ int step, int xsize, int ysize,
+ int* accumulated_red_histo,
+ int* accumulated_blue_histo,
+ const uint32_t* const argb) {
+ float best_diff = MAX_DIFF_COST;
+ float cur_diff;
+ const int halfstep = step / 2;
+ const int max_tile_size = 1 << bits;
+ const int tile_y_offset = tile_y * max_tile_size;
+ const int tile_x_offset = tile_x * max_tile_size;
+ int green_to_red;
+ int green_to_blue;
+ int red_to_blue;
+ int all_x_max = tile_x_offset + max_tile_size;
+ int all_y_max = tile_y_offset + max_tile_size;
+ Multipliers best_tx;
+ MultipliersClear(&best_tx);
+ if (all_x_max > xsize) {
+ all_x_max = xsize;
+ }
+ if (all_y_max > ysize) {
+ all_y_max = ysize;
+ }
+ for (green_to_red = -64; green_to_red <= 64; green_to_red += halfstep) {
+ int histo[256] = { 0 };
+ int all_y;
+ Multipliers tx;
+ MultipliersClear(&tx);
+ tx.green_to_red_ = green_to_red & 0xff;
+
+ for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
+ uint32_t predict;
+ int ix = all_y * xsize + tile_x_offset;
+ int all_x;
+ for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
+ if (SkipRepeatedPixels(argb, ix, xsize)) {
+ continue;
+ }
+ predict = TransformColor(&tx, argb[ix], 0);
+ ++histo[(predict >> 16) & 0xff]; // red.
+ }
+ }
+ cur_diff = PredictionCostCrossColor(&accumulated_red_histo[0], &histo[0]);
+ if (tx.green_to_red_ == prevX.green_to_red_) {
+ cur_diff -= 3; // favor keeping the areas locally similar
+ }
+ if (tx.green_to_red_ == prevY.green_to_red_) {
+ cur_diff -= 3; // favor keeping the areas locally similar
+ }
+ if (tx.green_to_red_ == 0) {
+ cur_diff -= 3;
+ }
+ if (cur_diff < best_diff) {
+ best_diff = cur_diff;
+ best_tx = tx;
+ }
+ }
+ best_diff = MAX_DIFF_COST;
+ green_to_red = best_tx.green_to_red_;
+ for (green_to_blue = -32; green_to_blue <= 32; green_to_blue += step) {
+ for (red_to_blue = -32; red_to_blue <= 32; red_to_blue += step) {
+ int all_y;
+ int histo[256] = { 0 };
+ Multipliers tx;
+ tx.green_to_red_ = green_to_red;
+ tx.green_to_blue_ = green_to_blue;
+ tx.red_to_blue_ = red_to_blue;
+ for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
+ uint32_t predict;
+ int all_x;
+ int ix = all_y * xsize + tile_x_offset;
+ for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
+ if (SkipRepeatedPixels(argb, ix, xsize)) {
+ continue;
+ }
+ predict = TransformColor(&tx, argb[ix], 0);
+ ++histo[predict & 0xff]; // blue.
+ }
+ }
+ cur_diff =
+ PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]);
+ if (tx.green_to_blue_ == prevX.green_to_blue_) {
+ cur_diff -= 3; // favor keeping the areas locally similar
+ }
+ if (tx.green_to_blue_ == prevY.green_to_blue_) {
+ cur_diff -= 3; // favor keeping the areas locally similar
+ }
+ if (tx.red_to_blue_ == prevX.red_to_blue_) {
+ cur_diff -= 3; // favor keeping the areas locally similar
+ }
+ if (tx.red_to_blue_ == prevY.red_to_blue_) {
+ cur_diff -= 3; // favor keeping the areas locally similar
+ }
+ if (tx.green_to_blue_ == 0) {
+ cur_diff -= 3;
+ }
+ if (tx.red_to_blue_ == 0) {
+ cur_diff -= 3;
+ }
+ if (cur_diff < best_diff) {
+ best_diff = cur_diff;
+ best_tx = tx;
+ }
+ }
+ }
+ return best_tx;
+}
+
+static void CopyTileWithColorTransform(int xsize, int ysize,
+ int tile_x, int tile_y, int bits,
+ Multipliers color_transform,
+ uint32_t* const argb) {
+ int y;
+ int xscan = 1 << bits;
+ int yscan = 1 << bits;
+ tile_x <<= bits;
+ tile_y <<= bits;
+ if (xscan > xsize - tile_x) {
+ xscan = xsize - tile_x;
+ }
+ if (yscan > ysize - tile_y) {
+ yscan = ysize - tile_y;
+ }
+ yscan += tile_y;
+ for (y = tile_y; y < yscan; ++y) {
+ int ix = y * xsize + tile_x;
+ const int end_ix = ix + xscan;
+ for (; ix < end_ix; ++ix) {
+ argb[ix] = TransformColor(&color_transform, argb[ix], 0);
+ }
+ }
+}
+
+void VP8LColorSpaceTransform(int width, int height, int bits, int step,
+ uint32_t* const argb, uint32_t* image) {
+ const int max_tile_size = 1 << bits;
+ int tile_xsize = VP8LSubSampleSize(width, bits);
+ int tile_ysize = VP8LSubSampleSize(height, bits);
+ int accumulated_red_histo[256] = { 0 };
+ int accumulated_blue_histo[256] = { 0 };
+ int tile_y;
+ int tile_x;
+ Multipliers prevX;
+ Multipliers prevY;
+ MultipliersClear(&prevY);
+ MultipliersClear(&prevX);
+ for (tile_y = 0; tile_y < tile_ysize; ++tile_y) {
+ for (tile_x = 0; tile_x < tile_xsize; ++tile_x) {
+ Multipliers color_transform;
+ int all_x_max;
+ int y;
+ const int tile_y_offset = tile_y * max_tile_size;
+ const int tile_x_offset = tile_x * max_tile_size;
+ if (tile_y != 0) {
+ ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX);
+ ColorCodeToMultipliers(image[(tile_y - 1) * tile_xsize + tile_x],
+ &prevY);
+ } else if (tile_x != 0) {
+ ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX);
+ }
+ color_transform =
+ GetBestColorTransformForTile(tile_x, tile_y, bits,
+ prevX, prevY,
+ step, width, height,
+ &accumulated_red_histo[0],
+ &accumulated_blue_histo[0],
+ argb);
+ image[tile_y * tile_xsize + tile_x] =
+ MultipliersToColorCode(&color_transform);
+ CopyTileWithColorTransform(width, height, tile_x, tile_y, bits,
+ color_transform, argb);
+
+ // Gather accumulated histogram data.
+ all_x_max = tile_x_offset + max_tile_size;
+ if (all_x_max > width) {
+ all_x_max = width;
+ }
+ for (y = 0; y < max_tile_size; ++y) {
+ int ix;
+ int all_x;
+ int all_y = tile_y_offset + y;
+ if (all_y >= height) {
+ break;
+ }
+ ix = all_y * width + tile_x_offset;
+ for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
+ if (ix >= 2 &&
+ argb[ix] == argb[ix - 2] &&
+ argb[ix] == argb[ix - 1]) {
+ continue; // repeated pixels are handled by backward references
+ }
+ if (ix >= width + 2 &&
+ argb[ix - 2] == argb[ix - width - 2] &&
+ argb[ix - 1] == argb[ix - width - 1] &&
+ argb[ix] == argb[ix - width]) {
+ continue; // repeated pixels are handled by backward references
+ }
+ ++accumulated_red_histo[(argb[ix] >> 16) & 0xff];
+ ++accumulated_blue_histo[argb[ix] & 0xff];
+ }
+ }
+ }
+ }
+}
+
+// Color space inverse transform.
+static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
+ int y_start, int y_end, uint32_t* data) {
+ const int width = transform->xsize_;
+ const int mask = (1 << transform->bits_) - 1;
+ const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
+ int y = y_start;
+ const uint32_t* pred_row =
+ transform->data_ + (y >> transform->bits_) * tiles_per_row;
+
+ while (y < y_end) {
+ const uint32_t* pred = pred_row;
+ Multipliers m = { 0, 0, 0 };
+ int x;
+
+ for (x = 0; x < width; ++x) {
+ if ((x & mask) == 0) ColorCodeToMultipliers(*pred++, &m);
+ data[x] = TransformColor(&m, data[x], 1);
+ }
+ data += width;
+ ++y;
+ if ((y & mask) == 0) pred_row += tiles_per_row;;
+ }
+}
+
+// Separate out pixels packed together using pixel-bundling.
+static void ColorIndexInverseTransform(
+ const VP8LTransform* const transform,
+ int y_start, int y_end, const uint32_t* src, uint32_t* dst) {
+ int y;
+ const int bits_per_pixel = 8 >> transform->bits_;
+ const int width = transform->xsize_;
+ const uint32_t* const color_map = transform->data_;
+ if (bits_per_pixel < 8) {
+ const int pixels_per_byte = 1 << transform->bits_;
+ const int count_mask = pixels_per_byte - 1;
+ const uint32_t bit_mask = (1 << bits_per_pixel) - 1;
+ for (y = y_start; y < y_end; ++y) {
+ uint32_t packed_pixels = 0;
+ int x;
+ for (x = 0; x < width; ++x) {
+ // We need to load fresh 'packed_pixels' once every 'pixels_per_byte'
+ // increments of x. Fortunately, pixels_per_byte is a power of 2, so
+ // can just use a mask for that, instead of decrementing a counter.
+ if ((x & count_mask) == 0) packed_pixels = ((*src++) >> 8) & 0xff;
+ *dst++ = color_map[packed_pixels & bit_mask];
+ packed_pixels >>= bits_per_pixel;
+ }
+ }
+ } else {
+ for (y = y_start; y < y_end; ++y) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ *dst++ = color_map[((*src++) >> 8) & 0xff];
+ }
+ }
+ }
+}
+
+void VP8LInverseTransform(const VP8LTransform* const transform,
+ int row_start, int row_end,
+ const uint32_t* const in, uint32_t* const out) {
+ assert(row_start < row_end);
+ assert(row_end <= transform->ysize_);
+ switch (transform->type_) {
+ case SUBTRACT_GREEN:
+ AddGreenToBlueAndRed(transform, row_start, row_end, out);
+ break;
+ case PREDICTOR_TRANSFORM:
+ PredictorInverseTransform(transform, row_start, row_end, out);
+ if (row_end != transform->ysize_) {
+ // The last predicted row in this iteration will be the top-pred row
+ // for the first row in next iteration.
+ const int width = transform->xsize_;
+ memcpy(out - width, out + (row_end - row_start - 1) * width,
+ width * sizeof(*out));
+ }
+ break;
+ case CROSS_COLOR_TRANSFORM:
+ ColorSpaceInverseTransform(transform, row_start, row_end, out);
+ break;
+ case COLOR_INDEXING_TRANSFORM:
+ if (in == out && transform->bits_ > 0) {
+ // Move packed pixels to the end of unpacked region, so that unpacking
+ // can occur seamlessly.
+ // Also, note that this is the only transform that applies on
+ // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
+ // transforms work on effective width of xsize_.
+ const int out_stride = (row_end - row_start) * transform->xsize_;
+ const int in_stride = (row_end - row_start) *
+ VP8LSubSampleSize(transform->xsize_, transform->bits_);
+ uint32_t* const src = out + out_stride - in_stride;
+ memmove(src, out, in_stride * sizeof(*src));
+ ColorIndexInverseTransform(transform, row_start, row_end, src, out);
+ } else {
+ ColorIndexInverseTransform(transform, row_start, row_end, in, out);
+ }
+ break;
+ }
+}
+
+//------------------------------------------------------------------------------
+// Color space conversion.
+
+static int is_big_endian(void) {
+ static const union {
+ uint16_t w;
+ uint8_t b[2];
+ } tmp = { 1 };
+ return (tmp.b[0] != 1);
+}
+
+static void ConvertBGRAToRGB(const uint32_t* src,
+ int num_pixels, uint8_t* dst) {
+ const uint32_t* const src_end = src + num_pixels;
+ while (src < src_end) {
+ const uint32_t argb = *src++;
+ *dst++ = (argb >> 16) & 0xff;
+ *dst++ = (argb >> 8) & 0xff;
+ *dst++ = (argb >> 0) & 0xff;
+ }
+}
+
+static void ConvertBGRAToRGBA(const uint32_t* src,
+ int num_pixels, uint8_t* dst) {
+ const uint32_t* const src_end = src + num_pixels;
+ while (src < src_end) {
+ const uint32_t argb = *src++;
+ *dst++ = (argb >> 16) & 0xff;
+ *dst++ = (argb >> 8) & 0xff;
+ *dst++ = (argb >> 0) & 0xff;
+ *dst++ = (argb >> 24) & 0xff;
+ }
+}
+
+static void ConvertBGRAToRGBA4444(const uint32_t* src,
+ int num_pixels, uint8_t* dst) {
+ const uint32_t* const src_end = src + num_pixels;
+ while (src < src_end) {
+ const uint32_t argb = *src++;
+#ifdef ANDROID_WEBP_RGB
+ *dst++ = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf);
+ *dst++ = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
+#else
+ *dst++ = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
+ *dst++ = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf);
+#endif
+ }
+}
+
+static void ConvertBGRAToRGB565(const uint32_t* src,
+ int num_pixels, uint8_t* dst) {
+ const uint32_t* const src_end = src + num_pixels;
+ while (src < src_end) {
+ const uint32_t argb = *src++;
+#ifdef ANDROID_WEBP_RGB
+ *dst++ = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f);
+ *dst++ = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
+#else
+ *dst++ = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
+ *dst++ = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f);
+#endif
+ }
+}
+
+static void ConvertBGRAToBGR(const uint32_t* src,
+ int num_pixels, uint8_t* dst) {
+ const uint32_t* const src_end = src + num_pixels;
+ while (src < src_end) {
+ const uint32_t argb = *src++;
+ *dst++ = (argb >> 0) & 0xff;
+ *dst++ = (argb >> 8) & 0xff;
+ *dst++ = (argb >> 16) & 0xff;
+ }
+}
+
+static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
+ int swap_on_big_endian) {
+ if (is_big_endian() == swap_on_big_endian) {
+ const uint32_t* const src_end = src + num_pixels;
+ while (src < src_end) {
+ uint32_t argb = *src++;
+#if !defined(__BIG_ENDIAN__) && (defined(__i386__) || defined(__x86_64__))
+ __asm__ volatile("bswap %0" : "=r"(argb) : "0"(argb));
+ *(uint32_t*)dst = argb;
+ dst += sizeof(argb);
+#elif !defined(__BIG_ENDIAN__) && defined(_MSC_VER)
+ argb = _byteswap_ulong(argb);
+ *(uint32_t*)dst = argb;
+ dst += sizeof(argb);
+#else
+ *dst++ = (argb >> 24) & 0xff;
+ *dst++ = (argb >> 16) & 0xff;
+ *dst++ = (argb >> 8) & 0xff;
+ *dst++ = (argb >> 0) & 0xff;
+#endif
+ }
+ } else {
+ memcpy(dst, src, num_pixels * sizeof(*src));
+ }
+}
+
+void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
+ WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
+ switch (out_colorspace) {
+ case MODE_RGB:
+ ConvertBGRAToRGB(in_data, num_pixels, rgba);
+ break;
+ case MODE_RGBA:
+ ConvertBGRAToRGBA(in_data, num_pixels, rgba);
+ break;
+ case MODE_rgbA:
+ ConvertBGRAToRGBA(in_data, num_pixels, rgba);
+ WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
+ break;
+ case MODE_BGR:
+ ConvertBGRAToBGR(in_data, num_pixels, rgba);
+ break;
+ case MODE_BGRA:
+ CopyOrSwap(in_data, num_pixels, rgba, 1);
+ break;
+ case MODE_bgrA:
+ CopyOrSwap(in_data, num_pixels, rgba, 1);
+ WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
+ break;
+ case MODE_ARGB:
+ CopyOrSwap(in_data, num_pixels, rgba, 0);
+ break;
+ case MODE_Argb:
+ CopyOrSwap(in_data, num_pixels, rgba, 0);
+ WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
+ break;
+ case MODE_RGBA_4444:
+ ConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
+ break;
+ case MODE_rgbA_4444:
+ ConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
+ WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
+ break;
+ case MODE_RGB_565:
+ ConvertBGRAToRGB565(in_data, num_pixels, rgba);
+ break;
+ default:
+ assert(0); // Code flow should not reach here.
+ }
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
diff --git a/src/dsp/lossless.h b/src/dsp/lossless.h
new file mode 100644
index 00000000..22a91cbf
--- /dev/null
+++ b/src/dsp/lossless.h
@@ -0,0 +1,82 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Image transforms and color space conversion methods for lossless decoder.
+//
+// Authors: Vikas Arora (vikaas.arora@gmail.com)
+// Jyrki Alakuijala (jyrki@google.com)
+
+#ifndef WEBP_DSP_LOSSLESS_H_
+#define WEBP_DSP_LOSSLESS_H_
+
+#include "webp/types.h"
+#include "webp/decode.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Image transforms.
+
+struct VP8LTransform; // Defined in dec/vp8li.h.
+
+// Performs inverse transform of data given transform information, start and end
+// rows. Transform will be applied to rows [row_start, row_end[.
+// The *in and *out pointers refer to source and destination data respectively
+// corresponding to the intermediate row (row_start).
+void VP8LInverseTransform(const struct VP8LTransform* const transform,
+ int row_start, int row_end,
+ const uint32_t* const in, uint32_t* const out);
+
+// Subtracts green from blue and red channels.
+void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs);
+
+void VP8LResidualImage(int width, int height, int bits,
+ uint32_t* const argb, uint32_t* const argb_scratch,
+ uint32_t* const image);
+
+void VP8LColorSpaceTransform(int width, int height, int bits, int step,
+ uint32_t* const argb, uint32_t* image);
+
+//------------------------------------------------------------------------------
+// Color space conversion.
+
+// Converts from BGRA to other color spaces.
+void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
+ WEBP_CSP_MODE out_colorspace, uint8_t* const rgba);
+
+//------------------------------------------------------------------------------
+// Misc methods.
+
+// Computes sampled size of 'size' when sampling using 'sampling bits'.
+static WEBP_INLINE uint32_t VP8LSubSampleSize(uint32_t size,
+ uint32_t sampling_bits) {
+ return (size + (1 << sampling_bits) - 1) >> sampling_bits;
+}
+
+// Faster logarithm for integers, with the property of log2(0) == 0.
+float VP8LFastLog2(int v);
+// Fast calculation of v * log2(v) for integer input.
+static WEBP_INLINE float VP8LFastSLog2(int v) { return VP8LFastLog2(v) * v; }
+
+// In-place difference of each component with mod 256.
+static WEBP_INLINE uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
+ const uint32_t alpha_and_green =
+ 0x00ff00ffu + (a & 0xff00ff00u) - (b & 0xff00ff00u);
+ const uint32_t red_and_blue =
+ 0xff00ff00u + (a & 0x00ff00ffu) - (b & 0x00ff00ffu);
+ return (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif // WEBP_DSP_LOSSLESS_H_
diff --git a/src/dsp/upsampling.c b/src/dsp/upsampling.c
new file mode 100644
index 00000000..4855eb14
--- /dev/null
+++ b/src/dsp/upsampling.c
@@ -0,0 +1,357 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// YUV to RGB upsampling functions.
+//
+// Author: somnath@google.com (Somnath Banerjee)
+
+#include "./dsp.h"
+#include "./yuv.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Fancy upsampler
+
+#ifdef FANCY_UPSAMPLING
+
+// Fancy upsampling functions to convert YUV to RGB
+WebPUpsampleLinePairFunc WebPUpsamplers[MODE_LAST];
+
+// Given samples laid out in a square as:
+// [a b]
+// [c d]
+// we interpolate u/v as:
+// ([9*a + 3*b + 3*c + d 3*a + 9*b + 3*c + d] + [8 8]) / 16
+// ([3*a + b + 9*c + 3*d a + 3*b + 3*c + 9*d] [8 8]) / 16
+
+// We process u and v together stashed into 32bit (16bit each).
+#define LOAD_UV(u,v) ((u) | ((v) << 16))
+
+#define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
+static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
+ const uint8_t* top_u, const uint8_t* top_v, \
+ const uint8_t* cur_u, const uint8_t* cur_v, \
+ uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
+ int x; \
+ const int last_pixel_pair = (len - 1) >> 1; \
+ uint32_t tl_uv = LOAD_UV(top_u[0], top_v[0]); /* top-left sample */ \
+ uint32_t l_uv = LOAD_UV(cur_u[0], cur_v[0]); /* left-sample */ \
+ if (top_y) { \
+ const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2; \
+ FUNC(top_y[0], uv0 & 0xff, (uv0 >> 16), top_dst); \
+ } \
+ if (bottom_y) { \
+ const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2; \
+ FUNC(bottom_y[0], uv0 & 0xff, (uv0 >> 16), bottom_dst); \
+ } \
+ for (x = 1; x <= last_pixel_pair; ++x) { \
+ const uint32_t t_uv = LOAD_UV(top_u[x], top_v[x]); /* top sample */ \
+ const uint32_t uv = LOAD_UV(cur_u[x], cur_v[x]); /* sample */ \
+ /* precompute invariant values associated with first and second diagonals*/\
+ const uint32_t avg = tl_uv + t_uv + l_uv + uv + 0x00080008u; \
+ const uint32_t diag_12 = (avg + 2 * (t_uv + l_uv)) >> 3; \
+ const uint32_t diag_03 = (avg + 2 * (tl_uv + uv)) >> 3; \
+ if (top_y) { \
+ const uint32_t uv0 = (diag_12 + tl_uv) >> 1; \
+ const uint32_t uv1 = (diag_03 + t_uv) >> 1; \
+ FUNC(top_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16), \
+ top_dst + (2 * x - 1) * XSTEP); \
+ FUNC(top_y[2 * x - 0], uv1 & 0xff, (uv1 >> 16), \
+ top_dst + (2 * x - 0) * XSTEP); \
+ } \
+ if (bottom_y) { \
+ const uint32_t uv0 = (diag_03 + l_uv) >> 1; \
+ const uint32_t uv1 = (diag_12 + uv) >> 1; \
+ FUNC(bottom_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16), \
+ bottom_dst + (2 * x - 1) * XSTEP); \
+ FUNC(bottom_y[2 * x + 0], uv1 & 0xff, (uv1 >> 16), \
+ bottom_dst + (2 * x + 0) * XSTEP); \
+ } \
+ tl_uv = t_uv; \
+ l_uv = uv; \
+ } \
+ if (!(len & 1)) { \
+ if (top_y) { \
+ const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2; \
+ FUNC(top_y[len - 1], uv0 & 0xff, (uv0 >> 16), \
+ top_dst + (len - 1) * XSTEP); \
+ } \
+ if (bottom_y) { \
+ const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2; \
+ FUNC(bottom_y[len - 1], uv0 & 0xff, (uv0 >> 16), \
+ bottom_dst + (len - 1) * XSTEP); \
+ } \
+ } \
+}
+
+// All variants implemented.
+UPSAMPLE_FUNC(UpsampleRgbLinePair, VP8YuvToRgb, 3)
+UPSAMPLE_FUNC(UpsampleBgrLinePair, VP8YuvToBgr, 3)
+UPSAMPLE_FUNC(UpsampleRgbaLinePair, VP8YuvToRgba, 4)
+UPSAMPLE_FUNC(UpsampleBgraLinePair, VP8YuvToBgra, 4)
+UPSAMPLE_FUNC(UpsampleArgbLinePair, VP8YuvToArgb, 4)
+UPSAMPLE_FUNC(UpsampleRgba4444LinePair, VP8YuvToRgba4444, 2)
+UPSAMPLE_FUNC(UpsampleRgb565LinePair, VP8YuvToRgb565, 2)
+
+#undef LOAD_UV
+#undef UPSAMPLE_FUNC
+
+#endif // FANCY_UPSAMPLING
+
+//------------------------------------------------------------------------------
+// simple point-sampling
+
+#define SAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
+static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
+ const uint8_t* u, const uint8_t* v, \
+ uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
+ int i; \
+ for (i = 0; i < len - 1; i += 2) { \
+ FUNC(top_y[0], u[0], v[0], top_dst); \
+ FUNC(top_y[1], u[0], v[0], top_dst + XSTEP); \
+ FUNC(bottom_y[0], u[0], v[0], bottom_dst); \
+ FUNC(bottom_y[1], u[0], v[0], bottom_dst + XSTEP); \
+ top_y += 2; \
+ bottom_y += 2; \
+ u++; \
+ v++; \
+ top_dst += 2 * XSTEP; \
+ bottom_dst += 2 * XSTEP; \
+ } \
+ if (i == len - 1) { /* last one */ \
+ FUNC(top_y[0], u[0], v[0], top_dst); \
+ FUNC(bottom_y[0], u[0], v[0], bottom_dst); \
+ } \
+}
+
+// All variants implemented.
+SAMPLE_FUNC(SampleRgbLinePair, VP8YuvToRgb, 3)
+SAMPLE_FUNC(SampleBgrLinePair, VP8YuvToBgr, 3)
+SAMPLE_FUNC(SampleRgbaLinePair, VP8YuvToRgba, 4)
+SAMPLE_FUNC(SampleBgraLinePair, VP8YuvToBgra, 4)
+SAMPLE_FUNC(SampleArgbLinePair, VP8YuvToArgb, 4)
+SAMPLE_FUNC(SampleRgba4444LinePair, VP8YuvToRgba4444, 2)
+SAMPLE_FUNC(SampleRgb565LinePair, VP8YuvToRgb565, 2)
+
+#undef SAMPLE_FUNC
+
+const WebPSampleLinePairFunc WebPSamplers[MODE_LAST] = {
+ SampleRgbLinePair, // MODE_RGB
+ SampleRgbaLinePair, // MODE_RGBA
+ SampleBgrLinePair, // MODE_BGR
+ SampleBgraLinePair, // MODE_BGRA
+ SampleArgbLinePair, // MODE_ARGB
+ SampleRgba4444LinePair, // MODE_RGBA_4444
+ SampleRgb565LinePair, // MODE_RGB_565
+ SampleRgbaLinePair, // MODE_rgbA
+ SampleBgraLinePair, // MODE_bgrA
+ SampleArgbLinePair, // MODE_Argb
+ SampleRgba4444LinePair // MODE_rgbA_4444
+};
+
+//------------------------------------------------------------------------------
+
+#if !defined(FANCY_UPSAMPLING)
+#define DUAL_SAMPLE_FUNC(FUNC_NAME, FUNC) \
+static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bot_y, \
+ const uint8_t* top_u, const uint8_t* top_v, \
+ const uint8_t* bot_u, const uint8_t* bot_v, \
+ uint8_t* top_dst, uint8_t* bot_dst, int len) { \
+ const int half_len = len >> 1; \
+ int x; \
+ if (top_dst != NULL) { \
+ for (x = 0; x < half_len; ++x) { \
+ FUNC(top_y[2 * x + 0], top_u[x], top_v[x], top_dst + 8 * x + 0); \
+ FUNC(top_y[2 * x + 1], top_u[x], top_v[x], top_dst + 8 * x + 4); \
+ } \
+ if (len & 1) FUNC(top_y[2 * x + 0], top_u[x], top_v[x], top_dst + 8 * x); \
+ } \
+ if (bot_dst != NULL) { \
+ for (x = 0; x < half_len; ++x) { \
+ FUNC(bot_y[2 * x + 0], bot_u[x], bot_v[x], bot_dst + 8 * x + 0); \
+ FUNC(bot_y[2 * x + 1], bot_u[x], bot_v[x], bot_dst + 8 * x + 4); \
+ } \
+ if (len & 1) FUNC(bot_y[2 * x + 0], bot_u[x], bot_v[x], bot_dst + 8 * x); \
+ } \
+}
+
+DUAL_SAMPLE_FUNC(DualLineSamplerBGRA, VP8YuvToBgra)
+DUAL_SAMPLE_FUNC(DualLineSamplerARGB, VP8YuvToArgb)
+#undef DUAL_SAMPLE_FUNC
+
+#endif // !FANCY_UPSAMPLING
+
+WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last) {
+ WebPInitUpsamplers();
+ VP8YUVInit();
+#ifdef FANCY_UPSAMPLING
+ return WebPUpsamplers[alpha_is_last ? MODE_BGRA : MODE_ARGB];
+#else
+ return (alpha_is_last ? DualLineSamplerBGRA : DualLineSamplerARGB);
+#endif
+}
+
+//------------------------------------------------------------------------------
+// YUV444 converter
+
+#define YUV444_FUNC(FUNC_NAME, FUNC, XSTEP) \
+static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
+ uint8_t* dst, int len) { \
+ int i; \
+ for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * XSTEP]); \
+}
+
+YUV444_FUNC(Yuv444ToRgb, VP8YuvToRgb, 3)
+YUV444_FUNC(Yuv444ToBgr, VP8YuvToBgr, 3)
+YUV444_FUNC(Yuv444ToRgba, VP8YuvToRgba, 4)
+YUV444_FUNC(Yuv444ToBgra, VP8YuvToBgra, 4)
+YUV444_FUNC(Yuv444ToArgb, VP8YuvToArgb, 4)
+YUV444_FUNC(Yuv444ToRgba4444, VP8YuvToRgba4444, 2)
+YUV444_FUNC(Yuv444ToRgb565, VP8YuvToRgb565, 2)
+
+#undef YUV444_FUNC
+
+const WebPYUV444Converter WebPYUV444Converters[MODE_LAST] = {
+ Yuv444ToRgb, // MODE_RGB
+ Yuv444ToRgba, // MODE_RGBA
+ Yuv444ToBgr, // MODE_BGR
+ Yuv444ToBgra, // MODE_BGRA
+ Yuv444ToArgb, // MODE_ARGB
+ Yuv444ToRgba4444, // MODE_RGBA_4444
+ Yuv444ToRgb565, // MODE_RGB_565
+ Yuv444ToRgba, // MODE_rgbA
+ Yuv444ToBgra, // MODE_bgrA
+ Yuv444ToArgb, // MODE_Argb
+ Yuv444ToRgba4444 // MODE_rgbA_4444
+};
+
+//------------------------------------------------------------------------------
+// Premultiplied modes
+
+// non dithered-modes
+
+// (x * a * 32897) >> 23 is bit-wise equivalent to (int)(x * a / 255.)
+// for all 8bit x or a. For bit-wise equivalence to (int)(x * a / 255. + .5),
+// one can use instead: (x * a * 65793 + (1 << 23)) >> 24
+#if 1 // (int)(x * a / 255.)
+#define MULTIPLIER(a) ((a) * 32897UL)
+#define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
+#else // (int)(x * a / 255. + .5)
+#define MULTIPLIER(a) ((a) * 65793UL)
+#define PREMULTIPLY(x, m) (((x) * (m) + (1UL << 23)) >> 24)
+#endif
+
+static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first,
+ int w, int h, int stride) {
+ while (h-- > 0) {
+ uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
+ const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
+ int i;
+ for (i = 0; i < w; ++i) {
+ const uint32_t a = alpha[4 * i];
+ if (a != 0xff) {
+ const uint32_t mult = MULTIPLIER(a);
+ rgb[4 * i + 0] = PREMULTIPLY(rgb[4 * i + 0], mult);
+ rgb[4 * i + 1] = PREMULTIPLY(rgb[4 * i + 1], mult);
+ rgb[4 * i + 2] = PREMULTIPLY(rgb[4 * i + 2], mult);
+ }
+ }
+ rgba += stride;
+ }
+}
+#undef MULTIPLIER
+#undef PREMULTIPLY
+
+// rgbA4444
+
+#define MULTIPLIER(a) ((a) * 0x1111) // 0x1111 ~= (1 << 16) / 15
+
+static WEBP_INLINE uint8_t dither_hi(uint8_t x) {
+ return (x & 0xf0) | (x >> 4);
+}
+
+static WEBP_INLINE uint8_t dither_lo(uint8_t x) {
+ return (x & 0x0f) | (x << 4);
+}
+
+static WEBP_INLINE uint8_t multiply(uint8_t x, uint32_t m) {
+ return (x * m) >> 16;
+}
+
+static void ApplyAlphaMultiply4444(uint8_t* rgba4444,
+ int w, int h, int stride) {
+ while (h-- > 0) {
+ int i;
+ for (i = 0; i < w; ++i) {
+ const uint8_t a = (rgba4444[2 * i + 1] & 0x0f);
+ const uint32_t mult = MULTIPLIER(a);
+ const uint8_t r = multiply(dither_hi(rgba4444[2 * i + 0]), mult);
+ const uint8_t g = multiply(dither_lo(rgba4444[2 * i + 0]), mult);
+ const uint8_t b = multiply(dither_hi(rgba4444[2 * i + 1]), mult);
+ rgba4444[2 * i + 0] = (r & 0xf0) | ((g >> 4) & 0x0f);
+ rgba4444[2 * i + 1] = (b & 0xf0) | a;
+ }
+ rgba4444 += stride;
+ }
+}
+#undef MULTIPLIER
+
+void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int)
+ = ApplyAlphaMultiply;
+void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int)
+ = ApplyAlphaMultiply4444;
+
+//------------------------------------------------------------------------------
+// Main call
+
+void WebPInitUpsamplers(void) {
+#ifdef FANCY_UPSAMPLING
+ WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePair;
+ WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair;
+ WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePair;
+ WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair;
+ WebPUpsamplers[MODE_ARGB] = UpsampleArgbLinePair;
+ WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair;
+ WebPUpsamplers[MODE_RGB_565] = UpsampleRgb565LinePair;
+
+ // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+ if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+ if (VP8GetCPUInfo(kSSE2)) {
+ WebPInitUpsamplersSSE2();
+ }
+#endif
+ }
+#endif // FANCY_UPSAMPLING
+}
+
+void WebPInitPremultiply(void) {
+ WebPApplyAlphaMultiply = ApplyAlphaMultiply;
+ WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply4444;
+
+#ifdef FANCY_UPSAMPLING
+ WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair;
+ WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair;
+ WebPUpsamplers[MODE_Argb] = UpsampleArgbLinePair;
+ WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair;
+
+ if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+ if (VP8GetCPUInfo(kSSE2)) {
+ WebPInitPremultiplySSE2();
+ }
+#endif
+ }
+#endif // FANCY_UPSAMPLING
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
diff --git a/src/dec/io_sse2.c b/src/dsp/upsampling_sse2.c
index 0f42350b..8cb275a0 100644
--- a/src/dec/io_sse2.c
+++ b/src/dsp/upsampling_sse2.c
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -9,18 +9,21 @@
//
// Author: somnath@google.com (Somnath Banerjee)
-#if defined(__SSE2__) || defined(_MSC_VER)
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
#include <assert.h>
#include <emmintrin.h>
#include <string.h>
-#include "webpi.h"
-#include "yuv.h"
+#include "./yuv.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
+#ifdef FANCY_UPSAMPLING
+
// We compute (9*a + 3*b + 3*c + d + 8) / 16 as follows
// u = (9*a + 3*b + 3*c + d + 8) / 16
// = (a + (a + 3*b + 3*c + d) / 8 + 1) / 2
@@ -173,9 +176,6 @@ SSE2_UPSAMPLE_FUNC(UpsampleRgbLinePairSSE2, VP8YuvToRgb, 3)
SSE2_UPSAMPLE_FUNC(UpsampleBgrLinePairSSE2, VP8YuvToBgr, 3)
SSE2_UPSAMPLE_FUNC(UpsampleRgbaLinePairSSE2, VP8YuvToRgba, 4)
SSE2_UPSAMPLE_FUNC(UpsampleBgraLinePairSSE2, VP8YuvToBgra, 4)
-// These two don't erase the alpha value
-SSE2_UPSAMPLE_FUNC(UpsampleRgbKeepAlphaLinePairSSE2, VP8YuvToRgb, 4)
-SSE2_UPSAMPLE_FUNC(UpsampleBgrKeepAlphaLinePairSSE2, VP8YuvToBgr, 4)
#undef GET_M
#undef PACK_AND_STORE
@@ -184,22 +184,26 @@ SSE2_UPSAMPLE_FUNC(UpsampleBgrKeepAlphaLinePairSSE2, VP8YuvToBgr, 4)
#undef CONVERT2RGB
#undef SSE2_UPSAMPLE_FUNC
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+
+extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
void WebPInitUpsamplersSSE2(void) {
- WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePairSSE2;
+ WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePairSSE2;
WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePairSSE2;
- WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePairSSE2;
- WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePairSSE2;
+ WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePairSSE2;
+ WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePairSSE2;
+}
- WebPUpsamplersKeepAlpha[MODE_RGB] = UpsampleRgbLinePairSSE2;
- WebPUpsamplersKeepAlpha[MODE_RGBA] = UpsampleRgbKeepAlphaLinePairSSE2;
- WebPUpsamplersKeepAlpha[MODE_BGR] = UpsampleBgrLinePairSSE2;
- WebPUpsamplersKeepAlpha[MODE_BGRA] = UpsampleBgrKeepAlphaLinePairSSE2;
+void WebPInitPremultiplySSE2(void) {
+ WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePairSSE2;
+ WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePairSSE2;
}
+#endif // FANCY_UPSAMPLING
+
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif
-#endif //__SSE2__ || _MSC_VER
+#endif // WEBP_USE_SSE2
diff --git a/src/dec/yuv.c b/src/dsp/yuv.c
index 2b203f7b..7f05f9a3 100644
--- a/src/dec/yuv.c
+++ b/src/dsp/yuv.c
@@ -1,4 +1,4 @@
-// Copyright 2010 Google Inc.
+// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -9,7 +9,7 @@
//
// Author: Skal (pascal.massimino@gmail.com)
-#include "yuv.h"
+#include "./yuv.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@@ -24,7 +24,7 @@ uint8_t VP8kClip4Bits[YUV_RANGE_MAX - YUV_RANGE_MIN];
static int done = 0;
-static inline uint8_t clip(int v, int max_value) {
+static WEBP_INLINE uint8_t clip(int v, int max_value) {
return v < 0 ? 0 : v > max_value ? max_value : v;
}
diff --git a/src/dec/yuv.h b/src/dsp/yuv.h
index 5f16ee63..ee3587e3 100644
--- a/src/dec/yuv.h
+++ b/src/dsp/yuv.h
@@ -1,18 +1,18 @@
-// Copyright 2010 Google Inc.
+// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// -----------------------------------------------------------------------------
//
-// inline YUV->RGB conversion function
+// inline YUV<->RGB conversion function
//
// Author: Skal (pascal.massimino@gmail.com)
-#ifndef WEBP_DEC_YUV_H_
-#define WEBP_DEC_YUV_H_
+#ifndef WEBP_DSP_YUV_H_
+#define WEBP_DSP_YUV_H_
-#include "webp/decode_vp8.h"
+#include "../dec/decode_vp8.h"
/*
* Define ANDROID_WEBP_RGB to enable specific optimizations for Android
@@ -22,6 +22,9 @@
#define ANDROID_WEBP_RGB
+//------------------------------------------------------------------------------
+// YUV -> RGB conversion
+
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
@@ -35,8 +38,8 @@ extern int32_t VP8kVToG[256], VP8kUToG[256];
extern uint8_t VP8kClip[YUV_RANGE_MAX - YUV_RANGE_MIN];
extern uint8_t VP8kClip4Bits[YUV_RANGE_MAX - YUV_RANGE_MIN];
-static inline void VP8YuvToRgb(uint8_t y, uint8_t u, uint8_t v,
- uint8_t* const rgb) {
+static WEBP_INLINE void VP8YuvToRgb(uint8_t y, uint8_t u, uint8_t v,
+ uint8_t* const rgb) {
const int r_off = VP8kVToR[v];
const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
const int b_off = VP8kUToB[u];
@@ -45,8 +48,8 @@ static inline void VP8YuvToRgb(uint8_t y, uint8_t u, uint8_t v,
rgb[2] = VP8kClip[y + b_off - YUV_RANGE_MIN];
}
-static inline void VP8YuvToRgb565(uint8_t y, uint8_t u, uint8_t v,
- uint8_t* const rgb) {
+static WEBP_INLINE void VP8YuvToRgb565(uint8_t y, uint8_t u, uint8_t v,
+ uint8_t* const rgb) {
const int r_off = VP8kVToR[v];
const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
const int b_off = VP8kUToB[u];
@@ -63,19 +66,13 @@ static inline void VP8YuvToRgb565(uint8_t y, uint8_t u, uint8_t v,
#endif
}
-static inline void VP8YuvToArgbKeepA(uint8_t y, uint8_t u, uint8_t v,
+static WEBP_INLINE void VP8YuvToArgb(uint8_t y, uint8_t u, uint8_t v,
uint8_t* const argb) {
- // Don't update Aplha (argb[0])
- VP8YuvToRgb(y, u, v, argb + 1);
-}
-
-static inline void VP8YuvToArgb(uint8_t y, uint8_t u, uint8_t v,
- uint8_t* const argb) {
argb[0] = 0xff;
- VP8YuvToArgbKeepA(y, u, v, argb);
+ VP8YuvToRgb(y, u, v, argb + 1);
}
-static inline void VP8YuvToRgba4444KeepA(uint8_t y, uint8_t u, uint8_t v,
+static WEBP_INLINE void VP8YuvToRgba4444(uint8_t y, uint8_t u, uint8_t v,
uint8_t* const argb) {
const int r_off = VP8kVToR[v];
const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
@@ -83,26 +80,16 @@ static inline void VP8YuvToRgba4444KeepA(uint8_t y, uint8_t u, uint8_t v,
#ifdef ANDROID_WEBP_RGB
argb[1] = ((VP8kClip4Bits[y + r_off - YUV_RANGE_MIN] << 4) |
VP8kClip4Bits[y + g_off - YUV_RANGE_MIN]);
- argb[0] = (argb[0] & 0x0f) | (VP8kClip4Bits[y + b_off - YUV_RANGE_MIN] << 4);
+ argb[0] = 0x0f | (VP8kClip4Bits[y + b_off - YUV_RANGE_MIN] << 4);
#else
argb[0] = ((VP8kClip4Bits[y + r_off - YUV_RANGE_MIN] << 4) |
VP8kClip4Bits[y + g_off - YUV_RANGE_MIN]);
- argb[1] = (argb[1] & 0x0f) | (VP8kClip4Bits[y + b_off - YUV_RANGE_MIN] << 4);
-#endif
-}
-
-static inline void VP8YuvToRgba4444(uint8_t y, uint8_t u, uint8_t v,
- uint8_t* const argb) {
-#ifdef ANDROID_WEBP_RGB
- argb[0] = 0x0f;
-#else
- argb[1] = 0x0f;
+ argb[1] = 0x0f | (VP8kClip4Bits[y + b_off - YUV_RANGE_MIN] << 4);
#endif
- VP8YuvToRgba4444KeepA(y, u, v, argb);
}
-static inline void VP8YuvToBgr(uint8_t y, uint8_t u, uint8_t v,
- uint8_t* const bgr) {
+static WEBP_INLINE void VP8YuvToBgr(uint8_t y, uint8_t u, uint8_t v,
+ uint8_t* const bgr) {
const int r_off = VP8kVToR[v];
const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
const int b_off = VP8kUToB[u];
@@ -111,14 +98,14 @@ static inline void VP8YuvToBgr(uint8_t y, uint8_t u, uint8_t v,
bgr[2] = VP8kClip[y + r_off - YUV_RANGE_MIN];
}
-static inline void VP8YuvToBgra(uint8_t y, uint8_t u, uint8_t v,
- uint8_t* const bgra) {
+static WEBP_INLINE void VP8YuvToBgra(uint8_t y, uint8_t u, uint8_t v,
+ uint8_t* const bgra) {
VP8YuvToBgr(y, u, v, bgra);
bgra[3] = 0xff;
}
-static inline void VP8YuvToRgba(uint8_t y, uint8_t u, uint8_t v,
- uint8_t* const rgba) {
+static WEBP_INLINE void VP8YuvToRgba(uint8_t y, uint8_t u, uint8_t v,
+ uint8_t* const rgba) {
VP8YuvToRgb(y, u, v, rgba);
rgba[3] = 0xff;
}
@@ -126,8 +113,36 @@ static inline void VP8YuvToRgba(uint8_t y, uint8_t u, uint8_t v,
// Must be called before everything, to initialize the tables.
void VP8YUVInit(void);
+//------------------------------------------------------------------------------
+// RGB -> YUV conversion
+// The exact naming is Y'CbCr, following the ITU-R BT.601 standard.
+// More information at: http://en.wikipedia.org/wiki/YCbCr
+// Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
+// U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
+// V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
+// We use 16bit fixed point operations.
+
+static WEBP_INLINE int VP8ClipUV(int v) {
+ v = (v + (257 << (YUV_FIX + 2 - 1))) >> (YUV_FIX + 2);
+ return ((v & ~0xff) == 0) ? v : (v < 0) ? 0 : 255;
+}
+
+static WEBP_INLINE int VP8RGBToY(int r, int g, int b) {
+ const int kRound = (1 << (YUV_FIX - 1)) + (16 << YUV_FIX);
+ const int luma = 16839 * r + 33059 * g + 6420 * b;
+ return (luma + kRound) >> YUV_FIX; // no need to clip
+}
+
+static WEBP_INLINE int VP8RGBToU(int r, int g, int b) {
+ return VP8ClipUV(-9719 * r - 19081 * g + 28800 * b);
+}
+
+static WEBP_INLINE int VP8RGBToV(int r, int g, int b) {
+ return VP8ClipUV(+28800 * r - 24116 * g - 4684 * b);
+}
+
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif
-#endif // WEBP_DEC_YUV_H_
+#endif /* WEBP_DSP_YUV_H_ */
diff --git a/src/enc/Android.mk b/src/enc/Android.mk
index 64f67439..7f38d40d 100644
--- a/src/enc/Android.mk
+++ b/src/enc/Android.mk
@@ -16,28 +16,48 @@ LOCAL_PATH:= $(call my-dir)
include $(CLEAR_VARS)
LOCAL_SRC_FILES := \
- alpha.c \
- analysis.c \
- bit_writer.c \
- config.c \
- cost.c \
- dsp.c \
- dsp_sse2.c \
- filter.c \
- frame.c \
- iterator.c \
- layer.c \
- picture.c \
- quant.c \
- syntax.c \
- tree.c \
- webpenc.c
+ alpha.c \
+ analysis.c \
+ backward_references.c \
+ config.c \
+ cost.c \
+ filter.c \
+ frame.c\
+ histogram.c \
+ iterator.c \
+ layer.c \
+ picture.c \
+ quant.c \
+ syntax.c \
+ tree.c \
+ vp8l.c \
+ webpenc.c \
+ ../dsp/cpu.c \
+ ../dsp/dec.c \
+ ../dsp/dec_neon.c \
+ ../dsp/dec_sse2.c \
+ ../dsp/enc.c \
+ ../dsp/enc_sse2.c \
+ ../dsp/lossless.c \
+ ../dsp/upsampling.c \
+ ../dsp/upsampling_sse2.c \
+ ../dsp/yuv.c \
+ ../utils/bit_reader.c \
+ ../utils/bit_writer.c \
+ ../utils/color_cache.c \
+ ../utils/filters.c \
+ ../utils/huffman.c \
+ ../utils/huffman_encode.c \
+ ../utils/quant_levels.c \
+ ../utils/rescaler.c \
+ ../utils/thread.c \
+ ../utils/utils.c
LOCAL_CFLAGS := -DANDROID
LOCAL_C_INCLUDES += \
- $(LOCAL_PATH) \
- $(LOCAL_PATH)/../../include
+ $(LOCAL_PATH) \
+ $(LOCAL_PATH)/../../include
LOCAL_MODULE:= libwebp-encode
diff --git a/src/enc/alpha.c b/src/enc/alpha.c
index d5d4f882..376f786c 100644
--- a/src/enc/alpha.c
+++ b/src/enc/alpha.c
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -11,98 +11,314 @@
#include <assert.h>
#include <stdlib.h>
-#include "vp8enci.h"
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-#include "zlib.h"
-#endif
+#include "./vp8enci.h"
+#include "../utils/filters.h"
+#include "../utils/quant_levels.h"
+#include "webp/format_constants.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-#ifdef WEBP_EXPERIMENTAL_FEATURES
+// -----------------------------------------------------------------------------
+// Encodes the given alpha data via specified compression method 'method'.
+// The pre-processing (quantization) is performed if 'quality' is less than 100.
+// For such cases, the encoding is lossy. The valid range is [0, 100] for
+// 'quality' and [0, 1] for 'method':
+// 'method = 0' - No compression;
+// 'method = 1' - Use lossless coder on the alpha plane only
+// 'filter' values [0, 4] correspond to prediction modes none, horizontal,
+// vertical & gradient filters. The prediction mode 4 will try all the
+// prediction modes 0 to 3 and pick the best one.
+// 'effort_level': specifies how much effort must be spent to try and reduce
+// the compressed output size. In range 0 (quick) to 6 (slow).
+//
+// 'output' corresponds to the buffer containing compressed alpha data.
+// This buffer is allocated by this method and caller should call
+// free(*output) when done.
+// 'output_size' corresponds to size of this compressed alpha buffer.
+//
+// Returns 1 on successfully encoding the alpha and
+// 0 if either:
+// invalid quality or method, or
+// memory allocation for the compressed data fails.
+
+#include "../enc/vp8li.h"
+
+static int EncodeLossless(const uint8_t* const data, int width, int height,
+ int effort_level, // in [0..6] range
+ VP8BitWriter* const bw,
+ WebPAuxStats* const stats) {
+ int ok = 0;
+ WebPConfig config;
+ WebPPicture picture;
+ VP8LBitWriter tmp_bw;
+
+ WebPPictureInit(&picture);
+ picture.width = width;
+ picture.height = height;
+ picture.use_argb = 1;
+ picture.stats = stats;
+ if (!WebPPictureAlloc(&picture)) return 0;
+
+ // Transfer the alpha values to the green channel.
+ {
+ int i, j;
+ uint32_t* dst = picture.argb;
+ const uint8_t* src = data;
+ for (j = 0; j < picture.height; ++j) {
+ for (i = 0; i < picture.width; ++i) {
+ dst[i] = (src[i] << 8) | 0xff000000u;
+ }
+ src += width;
+ dst += picture.argb_stride;
+ }
+ }
+
+ WebPConfigInit(&config);
+ config.lossless = 1;
+ config.method = effort_level; // impact is very small
+ // Set moderate default quality setting for alpha. Higher qualities (80 and
+ // above) could be very slow.
+ config.quality = 10.f + 15.f * effort_level;
+ if (config.quality > 100.f) config.quality = 100.f;
+
+ ok = VP8LBitWriterInit(&tmp_bw, (width * height) >> 3);
+ ok = ok && (VP8LEncodeStream(&config, &picture, &tmp_bw) == VP8_ENC_OK);
+ WebPPictureFree(&picture);
+ if (ok) {
+ const uint8_t* const data = VP8LBitWriterFinish(&tmp_bw);
+ const size_t data_size = VP8LBitWriterNumBytes(&tmp_bw);
+ VP8BitWriterAppend(bw, data, data_size);
+ }
+ VP8LBitWriterDestroy(&tmp_bw);
+ return ok && !bw->error_;
+}
+
+// -----------------------------------------------------------------------------
+
+static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
+ int method, int filter, int reduce_levels,
+ int effort_level, // in [0..6] range
+ uint8_t* const tmp_alpha,
+ VP8BitWriter* const bw,
+ WebPAuxStats* const stats) {
+ int ok = 0;
+ const uint8_t* alpha_src;
+ WebPFilterFunc filter_func;
+ uint8_t header;
+ size_t expected_size;
+ const size_t data_size = width * height;
+
+ assert((uint64_t)data_size == (uint64_t)width * height); // as per spec
+ assert(filter >= 0 && filter < WEBP_FILTER_LAST);
+ assert(method >= ALPHA_NO_COMPRESSION);
+ assert(method <= ALPHA_LOSSLESS_COMPRESSION);
+ assert(sizeof(header) == ALPHA_HEADER_LEN);
+ // TODO(skal): have a common function and #define's to validate alpha params.
+
+ expected_size =
+ (method == ALPHA_NO_COMPRESSION) ? (ALPHA_HEADER_LEN + data_size)
+ : (data_size >> 5);
+ header = method | (filter << 2);
+ if (reduce_levels) header |= ALPHA_PREPROCESSED_LEVELS << 4;
+
+ VP8BitWriterInit(bw, expected_size);
+ VP8BitWriterAppend(bw, &header, ALPHA_HEADER_LEN);
+
+ filter_func = WebPFilters[filter];
+ if (filter_func) {
+ filter_func(data, width, height, 1, width, tmp_alpha);
+ alpha_src = tmp_alpha;
+ } else {
+ alpha_src = data;
+ }
+
+ if (method == ALPHA_NO_COMPRESSION) {
+ ok = VP8BitWriterAppend(bw, alpha_src, width * height);
+ ok = ok && !bw->error_;
+ } else {
+ ok = EncodeLossless(alpha_src, width, height, effort_level, bw, stats);
+ VP8BitWriterFinish(bw);
+ }
+ return ok;
+}
+
+// -----------------------------------------------------------------------------
+
+// TODO(skal): move to dsp/ ?
+static void CopyPlane(const uint8_t* src, int src_stride,
+ uint8_t* dst, int dst_stride, int width, int height) {
+ while (height-- > 0) {
+ memcpy(dst, src, width);
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+static int EncodeAlpha(VP8Encoder* const enc,
+ int quality, int method, int filter,
+ int effort_level,
+ uint8_t** const output, size_t* const output_size) {
+ const WebPPicture* const pic = enc->pic_;
+ const int width = pic->width;
+ const int height = pic->height;
-#define CHUNK_SIZE 8192
+ uint8_t* quant_alpha = NULL;
+ const size_t data_size = width * height;
+ uint64_t sse = 0;
+ int ok = 1;
+ const int reduce_levels = (quality < 100);
-//-----------------------------------------------------------------------------
+ // quick sanity checks
+ assert((uint64_t)data_size == (uint64_t)width * height); // as per spec
+ assert(enc != NULL && pic != NULL && pic->a != NULL);
+ assert(output != NULL && output_size != NULL);
+ assert(width > 0 && height > 0);
+ assert(pic->a_stride >= width);
+ assert(filter >= WEBP_FILTER_NONE && filter <= WEBP_FILTER_FAST);
+
+ if (quality < 0 || quality > 100) {
+ return 0;
+ }
-static int CompressAlpha(const uint8_t* data, size_t data_size,
- uint8_t** output, size_t* output_size,
- int algo) {
- int ret = Z_OK;
- z_stream strm;
- unsigned char chunk[CHUNK_SIZE];
+ if (method < ALPHA_NO_COMPRESSION || method > ALPHA_LOSSLESS_COMPRESSION) {
+ return 0;
+ }
- *output = NULL;
- *output_size = 0;
- memset(&strm, 0, sizeof(strm));
- if (deflateInit(&strm, algo ? Z_BEST_SPEED : Z_BEST_COMPRESSION) != Z_OK) {
+ quant_alpha = (uint8_t*)malloc(data_size);
+ if (quant_alpha == NULL) {
return 0;
}
- strm.next_in = (unsigned char*)data;
- strm.avail_in = data_size;
- do {
- size_t size_out;
-
- strm.next_out = chunk;
- strm.avail_out = CHUNK_SIZE;
- ret = deflate(&strm, Z_FINISH);
- if (ret == Z_STREAM_ERROR) {
- break;
+
+ // Extract alpha data (width x height) from raw_data (stride x height).
+ CopyPlane(pic->a, pic->a_stride, quant_alpha, width, width, height);
+
+ if (reduce_levels) { // No Quantization required for 'quality = 100'.
+ // 16 alpha levels gives quite a low MSE w.r.t original alpha plane hence
+ // mapped to moderate quality 70. Hence Quality:[0, 70] -> Levels:[2, 16]
+ // and Quality:]70, 100] -> Levels:]16, 256].
+ const int alpha_levels = (quality <= 70) ? (2 + quality / 5)
+ : (16 + (quality - 70) * 8);
+ ok = QuantizeLevels(quant_alpha, width, height, alpha_levels, &sse);
+ }
+
+ if (ok) {
+ VP8BitWriter bw;
+ int test_filter;
+ uint8_t* filtered_alpha = NULL;
+
+ // We always test WEBP_FILTER_NONE first.
+ ok = EncodeAlphaInternal(quant_alpha, width, height,
+ method, WEBP_FILTER_NONE, reduce_levels,
+ effort_level, NULL, &bw, pic->stats);
+ if (!ok) {
+ VP8BitWriterWipeOut(&bw);
+ goto End;
}
- size_out = CHUNK_SIZE - strm.avail_out;
- if (size_out) {
- size_t new_size = *output_size + size_out;
- uint8_t* new_output = realloc(*output, new_size);
- if (new_output == NULL) {
- ret = Z_MEM_ERROR;
- break;
- }
- memcpy(new_output + *output_size, chunk, size_out);
- *output_size = new_size;
- *output = new_output;
+
+ if (filter == WEBP_FILTER_FAST) { // Quick estimate of a second candidate?
+ filter = EstimateBestFilter(quant_alpha, width, height, width);
+ }
+ // Stop?
+ if (filter == WEBP_FILTER_NONE) {
+ goto Ok;
}
- } while (ret != Z_STREAM_END || strm.avail_out == 0);
- deflateEnd(&strm);
- if (ret != Z_STREAM_END) {
- free(*output);
- output_size = 0;
- return 0;
+ filtered_alpha = (uint8_t*)malloc(data_size);
+ ok = (filtered_alpha != NULL);
+ if (!ok) {
+ goto End;
+ }
+
+ // Try the other mode(s).
+ {
+ WebPAuxStats best_stats;
+ size_t best_score = VP8BitWriterSize(&bw);
+
+ memset(&best_stats, 0, sizeof(best_stats)); // prevent spurious warning
+ if (pic->stats != NULL) best_stats = *pic->stats;
+ for (test_filter = WEBP_FILTER_HORIZONTAL;
+ ok && (test_filter <= WEBP_FILTER_GRADIENT);
+ ++test_filter) {
+ VP8BitWriter tmp_bw;
+ if (filter != WEBP_FILTER_BEST && test_filter != filter) {
+ continue;
+ }
+ ok = EncodeAlphaInternal(quant_alpha, width, height,
+ method, test_filter, reduce_levels,
+ effort_level, filtered_alpha, &tmp_bw,
+ pic->stats);
+ if (ok) {
+ const size_t score = VP8BitWriterSize(&tmp_bw);
+ if (score < best_score) {
+ // swap bitwriter objects.
+ VP8BitWriter tmp = tmp_bw;
+ tmp_bw = bw;
+ bw = tmp;
+ best_score = score;
+ if (pic->stats != NULL) best_stats = *pic->stats;
+ }
+ } else {
+ VP8BitWriterWipeOut(&bw);
+ }
+ VP8BitWriterWipeOut(&tmp_bw);
+ }
+ if (pic->stats != NULL) *pic->stats = best_stats;
+ }
+ Ok:
+ if (ok) {
+ *output_size = VP8BitWriterSize(&bw);
+ *output = VP8BitWriterBuf(&bw);
+ if (pic->stats != NULL) { // need stats?
+ pic->stats->coded_size += (int)(*output_size);
+ enc->sse_[3] = sse;
+ }
+ }
+ free(filtered_alpha);
}
- return 1;
+ End:
+ free(quant_alpha);
+ return ok;
}
-#endif /* WEBP_EXPERIMENTAL_FEATURES */
-void VP8EncInitAlpha(VP8Encoder* enc) {
- enc->has_alpha_ = (enc->pic_->a != NULL);
+//------------------------------------------------------------------------------
+// Main calls
+
+void VP8EncInitAlpha(VP8Encoder* const enc) {
+ enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_);
enc->alpha_data_ = NULL;
enc->alpha_data_size_ = 0;
}
-void VP8EncCodeAlphaBlock(VP8EncIterator* it) {
- (void)it;
- // Nothing for now. We just ZLIB-compress in the end.
-}
-
-int VP8EncFinishAlpha(VP8Encoder* enc) {
+int VP8EncFinishAlpha(VP8Encoder* const enc) {
if (enc->has_alpha_) {
-#ifdef WEBP_EXPERIMENTAL_FEATURES
- const WebPPicture* pic = enc->pic_;
- assert(pic->a);
- if (!CompressAlpha(pic->a, pic->width * pic->height,
- &enc->alpha_data_, &enc->alpha_data_size_,
- enc->config_->alpha_compression)) {
+ const WebPConfig* config = enc->config_;
+ uint8_t* tmp_data = NULL;
+ size_t tmp_size = 0;
+ const int effort_level = config->method; // maps to [0..6]
+ const WEBP_FILTER_TYPE filter =
+ (config->alpha_filtering == 0) ? WEBP_FILTER_NONE :
+ (config->alpha_filtering == 1) ? WEBP_FILTER_FAST :
+ WEBP_FILTER_BEST;
+
+ if (!EncodeAlpha(enc, config->alpha_quality, config->alpha_compression,
+ filter, effort_level, &tmp_data, &tmp_size)) {
return 0;
}
-#endif
+ if (tmp_size != (uint32_t)tmp_size) { // Sanity check.
+ free(tmp_data);
+ return 0;
+ }
+ enc->alpha_data_size_ = (uint32_t)tmp_size;
+ enc->alpha_data_ = tmp_data;
}
- return 1;
+ return WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
}
-void VP8EncDeleteAlpha(VP8Encoder* enc) {
+void VP8EncDeleteAlpha(VP8Encoder* const enc) {
free(enc->alpha_data_);
enc->alpha_data_ = NULL;
enc->alpha_data_size_ = 0;
diff --git a/src/enc/analysis.c b/src/enc/analysis.c
index 8f84bd58..22cfb492 100644
--- a/src/enc/analysis.c
+++ b/src/enc/analysis.c
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -13,8 +13,9 @@
#include <string.h>
#include <assert.h>
-#include "vp8enci.h"
-#include "cost.h"
+#include "./vp8enci.h"
+#include "./cost.h"
+#include "../utils/utils.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@@ -26,7 +27,7 @@ static int ClipAlpha(int alpha) {
return alpha < 0 ? 0 : alpha > 255 ? 255 : alpha;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Smooth the segment map by replacing isolated block by the majority of its
// neighbours.
@@ -35,7 +36,8 @@ static void SmoothSegmentMap(VP8Encoder* const enc) {
const int w = enc->mb_w_;
const int h = enc->mb_h_;
const int majority_cnt_3_x_3_grid = 5;
- uint8_t* tmp = (uint8_t*)malloc(w * h * sizeof(uint8_t));
+ uint8_t* const tmp = (uint8_t*)WebPSafeMalloc((uint64_t)w * h, sizeof(*tmp));
+ assert((uint64_t)(w * h) == (uint64_t)w * h); // no overflow, as per spec
if (tmp == NULL) return;
for (y = 1; y < h - 1; ++y) {
@@ -69,7 +71,7 @@ static void SmoothSegmentMap(VP8Encoder* const enc) {
free(tmp);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Finalize Segment probability based on the coding tree
static int GetProba(int a, int b) {
@@ -112,7 +114,7 @@ static void SetSegmentProbas(VP8Encoder* const enc) {
}
}
-static inline int clip(int v, int m, int M) {
+static WEBP_INLINE int clip(int v, int m, int M) {
return v < m ? m : v > M ? M : v;
}
@@ -139,13 +141,13 @@ static void SetSegmentAlphas(VP8Encoder* const enc,
}
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Simplified k-Means, to assign Nb segments based on alpha-histogram
static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) {
const int nb = enc->segment_hdr_.num_segments_;
int centers[NUM_MB_SEGMENTS];
- int weighted_average;
+ int weighted_average = 0;
int map[256];
int a, n, k;
int min_a = 0, max_a = 255, range_a;
@@ -206,9 +208,9 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) {
// Map each original value to the closest centroid
for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
VP8MBInfo* const mb = &enc->mb_info_[n];
- const int a = mb->alpha_;
- mb->segment_ = map[a];
- mb->alpha_ = centers[map[a]]; // just for the record.
+ const int alpha = mb->alpha_;
+ mb->segment_ = map[alpha];
+ mb->alpha_ = centers[map[alpha]]; // just for the record.
}
if (nb > 1) {
@@ -220,7 +222,7 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) {
SetSegmentAlphas(enc, centers, weighted_average); // pick some alphas.
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Macroblock analysis: collect histogram for each mode, deduce the maximal
// susceptibility and set best modes for this macroblock.
// Segment assignment is done later.
@@ -253,7 +255,7 @@ static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it,
int best_alpha) {
- int modes[16];
+ uint8_t modes[16];
const int max_mode = (it->enc_->method_ >= 3) ? MAX_INTRA4_MODE : NUM_BMODES;
int i4_alpha = 0;
VP8IteratorStartI4(it);
@@ -328,7 +330,7 @@ static void MBAnalyze(VP8EncIterator* const it,
it->mb_->alpha_ = best_alpha; // Informative only.
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Main analysis loop:
// Collect all susceptibilities for each macroblock and record their
// distribution in alphas[]. Segments is assigned a-posteriori, based on
@@ -339,6 +341,7 @@ static void MBAnalyze(VP8EncIterator* const it,
// this stage.
int VP8EncAnalyze(VP8Encoder* const enc) {
+ int ok = 1;
int alphas[256] = { 0 };
VP8EncIterator it;
@@ -347,12 +350,13 @@ int VP8EncAnalyze(VP8Encoder* const enc) {
do {
VP8IteratorImport(&it);
MBAnalyze(&it, alphas, &enc->uv_alpha_);
+ ok = VP8IteratorProgress(&it, 20);
// Let's pretend we have perfect lossless reconstruction.
- } while (VP8IteratorNext(&it, it.yuv_in_));
+ } while (ok && VP8IteratorNext(&it, it.yuv_in_));
enc->uv_alpha_ /= enc->mb_w_ * enc->mb_h_;
- AssignSegments(enc, alphas);
+ if (ok) AssignSegments(enc, alphas);
- return 1;
+ return ok;
}
#if defined(__cplusplus) || defined(c_plusplus)
diff --git a/src/enc/backward_references.c b/src/enc/backward_references.c
new file mode 100644
index 00000000..b8c8ece8
--- /dev/null
+++ b/src/enc/backward_references.c
@@ -0,0 +1,874 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+
+#include "./backward_references.h"
+#include "./histogram.h"
+#include "../dsp/lossless.h"
+#include "../utils/color_cache.h"
+#include "../utils/utils.h"
+
+#define VALUES_IN_BYTE 256
+
+#define HASH_BITS 18
+#define HASH_SIZE (1 << HASH_BITS)
+#define HASH_MULTIPLIER (0xc6a4a7935bd1e995ULL)
+
+// 1M window (4M bytes) minus 120 special codes for short distances.
+#define WINDOW_SIZE ((1 << 20) - 120)
+
+// Bounds for the match length.
+#define MIN_LENGTH 2
+#define MAX_LENGTH 4096
+
+typedef struct {
+ // Stores the most recently added position with the given hash value.
+ int32_t hash_to_first_index_[HASH_SIZE];
+ // chain_[pos] stores the previous position with the same hash value
+ // for every pixel in the image.
+ int32_t* chain_;
+} HashChain;
+
+// -----------------------------------------------------------------------------
+
+static const uint8_t plane_to_code_lut[128] = {
+ 96, 73, 55, 39, 23, 13, 5, 1, 255, 255, 255, 255, 255, 255, 255, 255,
+ 101, 78, 58, 42, 26, 16, 8, 2, 0, 3, 9, 17, 27, 43, 59, 79,
+ 102, 86, 62, 46, 32, 20, 10, 6, 4, 7, 11, 21, 33, 47, 63, 87,
+ 105, 90, 70, 52, 37, 28, 18, 14, 12, 15, 19, 29, 38, 53, 71, 91,
+ 110, 99, 82, 66, 48, 35, 30, 24, 22, 25, 31, 36, 49, 67, 83, 100,
+ 115, 108, 94, 76, 64, 50, 44, 40, 34, 41, 45, 51, 65, 77, 95, 109,
+ 118, 113, 103, 92, 80, 68, 60, 56, 54, 57, 61, 69, 81, 93, 104, 114,
+ 119, 116, 111, 106, 97, 88, 84, 74, 72, 75, 85, 89, 98, 107, 112, 117
+};
+
+static int DistanceToPlaneCode(int xsize, int dist) {
+ const int yoffset = dist / xsize;
+ const int xoffset = dist - yoffset * xsize;
+ if (xoffset <= 8 && yoffset < 8) {
+ return plane_to_code_lut[yoffset * 16 + 8 - xoffset] + 1;
+ } else if (xoffset > xsize - 8 && yoffset < 7) {
+ return plane_to_code_lut[(yoffset + 1) * 16 + 8 + (xsize - xoffset)] + 1;
+ }
+ return dist + 120;
+}
+
+static WEBP_INLINE int FindMatchLength(const uint32_t* const array1,
+ const uint32_t* const array2,
+ const int max_limit) {
+ int match_len = 0;
+ while (match_len < max_limit && array1[match_len] == array2[match_len]) {
+ ++match_len;
+ }
+ return match_len;
+}
+
+// -----------------------------------------------------------------------------
+// VP8LBackwardRefs
+
+void VP8LInitBackwardRefs(VP8LBackwardRefs* const refs) {
+ if (refs != NULL) {
+ refs->refs = NULL;
+ refs->size = 0;
+ refs->max_size = 0;
+ }
+}
+
+void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs) {
+ if (refs != NULL) {
+ free(refs->refs);
+ VP8LInitBackwardRefs(refs);
+ }
+}
+
+int VP8LBackwardRefsAlloc(VP8LBackwardRefs* const refs, int max_size) {
+ assert(refs != NULL);
+ refs->size = 0;
+ refs->max_size = 0;
+ refs->refs = (PixOrCopy*)WebPSafeMalloc((uint64_t)max_size,
+ sizeof(*refs->refs));
+ if (refs->refs == NULL) return 0;
+ refs->max_size = max_size;
+ return 1;
+}
+
+// -----------------------------------------------------------------------------
+// Hash chains
+
+static WEBP_INLINE uint64_t GetPixPairHash64(const uint32_t* const argb) {
+ uint64_t key = ((uint64_t)(argb[1]) << 32) | argb[0];
+ key = (key * HASH_MULTIPLIER) >> (64 - HASH_BITS);
+ return key;
+}
+
+static int HashChainInit(HashChain* const p, int size) {
+ int i;
+ p->chain_ = (int*)WebPSafeMalloc((uint64_t)size, sizeof(*p->chain_));
+ if (p->chain_ == NULL) {
+ return 0;
+ }
+ for (i = 0; i < size; ++i) {
+ p->chain_[i] = -1;
+ }
+ for (i = 0; i < HASH_SIZE; ++i) {
+ p->hash_to_first_index_[i] = -1;
+ }
+ return 1;
+}
+
+static void HashChainDelete(HashChain* const p) {
+ if (p != NULL) {
+ free(p->chain_);
+ free(p);
+ }
+}
+
+// Insertion of two pixels at a time.
+static void HashChainInsert(HashChain* const p,
+ const uint32_t* const argb, int pos) {
+ const uint64_t hash_code = GetPixPairHash64(argb);
+ p->chain_[pos] = p->hash_to_first_index_[hash_code];
+ p->hash_to_first_index_[hash_code] = pos;
+}
+
+static int HashChainFindCopy(const HashChain* const p,
+ int quality, int index, int xsize,
+ const uint32_t* const argb, int maxlen,
+ int* const distance_ptr,
+ int* const length_ptr) {
+ const uint64_t hash_code = GetPixPairHash64(&argb[index]);
+ int prev_length = 0;
+ int64_t best_val = 0;
+ int best_length = 0;
+ int best_distance = 0;
+ const uint32_t* const argb_start = argb + index;
+ const int iter_min_mult = (quality < 50) ? 2 : (quality < 75) ? 4 : 8;
+ const int iter_min = -quality * iter_min_mult;
+ int iter_cnt = 10 + (quality >> 1);
+ const int min_pos = (index > WINDOW_SIZE) ? index - WINDOW_SIZE : 0;
+ int pos;
+
+ assert(xsize > 0);
+ for (pos = p->hash_to_first_index_[hash_code];
+ pos >= min_pos;
+ pos = p->chain_[pos]) {
+ int64_t val;
+ int curr_length;
+ if (iter_cnt < 0) {
+ if (iter_cnt < iter_min || best_val >= 0xff0000) {
+ break;
+ }
+ }
+ --iter_cnt;
+ if (best_length != 0 &&
+ argb[pos + best_length - 1] != argb_start[best_length - 1]) {
+ continue;
+ }
+ curr_length = FindMatchLength(argb + pos, argb_start, maxlen);
+ if (curr_length < prev_length) {
+ continue;
+ }
+ val = 65536 * curr_length;
+ // Favoring 2d locality here gives savings for certain images.
+ if (index - pos < 9 * xsize) {
+ const int y = (index - pos) / xsize;
+ int x = (index - pos) % xsize;
+ if (x > xsize / 2) {
+ x = xsize - x;
+ }
+ if (x <= 7 && x >= -8) {
+ val -= y * y + x * x;
+ } else {
+ val -= 9 * 9 + 9 * 9;
+ }
+ } else {
+ val -= 9 * 9 + 9 * 9;
+ }
+ if (best_val < val) {
+ prev_length = curr_length;
+ best_val = val;
+ best_length = curr_length;
+ best_distance = index - pos;
+ if (curr_length >= MAX_LENGTH) {
+ break;
+ }
+ if ((best_distance == 1 || best_distance == xsize) &&
+ best_length >= 128) {
+ break;
+ }
+ }
+ }
+ *distance_ptr = best_distance;
+ *length_ptr = best_length;
+ return (best_length >= MIN_LENGTH);
+}
+
+static WEBP_INLINE void PushBackCopy(VP8LBackwardRefs* const refs, int length) {
+ int size = refs->size;
+ while (length >= MAX_LENGTH) {
+ refs->refs[size++] = PixOrCopyCreateCopy(1, MAX_LENGTH);
+ length -= MAX_LENGTH;
+ }
+ if (length > 0) {
+ refs->refs[size++] = PixOrCopyCreateCopy(1, length);
+ }
+ refs->size = size;
+}
+
+static void BackwardReferencesRle(int xsize, int ysize,
+ const uint32_t* const argb,
+ VP8LBackwardRefs* const refs) {
+ const int pix_count = xsize * ysize;
+ int match_len = 0;
+ int i;
+ refs->size = 0;
+ PushBackCopy(refs, match_len); // i=0 case
+ refs->refs[refs->size++] = PixOrCopyCreateLiteral(argb[0]);
+ for (i = 1; i < pix_count; ++i) {
+ if (argb[i] == argb[i - 1]) {
+ ++match_len;
+ } else {
+ PushBackCopy(refs, match_len);
+ match_len = 0;
+ refs->refs[refs->size++] = PixOrCopyCreateLiteral(argb[i]);
+ }
+ }
+ PushBackCopy(refs, match_len);
+}
+
+static int BackwardReferencesHashChain(int xsize, int ysize,
+ const uint32_t* const argb,
+ int cache_bits, int quality,
+ VP8LBackwardRefs* const refs) {
+ int i;
+ int ok = 0;
+ int cc_init = 0;
+ const int use_color_cache = (cache_bits > 0);
+ const int pix_count = xsize * ysize;
+ HashChain* const hash_chain = (HashChain*)malloc(sizeof(*hash_chain));
+ VP8LColorCache hashers;
+
+ if (hash_chain == NULL) return 0;
+ if (use_color_cache) {
+ cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+ if (!cc_init) goto Error;
+ }
+
+ if (!HashChainInit(hash_chain, pix_count)) goto Error;
+
+ refs->size = 0;
+ for (i = 0; i < pix_count; ) {
+ // Alternative#1: Code the pixels starting at 'i' using backward reference.
+ int offset = 0;
+ int len = 0;
+ if (i < pix_count - 1) { // FindCopy(i,..) reads pixels at [i] and [i + 1].
+ int maxlen = pix_count - i;
+ if (maxlen > MAX_LENGTH) {
+ maxlen = MAX_LENGTH;
+ }
+ HashChainFindCopy(hash_chain, quality, i, xsize, argb, maxlen,
+ &offset, &len);
+ }
+ if (len >= MIN_LENGTH) {
+ // Alternative#2: Insert the pixel at 'i' as literal, and code the
+ // pixels starting at 'i + 1' using backward reference.
+ int offset2 = 0;
+ int len2 = 0;
+ int k;
+ HashChainInsert(hash_chain, &argb[i], i);
+ if (i < pix_count - 2) { // FindCopy(i+1,..) reads [i + 1] and [i + 2].
+ int maxlen = pix_count - (i + 1);
+ if (maxlen > MAX_LENGTH) {
+ maxlen = MAX_LENGTH;
+ }
+ HashChainFindCopy(hash_chain, quality,
+ i + 1, xsize, argb, maxlen, &offset2, &len2);
+ if (len2 > len + 1) {
+ const uint32_t pixel = argb[i];
+ // Alternative#2 is a better match. So push pixel at 'i' as literal.
+ if (use_color_cache && VP8LColorCacheContains(&hashers, pixel)) {
+ const int ix = VP8LColorCacheGetIndex(&hashers, pixel);
+ refs->refs[refs->size] = PixOrCopyCreateCacheIdx(ix);
+ } else {
+ refs->refs[refs->size] = PixOrCopyCreateLiteral(pixel);
+ }
+ ++refs->size;
+ if (use_color_cache) VP8LColorCacheInsert(&hashers, pixel);
+ i++; // Backward reference to be done for next pixel.
+ len = len2;
+ offset = offset2;
+ }
+ }
+ if (len >= MAX_LENGTH) {
+ len = MAX_LENGTH - 1;
+ }
+ refs->refs[refs->size++] = PixOrCopyCreateCopy(offset, len);
+ if (use_color_cache) {
+ for (k = 0; k < len; ++k) {
+ VP8LColorCacheInsert(&hashers, argb[i + k]);
+ }
+ }
+ // Add to the hash_chain (but cannot add the last pixel).
+ {
+ const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i;
+ for (k = 1; k < last; ++k) {
+ HashChainInsert(hash_chain, &argb[i + k], i + k);
+ }
+ }
+ i += len;
+ } else {
+ const uint32_t pixel = argb[i];
+ if (use_color_cache && VP8LColorCacheContains(&hashers, pixel)) {
+ // push pixel as a PixOrCopyCreateCacheIdx pixel
+ const int ix = VP8LColorCacheGetIndex(&hashers, pixel);
+ refs->refs[refs->size] = PixOrCopyCreateCacheIdx(ix);
+ } else {
+ refs->refs[refs->size] = PixOrCopyCreateLiteral(pixel);
+ }
+ ++refs->size;
+ if (use_color_cache) VP8LColorCacheInsert(&hashers, pixel);
+ if (i + 1 < pix_count) {
+ HashChainInsert(hash_chain, &argb[i], i);
+ }
+ ++i;
+ }
+ }
+ ok = 1;
+Error:
+ if (cc_init) VP8LColorCacheClear(&hashers);
+ HashChainDelete(hash_chain);
+ return ok;
+}
+
+// -----------------------------------------------------------------------------
+
+typedef struct {
+ double alpha_[VALUES_IN_BYTE];
+ double red_[VALUES_IN_BYTE];
+ double literal_[PIX_OR_COPY_CODES_MAX];
+ double blue_[VALUES_IN_BYTE];
+ double distance_[NUM_DISTANCE_CODES];
+} CostModel;
+
+static int BackwardReferencesTraceBackwards(
+ int xsize, int ysize, int recursive_cost_model,
+ const uint32_t* const argb, int cache_bits, VP8LBackwardRefs* const refs);
+
+static void ConvertPopulationCountTableToBitEstimates(
+ int num_symbols, const int population_counts[], double output[]) {
+ int sum = 0;
+ int nonzeros = 0;
+ int i;
+ for (i = 0; i < num_symbols; ++i) {
+ sum += population_counts[i];
+ if (population_counts[i] > 0) {
+ ++nonzeros;
+ }
+ }
+ if (nonzeros <= 1) {
+ memset(output, 0, num_symbols * sizeof(*output));
+ } else {
+ const double logsum = VP8LFastLog2(sum);
+ for (i = 0; i < num_symbols; ++i) {
+ output[i] = logsum - VP8LFastLog2(population_counts[i]);
+ }
+ }
+}
+
+static int CostModelBuild(CostModel* const m, int xsize, int ysize,
+ int recursion_level, const uint32_t* const argb,
+ int cache_bits) {
+ int ok = 0;
+ VP8LHistogram histo;
+ VP8LBackwardRefs refs;
+ const int quality = 100;
+
+ if (!VP8LBackwardRefsAlloc(&refs, xsize * ysize)) goto Error;
+
+ if (recursion_level > 0) {
+ if (!BackwardReferencesTraceBackwards(xsize, ysize, recursion_level - 1,
+ argb, cache_bits, &refs)) {
+ goto Error;
+ }
+ } else {
+ if (!BackwardReferencesHashChain(xsize, ysize, argb, cache_bits, quality,
+ &refs)) {
+ goto Error;
+ }
+ }
+ VP8LHistogramCreate(&histo, &refs, cache_bits);
+ ConvertPopulationCountTableToBitEstimates(
+ VP8LHistogramNumCodes(&histo), histo.literal_, m->literal_);
+ ConvertPopulationCountTableToBitEstimates(
+ VALUES_IN_BYTE, histo.red_, m->red_);
+ ConvertPopulationCountTableToBitEstimates(
+ VALUES_IN_BYTE, histo.blue_, m->blue_);
+ ConvertPopulationCountTableToBitEstimates(
+ VALUES_IN_BYTE, histo.alpha_, m->alpha_);
+ ConvertPopulationCountTableToBitEstimates(
+ NUM_DISTANCE_CODES, histo.distance_, m->distance_);
+ ok = 1;
+
+ Error:
+ VP8LClearBackwardRefs(&refs);
+ return ok;
+}
+
+static WEBP_INLINE double GetLiteralCost(const CostModel* const m, uint32_t v) {
+ return m->alpha_[v >> 24] +
+ m->red_[(v >> 16) & 0xff] +
+ m->literal_[(v >> 8) & 0xff] +
+ m->blue_[v & 0xff];
+}
+
+static WEBP_INLINE double GetCacheCost(const CostModel* const m, uint32_t idx) {
+ const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx;
+ return m->literal_[literal_idx];
+}
+
+static WEBP_INLINE double GetLengthCost(const CostModel* const m,
+ uint32_t length) {
+ int code, extra_bits_count, extra_bits_value;
+ PrefixEncode(length, &code, &extra_bits_count, &extra_bits_value);
+ return m->literal_[VALUES_IN_BYTE + code] + extra_bits_count;
+}
+
+static WEBP_INLINE double GetDistanceCost(const CostModel* const m,
+ uint32_t distance) {
+ int code, extra_bits_count, extra_bits_value;
+ PrefixEncode(distance, &code, &extra_bits_count, &extra_bits_value);
+ return m->distance_[code] + extra_bits_count;
+}
+
+static int BackwardReferencesHashChainDistanceOnly(
+ int xsize, int ysize, int recursive_cost_model, const uint32_t* const argb,
+ int cache_bits, uint32_t* const dist_array) {
+ int i;
+ int ok = 0;
+ int cc_init = 0;
+ const int quality = 100;
+ const int pix_count = xsize * ysize;
+ const int use_color_cache = (cache_bits > 0);
+ double* const cost =
+ (double*)WebPSafeMalloc((uint64_t)pix_count, sizeof(*cost));
+ CostModel* cost_model = (CostModel*)malloc(sizeof(*cost_model));
+ HashChain* hash_chain = (HashChain*)malloc(sizeof(*hash_chain));
+ VP8LColorCache hashers;
+ const double mul0 = (recursive_cost_model != 0) ? 1.0 : 0.68;
+ const double mul1 = (recursive_cost_model != 0) ? 1.0 : 0.82;
+
+ if (cost == NULL || cost_model == NULL || hash_chain == NULL) goto Error;
+
+ if (!HashChainInit(hash_chain, pix_count)) goto Error;
+
+ if (use_color_cache) {
+ cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+ if (!cc_init) goto Error;
+ }
+
+ if (!CostModelBuild(cost_model, xsize, ysize, recursive_cost_model, argb,
+ cache_bits)) {
+ goto Error;
+ }
+
+ for (i = 0; i < pix_count; ++i) cost[i] = 1e100;
+
+ // We loop one pixel at a time, but store all currently best points to
+ // non-processed locations from this point.
+ dist_array[0] = 0;
+ for (i = 0; i < pix_count; ++i) {
+ double prev_cost = 0.0;
+ int shortmax;
+ if (i > 0) {
+ prev_cost = cost[i - 1];
+ }
+ for (shortmax = 0; shortmax < 2; ++shortmax) {
+ int offset = 0;
+ int len = 0;
+ if (i < pix_count - 1) { // FindCopy reads pixels at [i] and [i + 1].
+ int maxlen = shortmax ? 2 : MAX_LENGTH;
+ if (maxlen > pix_count - i) {
+ maxlen = pix_count - i;
+ }
+ HashChainFindCopy(hash_chain, quality, i, xsize, argb, maxlen,
+ &offset, &len);
+ }
+ if (len >= MIN_LENGTH) {
+ const int code = DistanceToPlaneCode(xsize, offset);
+ const double distance_cost =
+ prev_cost + GetDistanceCost(cost_model, code);
+ int k;
+ for (k = 1; k < len; ++k) {
+ const double cost_val =
+ distance_cost + GetLengthCost(cost_model, k);
+ if (cost[i + k] > cost_val) {
+ cost[i + k] = cost_val;
+ dist_array[i + k] = k + 1;
+ }
+ }
+ // This if is for speedup only. It roughly doubles the speed, and
+ // makes compression worse by .1 %.
+ if (len >= 128 && code < 2) {
+ // Long copy for short distances, let's skip the middle
+ // lookups for better copies.
+ // 1) insert the hashes.
+ if (use_color_cache) {
+ for (k = 0; k < len; ++k) {
+ VP8LColorCacheInsert(&hashers, argb[i + k]);
+ }
+ }
+ // 2) Add to the hash_chain (but cannot add the last pixel)
+ {
+ const int last = (len < pix_count - 1 - i) ? len
+ : pix_count - 1 - i;
+ for (k = 0; k < last; ++k) {
+ HashChainInsert(hash_chain, &argb[i + k], i + k);
+ }
+ }
+ // 3) jump.
+ i += len - 1; // for loop does ++i, thus -1 here.
+ goto next_symbol;
+ }
+ }
+ }
+ if (i < pix_count - 1) {
+ HashChainInsert(hash_chain, &argb[i], i);
+ }
+ {
+ // inserting a literal pixel
+ double cost_val = prev_cost;
+ if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) {
+ const int ix = VP8LColorCacheGetIndex(&hashers, argb[i]);
+ cost_val += GetCacheCost(cost_model, ix) * mul0;
+ } else {
+ cost_val += GetLiteralCost(cost_model, argb[i]) * mul1;
+ }
+ if (cost[i] > cost_val) {
+ cost[i] = cost_val;
+ dist_array[i] = 1; // only one is inserted.
+ }
+ if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]);
+ }
+ next_symbol: ;
+ }
+ // Last pixel still to do, it can only be a single step if not reached
+ // through cheaper means already.
+ ok = 1;
+Error:
+ if (cc_init) VP8LColorCacheClear(&hashers);
+ HashChainDelete(hash_chain);
+ free(cost_model);
+ free(cost);
+ return ok;
+}
+
+static int TraceBackwards(const uint32_t* const dist_array,
+ int dist_array_size,
+ uint32_t** const chosen_path,
+ int* const chosen_path_size) {
+ int i;
+ // Count how many.
+ int count = 0;
+ for (i = dist_array_size - 1; i >= 0; ) {
+ int k = dist_array[i];
+ assert(k >= 1);
+ ++count;
+ i -= k;
+ }
+ // Allocate.
+ *chosen_path_size = count;
+ *chosen_path =
+ (uint32_t*)WebPSafeMalloc((uint64_t)count, sizeof(**chosen_path));
+ if (*chosen_path == NULL) return 0;
+
+ // Write in reverse order.
+ for (i = dist_array_size - 1; i >= 0; ) {
+ int k = dist_array[i];
+ assert(k >= 1);
+ (*chosen_path)[--count] = k;
+ i -= k;
+ }
+ return 1;
+}
+
+static int BackwardReferencesHashChainFollowChosenPath(
+ int xsize, int ysize, const uint32_t* const argb, int cache_bits,
+ const uint32_t* const chosen_path, int chosen_path_size,
+ VP8LBackwardRefs* const refs) {
+ const int quality = 100;
+ const int pix_count = xsize * ysize;
+ const int use_color_cache = (cache_bits > 0);
+ int size = 0;
+ int i = 0;
+ int k;
+ int ix;
+ int ok = 0;
+ int cc_init = 0;
+ HashChain* hash_chain = (HashChain*)malloc(sizeof(*hash_chain));
+ VP8LColorCache hashers;
+
+ if (hash_chain == NULL || !HashChainInit(hash_chain, pix_count)) {
+ goto Error;
+ }
+ if (use_color_cache) {
+ cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+ if (!cc_init) goto Error;
+ }
+
+ refs->size = 0;
+ for (ix = 0; ix < chosen_path_size; ++ix, ++size) {
+ int offset = 0;
+ int len = 0;
+ int maxlen = chosen_path[ix];
+ if (maxlen != 1) {
+ HashChainFindCopy(hash_chain, quality,
+ i, xsize, argb, maxlen, &offset, &len);
+ assert(len == maxlen);
+ refs->refs[size] = PixOrCopyCreateCopy(offset, len);
+ if (use_color_cache) {
+ for (k = 0; k < len; ++k) {
+ VP8LColorCacheInsert(&hashers, argb[i + k]);
+ }
+ }
+ {
+ const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i;
+ for (k = 0; k < last; ++k) {
+ HashChainInsert(hash_chain, &argb[i + k], i + k);
+ }
+ }
+ i += len;
+ } else {
+ if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) {
+ // push pixel as a color cache index
+ const int idx = VP8LColorCacheGetIndex(&hashers, argb[i]);
+ refs->refs[size] = PixOrCopyCreateCacheIdx(idx);
+ } else {
+ refs->refs[size] = PixOrCopyCreateLiteral(argb[i]);
+ }
+ if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]);
+ if (i + 1 < pix_count) {
+ HashChainInsert(hash_chain, &argb[i], i);
+ }
+ ++i;
+ }
+ }
+ assert(size <= refs->max_size);
+ refs->size = size;
+ ok = 1;
+Error:
+ if (cc_init) VP8LColorCacheClear(&hashers);
+ HashChainDelete(hash_chain);
+ return ok;
+}
+
+// Returns 1 on success.
+static int BackwardReferencesTraceBackwards(int xsize, int ysize,
+ int recursive_cost_model,
+ const uint32_t* const argb,
+ int cache_bits,
+ VP8LBackwardRefs* const refs) {
+ int ok = 0;
+ const int dist_array_size = xsize * ysize;
+ uint32_t* chosen_path = NULL;
+ int chosen_path_size = 0;
+ uint32_t* dist_array =
+ (uint32_t*)WebPSafeMalloc((uint64_t)dist_array_size, sizeof(*dist_array));
+
+ if (dist_array == NULL) goto Error;
+
+ if (!BackwardReferencesHashChainDistanceOnly(
+ xsize, ysize, recursive_cost_model, argb, cache_bits, dist_array)) {
+ goto Error;
+ }
+ if (!TraceBackwards(dist_array, dist_array_size,
+ &chosen_path, &chosen_path_size)) {
+ goto Error;
+ }
+ free(dist_array); // no need to retain this memory any longer
+ dist_array = NULL;
+ if (!BackwardReferencesHashChainFollowChosenPath(
+ xsize, ysize, argb, cache_bits, chosen_path, chosen_path_size, refs)) {
+ goto Error;
+ }
+ ok = 1;
+ Error:
+ free(chosen_path);
+ free(dist_array);
+ return ok;
+}
+
+static void BackwardReferences2DLocality(int xsize,
+ VP8LBackwardRefs* const refs) {
+ int i;
+ for (i = 0; i < refs->size; ++i) {
+ if (PixOrCopyIsCopy(&refs->refs[i])) {
+ const int dist = refs->refs[i].argb_or_distance;
+ const int transformed_dist = DistanceToPlaneCode(xsize, dist);
+ refs->refs[i].argb_or_distance = transformed_dist;
+ }
+ }
+}
+
+int VP8LGetBackwardReferences(int width, int height,
+ const uint32_t* const argb,
+ int quality, int cache_bits, int use_2d_locality,
+ VP8LBackwardRefs* const best) {
+ int ok = 0;
+ int lz77_is_useful;
+ VP8LBackwardRefs refs_rle, refs_lz77;
+ const int num_pix = width * height;
+
+ VP8LBackwardRefsAlloc(&refs_rle, num_pix);
+ VP8LBackwardRefsAlloc(&refs_lz77, num_pix);
+ VP8LInitBackwardRefs(best);
+ if (refs_rle.refs == NULL || refs_lz77.refs == NULL) {
+ Error1:
+ VP8LClearBackwardRefs(&refs_rle);
+ VP8LClearBackwardRefs(&refs_lz77);
+ goto End;
+ }
+
+ if (!BackwardReferencesHashChain(width, height, argb, cache_bits, quality,
+ &refs_lz77)) {
+ goto End;
+ }
+ // Backward Reference using RLE only.
+ BackwardReferencesRle(width, height, argb, &refs_rle);
+
+ {
+ double bit_cost_lz77, bit_cost_rle;
+ VP8LHistogram* const histo = (VP8LHistogram*)malloc(sizeof(*histo));
+ if (histo == NULL) goto Error1;
+ // Evaluate lz77 coding
+ VP8LHistogramCreate(histo, &refs_lz77, cache_bits);
+ bit_cost_lz77 = VP8LHistogramEstimateBits(histo);
+ // Evaluate RLE coding
+ VP8LHistogramCreate(histo, &refs_rle, cache_bits);
+ bit_cost_rle = VP8LHistogramEstimateBits(histo);
+ // Decide if LZ77 is useful.
+ lz77_is_useful = (bit_cost_lz77 < bit_cost_rle);
+ free(histo);
+ }
+
+ // Choose appropriate backward reference.
+ if (lz77_is_useful) {
+ // TraceBackwards is costly. Run it for higher qualities.
+ const int try_lz77_trace_backwards = (quality >= 75);
+ *best = refs_lz77; // default guess: lz77 is better
+ VP8LClearBackwardRefs(&refs_rle);
+ if (try_lz77_trace_backwards) {
+ const int recursion_level = (num_pix < 320 * 200) ? 1 : 0;
+ VP8LBackwardRefs refs_trace;
+ if (!VP8LBackwardRefsAlloc(&refs_trace, num_pix)) {
+ goto End;
+ }
+ if (BackwardReferencesTraceBackwards(
+ width, height, recursion_level, argb, cache_bits, &refs_trace)) {
+ VP8LClearBackwardRefs(&refs_lz77);
+ *best = refs_trace;
+ }
+ }
+ } else {
+ VP8LClearBackwardRefs(&refs_lz77);
+ *best = refs_rle;
+ }
+
+ if (use_2d_locality) BackwardReferences2DLocality(width, best);
+
+ ok = 1;
+
+ End:
+ if (!ok) {
+ VP8LClearBackwardRefs(best);
+ }
+ return ok;
+}
+
+// Returns 1 on success.
+static int ComputeCacheHistogram(const uint32_t* const argb,
+ int xsize, int ysize,
+ const VP8LBackwardRefs* const refs,
+ int cache_bits,
+ VP8LHistogram* const histo) {
+ int pixel_index = 0;
+ int i;
+ uint32_t k;
+ VP8LColorCache hashers;
+ const int use_color_cache = (cache_bits > 0);
+ int cc_init = 0;
+
+ if (use_color_cache) {
+ cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+ if (!cc_init) return 0;
+ }
+
+ for (i = 0; i < refs->size; ++i) {
+ const PixOrCopy* const v = &refs->refs[i];
+ if (PixOrCopyIsLiteral(v)) {
+ if (use_color_cache &&
+ VP8LColorCacheContains(&hashers, argb[pixel_index])) {
+ // push pixel as a cache index
+ const int ix = VP8LColorCacheGetIndex(&hashers, argb[pixel_index]);
+ const PixOrCopy token = PixOrCopyCreateCacheIdx(ix);
+ VP8LHistogramAddSinglePixOrCopy(histo, &token);
+ } else {
+ VP8LHistogramAddSinglePixOrCopy(histo, v);
+ }
+ } else {
+ VP8LHistogramAddSinglePixOrCopy(histo, v);
+ }
+ if (use_color_cache) {
+ for (k = 0; k < PixOrCopyLength(v); ++k) {
+ VP8LColorCacheInsert(&hashers, argb[pixel_index + k]);
+ }
+ }
+ pixel_index += PixOrCopyLength(v);
+ }
+ assert(pixel_index == xsize * ysize);
+ (void)xsize; // xsize is not used in non-debug compilations otherwise.
+ (void)ysize; // ysize is not used in non-debug compilations otherwise.
+ if (cc_init) VP8LColorCacheClear(&hashers);
+ return 1;
+}
+
+// Returns how many bits are to be used for a color cache.
+int VP8LCalculateEstimateForCacheSize(const uint32_t* const argb,
+ int xsize, int ysize,
+ int* const best_cache_bits) {
+ int ok = 0;
+ int cache_bits;
+ double lowest_entropy = 1e99;
+ VP8LBackwardRefs refs;
+ static const double kSmallPenaltyForLargeCache = 4.0;
+ static const int quality = 30;
+ if (!VP8LBackwardRefsAlloc(&refs, xsize * ysize) ||
+ !BackwardReferencesHashChain(xsize, ysize, argb, 0, quality, &refs)) {
+ goto Error;
+ }
+ for (cache_bits = 0; cache_bits <= MAX_COLOR_CACHE_BITS; ++cache_bits) {
+ double cur_entropy;
+ VP8LHistogram histo;
+ VP8LHistogramInit(&histo, cache_bits);
+ ComputeCacheHistogram(argb, xsize, ysize, &refs, cache_bits, &histo);
+ cur_entropy = VP8LHistogramEstimateBits(&histo) +
+ kSmallPenaltyForLargeCache * cache_bits;
+ if (cache_bits == 0 || cur_entropy < lowest_entropy) {
+ *best_cache_bits = cache_bits;
+ lowest_entropy = cur_entropy;
+ }
+ }
+ ok = 1;
+ Error:
+ VP8LClearBackwardRefs(&refs);
+ return ok;
+}
diff --git a/src/enc/backward_references.h b/src/enc/backward_references.h
new file mode 100644
index 00000000..cda7c2b1
--- /dev/null
+++ b/src/enc/backward_references.h
@@ -0,0 +1,212 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+
+#ifndef WEBP_ENC_BACKWARD_REFERENCES_H_
+#define WEBP_ENC_BACKWARD_REFERENCES_H_
+
+#include <assert.h>
+#include <stdlib.h>
+#include "webp/types.h"
+#include "webp/format_constants.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// The spec allows 11, we use 9 bits to reduce memory consumption in encoding.
+// Having 9 instead of 11 only removes about 0.25 % of compression density.
+#define MAX_COLOR_CACHE_BITS 9
+
+// Max ever number of codes we'll use:
+#define PIX_OR_COPY_CODES_MAX \
+ (NUM_LITERAL_CODES + NUM_LENGTH_CODES + (1 << MAX_COLOR_CACHE_BITS))
+
+// -----------------------------------------------------------------------------
+// PrefixEncode()
+
+// use GNU builtins where available.
+#if defined(__GNUC__) && \
+ ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4)
+static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
+ return n == 0 ? -1 : 31 ^ __builtin_clz(n);
+}
+#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
+#include <intrin.h>
+#pragma intrinsic(_BitScanReverse)
+
+static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
+ unsigned long first_set_bit;
+ return _BitScanReverse(&first_set_bit, n) ? first_set_bit : -1;
+}
+#else
+static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
+ int log = 0;
+ uint32_t value = n;
+ int i;
+
+ if (value == 0) return -1;
+ for (i = 4; i >= 0; --i) {
+ const int shift = (1 << i);
+ const uint32_t x = value >> shift;
+ if (x != 0) {
+ value = x;
+ log += shift;
+ }
+ }
+ return log;
+}
+#endif
+
+static WEBP_INLINE int VP8LBitsLog2Ceiling(uint32_t n) {
+ const int floor = BitsLog2Floor(n);
+ if (n == (n & ~(n - 1))) // zero or a power of two.
+ return floor;
+ else
+ return floor + 1;
+}
+
+// Splitting of distance and length codes into prefixes and
+// extra bits. The prefixes are encoded with an entropy code
+// while the extra bits are stored just as normal bits.
+static WEBP_INLINE void PrefixEncode(int distance, int* const code,
+ int* const extra_bits_count,
+ int* const extra_bits_value) {
+ // Collect the two most significant bits where the highest bit is 1.
+ const int highest_bit = BitsLog2Floor(--distance);
+ // & 0x3f is to make behavior well defined when highest_bit
+ // does not exist or is the least significant bit.
+ const int second_highest_bit =
+ (distance >> ((highest_bit - 1) & 0x3f)) & 1;
+ *extra_bits_count = (highest_bit > 0) ? (highest_bit - 1) : 0;
+ *extra_bits_value = distance & ((1 << *extra_bits_count) - 1);
+ *code = (highest_bit > 0) ? (2 * highest_bit + second_highest_bit)
+ : (highest_bit == 0) ? 1 : 0;
+}
+
+// -----------------------------------------------------------------------------
+// PixOrCopy
+
+enum Mode {
+ kLiteral,
+ kCacheIdx,
+ kCopy,
+ kNone
+};
+
+typedef struct {
+ // mode as uint8_t to make the memory layout to be exactly 8 bytes.
+ uint8_t mode;
+ uint16_t len;
+ uint32_t argb_or_distance;
+} PixOrCopy;
+
+static WEBP_INLINE PixOrCopy PixOrCopyCreateCopy(uint32_t distance,
+ uint16_t len) {
+ PixOrCopy retval;
+ retval.mode = kCopy;
+ retval.argb_or_distance = distance;
+ retval.len = len;
+ return retval;
+}
+
+static WEBP_INLINE PixOrCopy PixOrCopyCreateCacheIdx(int idx) {
+ PixOrCopy retval;
+ assert(idx >= 0);
+ assert(idx < (1 << MAX_COLOR_CACHE_BITS));
+ retval.mode = kCacheIdx;
+ retval.argb_or_distance = idx;
+ retval.len = 1;
+ return retval;
+}
+
+static WEBP_INLINE PixOrCopy PixOrCopyCreateLiteral(uint32_t argb) {
+ PixOrCopy retval;
+ retval.mode = kLiteral;
+ retval.argb_or_distance = argb;
+ retval.len = 1;
+ return retval;
+}
+
+static WEBP_INLINE int PixOrCopyIsLiteral(const PixOrCopy* const p) {
+ return (p->mode == kLiteral);
+}
+
+static WEBP_INLINE int PixOrCopyIsCacheIdx(const PixOrCopy* const p) {
+ return (p->mode == kCacheIdx);
+}
+
+static WEBP_INLINE int PixOrCopyIsCopy(const PixOrCopy* const p) {
+ return (p->mode == kCopy);
+}
+
+static WEBP_INLINE uint32_t PixOrCopyLiteral(const PixOrCopy* const p,
+ int component) {
+ assert(p->mode == kLiteral);
+ return (p->argb_or_distance >> (component * 8)) & 0xff;
+}
+
+static WEBP_INLINE uint32_t PixOrCopyLength(const PixOrCopy* const p) {
+ return p->len;
+}
+
+static WEBP_INLINE uint32_t PixOrCopyArgb(const PixOrCopy* const p) {
+ assert(p->mode == kLiteral);
+ return p->argb_or_distance;
+}
+
+static WEBP_INLINE uint32_t PixOrCopyCacheIdx(const PixOrCopy* const p) {
+ assert(p->mode == kCacheIdx);
+ assert(p->argb_or_distance < (1U << MAX_COLOR_CACHE_BITS));
+ return p->argb_or_distance;
+}
+
+static WEBP_INLINE uint32_t PixOrCopyDistance(const PixOrCopy* const p) {
+ assert(p->mode == kCopy);
+ return p->argb_or_distance;
+}
+
+// -----------------------------------------------------------------------------
+// VP8LBackwardRefs
+
+typedef struct {
+ PixOrCopy* refs;
+ int size; // currently used
+ int max_size; // maximum capacity
+} VP8LBackwardRefs;
+
+// Initialize the object. Must be called first. 'refs' can be NULL.
+void VP8LInitBackwardRefs(VP8LBackwardRefs* const refs);
+
+// Release memory and re-initialize the object. 'refs' can be NULL.
+void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs);
+
+// Allocate 'max_size' references. Returns false in case of memory error.
+int VP8LBackwardRefsAlloc(VP8LBackwardRefs* const refs, int max_size);
+
+// -----------------------------------------------------------------------------
+// Main entry points
+
+// Evaluates best possible backward references for specified quality.
+// Further optimize for 2D locality if use_2d_locality flag is set.
+int VP8LGetBackwardReferences(int width, int height,
+ const uint32_t* const argb,
+ int quality, int cache_bits, int use_2d_locality,
+ VP8LBackwardRefs* const best);
+
+// Produce an estimate for a good color cache size for the image.
+int VP8LCalculateEstimateForCacheSize(const uint32_t* const argb,
+ int xsize, int ysize,
+ int* const best_cache_bits);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif // WEBP_ENC_BACKWARD_REFERENCES_H_
diff --git a/src/enc/bit_writer.h b/src/enc/bit_writer.h
deleted file mode 100644
index 69e247a1..00000000
--- a/src/enc/bit_writer.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright 2011 Google Inc.
-//
-// This code is licensed under the same terms as WebM:
-// Software License Agreement: http://www.webmproject.org/license/software/
-// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
-// -----------------------------------------------------------------------------
-//
-// Bit writing and boolean coder
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#ifndef WEBP_ENC_BIT_WRITER_H_
-#define WEBP_ENC_BIT_WRITER_H_
-
-#include "vp8enci.h"
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
-//-----------------------------------------------------------------------------
-// Bit-writing
-
-typedef struct VP8BitWriter VP8BitWriter;
-struct VP8BitWriter {
- int32_t range_; // range-1
- int32_t value_;
- int run_; // number of outstanding bits
- int nb_bits_; // number of pending bits
- uint8_t* buf_;
- size_t pos_;
- size_t max_pos_;
- int error_; // true in case of error
-};
-
-int VP8BitWriterInit(VP8BitWriter* const bw, size_t expected_size);
-uint8_t* VP8BitWriterFinish(VP8BitWriter* const bw);
-int VP8PutBit(VP8BitWriter* const bw, int bit, int prob);
-int VP8PutBitUniform(VP8BitWriter* const bw, int bit);
-void VP8PutValue(VP8BitWriter* const bw, int value, int nb_bits);
-void VP8PutSignedValue(VP8BitWriter* const bw, int value, int nb_bits);
-int VP8BitWriterAppend(VP8BitWriter* const bw,
- const uint8_t* data, size_t size);
-
-// return approximate write position (in bits)
-static inline uint64_t VP8BitWriterPos(const VP8BitWriter* const bw) {
- return (uint64_t)(bw->pos_ + bw->run_) * 8 + 8 + bw->nb_bits_;
-}
-
-static inline uint8_t* VP8BitWriterBuf(const VP8BitWriter* const bw) {
- return bw->buf_;
-}
-static inline size_t VP8BitWriterSize(const VP8BitWriter* const bw) {
- return bw->pos_;
-}
-
-//-----------------------------------------------------------------------------
-
-#if defined(__cplusplus) || defined(c_plusplus)
-} // extern "C"
-#endif
-
-#endif // WEBP_ENC_BIT_WRITER_H_
diff --git a/src/enc/config.c b/src/enc/config.c
index 0a1ccbbe..b05328d1 100644
--- a/src/enc/config.c
+++ b/src/enc/config.c
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -9,20 +9,19 @@
//
// Author: Skal (pascal.massimino@gmail.com)
-#include <assert.h>
#include "webp/encode.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// WebPConfig
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
-int WebPConfigInitInternal(WebPConfig* const config,
+int WebPConfigInitInternal(WebPConfig* config,
WebPPreset preset, float quality, int version) {
- if (version != WEBP_ENCODER_ABI_VERSION) {
+ if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_ENCODER_ABI_VERSION)) {
return 0; // caller/system version mismatch!
}
if (config == NULL) return 0;
@@ -41,7 +40,12 @@ int WebPConfigInitInternal(WebPConfig* const config,
config->show_compressed = 0;
config->preprocessing = 0;
config->autofilter = 0;
- config->alpha_compression = 0;
+ config->partition_limit = 0;
+ config->alpha_compression = 1;
+ config->alpha_filtering = 1;
+ config->alpha_quality = 100;
+ config->lossless = 0;
+ config->image_hint = WEBP_HINT_DEFAULT;
// TODO(skal): tune.
switch (preset) {
@@ -76,7 +80,7 @@ int WebPConfigInitInternal(WebPConfig* const config,
return WebPValidateConfig(config);
}
-int WebPValidateConfig(const WebPConfig* const config) {
+int WebPValidateConfig(const WebPConfig* config) {
if (config == NULL) return 0;
if (config->quality < 0 || config->quality > 100)
return 0;
@@ -106,12 +110,22 @@ int WebPValidateConfig(const WebPConfig* const config) {
return 0;
if (config->partitions < 0 || config->partitions > 3)
return 0;
+ if (config->partition_limit < 0 || config->partition_limit > 100)
+ return 0;
if (config->alpha_compression < 0)
return 0;
+ if (config->alpha_filtering < 0)
+ return 0;
+ if (config->alpha_quality < 0 || config->alpha_quality > 100)
+ return 0;
+ if (config->lossless < 0 || config->lossless > 1)
+ return 0;
+ if (config->image_hint >= WEBP_HINT_LAST)
+ return 0;
return 1;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
diff --git a/src/enc/cost.c b/src/enc/cost.c
index 0f7ee722..92e0cc71 100644
--- a/src/enc/cost.c
+++ b/src/enc/cost.c
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -9,15 +9,13 @@
//
// Author: Skal (pascal.massimino@gmail.com)
-#include <assert.h>
-
-#include "cost.h"
+#include "./cost.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Boolean-cost cost table
const uint16_t VP8EntropyCost[256] = {
@@ -49,12 +47,12 @@ const uint16_t VP8EntropyCost[256] = {
10, 9, 7, 6, 4, 3
};
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Level cost tables
-// For each given level, the following table given the pattern of contexts
-// to use for coding it (in [][0]) as well as the bit value to use for
-// each context (in [][1]).
+// For each given level, the following table gives the pattern of contexts to
+// use for coding it (in [][0]) as well as the bit value to use for each
+// context (in [][1]).
const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2] = {
{0x001, 0x000}, {0x007, 0x001}, {0x00f, 0x005},
{0x00f, 0x00d}, {0x033, 0x003}, {0x033, 0x003}, {0x033, 0x023},
@@ -351,11 +349,14 @@ static int VariableLevelCost(int level, const uint8_t probas[NUM_PROBAS]) {
return cost;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Pre-calc level costs once for all
void VP8CalculateLevelCosts(VP8Proba* const proba) {
int ctype, band, ctx;
+
+ if (!proba->dirty_) return; // nothing to do.
+
for (ctype = 0; ctype < NUM_TYPES; ++ctype) {
for (band = 0; band < NUM_BANDS; ++band) {
for(ctx = 0; ctx < NUM_CTX; ++ctx) {
@@ -372,14 +373,16 @@ void VP8CalculateLevelCosts(VP8Proba* const proba) {
}
}
}
+ proba->dirty_ = 0;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Mode cost tables.
// These are the fixed probabilities (in the coding trees) turned into bit-cost
// by calling VP8BitCost().
const uint16_t VP8FixedCostsUV[4] = { 302, 984, 439, 642 };
+// note: these values include the fixed VP8BitCost(1, 145) mode selection cost.
const uint16_t VP8FixedCostsI16[4] = { 663, 919, 872, 919 };
const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES] = {
{ { 251, 1362, 1934, 2085, 2314, 2230, 1839, 1988, 2437, 2348 },
@@ -484,7 +487,7 @@ const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES] = {
{ 516, 1378, 1569, 1110, 1798, 1798, 1198, 2199, 1543, 712 } },
};
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
diff --git a/src/enc/cost.h b/src/enc/cost.h
index 6b83c832..09b75b69 100644
--- a/src/enc/cost.h
+++ b/src/enc/cost.h
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -12,7 +12,7 @@
#ifndef WEBP_ENC_COST_H_
#define WEBP_ENC_COST_H_
-#include "vp8enci.h"
+#include "./vp8enci.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@@ -22,22 +22,16 @@ extern const uint16_t VP8LevelFixedCosts[2048]; // approximate cost per level
extern const uint16_t VP8EntropyCost[256]; // 8bit fixed-point log(p)
// Cost of coding one event with probability 'proba'.
-static inline int VP8BitCost(int bit, uint8_t proba) {
+static WEBP_INLINE int VP8BitCost(int bit, uint8_t proba) {
return !bit ? VP8EntropyCost[proba] : VP8EntropyCost[255 - proba];
}
-// Cost of coding 'nb' 1's and 'total-nb' 0's using 'proba' probability.
-static inline uint64_t VP8BranchCost(uint64_t nb, uint64_t total,
- uint8_t proba) {
- return nb * VP8BitCost(1, proba) + (total - nb) * VP8BitCost(0, proba);
-}
-
// Level cost calculations
extern const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2];
void VP8CalculateLevelCosts(VP8Proba* const proba);
-static inline int VP8LevelCost(const uint16_t* const table, int level) {
+static WEBP_INLINE int VP8LevelCost(const uint16_t* const table, int level) {
return VP8LevelFixedCosts[level]
- + table[level > MAX_VARIABLE_LEVEL ? MAX_VARIABLE_LEVEL : level];
+ + table[(level > MAX_VARIABLE_LEVEL) ? MAX_VARIABLE_LEVEL : level];
}
// Mode costs
@@ -45,10 +39,10 @@ extern const uint16_t VP8FixedCostsUV[4];
extern const uint16_t VP8FixedCostsI16[4];
extern const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES];
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif
-#endif // WEBP_ENC_COST_H_
+#endif /* WEBP_ENC_COST_H_ */
diff --git a/src/enc/filter.c b/src/enc/filter.c
index a0a42b07..7fb78a39 100644
--- a/src/enc/filter.c
+++ b/src/enc/filter.c
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -9,8 +9,7 @@
//
// Author: somnath@google.com (Somnath Banerjee)
-#include <math.h>
-#include "vp8enci.h"
+#include "./vp8enci.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@@ -45,11 +44,11 @@ static void InitTables(void) {
}
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Edge filtering functions
// 4 pixels in, 2 pixels out
-static inline void do_filter2(uint8_t* p, int step) {
+static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
const int a = 3 * (q0 - p0) + sclip1[1020 + p1 - q1];
const int a1 = sclip2[112 + ((a + 4) >> 3)];
@@ -59,7 +58,7 @@ static inline void do_filter2(uint8_t* p, int step) {
}
// 4 pixels in, 4 pixels out
-static inline void do_filter4(uint8_t* p, int step) {
+static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
const int a = 3 * (q0 - p0);
const int a1 = sclip2[112 + ((a + 4) >> 3)];
@@ -72,17 +71,18 @@ static inline void do_filter4(uint8_t* p, int step) {
}
// high edge-variance
-static inline int hev(const uint8_t* p, int step, int thresh) {
+static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
return (abs0[255 + p1 - p0] > thresh) || (abs0[255 + q1 - q0] > thresh);
}
-static inline int needs_filter(const uint8_t* p, int step, int thresh) {
+static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int thresh) {
const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
return (2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) <= thresh;
}
-static inline int needs_filter2(const uint8_t* p, int step, int t, int it) {
+static WEBP_INLINE int needs_filter2(const uint8_t* p,
+ int step, int t, int it) {
const int p3 = p[-4*step], p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step];
const int q0 = p[0], q1 = p[step], q2 = p[2*step], q3 = p[3*step];
if ((2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) > t)
@@ -92,7 +92,7 @@ static inline int needs_filter2(const uint8_t* p, int step, int t, int it) {
abs0[255 + q2 - q1] <= it && abs0[255 + q1 - q0] <= it;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Simple In-loop filtering (Paragraph 15.2)
static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
@@ -129,11 +129,12 @@ static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
}
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Complex In-loop filtering (Paragraph 15.3)
-static inline void FilterLoop24(uint8_t* p, int hstride, int vstride, int size,
- int thresh, int ithresh, int hev_thresh) {
+static WEBP_INLINE void FilterLoop24(uint8_t* p,
+ int hstride, int vstride, int size,
+ int thresh, int ithresh, int hev_thresh) {
while (size-- > 0) {
if (needs_filter2(p, hstride, thresh, ithresh)) {
if (hev(p, hstride, hev_thresh)) {
@@ -177,7 +178,7 @@ static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
void (*VP8EncVFilter16i)(uint8_t*, int, int, int, int) = VFilter16i;
void (*VP8EncHFilter16i)(uint8_t*, int, int, int, int) = HFilter16i;
@@ -187,7 +188,7 @@ void (*VP8EncHFilter8i)(uint8_t*, uint8_t*, int, int, int, int) = HFilter8i;
void (*VP8EncSimpleVFilter16i)(uint8_t*, int, int) = SimpleVFilter16i;
void (*VP8EncSimpleHFilter16i)(uint8_t*, int, int) = SimpleHFilter16i;
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Paragraph 15.4: compute the inner-edge filtering strength
static int GetILevel(int sharpness, int level) {
@@ -229,18 +230,25 @@ static void DoFilter(const VP8EncIterator* const it, int level) {
}
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// SSIM metric
enum { KERNEL = 3 };
-typedef struct {
- double w, xm, ym, xxm, xym, yym;
-} SSIMStats;
-
-static void Accumulate(const uint8_t* src1, int stride1,
- const uint8_t* src2, int stride2,
- int xo, int yo, int W, int H,
- SSIMStats* const stats) {
+static const double kMinValue = 1.e-10; // minimal threshold
+
+void VP8SSIMAddStats(const DistoStats* const src, DistoStats* const dst) {
+ dst->w += src->w;
+ dst->xm += src->xm;
+ dst->ym += src->ym;
+ dst->xxm += src->xxm;
+ dst->xym += src->xym;
+ dst->yym += src->yym;
+}
+
+static void VP8SSIMAccumulate(const uint8_t* src1, int stride1,
+ const uint8_t* src2, int stride2,
+ int xo, int yo, int W, int H,
+ DistoStats* const stats) {
const int ymin = (yo - KERNEL < 0) ? 0 : yo - KERNEL;
const int ymax = (yo + KERNEL > H - 1) ? H - 1 : yo + KERNEL;
const int xmin = (xo - KERNEL < 0) ? 0 : xo - KERNEL;
@@ -262,7 +270,7 @@ static void Accumulate(const uint8_t* src1, int stride1,
}
}
-static double GetSSIM(const SSIMStats* const stats) {
+double VP8SSIMGet(const DistoStats* const stats) {
const double xmxm = stats->xm * stats->xm;
const double ymym = stats->ym * stats->ym;
const double xmym = stats->xm * stats->ym;
@@ -280,29 +288,52 @@ static double GetSSIM(const SSIMStats* const stats) {
C2 = 58.5225 * w2;
fnum = (2 * xmym + C1) * (2 * sxy + C2);
fden = (xmxm + ymym + C1) * (sxx + syy + C2);
- return (fden != 0) ? fnum / fden : 0.;
+ return (fden != 0.) ? fnum / fden : kMinValue;
+}
+
+double VP8SSIMGetSquaredError(const DistoStats* const s) {
+ if (s->w > 0.) {
+ const double iw2 = 1. / (s->w * s->w);
+ const double sxx = s->xxm * s->w - s->xm * s->xm;
+ const double syy = s->yym * s->w - s->ym * s->ym;
+ const double sxy = s->xym * s->w - s->xm * s->ym;
+ const double SSE = iw2 * (sxx + syy - 2. * sxy);
+ if (SSE > kMinValue) return SSE;
+ }
+ return kMinValue;
+}
+
+void VP8SSIMAccumulatePlane(const uint8_t* src1, int stride1,
+ const uint8_t* src2, int stride2,
+ int W, int H, DistoStats* const stats) {
+ int x, y;
+ for (y = 0; y < H; ++y) {
+ for (x = 0; x < W; ++x) {
+ VP8SSIMAccumulate(src1, stride1, src2, stride2, x, y, W, H, stats);
+ }
+ }
}
static double GetMBSSIM(const uint8_t* yuv1, const uint8_t* yuv2) {
int x, y;
- SSIMStats s = { .0, .0, .0, .0, .0, .0 };
+ DistoStats s = { .0, .0, .0, .0, .0, .0 };
// compute SSIM in a 10 x 10 window
for (x = 3; x < 13; x++) {
for (y = 3; y < 13; y++) {
- Accumulate(yuv1 + Y_OFF, BPS, yuv2 + Y_OFF, BPS, x, y, 16, 16, &s);
+ VP8SSIMAccumulate(yuv1 + Y_OFF, BPS, yuv2 + Y_OFF, BPS, x, y, 16, 16, &s);
}
}
for (x = 1; x < 7; x++) {
for (y = 1; y < 7; y++) {
- Accumulate(yuv1 + U_OFF, BPS, yuv2 + U_OFF, BPS, x, y, 8, 8, &s);
- Accumulate(yuv1 + V_OFF, BPS, yuv2 + V_OFF, BPS, x, y, 8, 8, &s);
+ VP8SSIMAccumulate(yuv1 + U_OFF, BPS, yuv2 + U_OFF, BPS, x, y, 8, 8, &s);
+ VP8SSIMAccumulate(yuv1 + V_OFF, BPS, yuv2 + V_OFF, BPS, x, y, 8, 8, &s);
}
}
- return GetSSIM(&s);
+ return VP8SSIMGet(&s);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Exposed APIs: Encoder should call the following 3 functions to adjust
// loop filter strength
diff --git a/src/enc/frame.c b/src/enc/frame.c
index d0270d7b..bdd36006 100644
--- a/src/enc/frame.c
+++ b/src/enc/frame.c
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -9,13 +9,13 @@
//
// Author: Skal (pascal.massimino@gmail.com)
+#include <assert.h>
#include <stdlib.h>
#include <string.h>
-#include <assert.h>
#include <math.h>
-#include "vp8enci.h"
-#include "cost.h"
+#include "./vp8enci.h"
+#include "./cost.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@@ -37,7 +37,7 @@ typedef struct {
CostArray* cost;
} VP8Residual;
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Tables for level coding
const uint8_t VP8EncBands[16 + 1] = {
@@ -51,18 +51,20 @@ static const uint8_t kCat5[] = { 180, 157, 141, 134, 130 };
static const uint8_t kCat6[] =
{ 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129 };
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Reset the statistics about: number of skips, token proba, level cost,...
-static void ResetStats(VP8Encoder* const enc, int precalc_cost) {
+static void ResetStats(VP8Encoder* const enc) {
VP8Proba* const proba = &enc->proba_;
- if (precalc_cost) VP8CalculateLevelCosts(proba);
+ VP8CalculateLevelCosts(proba);
proba->nb_skip_ = 0;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Skip decision probability
+#define SKIP_PROBA_THRESHOLD 250 // value below which using skip_proba is OK.
+
static int CalcSkipProba(uint64_t nb, uint64_t total) {
return (int)(total ? (total - nb) * 255 / total : 255);
}
@@ -74,7 +76,7 @@ static int FinalizeSkipProba(VP8Encoder* const enc) {
const int nb_events = proba->nb_skip_;
int size;
proba->skip_proba_ = CalcSkipProba(nb_events, nb_mbs);
- proba->use_skip_proba_ = (proba->skip_proba_ < 250);
+ proba->use_skip_proba_ = (proba->skip_proba_ < SKIP_PROBA_THRESHOLD);
size = 256; // 'use_skip_proba' bit
if (proba->use_skip_proba_) {
size += nb_events * VP8BitCost(1, proba->skip_proba_)
@@ -84,7 +86,7 @@ static int FinalizeSkipProba(VP8Encoder* const enc) {
return size;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Recording of token probabilities.
static void ResetTokenStats(VP8Encoder* const enc) {
@@ -93,9 +95,14 @@ static void ResetTokenStats(VP8Encoder* const enc) {
}
// Record proba context used
-static int Record(int bit, uint64_t* const stats) {
- stats[0] += bit;
- stats[1] += 1;
+static int Record(int bit, proba_t* const stats) {
+ proba_t p = *stats;
+ if (p >= 0xffff0000u) { // an overflow is inbound.
+ p = ((p + 1u) >> 1) & 0x7fff7fffu; // -> divide the stats by 2.
+ }
+ // record bit count (lower 16 bits) and increment total count (upper 16 bits).
+ p += 0x00010000u + bit;
+ *stats = p;
return bit;
}
@@ -104,33 +111,35 @@ static int Record(int bit, uint64_t* const stats) {
// Simulate block coding, but only record statistics.
// Note: no need to record the fixed probas.
-static int RecordCoeffs(int ctx, VP8Residual* res) {
+static int RecordCoeffs(int ctx, const VP8Residual* const res) {
int n = res->first;
- uint64_t (*s)[2] = res->stats[VP8EncBands[n]][ctx];
- if (!Record(res->last >= 0, s[0])) {
+ proba_t* s = res->stats[VP8EncBands[n]][ctx];
+ if (res->last < 0) {
+ Record(0, s + 0);
return 0;
}
-
- while (1) {
- int v = res->coeffs[n++];
- if (!Record(v != 0, s[1])) {
+ while (n <= res->last) {
+ int v;
+ Record(1, s + 0);
+ while ((v = res->coeffs[n++]) == 0) {
+ Record(0, s + 1);
s = res->stats[VP8EncBands[n]][0];
- continue;
}
- if (!Record(2u < (unsigned int)(v + 1), s[2])) { // v = -1 or 1
+ Record(1, s + 1);
+ if (!Record(2u < (unsigned int)(v + 1), s + 2)) { // v = -1 or 1
s = res->stats[VP8EncBands[n]][1];
} else {
v = abs(v);
#if !defined(USE_LEVEL_CODE_TABLE)
- if (!Record(v > 4, s[3])) {
- if (Record(v != 2, s[4]))
- Record(v == 4, s[5]);
- } else if (!Record(v > 10, s[6])) {
- Record(v > 6, s[7]);
- } else if (!Record((v >= 3 + (8 << 2)), s[8])) {
- Record((v >= 3 + (8 << 1)), s[9]);
+ if (!Record(v > 4, s + 3)) {
+ if (Record(v != 2, s + 4))
+ Record(v == 4, s + 5);
+ } else if (!Record(v > 10, s + 6)) {
+ Record(v > 6, s + 7);
+ } else if (!Record((v >= 3 + (8 << 2)), s + 8)) {
+ Record((v >= 3 + (8 << 1)), s + 9);
} else {
- Record((v >= 3 + (8 << 3)), s[10]);
+ Record((v >= 3 + (8 << 3)), s + 10);
}
#else
if (v > MAX_VARIABLE_LEVEL)
@@ -142,44 +151,54 @@ static int RecordCoeffs(int ctx, VP8Residual* res) {
int i;
for (i = 0; (pattern >>= 1) != 0; ++i) {
const int mask = 2 << i;
- if (pattern & 1) Record(!!(bits & mask), s[3 + i]);
+ if (pattern & 1) Record(!!(bits & mask), s + 3 + i);
}
}
#endif
s = res->stats[VP8EncBands[n]][2];
}
- if (n == 16 || !Record(n <= res->last, s[0])) {
- return 1;
- }
}
+ if (n < 16) Record(0, s + 0);
+ return 1;
}
// Collect statistics and deduce probabilities for next coding pass.
// Return the total bit-cost for coding the probability updates.
-static int CalcTokenProba(uint64_t nb, uint64_t total) {
- return (int)(nb ? ((total - nb) * 255 + total / 2) / total : 255);
+static int CalcTokenProba(int nb, int total) {
+ assert(nb <= total);
+ return nb ? (255 - nb * 255 / total) : 255;
+}
+
+// Cost of coding 'nb' 1's and 'total-nb' 0's using 'proba' probability.
+static int BranchCost(int nb, int total, int proba) {
+ return nb * VP8BitCost(1, proba) + (total - nb) * VP8BitCost(0, proba);
}
static int FinalizeTokenProbas(VP8Encoder* const enc) {
VP8Proba* const proba = &enc->proba_;
+ int has_changed = 0;
int size = 0;
int t, b, c, p;
for (t = 0; t < NUM_TYPES; ++t) {
for (b = 0; b < NUM_BANDS; ++b) {
for (c = 0; c < NUM_CTX; ++c) {
for (p = 0; p < NUM_PROBAS; ++p) {
- const uint64_t* const cnt = proba->stats_[t][b][c][p];
+ const proba_t stats = proba->stats_[t][b][c][p];
+ const int nb = (stats >> 0) & 0xffff;
+ const int total = (stats >> 16) & 0xffff;
const int update_proba = VP8CoeffsUpdateProba[t][b][c][p];
const int old_p = VP8CoeffsProba0[t][b][c][p];
- const int new_p = CalcTokenProba(cnt[0], cnt[1]);
- const uint64_t old_cost = VP8BranchCost(cnt[0], cnt[1], old_p)
- + VP8BitCost(0, update_proba);
- const uint64_t new_cost = VP8BranchCost(cnt[0], cnt[1], new_p)
- + VP8BitCost(1, update_proba) + 8 * 256;
+ const int new_p = CalcTokenProba(nb, total);
+ const int old_cost = BranchCost(nb, total, old_p)
+ + VP8BitCost(0, update_proba);
+ const int new_cost = BranchCost(nb, total, new_p)
+ + VP8BitCost(1, update_proba)
+ + 8 * 256;
const int use_new_p = (old_cost > new_cost);
size += VP8BitCost(use_new_p, update_proba);
if (use_new_p) { // only use proba that seem meaningful enough.
proba->coeffs_[t][b][c][p] = new_p;
+ has_changed |= (new_p != old_p);
size += 8 * 256;
} else {
proba->coeffs_[t][b][c][p] = old_p;
@@ -188,10 +207,11 @@ static int FinalizeTokenProbas(VP8Encoder* const enc) {
}
}
}
+ proba->dirty_ = has_changed;
return size;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// helper functions for residuals struct VP8Residual.
static void InitResidual(int first, int coeff_type,
@@ -216,49 +236,53 @@ static void SetResidualCoeffs(const int16_t* const coeffs,
res->coeffs = coeffs;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Mode costs
static int GetResidualCost(int ctx, const VP8Residual* const res) {
int n = res->first;
- const uint8_t* p = res->prob[VP8EncBands[n]][ctx];
- const uint16_t *t = res->cost[VP8EncBands[n]][ctx];
+ int p0 = res->prob[VP8EncBands[n]][ctx][0];
+ const uint16_t* t = res->cost[VP8EncBands[n]][ctx];
int cost;
- cost = VP8BitCost(res->last >= 0, p[0]);
if (res->last < 0) {
- return cost;
+ return VP8BitCost(0, p0);
}
+ cost = 0;
while (n <= res->last) {
- const int v = res->coeffs[n++];
+ const int v = res->coeffs[n];
+ const int b = VP8EncBands[n + 1];
+ ++n;
if (v == 0) {
- cost += VP8LevelCost(t, 0);
- p = res->prob[VP8EncBands[n]][0];
- t = res->cost[VP8EncBands[n]][0];
+ // short-case for VP8LevelCost(t, 0) (note: VP8LevelFixedCosts[0] == 0):
+ cost += t[0];
+ t = res->cost[b][0];
continue;
- } else if (2u >= (unsigned int)(v + 1)) { // v = -1 or 1
- cost += VP8LevelCost(t, 1);
- p = res->prob[VP8EncBands[n]][1];
- t = res->cost[VP8EncBands[n]][1];
+ }
+ cost += VP8BitCost(1, p0);
+ if (2u >= (unsigned int)(v + 1)) { // v = -1 or 1
+ // short-case for "VP8LevelCost(t, 1)" (256 is VP8LevelFixedCosts[1]):
+ cost += 256 + t[1];
+ p0 = res->prob[b][1][0];
+ t = res->cost[b][1];
} else {
cost += VP8LevelCost(t, abs(v));
- p = res->prob[VP8EncBands[n]][2];
- t = res->cost[VP8EncBands[n]][2];
- }
- if (n < 16) {
- cost += VP8BitCost(n <= res->last, p[0]);
+ p0 = res->prob[b][2][0];
+ t = res->cost[b][2];
}
}
+ if (n < 16) cost += VP8BitCost(0, p0);
return cost;
}
int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]) {
const int x = (it->i4_ & 3), y = (it->i4_ >> 2);
VP8Residual res;
+ VP8Encoder* const enc = it->enc_;
int R = 0;
int ctx;
- InitResidual(0, 3, it->enc_, &res);
+ InitResidual(0, 3, enc, &res);
ctx = it->top_nz_[x] + it->left_nz_[y];
SetResidualCoeffs(levels, &res);
R += GetResidualCost(ctx, &res);
@@ -267,18 +291,19 @@ int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]) {
int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd) {
VP8Residual res;
+ VP8Encoder* const enc = it->enc_;
int x, y;
int R = 0;
VP8IteratorNzToBytes(it); // re-import the non-zero context
// DC
- InitResidual(0, 1, it->enc_, &res);
+ InitResidual(0, 1, enc, &res);
SetResidualCoeffs(rd->y_dc_levels, &res);
R += GetResidualCost(it->top_nz_[8] + it->left_nz_[8], &res);
// AC
- InitResidual(1, 0, it->enc_, &res);
+ InitResidual(1, 0, enc, &res);
for (y = 0; y < 4; ++y) {
for (x = 0; x < 4; ++x) {
const int ctx = it->top_nz_[x] + it->left_nz_[y];
@@ -292,12 +317,13 @@ int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd) {
int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd) {
VP8Residual res;
+ VP8Encoder* const enc = it->enc_;
int ch, x, y;
int R = 0;
VP8IteratorNzToBytes(it); // re-import the non-zero context
- InitResidual(0, 2, it->enc_, &res);
+ InitResidual(0, 2, enc, &res);
for (ch = 0; ch <= 2; ch += 2) {
for (y = 0; y < 2; ++y) {
for (x = 0; x < 2; ++x) {
@@ -311,7 +337,7 @@ int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd) {
return R;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Coefficient coding
static int PutCoeffs(VP8BitWriter* const bw, int ctx, const VP8Residual* res) {
@@ -393,18 +419,19 @@ static void CodeResiduals(VP8BitWriter* const bw,
uint64_t pos1, pos2, pos3;
const int i16 = (it->mb_->type_ == 1);
const int segment = it->mb_->segment_;
+ VP8Encoder* const enc = it->enc_;
VP8IteratorNzToBytes(it);
pos1 = VP8BitWriterPos(bw);
if (i16) {
- InitResidual(0, 1, it->enc_, &res);
+ InitResidual(0, 1, enc, &res);
SetResidualCoeffs(rd->y_dc_levels, &res);
it->top_nz_[8] = it->left_nz_[8] =
PutCoeffs(bw, it->top_nz_[8] + it->left_nz_[8], &res);
- InitResidual(1, 0, it->enc_, &res);
+ InitResidual(1, 0, enc, &res);
} else {
- InitResidual(0, 3, it->enc_, &res);
+ InitResidual(0, 3, enc, &res);
}
// luma-AC
@@ -418,7 +445,7 @@ static void CodeResiduals(VP8BitWriter* const bw,
pos2 = VP8BitWriterPos(bw);
// U/V
- InitResidual(0, 2, it->enc_, &res);
+ InitResidual(0, 2, enc, &res);
for (ch = 0; ch <= 2; ch += 2) {
for (y = 0; y < 2; ++y) {
for (x = 0; x < 2; ++x) {
@@ -443,17 +470,18 @@ static void RecordResiduals(VP8EncIterator* const it,
const VP8ModeScore* const rd) {
int x, y, ch;
VP8Residual res;
+ VP8Encoder* const enc = it->enc_;
VP8IteratorNzToBytes(it);
if (it->mb_->type_ == 1) { // i16x16
- InitResidual(0, 1, it->enc_, &res);
+ InitResidual(0, 1, enc, &res);
SetResidualCoeffs(rd->y_dc_levels, &res);
it->top_nz_[8] = it->left_nz_[8] =
RecordCoeffs(it->top_nz_[8] + it->left_nz_[8], &res);
- InitResidual(1, 0, it->enc_, &res);
+ InitResidual(1, 0, enc, &res);
} else {
- InitResidual(0, 3, it->enc_, &res);
+ InitResidual(0, 3, enc, &res);
}
// luma-AC
@@ -466,7 +494,7 @@ static void RecordResiduals(VP8EncIterator* const it,
}
// U/V
- InitResidual(0, 2, it->enc_, &res);
+ InitResidual(0, 2, enc, &res);
for (ch = 0; ch <= 2; ch += 2) {
for (y = 0; y < 2; ++y) {
for (x = 0; x < 2; ++x) {
@@ -481,7 +509,181 @@ static void RecordResiduals(VP8EncIterator* const it,
VP8IteratorBytesToNz(it);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+// Token buffer
+
+#ifdef USE_TOKEN_BUFFER
+
+void VP8TBufferInit(VP8TBuffer* const b) {
+ b->rows_ = NULL;
+ b->tokens_ = NULL;
+ b->last_ = &b->rows_;
+ b->left_ = 0;
+ b->error_ = 0;
+}
+
+int VP8TBufferNewPage(VP8TBuffer* const b) {
+ VP8Tokens* const page = b->error_ ? NULL : (VP8Tokens*)malloc(sizeof(*page));
+ if (page == NULL) {
+ b->error_ = 1;
+ return 0;
+ }
+ *b->last_ = page;
+ b->last_ = &page->next_;
+ b->left_ = MAX_NUM_TOKEN;
+ b->tokens_ = page->tokens_;
+ return 1;
+}
+
+void VP8TBufferClear(VP8TBuffer* const b) {
+ if (b != NULL) {
+ const VP8Tokens* p = b->rows_;
+ while (p != NULL) {
+ const VP8Tokens* const next = p->next_;
+ free((void*)p);
+ p = next;
+ }
+ VP8TBufferInit(b);
+ }
+}
+
+int VP8EmitTokens(const VP8TBuffer* const b, VP8BitWriter* const bw,
+ const uint8_t* const probas) {
+ VP8Tokens* p = b->rows_;
+ if (b->error_) return 0;
+ while (p != NULL) {
+ const int N = (p->next_ == NULL) ? b->left_ : 0;
+ int n = MAX_NUM_TOKEN;
+ while (n-- > N) {
+ VP8PutBit(bw, (p->tokens_[n] >> 15) & 1, probas[p->tokens_[n] & 0x7fff]);
+ }
+ p = p->next_;
+ }
+ return 1;
+}
+
+#define TOKEN_ID(b, ctx, p) ((p) + NUM_PROBAS * ((ctx) + (b) * NUM_CTX))
+
+static int RecordCoeffTokens(int ctx, const VP8Residual* const res,
+ VP8TBuffer* tokens) {
+ int n = res->first;
+ int b = VP8EncBands[n];
+ if (!VP8AddToken(tokens, res->last >= 0, TOKEN_ID(b, ctx, 0))) {
+ return 0;
+ }
+
+ while (n < 16) {
+ const int c = res->coeffs[n++];
+ const int sign = c < 0;
+ int v = sign ? -c : c;
+ const int base_id = TOKEN_ID(b, ctx, 0);
+ if (!VP8AddToken(tokens, v != 0, base_id + 1)) {
+ b = VP8EncBands[n];
+ ctx = 0;
+ continue;
+ }
+ if (!VP8AddToken(tokens, v > 1, base_id + 2)) {
+ b = VP8EncBands[n];
+ ctx = 1;
+ } else {
+ if (!VP8AddToken(tokens, v > 4, base_id + 3)) {
+ if (VP8AddToken(tokens, v != 2, base_id + 4))
+ VP8AddToken(tokens, v == 4, base_id + 5);
+ } else if (!VP8AddToken(tokens, v > 10, base_id + 6)) {
+ if (!VP8AddToken(tokens, v > 6, base_id + 7)) {
+// VP8AddToken(tokens, v == 6, 159);
+ } else {
+// VP8AddToken(tokens, v >= 9, 165);
+// VP8AddToken(tokens, !(v & 1), 145);
+ }
+ } else {
+ int mask;
+ const uint8_t* tab;
+ if (v < 3 + (8 << 1)) { // kCat3 (3b)
+ VP8AddToken(tokens, 0, base_id + 8);
+ VP8AddToken(tokens, 0, base_id + 9);
+ v -= 3 + (8 << 0);
+ mask = 1 << 2;
+ tab = kCat3;
+ } else if (v < 3 + (8 << 2)) { // kCat4 (4b)
+ VP8AddToken(tokens, 0, base_id + 8);
+ VP8AddToken(tokens, 1, base_id + 9);
+ v -= 3 + (8 << 1);
+ mask = 1 << 3;
+ tab = kCat4;
+ } else if (v < 3 + (8 << 3)) { // kCat5 (5b)
+ VP8AddToken(tokens, 1, base_id + 8);
+ VP8AddToken(tokens, 0, base_id + 10);
+ v -= 3 + (8 << 2);
+ mask = 1 << 4;
+ tab = kCat5;
+ } else { // kCat6 (11b)
+ VP8AddToken(tokens, 1, base_id + 8);
+ VP8AddToken(tokens, 1, base_id + 10);
+ v -= 3 + (8 << 3);
+ mask = 1 << 10;
+ tab = kCat6;
+ }
+ while (mask) {
+ // VP8AddToken(tokens, !!(v & mask), *tab++);
+ mask >>= 1;
+ }
+ }
+ ctx = 2;
+ }
+ b = VP8EncBands[n];
+ // VP8PutBitUniform(bw, sign);
+ if (n == 16 || !VP8AddToken(tokens, n <= res->last, TOKEN_ID(b, ctx, 0))) {
+ return 1; // EOB
+ }
+ }
+ return 1;
+}
+
+static void RecordTokens(VP8EncIterator* const it,
+ const VP8ModeScore* const rd, VP8TBuffer tokens[2]) {
+ int x, y, ch;
+ VP8Residual res;
+ VP8Encoder* const enc = it->enc_;
+
+ VP8IteratorNzToBytes(it);
+ if (it->mb_->type_ == 1) { // i16x16
+ InitResidual(0, 1, enc, &res);
+ SetResidualCoeffs(rd->y_dc_levels, &res);
+// TODO(skal): FIX -> it->top_nz_[8] = it->left_nz_[8] =
+ RecordCoeffTokens(it->top_nz_[8] + it->left_nz_[8], &res, &tokens[0]);
+ InitResidual(1, 0, enc, &res);
+ } else {
+ InitResidual(0, 3, enc, &res);
+ }
+
+ // luma-AC
+ for (y = 0; y < 4; ++y) {
+ for (x = 0; x < 4; ++x) {
+ const int ctx = it->top_nz_[x] + it->left_nz_[y];
+ SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
+ it->top_nz_[x] = it->left_nz_[y] =
+ RecordCoeffTokens(ctx, &res, &tokens[0]);
+ }
+ }
+
+ // U/V
+ InitResidual(0, 2, enc, &res);
+ for (ch = 0; ch <= 2; ch += 2) {
+ for (y = 0; y < 2; ++y) {
+ for (x = 0; x < 2; ++x) {
+ const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+ SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
+ it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
+ RecordCoeffTokens(ctx, &res, &tokens[1]);
+ }
+ }
+ }
+}
+
+#endif // USE_TOKEN_BUFFER
+
+//------------------------------------------------------------------------------
// ExtraInfo map / Debug function
#if SEGMENT_VISU
@@ -515,16 +717,16 @@ static void StoreSideInfo(const VP8EncIterator* const it) {
const VP8MBInfo* const mb = it->mb_;
WebPPicture* const pic = enc->pic_;
- if (pic->stats) {
+ if (pic->stats != NULL) {
StoreSSE(it);
enc->block_count_[0] += (mb->type_ == 0);
enc->block_count_[1] += (mb->type_ == 1);
enc->block_count_[2] += (mb->skip_ != 0);
}
- if (pic->extra_info) {
+ if (pic->extra_info != NULL) {
uint8_t* const info = &pic->extra_info[it->x_ + it->y_ * enc->mb_w_];
- switch(pic->extra_info_type) {
+ switch (pic->extra_info_type) {
case 1: *info = mb->type_; break;
case 2: *info = mb->segment_; break;
case 3: *info = enc->dqm_[mb->segment_].quant_; break;
@@ -544,7 +746,7 @@ static void StoreSideInfo(const VP8EncIterator* const it) {
#endif
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Main loops
//
// VP8EncLoop(): does the final bitstream coding.
@@ -560,6 +762,7 @@ static void ResetAfterSkip(VP8EncIterator* const it) {
int VP8EncLoop(VP8Encoder* const enc) {
int i, s, p;
+ int ok = 1;
VP8EncIterator it;
VP8ModeScore info;
const int dont_use_skip = !enc->proba_.use_skip_proba_;
@@ -573,7 +776,7 @@ int VP8EncLoop(VP8Encoder* const enc) {
VP8BitWriterInit(enc->parts_ + p, bytes_per_parts);
}
- ResetStats(enc, rd_opt != 0);
+ ResetStats(enc);
ResetSSE(enc);
VP8IteratorInit(enc, &it);
@@ -588,9 +791,6 @@ int VP8EncLoop(VP8Encoder* const enc) {
ResetAfterSkip(&it);
}
#ifdef WEBP_EXPERIMENTAL_FEATURES
- if (enc->has_alpha_) {
- VP8EncCodeAlphaBlock(&it);
- }
if (enc->use_layer_) {
VP8EncCodeLayerBlock(&it);
}
@@ -598,25 +798,34 @@ int VP8EncLoop(VP8Encoder* const enc) {
StoreSideInfo(&it);
VP8StoreFilterStats(&it);
VP8IteratorExport(&it);
- } while (VP8IteratorNext(&it, it.yuv_out_));
- VP8AdjustFilterStrength(&it);
+ ok = VP8IteratorProgress(&it, 20);
+ } while (ok && VP8IteratorNext(&it, it.yuv_out_));
- // Finalize the partitions
- for (p = 0; p < enc->num_parts_; ++p) {
- VP8BitWriterFinish(enc->parts_ + p);
+ if (ok) { // Finalize the partitions, check for extra errors.
+ for (p = 0; p < enc->num_parts_; ++p) {
+ VP8BitWriterFinish(enc->parts_ + p);
+ ok &= !enc->parts_[p].error_;
+ }
}
- // and byte counters
- if (enc->pic_->stats) {
- for (i = 0; i <= 2; ++i) {
- for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
- enc->residual_bytes_[i][s] = (int)((it.bit_count_[s][i] + 7) >> 3);
+
+ if (ok) { // All good. Finish up.
+ if (enc->pic_->stats) { // finalize byte counters...
+ for (i = 0; i <= 2; ++i) {
+ for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+ enc->residual_bytes_[i][s] = (int)((it.bit_count_[s][i] + 7) >> 3);
+ }
}
}
+ VP8AdjustFilterStrength(&it); // ...and store filter stats.
+ } else {
+ // Something bad happened -> need to do some memory cleanup.
+ VP8EncFreeBitWriters(enc);
}
- return 1;
+
+ return ok;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// VP8StatLoop(): only collect statistics (number of skips, token usage, ...)
// This is used for deciding optimal probabilities. It also
// modifies the quantizer value if some target (size, PNSR)
@@ -625,7 +834,7 @@ int VP8EncLoop(VP8Encoder* const enc) {
#define kHeaderSizeEstimate (15 + 20 + 10) // TODO: fix better
static int OneStatPass(VP8Encoder* const enc, float q, int rd_opt, int nb_mbs,
- float* const PSNR) {
+ float* const PSNR, int percent_delta) {
VP8EncIterator it;
uint64_t size = 0;
uint64_t distortion = 0;
@@ -640,7 +849,7 @@ static int OneStatPass(VP8Encoder* const enc, float q, int rd_opt, int nb_mbs,
VP8SetSegmentParams(enc, q); // setup segment quantizations and filters
- ResetStats(enc, rd_opt != 0);
+ ResetStats(enc);
ResetTokenStats(enc);
VP8IteratorInit(enc, &it);
@@ -654,6 +863,8 @@ static int OneStatPass(VP8Encoder* const enc, float q, int rd_opt, int nb_mbs,
RecordResiduals(&it, &info);
size += info.R;
distortion += info.D;
+ if (percent_delta && !VP8IteratorProgress(&it, percent_delta))
+ return 0;
} while (VP8IteratorNext(&it, it.yuv_out_) && --nb_mbs > 0);
size += FinalizeSkipProba(enc);
size += FinalizeTokenProbas(enc);
@@ -674,6 +885,10 @@ int VP8StatLoop(VP8Encoder* const enc) {
(enc->config_->target_size > 0 || enc->config_->target_PSNR > 0);
const int fast_probe = (enc->method_ < 2 && !do_search);
float q = enc->config_->quality;
+ const int max_passes = enc->config_->pass;
+ const int task_percent = 20;
+ const int percent_per_pass = (task_percent + max_passes / 2) / max_passes;
+ const int final_percent = enc->percent_ + task_percent;
int pass;
int nb_mbs;
@@ -683,39 +898,41 @@ int VP8StatLoop(VP8Encoder* const enc) {
// No target size: just do several pass without changing 'q'
if (!do_search) {
- for (pass = 0; pass < enc->config_->pass; ++pass) {
+ for (pass = 0; pass < max_passes; ++pass) {
const int rd_opt = (enc->method_ > 2);
- OneStatPass(enc, q, rd_opt, nb_mbs, NULL);
+ if (!OneStatPass(enc, q, rd_opt, nb_mbs, NULL, percent_per_pass)) {
+ return 0;
+ }
}
- return 1;
- }
-
- // binary search for a size close to target
- for (pass = 0; pass < enc->config_->pass && (dqs[pass] > 0); ++pass) {
- const int rd_opt = 1;
- float PSNR;
- int criterion;
- const int size = OneStatPass(enc, q, rd_opt, nb_mbs, &PSNR);
+ } else {
+ // binary search for a size close to target
+ for (pass = 0; pass < max_passes && (dqs[pass] > 0); ++pass) {
+ const int rd_opt = 1;
+ float PSNR;
+ int criterion;
+ const int size = OneStatPass(enc, q, rd_opt, nb_mbs, &PSNR,
+ percent_per_pass);
#if DEBUG_SEARCH
- printf("#%d size=%d PSNR=%.2f q=%.2f\n", pass, size, PSNR, q);
+ printf("#%d size=%d PSNR=%.2f q=%.2f\n", pass, size, PSNR, q);
#endif
-
- if (enc->config_->target_PSNR > 0) {
- criterion = (PSNR < enc->config_->target_PSNR);
- } else {
- criterion = (size < enc->config_->target_size);
- }
- // dichotomize
- if (criterion) {
- q += dqs[pass];
- } else {
- q -= dqs[pass];
+ if (!size) return 0;
+ if (enc->config_->target_PSNR > 0) {
+ criterion = (PSNR < enc->config_->target_PSNR);
+ } else {
+ criterion = (size < enc->config_->target_size);
+ }
+ // dichotomize
+ if (criterion) {
+ q += dqs[pass];
+ } else {
+ q -= dqs[pass];
+ }
}
}
- return 1;
+ return WebPReportProgress(enc->pic_, final_percent, &enc->percent_);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
diff --git a/src/enc/histogram.c b/src/enc/histogram.c
new file mode 100644
index 00000000..ca838e06
--- /dev/null
+++ b/src/enc/histogram.c
@@ -0,0 +1,406 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <math.h>
+#include <stdio.h>
+
+#include "./backward_references.h"
+#include "./histogram.h"
+#include "../dsp/lossless.h"
+#include "../utils/utils.h"
+
+static void HistogramClear(VP8LHistogram* const p) {
+ memset(p->literal_, 0, sizeof(p->literal_));
+ memset(p->red_, 0, sizeof(p->red_));
+ memset(p->blue_, 0, sizeof(p->blue_));
+ memset(p->alpha_, 0, sizeof(p->alpha_));
+ memset(p->distance_, 0, sizeof(p->distance_));
+ p->bit_cost_ = 0;
+}
+
+void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs,
+ VP8LHistogram* const histo) {
+ int i;
+ for (i = 0; i < refs->size; ++i) {
+ VP8LHistogramAddSinglePixOrCopy(histo, &refs->refs[i]);
+ }
+}
+
+void VP8LHistogramCreate(VP8LHistogram* const p,
+ const VP8LBackwardRefs* const refs,
+ int palette_code_bits) {
+ if (palette_code_bits >= 0) {
+ p->palette_code_bits_ = palette_code_bits;
+ }
+ HistogramClear(p);
+ VP8LHistogramStoreRefs(refs, p);
+}
+
+void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits) {
+ p->palette_code_bits_ = palette_code_bits;
+ HistogramClear(p);
+}
+
+VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {
+ int i;
+ VP8LHistogramSet* set;
+ VP8LHistogram* bulk;
+ const uint64_t total_size = (uint64_t)sizeof(*set)
+ + size * sizeof(*set->histograms)
+ + size * sizeof(**set->histograms);
+ uint8_t* memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));
+ if (memory == NULL) return NULL;
+
+ set = (VP8LHistogramSet*)memory;
+ memory += sizeof(*set);
+ set->histograms = (VP8LHistogram**)memory;
+ memory += size * sizeof(*set->histograms);
+ bulk = (VP8LHistogram*)memory;
+ set->max_size = size;
+ set->size = size;
+ for (i = 0; i < size; ++i) {
+ set->histograms[i] = bulk + i;
+ VP8LHistogramInit(set->histograms[i], cache_bits);
+ }
+ return set;
+}
+
+// -----------------------------------------------------------------------------
+
+void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
+ const PixOrCopy* const v) {
+ if (PixOrCopyIsLiteral(v)) {
+ ++histo->alpha_[PixOrCopyLiteral(v, 3)];
+ ++histo->red_[PixOrCopyLiteral(v, 2)];
+ ++histo->literal_[PixOrCopyLiteral(v, 1)];
+ ++histo->blue_[PixOrCopyLiteral(v, 0)];
+ } else if (PixOrCopyIsCacheIdx(v)) {
+ int literal_ix = 256 + NUM_LENGTH_CODES + PixOrCopyCacheIdx(v);
+ ++histo->literal_[literal_ix];
+ } else {
+ int code, extra_bits_count, extra_bits_value;
+ PrefixEncode(PixOrCopyLength(v),
+ &code, &extra_bits_count, &extra_bits_value);
+ ++histo->literal_[256 + code];
+ PrefixEncode(PixOrCopyDistance(v),
+ &code, &extra_bits_count, &extra_bits_value);
+ ++histo->distance_[code];
+ }
+}
+
+
+
+static double BitsEntropy(const int* const array, int n) {
+ double retval = 0.;
+ int sum = 0;
+ int nonzeros = 0;
+ int max_val = 0;
+ int i;
+ double mix;
+ for (i = 0; i < n; ++i) {
+ if (array[i] != 0) {
+ sum += array[i];
+ ++nonzeros;
+ retval -= VP8LFastSLog2(array[i]);
+ if (max_val < array[i]) {
+ max_val = array[i];
+ }
+ }
+ }
+ retval += VP8LFastSLog2(sum);
+
+ if (nonzeros < 5) {
+ if (nonzeros <= 1) {
+ return 0;
+ }
+ // Two symbols, they will be 0 and 1 in a Huffman code.
+ // Let's mix in a bit of entropy to favor good clustering when
+ // distributions of these are combined.
+ if (nonzeros == 2) {
+ return 0.99 * sum + 0.01 * retval;
+ }
+ // No matter what the entropy says, we cannot be better than min_limit
+ // with Huffman coding. I am mixing a bit of entropy into the
+ // min_limit since it produces much better (~0.5 %) compression results
+ // perhaps because of better entropy clustering.
+ if (nonzeros == 3) {
+ mix = 0.95;
+ } else {
+ mix = 0.7; // nonzeros == 4.
+ }
+ } else {
+ mix = 0.627;
+ }
+
+ {
+ double min_limit = 2 * sum - max_val;
+ min_limit = mix * min_limit + (1.0 - mix) * retval;
+ return (retval < min_limit) ? min_limit : retval;
+ }
+}
+
+double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
+ double retval = BitsEntropy(&p->literal_[0], VP8LHistogramNumCodes(p))
+ + BitsEntropy(&p->red_[0], 256)
+ + BitsEntropy(&p->blue_[0], 256)
+ + BitsEntropy(&p->alpha_[0], 256)
+ + BitsEntropy(&p->distance_[0], NUM_DISTANCE_CODES);
+ // Compute the extra bits cost.
+ int i;
+ for (i = 2; i < NUM_LENGTH_CODES - 2; ++i) {
+ retval +=
+ (i >> 1) * p->literal_[256 + i + 2];
+ }
+ for (i = 2; i < NUM_DISTANCE_CODES - 2; ++i) {
+ retval += (i >> 1) * p->distance_[i + 2];
+ }
+ return retval;
+}
+
+
+// Returns the cost encode the rle-encoded entropy code.
+// The constants in this function are experimental.
+static double HuffmanCost(const int* const population, int length) {
+ // Small bias because Huffman code length is typically not stored in
+ // full length.
+ static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3;
+ static const double kSmallBias = 9.1;
+ double retval = kHuffmanCodeOfHuffmanCodeSize - kSmallBias;
+ int streak = 0;
+ int i = 0;
+ for (; i < length - 1; ++i) {
+ ++streak;
+ if (population[i] == population[i + 1]) {
+ continue;
+ }
+ last_streak_hack:
+ // population[i] points now to the symbol in the streak of same values.
+ if (streak > 3) {
+ if (population[i] == 0) {
+ retval += 1.5625 + 0.234375 * streak;
+ } else {
+ retval += 2.578125 + 0.703125 * streak;
+ }
+ } else {
+ if (population[i] == 0) {
+ retval += 1.796875 * streak;
+ } else {
+ retval += 3.28125 * streak;
+ }
+ }
+ streak = 0;
+ }
+ if (i == length - 1) {
+ ++streak;
+ goto last_streak_hack;
+ }
+ return retval;
+}
+
+// Estimates the Huffman dictionary + other block overhead size.
+static double HistogramEstimateBitsHeader(const VP8LHistogram* const p) {
+ return HuffmanCost(&p->alpha_[0], 256) +
+ HuffmanCost(&p->red_[0], 256) +
+ HuffmanCost(&p->literal_[0], VP8LHistogramNumCodes(p)) +
+ HuffmanCost(&p->blue_[0], 256) +
+ HuffmanCost(&p->distance_[0], NUM_DISTANCE_CODES);
+}
+
+double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
+ return HistogramEstimateBitsHeader(p) + VP8LHistogramEstimateBitsBulk(p);
+}
+
+static void HistogramBuildImage(int xsize, int histo_bits,
+ const VP8LBackwardRefs* const backward_refs,
+ VP8LHistogramSet* const image) {
+ int i;
+ int x = 0, y = 0;
+ const int histo_xsize = VP8LSubSampleSize(xsize, histo_bits);
+ VP8LHistogram** const histograms = image->histograms;
+ assert(histo_bits > 0);
+ for (i = 0; i < backward_refs->size; ++i) {
+ const PixOrCopy* const v = &backward_refs->refs[i];
+ const int ix = (y >> histo_bits) * histo_xsize + (x >> histo_bits);
+ VP8LHistogramAddSinglePixOrCopy(histograms[ix], v);
+ x += PixOrCopyLength(v);
+ while (x >= xsize) {
+ x -= xsize;
+ ++y;
+ }
+ }
+}
+
+static uint32_t MyRand(uint32_t *seed) {
+ *seed *= 16807U;
+ if (*seed == 0) {
+ *seed = 1;
+ }
+ return *seed;
+}
+
+static int HistogramCombine(const VP8LHistogramSet* const in,
+ VP8LHistogramSet* const out, int num_pairs) {
+ int ok = 0;
+ int i, iter;
+ uint32_t seed = 0;
+ int tries_with_no_success = 0;
+ const int min_cluster_size = 2;
+ int out_size = in->size;
+ const int outer_iters = in->size * 3;
+ VP8LHistogram* const histos = (VP8LHistogram*)malloc(2 * sizeof(*histos));
+ VP8LHistogram* cur_combo = histos + 0; // trial merged histogram
+ VP8LHistogram* best_combo = histos + 1; // best merged histogram so far
+ if (histos == NULL) goto End;
+
+ // Copy histograms from in[] to out[].
+ assert(in->size <= out->size);
+ for (i = 0; i < in->size; ++i) {
+ in->histograms[i]->bit_cost_ = VP8LHistogramEstimateBits(in->histograms[i]);
+ *out->histograms[i] = *in->histograms[i];
+ }
+
+ // Collapse similar histograms in 'out'.
+ for (iter = 0; iter < outer_iters && out_size >= min_cluster_size; ++iter) {
+ // We pick the best pair to be combined out of 'inner_iters' pairs.
+ double best_cost_diff = 0.;
+ int best_idx1 = 0, best_idx2 = 1;
+ int j;
+ seed += iter;
+ for (j = 0; j < num_pairs; ++j) {
+ double curr_cost_diff;
+ // Choose two histograms at random and try to combine them.
+ const uint32_t idx1 = MyRand(&seed) % out_size;
+ const uint32_t tmp = ((j & 7) + 1) % (out_size - 1);
+ const uint32_t diff = (tmp < 3) ? tmp : MyRand(&seed) % (out_size - 1);
+ const uint32_t idx2 = (idx1 + diff + 1) % out_size;
+ if (idx1 == idx2) {
+ continue;
+ }
+ *cur_combo = *out->histograms[idx1];
+ VP8LHistogramAdd(cur_combo, out->histograms[idx2]);
+ cur_combo->bit_cost_ = VP8LHistogramEstimateBits(cur_combo);
+ // Calculate cost reduction on combining.
+ curr_cost_diff = cur_combo->bit_cost_
+ - out->histograms[idx1]->bit_cost_
+ - out->histograms[idx2]->bit_cost_;
+ if (best_cost_diff > curr_cost_diff) { // found a better pair?
+ { // swap cur/best combo histograms
+ VP8LHistogram* const tmp_histo = cur_combo;
+ cur_combo = best_combo;
+ best_combo = tmp_histo;
+ }
+ best_cost_diff = curr_cost_diff;
+ best_idx1 = idx1;
+ best_idx2 = idx2;
+ }
+ }
+
+ if (best_cost_diff < 0.0) {
+ *out->histograms[best_idx1] = *best_combo;
+ // swap best_idx2 slot with last one (which is now unused)
+ --out_size;
+ if (best_idx2 != out_size) {
+ out->histograms[best_idx2] = out->histograms[out_size];
+ out->histograms[out_size] = NULL; // just for sanity check.
+ }
+ tries_with_no_success = 0;
+ }
+ if (++tries_with_no_success >= 50) {
+ break;
+ }
+ }
+ out->size = out_size;
+ ok = 1;
+
+ End:
+ free(histos);
+ return ok;
+}
+
+// -----------------------------------------------------------------------------
+// Histogram refinement
+
+// What is the bit cost of moving square_histogram from
+// cur_symbol to candidate_symbol.
+// TODO(skal): we don't really need to copy the histogram and Add(). Instead
+// we just need VP8LDualHistogramEstimateBits(A, B) estimation function.
+static double HistogramDistance(const VP8LHistogram* const square_histogram,
+ const VP8LHistogram* const candidate) {
+ const double previous_bit_cost = candidate->bit_cost_;
+ double new_bit_cost;
+ VP8LHistogram modified_histo;
+ modified_histo = *candidate;
+ VP8LHistogramAdd(&modified_histo, square_histogram);
+ new_bit_cost = VP8LHistogramEstimateBits(&modified_histo);
+
+ return new_bit_cost - previous_bit_cost;
+}
+
+// Find the best 'out' histogram for each of the 'in' histograms.
+// Note: we assume that out[]->bit_cost_ is already up-to-date.
+static void HistogramRemap(const VP8LHistogramSet* const in,
+ const VP8LHistogramSet* const out,
+ uint16_t* const symbols) {
+ int i;
+ for (i = 0; i < in->size; ++i) {
+ int best_out = 0;
+ double best_bits = HistogramDistance(in->histograms[i], out->histograms[0]);
+ int k;
+ for (k = 1; k < out->size; ++k) {
+ const double cur_bits =
+ HistogramDistance(in->histograms[i], out->histograms[k]);
+ if (cur_bits < best_bits) {
+ best_bits = cur_bits;
+ best_out = k;
+ }
+ }
+ symbols[i] = best_out;
+ }
+
+ // Recompute each out based on raw and symbols.
+ for (i = 0; i < out->size; ++i) {
+ HistogramClear(out->histograms[i]);
+ }
+ for (i = 0; i < in->size; ++i) {
+ VP8LHistogramAdd(out->histograms[symbols[i]], in->histograms[i]);
+ }
+}
+
+int VP8LGetHistoImageSymbols(int xsize, int ysize,
+ const VP8LBackwardRefs* const refs,
+ int quality, int histo_bits, int cache_bits,
+ VP8LHistogramSet* const image_in,
+ uint16_t* const histogram_symbols) {
+ int ok = 0;
+ const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1;
+ const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1;
+ const int num_histo_pairs = 10 + quality / 2; // For HistogramCombine().
+ const int histo_image_raw_size = histo_xsize * histo_ysize;
+ VP8LHistogramSet* const image_out =
+ VP8LAllocateHistogramSet(histo_image_raw_size, cache_bits);
+ if (image_out == NULL) return 0;
+
+ // Build histogram image.
+ HistogramBuildImage(xsize, histo_bits, refs, image_out);
+ // Collapse similar histograms.
+ if (!HistogramCombine(image_out, image_in, num_histo_pairs)) {
+ goto Error;
+ }
+ // Find the optimal map from original histograms to the final ones.
+ HistogramRemap(image_out, image_in, histogram_symbols);
+ ok = 1;
+
+Error:
+ free(image_out);
+ return ok;
+}
diff --git a/src/enc/histogram.h b/src/enc/histogram.h
new file mode 100644
index 00000000..b99b7588
--- /dev/null
+++ b/src/enc/histogram.h
@@ -0,0 +1,115 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+// Models the histograms of literal and distance codes.
+
+#ifndef WEBP_ENC_HISTOGRAM_H_
+#define WEBP_ENC_HISTOGRAM_H_
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "./backward_references.h"
+#include "webp/format_constants.h"
+#include "webp/types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// A simple container for histograms of data.
+typedef struct {
+ // literal_ contains green literal, palette-code and
+ // copy-length-prefix histogram
+ int literal_[PIX_OR_COPY_CODES_MAX];
+ int red_[256];
+ int blue_[256];
+ int alpha_[256];
+ // Backward reference prefix-code histogram.
+ int distance_[NUM_DISTANCE_CODES];
+ int palette_code_bits_;
+ double bit_cost_; // cached value of VP8LHistogramEstimateBits(this)
+} VP8LHistogram;
+
+// Collection of histograms with fixed capacity, allocated as one
+// big memory chunk. Can be destroyed by simply calling 'free()'.
+typedef struct {
+ int size; // number of slots currently in use
+ int max_size; // maximum capacity
+ VP8LHistogram** histograms;
+} VP8LHistogramSet;
+
+// Create the histogram.
+//
+// The input data is the PixOrCopy data, which models the literals, stop
+// codes and backward references (both distances and lengths). Also: if
+// palette_code_bits is >= 0, initialize the histogram with this value.
+void VP8LHistogramCreate(VP8LHistogram* const p,
+ const VP8LBackwardRefs* const refs,
+ int palette_code_bits);
+
+// Set the palette_code_bits and reset the stats.
+void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits);
+
+// Collect all the references into a histogram (without reset)
+void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs,
+ VP8LHistogram* const histo);
+
+// Allocate an array of pointer to histograms, allocated and initialized
+// using 'cache_bits'. Return NULL in case of memory error.
+VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits);
+
+// Accumulate a token 'v' into a histogram.
+void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
+ const PixOrCopy* const v);
+
+// Estimate how many bits the combined entropy of literals and distance
+// approximately maps to.
+double VP8LHistogramEstimateBits(const VP8LHistogram* const p);
+
+// This function estimates the cost in bits excluding the bits needed to
+// represent the entropy code itself.
+double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p);
+
+static WEBP_INLINE void VP8LHistogramAdd(VP8LHistogram* const p,
+ const VP8LHistogram* const a) {
+ int i;
+ for (i = 0; i < PIX_OR_COPY_CODES_MAX; ++i) {
+ p->literal_[i] += a->literal_[i];
+ }
+ for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
+ p->distance_[i] += a->distance_[i];
+ }
+ for (i = 0; i < 256; ++i) {
+ p->red_[i] += a->red_[i];
+ p->blue_[i] += a->blue_[i];
+ p->alpha_[i] += a->alpha_[i];
+ }
+}
+
+static WEBP_INLINE int VP8LHistogramNumCodes(const VP8LHistogram* const p) {
+ return 256 + NUM_LENGTH_CODES +
+ ((p->palette_code_bits_ > 0) ? (1 << p->palette_code_bits_) : 0);
+}
+
+// Builds the histogram image.
+int VP8LGetHistoImageSymbols(int xsize, int ysize,
+ const VP8LBackwardRefs* const refs,
+ int quality, int histogram_bits, int cache_bits,
+ VP8LHistogramSet* const image_in,
+ uint16_t* const histogram_symbols);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif // WEBP_ENC_HISTOGRAM_H_
diff --git a/src/enc/iterator.c b/src/enc/iterator.c
index 3a8ad048..86e473bc 100644
--- a/src/enc/iterator.c
+++ b/src/enc/iterator.c
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -9,22 +9,22 @@
//
// Author: Skal (pascal.massimino@gmail.com)
-#include <stdlib.h>
#include <string.h>
-#include "vp8enci.h"
+
+#include "./vp8enci.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// VP8Iterator
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
static void InitLeft(VP8EncIterator* const it) {
const VP8Encoder* const enc = it->enc_;
enc->y_left_[-1] = enc->u_left_[-1] = enc->v_left_[-1] =
- (it->y_) > 0 ? 129 : 127;
+ (it->y_ > 0) ? 129 : 127;
memset(enc->y_left_, 129, 16);
memset(enc->u_left_, 129, 8);
memset(enc->v_left_, 129, 8);
@@ -33,7 +33,7 @@ static void InitLeft(VP8EncIterator* const it) {
static void InitTop(VP8EncIterator* const it) {
const VP8Encoder* const enc = it->enc_;
- const int top_size = enc->mb_w_ * 16;
+ const size_t top_size = enc->mb_w_ * 16;
memset(enc->y_top_, 127, 2 * top_size);
memset(enc->nz_, 0, enc->mb_w_ * sizeof(*enc->nz_));
}
@@ -65,66 +65,81 @@ void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it) {
it->yuv_out2_ = enc->yuv_out2_;
it->yuv_p_ = enc->yuv_p_;
it->lf_stats_ = enc->lf_stats_;
+ it->percent0_ = enc->percent_;
VP8IteratorReset(it);
}
-//-----------------------------------------------------------------------------
+int VP8IteratorProgress(const VP8EncIterator* const it, int delta) {
+ VP8Encoder* const enc = it->enc_;
+ if (delta && enc->pic_->progress_hook) {
+ const int percent = (enc->mb_h_ <= 1)
+ ? it->percent0_
+ : it->percent0_ + delta * it->y_ / (enc->mb_h_ - 1);
+ return WebPReportProgress(enc->pic_, percent, &enc->percent_);
+ }
+ return 1;
+}
+
+//------------------------------------------------------------------------------
// Import the source samples into the cache. Takes care of replicating
// boundary pixels if necessary.
+static void ImportBlock(const uint8_t* src, int src_stride,
+ uint8_t* dst, int w, int h, int size) {
+ int i;
+ for (i = 0; i < h; ++i) {
+ memcpy(dst, src, w);
+ if (w < size) {
+ memset(dst + w, dst[w - 1], size - w);
+ }
+ dst += BPS;
+ src += src_stride;
+ }
+ for (i = h; i < size; ++i) {
+ memcpy(dst, dst - BPS, size);
+ dst += BPS;
+ }
+}
+
void VP8IteratorImport(const VP8EncIterator* const it) {
const VP8Encoder* const enc = it->enc_;
const int x = it->x_, y = it->y_;
const WebPPicture* const pic = enc->pic_;
- const uint8_t* ysrc = pic->y + (y * pic->y_stride + x) * 16;
- const uint8_t* usrc = pic->u + (y * pic->uv_stride + x) * 8;
- const uint8_t* vsrc = pic->v + (y * pic->uv_stride + x) * 8;
- uint8_t* ydst = it->yuv_in_ + Y_OFF;
- uint8_t* udst = it->yuv_in_ + U_OFF;
- uint8_t* vdst = it->yuv_in_ + V_OFF;
+ const uint8_t* const ysrc = pic->y + (y * pic->y_stride + x) * 16;
+ const uint8_t* const usrc = pic->u + (y * pic->uv_stride + x) * 8;
+ const uint8_t* const vsrc = pic->v + (y * pic->uv_stride + x) * 8;
+ uint8_t* const ydst = it->yuv_in_ + Y_OFF;
+ uint8_t* const udst = it->yuv_in_ + U_OFF;
+ uint8_t* const vdst = it->yuv_in_ + V_OFF;
int w = (pic->width - x * 16);
int h = (pic->height - y * 16);
- int i;
if (w > 16) w = 16;
if (h > 16) h = 16;
+
// Luma plane
- for (i = 0; i < h; ++i) {
- memcpy(ydst, ysrc, w);
- if (w < 16) memset(ydst + w, ydst[w - 1], 16 - w);
- ydst += BPS;
- ysrc += pic->y_stride;
- }
- for (i = h; i < 16; ++i) {
- memcpy(ydst, ydst - BPS, 16);
- ydst += BPS;
- }
- // U/V plane
- w = (w + 1) / 2;
- h = (h + 1) / 2;
- for (i = 0; i < h; ++i) {
- memcpy(udst, usrc, w);
- memcpy(vdst, vsrc, w);
- if (w < 8) {
- memset(udst + w, udst[w - 1], 8 - w);
- memset(vdst + w, vdst[w - 1], 8 - w);
- }
- udst += BPS;
- vdst += BPS;
- usrc += pic->uv_stride;
- vsrc += pic->uv_stride;
- }
- for (i = h; i < 8; ++i) {
- memcpy(udst, udst - BPS, 8);
- memcpy(vdst, vdst - BPS, 8);
- udst += BPS;
- vdst += BPS;
+ ImportBlock(ysrc, pic->y_stride, ydst, w, h, 16);
+
+ { // U/V planes
+ const int uv_w = (w + 1) >> 1;
+ const int uv_h = (h + 1) >> 1;
+ ImportBlock(usrc, pic->uv_stride, udst, uv_w, uv_h, 8);
+ ImportBlock(vsrc, pic->uv_stride, vdst, uv_w, uv_h, 8);
}
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Copy back the compressed samples into user space if requested.
+static void ExportBlock(const uint8_t* src, uint8_t* dst, int dst_stride,
+ int w, int h) {
+ while (h-- > 0) {
+ memcpy(dst, src, w);
+ dst += dst_stride;
+ src += BPS;
+ }
+}
+
void VP8IteratorExport(const VP8EncIterator* const it) {
const VP8Encoder* const enc = it->enc_;
if (enc->config_->show_compressed) {
@@ -133,33 +148,28 @@ void VP8IteratorExport(const VP8EncIterator* const it) {
const uint8_t* const usrc = it->yuv_out_ + U_OFF;
const uint8_t* const vsrc = it->yuv_out_ + V_OFF;
const WebPPicture* const pic = enc->pic_;
- uint8_t* ydst = pic->y + (y * pic->y_stride + x) * 16;
- uint8_t* udst = pic->u + (y * pic->uv_stride + x) * 8;
- uint8_t* vdst = pic->v + (y * pic->uv_stride + x) * 8;
+ uint8_t* const ydst = pic->y + (y * pic->y_stride + x) * 16;
+ uint8_t* const udst = pic->u + (y * pic->uv_stride + x) * 8;
+ uint8_t* const vdst = pic->v + (y * pic->uv_stride + x) * 8;
int w = (pic->width - x * 16);
int h = (pic->height - y * 16);
- int i;
if (w > 16) w = 16;
if (h > 16) h = 16;
// Luma plane
- for (i = 0; i < h; ++i) {
- memcpy(ydst + i * pic->y_stride, ysrc + i * BPS, w);
- }
- // U/V plane
- {
- const int uv_w = (w + 1) / 2;
- const int uv_h = (h + 1) / 2;
- for (i = 0; i < uv_h; ++i) {
- memcpy(udst + i * pic->uv_stride, usrc + i * BPS, uv_w);
- memcpy(vdst + i * pic->uv_stride, vsrc + i * BPS, uv_w);
- }
+ ExportBlock(ysrc, ydst, pic->y_stride, w, h);
+
+ { // U/V planes
+ const int uv_w = (w + 1) >> 1;
+ const int uv_h = (h + 1) >> 1;
+ ExportBlock(usrc, udst, pic->uv_stride, uv_w, uv_h);
+ ExportBlock(vsrc, vdst, pic->uv_stride, uv_w, uv_h);
}
}
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Non-zero contexts setup/teardown
// Nz bits:
@@ -178,54 +188,58 @@ void VP8IteratorExport(const VP8EncIterator* const it) {
void VP8IteratorNzToBytes(VP8EncIterator* const it) {
const int tnz = it->nz_[0], lnz = it->nz_[-1];
+ int* const top_nz = it->top_nz_;
+ int* const left_nz = it->left_nz_;
// Top-Y
- it->top_nz_[0] = BIT(tnz, 12);
- it->top_nz_[1] = BIT(tnz, 13);
- it->top_nz_[2] = BIT(tnz, 14);
- it->top_nz_[3] = BIT(tnz, 15);
+ top_nz[0] = BIT(tnz, 12);
+ top_nz[1] = BIT(tnz, 13);
+ top_nz[2] = BIT(tnz, 14);
+ top_nz[3] = BIT(tnz, 15);
// Top-U
- it->top_nz_[4] = BIT(tnz, 18);
- it->top_nz_[5] = BIT(tnz, 19);
+ top_nz[4] = BIT(tnz, 18);
+ top_nz[5] = BIT(tnz, 19);
// Top-V
- it->top_nz_[6] = BIT(tnz, 22);
- it->top_nz_[7] = BIT(tnz, 23);
+ top_nz[6] = BIT(tnz, 22);
+ top_nz[7] = BIT(tnz, 23);
// DC
- it->top_nz_[8] = BIT(tnz, 24);
+ top_nz[8] = BIT(tnz, 24);
// left-Y
- it->left_nz_[0] = BIT(lnz, 3);
- it->left_nz_[1] = BIT(lnz, 7);
- it->left_nz_[2] = BIT(lnz, 11);
- it->left_nz_[3] = BIT(lnz, 15);
+ left_nz[0] = BIT(lnz, 3);
+ left_nz[1] = BIT(lnz, 7);
+ left_nz[2] = BIT(lnz, 11);
+ left_nz[3] = BIT(lnz, 15);
// left-U
- it->left_nz_[4] = BIT(lnz, 17);
- it->left_nz_[5] = BIT(lnz, 19);
+ left_nz[4] = BIT(lnz, 17);
+ left_nz[5] = BIT(lnz, 19);
// left-V
- it->left_nz_[6] = BIT(lnz, 21);
- it->left_nz_[7] = BIT(lnz, 23);
+ left_nz[6] = BIT(lnz, 21);
+ left_nz[7] = BIT(lnz, 23);
// left-DC is special, iterated separately
}
void VP8IteratorBytesToNz(VP8EncIterator* const it) {
uint32_t nz = 0;
+ const int* const top_nz = it->top_nz_;
+ const int* const left_nz = it->left_nz_;
// top
- nz |= (it->top_nz_[0] << 12) | (it->top_nz_[1] << 13);
- nz |= (it->top_nz_[2] << 14) | (it->top_nz_[3] << 15);
- nz |= (it->top_nz_[4] << 18) | (it->top_nz_[5] << 19);
- nz |= (it->top_nz_[6] << 22) | (it->top_nz_[7] << 23);
- nz |= (it->top_nz_[8] << 24); // we propagate the _top_ bit, esp. for intra4
+ nz |= (top_nz[0] << 12) | (top_nz[1] << 13);
+ nz |= (top_nz[2] << 14) | (top_nz[3] << 15);
+ nz |= (top_nz[4] << 18) | (top_nz[5] << 19);
+ nz |= (top_nz[6] << 22) | (top_nz[7] << 23);
+ nz |= (top_nz[8] << 24); // we propagate the _top_ bit, esp. for intra4
// left
- nz |= (it->left_nz_[0] << 3) | (it->left_nz_[1] << 7);
- nz |= (it->left_nz_[2] << 11);
- nz |= (it->left_nz_[4] << 17) | (it->left_nz_[6] << 21);
+ nz |= (left_nz[0] << 3) | (left_nz[1] << 7);
+ nz |= (left_nz[2] << 11);
+ nz |= (left_nz[4] << 17) | (left_nz[6] << 21);
*it->nz_ = nz;
}
#undef BIT
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Advance to the next position, doing the bookeeping.
int VP8IteratorNext(VP8EncIterator* const it,
@@ -270,12 +284,12 @@ int VP8IteratorNext(VP8EncIterator* const it,
return (0 < --it->done_);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Helper function to set mode properties
void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode) {
- int y;
uint8_t* preds = it->preds_;
+ int y;
for (y = 0; y < 4; ++y) {
memset(preds, mode, 4);
preds += it->enc_->preds_w_;
@@ -283,14 +297,13 @@ void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode) {
it->mb_->type_ = 1;
}
-void VP8SetIntra4Mode(const VP8EncIterator* const it, int modes[16]) {
- int x, y;
+void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes) {
uint8_t* preds = it->preds_;
- for (y = 0; y < 4; ++y) {
- for (x = 0; x < 4; ++x) {
- preds[x] = modes[x + y * 4];
- }
+ int y;
+ for (y = 4; y > 0; --y) {
+ memcpy(preds, modes, 4 * sizeof(*modes));
preds += it->enc_->preds_w_;
+ modes += 4;
}
it->mb_->type_ = 0;
}
@@ -307,7 +320,7 @@ void VP8SetSegment(const VP8EncIterator* const it, int segment) {
it->mb_->segment_ = segment;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Intra4x4 sub-blocks iteration
//
// We store and update the boundary samples into an array of 37 pixels. They
@@ -347,7 +360,7 @@ static const uint8_t VP8TopLeftI4[16] = {
};
void VP8IteratorStartI4(VP8EncIterator* const it) {
- VP8Encoder* const enc = it->enc_;
+ const VP8Encoder* const enc = it->enc_;
int i;
it->i4_ = 0; // first 4x4 sub-block
@@ -393,7 +406,7 @@ int VP8IteratorRotateI4(VP8EncIterator* const it,
}
}
// move pointers to next sub-block
- it->i4_++;
+ ++it->i4_;
if (it->i4_ == 16) { // we're done
return 0;
}
@@ -402,7 +415,7 @@ int VP8IteratorRotateI4(VP8EncIterator* const it,
return 1;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
diff --git a/src/enc/layer.c b/src/enc/layer.c
index ec4dc87c..423127df 100644
--- a/src/enc/layer.c
+++ b/src/enc/layer.c
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -9,19 +9,15 @@
//
// Author: Skal (pascal.massimino@gmail.com)
-#include <assert.h>
#include <stdlib.h>
-#include "vp8enci.h"
+
+#include "./vp8enci.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-
-#endif /* WEBP_EXPERIMENTAL_FEATURES */
-
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
void VP8EncInitLayer(VP8Encoder* const enc) {
enc->use_layer_ = (enc->pic_->u0 != NULL);
@@ -34,8 +30,6 @@ void VP8EncInitLayer(VP8Encoder* const enc) {
void VP8EncCodeLayerBlock(VP8EncIterator* it) {
(void)it; // remove a warning
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-#endif /* WEBP_EXPERIMENTAL_FEATURES */
}
int VP8EncFinishLayer(VP8Encoder* const enc) {
diff --git a/src/enc/picture.c b/src/enc/picture.c
index b6446622..44eed060 100644
--- a/src/enc/picture.c
+++ b/src/enc/picture.c
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -11,302 +11,384 @@
#include <assert.h>
#include <stdlib.h>
-#include "vp8enci.h"
+#include <math.h>
+
+#include "./vp8enci.h"
+#include "../utils/rescaler.h"
+#include "../utils/utils.h"
+#include "../dsp/dsp.h"
+#include "../dsp/yuv.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-//-----------------------------------------------------------------------------
+#define HALVE(x) (((x) + 1) >> 1)
+#define IS_YUV_CSP(csp, YUV_CSP) (((csp) & WEBP_CSP_UV_MASK) == (YUV_CSP))
+
+static const union {
+ uint32_t argb;
+ uint8_t bytes[4];
+} test_endian = { 0xff000000u };
+#define ALPHA_IS_LAST (test_endian.bytes[3] == 0xff)
+
+//------------------------------------------------------------------------------
// WebPPicture
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
-int WebPPictureAlloc(WebPPicture* const picture) {
- if (picture) {
+int WebPPictureAlloc(WebPPicture* picture) {
+ if (picture != NULL) {
const WebPEncCSP uv_csp = picture->colorspace & WEBP_CSP_UV_MASK;
const int has_alpha = picture->colorspace & WEBP_CSP_ALPHA_BIT;
const int width = picture->width;
const int height = picture->height;
- const int y_stride = width;
- const int uv_width = (width + 1) / 2;
- const int uv_height = (height + 1) / 2;
- const int uv_stride = uv_width;
- int uv0_stride = 0;
- int a_width, a_stride;
- uint64_t y_size, uv_size, uv0_size, a_size, total_size;
- uint8_t* mem;
-
- // U/V
- switch (uv_csp) {
- case WEBP_YUV420:
- break;
+
+ if (!picture->use_argb) {
+ const int y_stride = width;
+ const int uv_width = HALVE(width);
+ const int uv_height = HALVE(height);
+ const int uv_stride = uv_width;
+ int uv0_stride = 0;
+ int a_width, a_stride;
+ uint64_t y_size, uv_size, uv0_size, a_size, total_size;
+ uint8_t* mem;
+
+ // U/V
+ switch (uv_csp) {
+ case WEBP_YUV420:
+ break;
#ifdef WEBP_EXPERIMENTAL_FEATURES
- case WEBP_YUV400: // for now, we'll just reset the U/V samples
- break;
- case WEBP_YUV422:
- uv0_stride = uv_width;
- break;
- case WEBP_YUV444:
- uv0_stride = width;
- break;
+ case WEBP_YUV400: // for now, we'll just reset the U/V samples
+ break;
+ case WEBP_YUV422:
+ uv0_stride = uv_width;
+ break;
+ case WEBP_YUV444:
+ uv0_stride = width;
+ break;
#endif
- default:
+ default:
+ return 0;
+ }
+ uv0_size = height * uv0_stride;
+
+ // alpha
+ a_width = has_alpha ? width : 0;
+ a_stride = a_width;
+ y_size = (uint64_t)y_stride * height;
+ uv_size = (uint64_t)uv_stride * uv_height;
+ a_size = (uint64_t)a_stride * height;
+
+ total_size = y_size + a_size + 2 * uv_size + 2 * uv0_size;
+
+ // Security and validation checks
+ if (width <= 0 || height <= 0 || // luma/alpha param error
+ uv_width < 0 || uv_height < 0) { // u/v param error
return 0;
- }
- uv0_size = height * uv0_stride;
-
- // alpha
- a_width = has_alpha ? width : 0;
- a_stride = a_width;
- y_size = (uint64_t)y_stride * height;
- uv_size = (uint64_t)uv_stride * uv_height;
- a_size = (uint64_t)a_stride * height;
-
- total_size = y_size + a_size + 2 * uv_size + 2 * uv0_size;
-
- // Security and validation checks
- if (width <= 0 || height <= 0 || // check for luma/alpha param error
- uv_width < 0 || uv_height < 0 || // check for u/v param error
- y_size >= (1ULL << 40) || // check for reasonable global size
- (size_t)total_size != total_size) { // check for overflow on 32bit
- return 0;
- }
- picture->y_stride = y_stride;
- picture->uv_stride = uv_stride;
- picture->a_stride = a_stride;
- picture->uv0_stride = uv0_stride;
- WebPPictureFree(picture); // erase previous buffer
- mem = (uint8_t*)malloc((size_t)total_size);
- if (mem == NULL) return 0;
-
- picture->y = mem;
- mem += y_size;
-
- picture->u = mem;
- mem += uv_size;
- picture->v = mem;
- mem += uv_size;
-
- if (a_size) {
- picture->a = mem;
- mem += a_size;
- }
- if (uv0_size) {
- picture->u0 = mem;
- mem += uv0_size;
- picture->v0 = mem;
- mem += uv0_size;
+ }
+ // Clear previous buffer and allocate a new one.
+ WebPPictureFree(picture); // erase previous buffer
+ mem = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*mem));
+ if (mem == NULL) return 0;
+
+ // From now on, we're in the clear, we can no longer fail...
+ picture->memory_ = (void*)mem;
+ picture->y_stride = y_stride;
+ picture->uv_stride = uv_stride;
+ picture->a_stride = a_stride;
+ picture->uv0_stride = uv0_stride;
+ // TODO(skal): we could align the y/u/v planes and adjust stride.
+ picture->y = mem;
+ mem += y_size;
+
+ picture->u = mem;
+ mem += uv_size;
+ picture->v = mem;
+ mem += uv_size;
+
+ if (a_size) {
+ picture->a = mem;
+ mem += a_size;
+ }
+ if (uv0_size) {
+ picture->u0 = mem;
+ mem += uv0_size;
+ picture->v0 = mem;
+ mem += uv0_size;
+ }
+ } else {
+ void* memory;
+ const uint64_t argb_size = (uint64_t)width * height;
+ if (width <= 0 || height <= 0) {
+ return 0;
+ }
+ // Clear previous buffer and allocate a new one.
+ WebPPictureFree(picture); // erase previous buffer
+ memory = WebPSafeMalloc(argb_size, sizeof(*picture->argb));
+ if (memory == NULL) return 0;
+
+ // TODO(skal): align plane to cache line?
+ picture->memory_argb_ = memory;
+ picture->argb = (uint32_t*)memory;
+ picture->argb_stride = width;
}
}
return 1;
}
+// Remove reference to the ARGB buffer (doesn't free anything).
+static void PictureResetARGB(WebPPicture* const picture) {
+ picture->memory_argb_ = NULL;
+ picture->argb = NULL;
+ picture->argb_stride = 0;
+}
+
+// Remove reference to the YUVA buffer (doesn't free anything).
+static void PictureResetYUVA(WebPPicture* const picture) {
+ picture->memory_ = NULL;
+ picture->y = picture->u = picture->v = picture->a = NULL;
+ picture->u0 = picture->v0 = NULL;
+ picture->y_stride = picture->uv_stride = 0;
+ picture->a_stride = 0;
+ picture->uv0_stride = 0;
+}
+
// Grab the 'specs' (writer, *opaque, width, height...) from 'src' and copy them
-// into 'dst'. Mark 'dst' as not owning any memory. 'src' can be NULL.
+// into 'dst'. Mark 'dst' as not owning any memory.
static void WebPPictureGrabSpecs(const WebPPicture* const src,
WebPPicture* const dst) {
- if (src) *dst = *src;
- dst->y = dst->u = dst->v = NULL;
- dst->u0 = dst->v0 = NULL;
- dst->a = NULL;
+ assert(src != NULL && dst != NULL);
+ *dst = *src;
+ PictureResetYUVA(dst);
+ PictureResetARGB(dst);
}
-// Release memory owned by 'picture'.
-void WebPPictureFree(WebPPicture* const picture) {
- if (picture) {
- free(picture->y);
- WebPPictureGrabSpecs(NULL, picture);
+// Allocate a new argb buffer, discarding any existing one and preserving
+// the other YUV(A) buffer.
+static int PictureAllocARGB(WebPPicture* const picture) {
+ WebPPicture tmp;
+ free(picture->memory_argb_);
+ PictureResetARGB(picture);
+ picture->use_argb = 1;
+ WebPPictureGrabSpecs(picture, &tmp);
+ if (!WebPPictureAlloc(&tmp)) {
+ return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
+ picture->memory_argb_ = tmp.memory_argb_;
+ picture->argb = tmp.argb;
+ picture->argb_stride = tmp.argb_stride;
+ return 1;
}
-//-----------------------------------------------------------------------------
+// Release memory owned by 'picture' (both YUV and ARGB buffers).
+void WebPPictureFree(WebPPicture* picture) {
+ if (picture != NULL) {
+ free(picture->memory_);
+ free(picture->memory_argb_);
+ PictureResetYUVA(picture);
+ PictureResetARGB(picture);
+ }
+}
+
+//------------------------------------------------------------------------------
// Picture copying
-int WebPPictureCopy(const WebPPicture* const src, WebPPicture* const dst) {
- int y;
+// Not worth moving to dsp/enc.c (only used here).
+static void CopyPlane(const uint8_t* src, int src_stride,
+ uint8_t* dst, int dst_stride, int width, int height) {
+ while (height-- > 0) {
+ memcpy(dst, src, width);
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+// Adjust top-left corner to chroma sample position.
+static void SnapTopLeftPosition(const WebPPicture* const pic,
+ int* const left, int* const top) {
+ if (!pic->use_argb) {
+ const int is_yuv422 = IS_YUV_CSP(pic->colorspace, WEBP_YUV422);
+ if (IS_YUV_CSP(pic->colorspace, WEBP_YUV420) || is_yuv422) {
+ *left &= ~1;
+ if (!is_yuv422) *top &= ~1;
+ }
+ }
+}
+
+// Adjust top-left corner and verify that the sub-rectangle is valid.
+static int AdjustAndCheckRectangle(const WebPPicture* const pic,
+ int* const left, int* const top,
+ int width, int height) {
+ SnapTopLeftPosition(pic, left, top);
+ if ((*left) < 0 || (*top) < 0) return 0;
+ if (width <= 0 || height <= 0) return 0;
+ if ((*left) + width > pic->width) return 0;
+ if ((*top) + height > pic->height) return 0;
+ return 1;
+}
+
+int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) {
if (src == NULL || dst == NULL) return 0;
if (src == dst) return 1;
WebPPictureGrabSpecs(src, dst);
if (!WebPPictureAlloc(dst)) return 0;
- for (y = 0; y < dst->height; ++y) {
- memcpy(dst->y + y * dst->y_stride,
- src->y + y * src->y_stride, src->width);
- }
- for (y = 0; y < (dst->height + 1) / 2; ++y) {
- memcpy(dst->u + y * dst->uv_stride,
- src->u + y * src->uv_stride, (src->width + 1) / 2);
- memcpy(dst->v + y * dst->uv_stride,
- src->v + y * src->uv_stride, (src->width + 1) / 2);
- }
+ if (!src->use_argb) {
+ CopyPlane(src->y, src->y_stride,
+ dst->y, dst->y_stride, dst->width, dst->height);
+ CopyPlane(src->u, src->uv_stride,
+ dst->u, dst->uv_stride, HALVE(dst->width), HALVE(dst->height));
+ CopyPlane(src->v, src->uv_stride,
+ dst->v, dst->uv_stride, HALVE(dst->width), HALVE(dst->height));
+ if (dst->a != NULL) {
+ CopyPlane(src->a, src->a_stride,
+ dst->a, dst->a_stride, dst->width, dst->height);
+ }
#ifdef WEBP_EXPERIMENTAL_FEATURES
- if (dst->a != NULL) {
- for (y = 0; y < dst->height; ++y) {
- memcpy(dst->a + y * dst->a_stride,
- src->a + y * src->a_stride, src->width);
+ if (dst->u0 != NULL) {
+ int uv0_width = src->width;
+ if (IS_YUV_CSP(dst->colorspace, WEBP_YUV422)) {
+ uv0_width = HALVE(uv0_width);
+ }
+ CopyPlane(src->u0, src->uv0_stride,
+ dst->u0, dst->uv0_stride, uv0_width, dst->height);
+ CopyPlane(src->v0, src->uv0_stride,
+ dst->v0, dst->uv0_stride, uv0_width, dst->height);
}
+#endif
+ } else {
+ CopyPlane((const uint8_t*)src->argb, 4 * src->argb_stride,
+ (uint8_t*)dst->argb, 4 * dst->argb_stride,
+ 4 * dst->width, dst->height);
}
- if (dst->u0 != NULL) {
- int uv0_width = src->width;
- if ((dst->colorspace & WEBP_CSP_UV_MASK) == WEBP_YUV422) {
- uv0_width = (uv0_width + 1) / 2;
+ return 1;
+}
+
+int WebPPictureIsView(const WebPPicture* picture) {
+ if (picture == NULL) return 0;
+ if (picture->use_argb) {
+ return (picture->memory_argb_ == NULL);
+ }
+ return (picture->memory_ == NULL);
+}
+
+int WebPPictureView(const WebPPicture* src,
+ int left, int top, int width, int height,
+ WebPPicture* dst) {
+ if (src == NULL || dst == NULL) return 0;
+
+ // verify rectangle position.
+ if (!AdjustAndCheckRectangle(src, &left, &top, width, height)) return 0;
+
+ if (src != dst) { // beware of aliasing! We don't want to leak 'memory_'.
+ WebPPictureGrabSpecs(src, dst);
+ }
+ dst->width = width;
+ dst->height = height;
+ if (!src->use_argb) {
+ dst->y = src->y + top * src->y_stride + left;
+ dst->u = src->u + (top >> 1) * src->uv_stride + (left >> 1);
+ dst->v = src->v + (top >> 1) * src->uv_stride + (left >> 1);
+ if (src->a != NULL) {
+ dst->a = src->a + top * src->a_stride + left;
}
- for (y = 0; y < dst->height; ++y) {
- memcpy(dst->u0 + y * dst->uv0_stride,
- src->u0 + y * src->uv0_stride, uv0_width);
- memcpy(dst->v0 + y * dst->uv0_stride,
- src->v0 + y * src->uv0_stride, uv0_width);
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+ if (src->u0 != NULL) {
+ const int left_pos =
+ IS_YUV_CSP(dst->colorspace, WEBP_YUV422) ? (left >> 1) : left;
+ dst->u0 = src->u0 + top * src->uv0_stride + left_pos;
+ dst->v0 = src->v0 + top * src->uv0_stride + left_pos;
}
- }
#endif
+ } else {
+ dst->argb = src->argb + top * src->argb_stride + left;
+ }
return 1;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Picture cropping
-int WebPPictureCrop(WebPPicture* const pic,
+int WebPPictureCrop(WebPPicture* pic,
int left, int top, int width, int height) {
WebPPicture tmp;
- int y;
if (pic == NULL) return 0;
- if (width <= 0 || height <= 0) return 0;
- if (left < 0 || ((left + width + 1) & ~1) > pic->width) return 0;
- if (top < 0 || ((top + height + 1) & ~1) > pic->height) return 0;
+ if (!AdjustAndCheckRectangle(pic, &left, &top, width, height)) return 0;
WebPPictureGrabSpecs(pic, &tmp);
tmp.width = width;
tmp.height = height;
if (!WebPPictureAlloc(&tmp)) return 0;
- for (y = 0; y < height; ++y) {
- memcpy(tmp.y + y * tmp.y_stride,
- pic->y + (top + y) * pic->y_stride + left, width);
- }
- for (y = 0; y < (height + 1) / 2; ++y) {
- const int offset = (y + top / 2) * pic->uv_stride + left / 2;
- memcpy(tmp.u + y * tmp.uv_stride, pic->u + offset, (width + 1) / 2);
- memcpy(tmp.v + y * tmp.uv_stride, pic->v + offset, (width + 1) / 2);
- }
-
-#ifdef WEBP_EXPERIMENTAL_FEATURES
- if (tmp.a) {
- for (y = 0; y < height; ++y) {
- memcpy(tmp.a + y * tmp.a_stride,
- pic->a + (top + y) * pic->a_stride + left, width);
+ if (!pic->use_argb) {
+ const int y_offset = top * pic->y_stride + left;
+ const int uv_offset = (top / 2) * pic->uv_stride + left / 2;
+ CopyPlane(pic->y + y_offset, pic->y_stride,
+ tmp.y, tmp.y_stride, width, height);
+ CopyPlane(pic->u + uv_offset, pic->uv_stride,
+ tmp.u, tmp.uv_stride, HALVE(width), HALVE(height));
+ CopyPlane(pic->v + uv_offset, pic->uv_stride,
+ tmp.v, tmp.uv_stride, HALVE(width), HALVE(height));
+
+ if (tmp.a != NULL) {
+ const int a_offset = top * pic->a_stride + left;
+ CopyPlane(pic->a + a_offset, pic->a_stride,
+ tmp.a, tmp.a_stride, width, height);
}
- }
- if (tmp.u0) {
- int w = width;
- int l = left;
- if (tmp.colorspace == WEBP_YUV422) {
- w = (w + 1) / 2;
- l = (l + 1) / 2;
- }
- for (y = 0; y < height; ++y) {
- memcpy(tmp.u0 + y * tmp.uv0_stride,
- pic->u0 + (top + y) * pic->uv0_stride + l, w);
- memcpy(tmp.v0 + y * tmp.uv0_stride,
- pic->v0 + (top + y) * pic->uv0_stride + l, w);
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+ if (tmp.u0 != NULL) {
+ int w = width;
+ int left_pos = left;
+ if (IS_YUV_CSP(tmp.colorspace, WEBP_YUV422)) {
+ w = HALVE(w);
+ left_pos = HALVE(left_pos);
+ }
+ CopyPlane(pic->u0 + top * pic->uv0_stride + left_pos, pic->uv0_stride,
+ tmp.u0, tmp.uv0_stride, w, height);
+ CopyPlane(pic->v0 + top * pic->uv0_stride + left_pos, pic->uv0_stride,
+ tmp.v0, tmp.uv0_stride, w, height);
}
- }
#endif
-
+ } else {
+ const uint8_t* const src =
+ (const uint8_t*)(pic->argb + top * pic->argb_stride + left);
+ CopyPlane(src, pic->argb_stride * 4,
+ (uint8_t*)tmp.argb, tmp.argb_stride * 4,
+ width * 4, height);
+ }
WebPPictureFree(pic);
*pic = tmp;
return 1;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Simple picture rescaler
-#define RFIX 30
-#define MULT(x,y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX)
-static inline void ImportRow(const uint8_t* src, int src_width,
- int32_t* frow, int32_t* irow, int dst_width) {
- const int x_expand = (src_width < dst_width);
- const int fx_scale = (1 << RFIX) / dst_width;
- int x_in = 0;
- int x_out;
- int x_accum = 0;
- if (!x_expand) {
- int sum = 0;
- for (x_out = 0; x_out < dst_width; ++x_out) {
- x_accum += src_width - dst_width;
- for (; x_accum > 0; x_accum -= dst_width) {
- sum += src[x_in++];
- }
- { // Emit next horizontal pixel.
- const int32_t base = src[x_in++];
- const int32_t frac = base * (-x_accum);
- frow[x_out] = (sum + base) * dst_width - frac;
- sum = MULT(frac, fx_scale); // fresh fractional start for next pixel
- }
- }
- } else { // simple bilinear interpolation
- int left = src[0], right = src[0];
- for (x_out = 0; x_out < dst_width; ++x_out) {
- if (x_accum < 0) {
- left = right;
- right = src[++x_in];
- x_accum += dst_width - 1;
- }
- frow[x_out] = right * (dst_width - 1) + (left - right) * x_accum;
- x_accum -= src_width - 1;
- }
- }
- // Accumulate the new row's contribution
- for (x_out = 0; x_out < dst_width; ++x_out) {
- irow[x_out] += frow[x_out];
- }
-}
-
-static void ExportRow(int32_t* frow, int32_t* irow, uint8_t* dst, int dst_width,
- const int yscale, const int64_t fxy_scale) {
- int x_out;
- for (x_out = 0; x_out < dst_width; ++x_out) {
- const int frac = MULT(frow[x_out], yscale);
- const int v = MULT(irow[x_out] - frac, fxy_scale);
- dst[x_out] = (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
- irow[x_out] = frac; // new fractional start
- }
-}
-
static void RescalePlane(const uint8_t* src,
int src_width, int src_height, int src_stride,
uint8_t* dst,
int dst_width, int dst_height, int dst_stride,
- int32_t* const work) {
- const int x_expand = (src_width < dst_width);
- const int fy_scale = (1 << RFIX) / dst_height;
- const int64_t fxy_scale = x_expand ?
- ((int64_t)dst_height << RFIX) / (dst_width * src_height) :
- ((int64_t)dst_height << RFIX) / (src_width * src_height);
- int y_accum = src_height;
- int y;
- int32_t* irow = work; // integral contribution
- int32_t* frow = work + dst_width; // fractional contribution
-
- memset(work, 0, 2 * dst_width * sizeof(*work));
- for (y = 0; y < src_height; ++y) {
- // import new contribution of one source row.
- ImportRow(src, src_width, frow, irow, dst_width);
- src += src_stride;
- // emit output row(s)
- y_accum -= dst_height;
- for (; y_accum <= 0; y_accum += src_height) {
- const int yscale = fy_scale * (-y_accum);
- ExportRow(frow, irow, dst, dst_width, yscale, fxy_scale);
- dst += dst_stride;
- }
+ int32_t* const work,
+ int num_channels) {
+ WebPRescaler rescaler;
+ int y = 0;
+ WebPRescalerInit(&rescaler, src_width, src_height,
+ dst, dst_width, dst_height, dst_stride,
+ num_channels,
+ src_width, dst_width,
+ src_height, dst_height,
+ work);
+ memset(work, 0, 2 * dst_width * num_channels * sizeof(*work));
+ while (y < src_height) {
+ y += WebPRescalerImport(&rescaler, src_height - y,
+ src + y * src_stride, src_stride);
+ WebPRescalerExport(&rescaler);
}
}
-#undef MULT
-#undef RFIX
-int WebPPictureRescale(WebPPicture* const pic, int width, int height) {
+int WebPPictureRescale(WebPPicture* pic, int width, int height) {
WebPPicture tmp;
int prev_width, prev_height;
int32_t* work;
@@ -330,123 +412,139 @@ int WebPPictureRescale(WebPPicture* const pic, int width, int height) {
tmp.height = height;
if (!WebPPictureAlloc(&tmp)) return 0;
- work = malloc(2 * width * sizeof(int32_t));
- if (work == NULL) {
- WebPPictureFree(&tmp);
- return 0;
- }
-
- RescalePlane(pic->y, prev_width, prev_height, pic->y_stride,
- tmp.y, width, height, tmp.y_stride, work);
- RescalePlane(pic->u,
- (prev_width + 1) / 2, (prev_height + 1) / 2, pic->uv_stride,
- tmp.u,
- (width + 1) / 2, (height + 1) / 2, tmp.uv_stride, work);
- RescalePlane(pic->v,
- (prev_width + 1) / 2, (prev_height + 1) / 2, pic->uv_stride,
- tmp.v,
- (width + 1) / 2, (height + 1) / 2, tmp.uv_stride, work);
+ if (!pic->use_argb) {
+ work = (int32_t*)WebPSafeMalloc(2ULL * width, sizeof(*work));
+ if (work == NULL) {
+ WebPPictureFree(&tmp);
+ return 0;
+ }
+ RescalePlane(pic->y, prev_width, prev_height, pic->y_stride,
+ tmp.y, width, height, tmp.y_stride, work, 1);
+ RescalePlane(pic->u,
+ HALVE(prev_width), HALVE(prev_height), pic->uv_stride,
+ tmp.u,
+ HALVE(width), HALVE(height), tmp.uv_stride, work, 1);
+ RescalePlane(pic->v,
+ HALVE(prev_width), HALVE(prev_height), pic->uv_stride,
+ tmp.v,
+ HALVE(width), HALVE(height), tmp.uv_stride, work, 1);
+
+ if (tmp.a != NULL) {
+ RescalePlane(pic->a, prev_width, prev_height, pic->a_stride,
+ tmp.a, width, height, tmp.a_stride, work, 1);
+ }
#ifdef WEBP_EXPERIMENTAL_FEATURES
- if (tmp.a) {
- RescalePlane(pic->a, prev_width, prev_height, pic->a_stride,
- tmp.a, width, height, tmp.a_stride, work);
- }
- if (tmp.u0) {
- int s = 1;
- if ((tmp.colorspace & WEBP_CSP_UV_MASK) == WEBP_YUV422) {
- s = 2;
+ if (tmp.u0 != NULL) {
+ const int s = IS_YUV_CSP(tmp.colorspace, WEBP_YUV422) ? 2 : 1;
+ RescalePlane(
+ pic->u0, (prev_width + s / 2) / s, prev_height, pic->uv0_stride,
+ tmp.u0, (width + s / 2) / s, height, tmp.uv0_stride, work, 1);
+ RescalePlane(
+ pic->v0, (prev_width + s / 2) / s, prev_height, pic->uv0_stride,
+ tmp.v0, (width + s / 2) / s, height, tmp.uv0_stride, work, 1);
}
- RescalePlane(
- pic->u0, (prev_width + s / 2) / s, prev_height, pic->uv0_stride,
- tmp.u0, (width + s / 2) / s, height, tmp.uv0_stride, work);
- RescalePlane(
- pic->v0, (prev_width + s / 2) / s, prev_height, pic->uv0_stride,
- tmp.v0, (width + s / 2) / s, height, tmp.uv0_stride, work);
- }
#endif
+ } else {
+ work = (int32_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work));
+ if (work == NULL) {
+ WebPPictureFree(&tmp);
+ return 0;
+ }
+
+ RescalePlane((const uint8_t*)pic->argb, prev_width, prev_height,
+ pic->argb_stride * 4,
+ (uint8_t*)tmp.argb, width, height,
+ tmp.argb_stride * 4,
+ work, 4);
+ }
WebPPictureFree(pic);
free(work);
*pic = tmp;
return 1;
}
-//-----------------------------------------------------------------------------
-// Write-to-memory
-
-typedef struct {
- uint8_t** mem;
- size_t max_size;
- size_t* size;
-} WebPMemoryWriter;
+//------------------------------------------------------------------------------
+// WebPMemoryWriter: Write-to-memory
-static void InitMemoryWriter(WebPMemoryWriter* const writer) {
- *writer->mem = NULL;
- *writer->size = 0;
+void WebPMemoryWriterInit(WebPMemoryWriter* writer) {
+ writer->mem = NULL;
+ writer->size = 0;
writer->max_size = 0;
}
-static int WebPMemoryWrite(const uint8_t* data, size_t data_size,
- const WebPPicture* const picture) {
+int WebPMemoryWrite(const uint8_t* data, size_t data_size,
+ const WebPPicture* picture) {
WebPMemoryWriter* const w = (WebPMemoryWriter*)picture->custom_ptr;
- size_t next_size;
+ uint64_t next_size;
if (w == NULL) {
return 1;
}
- next_size = (*w->size) + data_size;
+ next_size = (uint64_t)w->size + data_size;
if (next_size > w->max_size) {
uint8_t* new_mem;
- size_t next_max_size = w->max_size * 2;
+ uint64_t next_max_size = 2ULL * w->max_size;
if (next_max_size < next_size) next_max_size = next_size;
- if (next_max_size < 8192) next_max_size = 8192;
- new_mem = (uint8_t*)malloc(next_max_size);
+ if (next_max_size < 8192ULL) next_max_size = 8192ULL;
+ new_mem = (uint8_t*)WebPSafeMalloc(next_max_size, 1);
if (new_mem == NULL) {
return 0;
}
- if ((*w->size) > 0) {
- memcpy(new_mem, *w->mem, *w->size);
+ if (w->size > 0) {
+ memcpy(new_mem, w->mem, w->size);
}
- free(*w->mem);
- *w->mem = new_mem;
- w->max_size = next_max_size;
+ free(w->mem);
+ w->mem = new_mem;
+ // down-cast is ok, thanks to WebPSafeMalloc
+ w->max_size = (size_t)next_max_size;
}
- if (data_size) {
- memcpy((*w->mem) + (*w->size), data, data_size);
- *w->size += data_size;
+ if (data_size > 0) {
+ memcpy(w->mem + w->size, data, data_size);
+ w->size += data_size;
}
return 1;
}
-//-----------------------------------------------------------------------------
-// RGB -> YUV conversion
-// The exact naming is Y'CbCr, following the ITU-R BT.601 standard.
-// More information at: http://en.wikipedia.org/wiki/YCbCr
-// Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
-// U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
-// V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
-// We use 16bit fixed point operations.
-
-enum { YUV_FRAC = 16 };
-
-static inline int clip_uv(int v) {
- v = (v + (257 << (YUV_FRAC + 2 - 1))) >> (YUV_FRAC + 2);
- return ((v & ~0xff) == 0) ? v : (v < 0) ? 0 : 255;
-}
-
-static inline int rgb_to_y(int r, int g, int b) {
- const int kRound = (1 << (YUV_FRAC - 1)) + (16 << YUV_FRAC);
- const int luma = 16839 * r + 33059 * g + 6420 * b;
- return (luma + kRound) >> YUV_FRAC; // no need to clip
+//------------------------------------------------------------------------------
+// Detection of non-trivial transparency
+
+// Returns true if alpha[] has non-0xff values.
+static int CheckNonOpaque(const uint8_t* alpha, int width, int height,
+ int x_step, int y_step) {
+ if (alpha == NULL) return 0;
+ while (height-- > 0) {
+ int x;
+ for (x = 0; x < width * x_step; x += x_step) {
+ if (alpha[x] != 0xff) return 1; // TODO(skal): check 4/8 bytes at a time.
+ }
+ alpha += y_step;
+ }
+ return 0;
}
-static inline int rgb_to_u(int r, int g, int b) {
- return clip_uv(-9719 * r - 19081 * g + 28800 * b);
+// Checking for the presence of non-opaque alpha.
+int WebPPictureHasTransparency(const WebPPicture* picture) {
+ if (picture == NULL) return 0;
+ if (!picture->use_argb) {
+ return CheckNonOpaque(picture->a, picture->width, picture->height,
+ 1, picture->a_stride);
+ } else {
+ int x, y;
+ const uint32_t* argb = picture->argb;
+ if (argb == NULL) return 0;
+ for (y = 0; y < picture->height; ++y) {
+ for (x = 0; x < picture->width; ++x) {
+ if (argb[x] < 0xff000000u) return 1; // test any alpha values != 0xff
+ }
+ argb += picture->argb_stride;
+ }
+ }
+ return 0;
}
-static inline int rgb_to_v(int r, int g, int b) {
- return clip_uv(+28800 * r - 24116 * g - 4684 * b);
-}
+//------------------------------------------------------------------------------
+// RGB -> YUV conversion
// TODO: we can do better than simply 2x2 averaging on U/V samples.
#define SUM4(ptr) ((ptr)[0] + (ptr)[step] + \
@@ -460,8 +558,8 @@ static inline int rgb_to_v(int r, int g, int b) {
const int r = SUM(r_ptr + src); \
const int g = SUM(g_ptr + src); \
const int b = SUM(b_ptr + src); \
- picture->u[dst] = rgb_to_u(r, g, b); \
- picture->v[dst] = rgb_to_v(r, g, b); \
+ picture->u[dst] = VP8RGBToU(r, g, b); \
+ picture->v[dst] = VP8RGBToV(r, g, b); \
}
#define RGB_TO_UV0(x_in, x_out, y, SUM) { \
@@ -470,36 +568,46 @@ static inline int rgb_to_v(int r, int g, int b) {
const int r = SUM(r_ptr + src); \
const int g = SUM(g_ptr + src); \
const int b = SUM(b_ptr + src); \
- picture->u0[dst] = rgb_to_u(r, g, b); \
- picture->v0[dst] = rgb_to_v(r, g, b); \
+ picture->u0[dst] = VP8RGBToU(r, g, b); \
+ picture->v0[dst] = VP8RGBToV(r, g, b); \
}
static void MakeGray(WebPPicture* const picture) {
int y;
- const int uv_width = (picture->width + 1) >> 1;
- for (y = 0; y < ((picture->height + 1) >> 1); ++y) {
+ const int uv_width = HALVE(picture->width);
+ const int uv_height = HALVE(picture->height);
+ for (y = 0; y < uv_height; ++y) {
memset(picture->u + y * picture->uv_stride, 128, uv_width);
memset(picture->v + y * picture->uv_stride, 128, uv_width);
}
}
-static int Import(WebPPicture* const picture,
- const uint8_t* const rgb, int rgb_stride,
- int step, int swap_rb, int import_alpha) {
+static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
+ const uint8_t* const g_ptr,
+ const uint8_t* const b_ptr,
+ const uint8_t* const a_ptr,
+ int step, // bytes per pixel
+ int rgb_stride, // bytes per scanline
+ WebPPicture* const picture) {
const WebPEncCSP uv_csp = picture->colorspace & WEBP_CSP_UV_MASK;
int x, y;
- const uint8_t* const r_ptr = rgb + (swap_rb ? 2 : 0);
- const uint8_t* const g_ptr = rgb + 1;
- const uint8_t* const b_ptr = rgb + (swap_rb ? 0 : 2);
const int width = picture->width;
const int height = picture->height;
+ const int has_alpha = CheckNonOpaque(a_ptr, width, height, step, rgb_stride);
+
+ picture->colorspace = uv_csp;
+ picture->use_argb = 0;
+ if (has_alpha) {
+ picture->colorspace |= WEBP_CSP_ALPHA_BIT;
+ }
+ if (!WebPPictureAlloc(picture)) return 0;
// Import luma plane
for (y = 0; y < height; ++y) {
for (x = 0; x < width; ++x) {
const int offset = step * x + y * rgb_stride;
picture->y[x + y * picture->y_stride] =
- rgb_to_y(r_ptr[offset], g_ptr[offset], b_ptr[offset]);
+ VP8RGBToY(r_ptr[offset], g_ptr[offset], b_ptr[offset]);
}
}
@@ -509,7 +617,7 @@ static int Import(WebPPicture* const picture,
for (x = 0; x < (width >> 1); ++x) {
RGB_TO_UV(x, y, SUM4);
}
- if (picture->width & 1) {
+ if (width & 1) {
RGB_TO_UV(x, y, SUM2V);
}
}
@@ -545,17 +653,65 @@ static int Import(WebPPicture* const picture,
MakeGray(picture);
}
- if (import_alpha) {
-#ifdef WEBP_EXPERIMENTAL_FEATURES
- const uint8_t* const a_ptr = rgb + 3;
+ if (has_alpha) {
assert(step >= 4);
for (y = 0; y < height; ++y) {
for (x = 0; x < width; ++x) {
picture->a[x + y * picture->a_stride] =
- a_ptr[step * x + y * rgb_stride];
+ a_ptr[step * x + y * rgb_stride];
+ }
+ }
+ }
+ return 1;
+}
+
+static int Import(WebPPicture* const picture,
+ const uint8_t* const rgb, int rgb_stride,
+ int step, int swap_rb, int import_alpha) {
+ const uint8_t* const r_ptr = rgb + (swap_rb ? 2 : 0);
+ const uint8_t* const g_ptr = rgb + 1;
+ const uint8_t* const b_ptr = rgb + (swap_rb ? 0 : 2);
+ const uint8_t* const a_ptr = import_alpha ? rgb + 3 : NULL;
+ const int width = picture->width;
+ const int height = picture->height;
+
+ if (!picture->use_argb) {
+ return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride,
+ picture);
+ }
+ if (import_alpha) {
+ picture->colorspace |= WEBP_CSP_ALPHA_BIT;
+ } else {
+ picture->colorspace &= ~WEBP_CSP_ALPHA_BIT;
+ }
+ if (!WebPPictureAlloc(picture)) return 0;
+
+ if (!import_alpha) {
+ int x, y;
+ for (y = 0; y < height; ++y) {
+ for (x = 0; x < width; ++x) {
+ const int offset = step * x + y * rgb_stride;
+ const uint32_t argb =
+ 0xff000000u |
+ (r_ptr[offset] << 16) |
+ (g_ptr[offset] << 8) |
+ (b_ptr[offset]);
+ picture->argb[x + y * picture->argb_stride] = argb;
+ }
+ }
+ } else {
+ int x, y;
+ assert(step >= 4);
+ for (y = 0; y < height; ++y) {
+ for (x = 0; x < width; ++x) {
+ const int offset = step * x + y * rgb_stride;
+ const uint32_t argb = (a_ptr[offset] << 24) |
+ (r_ptr[offset] << 16) |
+ (g_ptr[offset] << 8) |
+ (b_ptr[offset]);
+ picture->argb[x + y * picture->argb_stride] = argb;
}
}
-#endif
}
return 1;
}
@@ -565,42 +721,264 @@ static int Import(WebPPicture* const picture,
#undef SUM1
#undef RGB_TO_UV
-int WebPPictureImportRGB(WebPPicture* const picture,
- const uint8_t* const rgb, int rgb_stride) {
- picture->colorspace &= ~WEBP_CSP_ALPHA_BIT;
- if (!WebPPictureAlloc(picture)) return 0;
+int WebPPictureImportRGB(WebPPicture* picture,
+ const uint8_t* rgb, int rgb_stride) {
return Import(picture, rgb, rgb_stride, 3, 0, 0);
}
-int WebPPictureImportBGR(WebPPicture* const picture,
- const uint8_t* const rgb, int rgb_stride) {
- picture->colorspace &= ~WEBP_CSP_ALPHA_BIT;
- if (!WebPPictureAlloc(picture)) return 0;
+int WebPPictureImportBGR(WebPPicture* picture,
+ const uint8_t* rgb, int rgb_stride) {
return Import(picture, rgb, rgb_stride, 3, 1, 0);
}
-int WebPPictureImportRGBA(WebPPicture* const picture,
- const uint8_t* const rgba, int rgba_stride) {
- picture->colorspace |= WEBP_CSP_ALPHA_BIT;
- if (!WebPPictureAlloc(picture)) return 0;
+int WebPPictureImportRGBA(WebPPicture* picture,
+ const uint8_t* rgba, int rgba_stride) {
return Import(picture, rgba, rgba_stride, 4, 0, 1);
}
-int WebPPictureImportBGRA(WebPPicture* const picture,
- const uint8_t* const rgba, int rgba_stride) {
- picture->colorspace |= WEBP_CSP_ALPHA_BIT;
- if (!WebPPictureAlloc(picture)) return 0;
+int WebPPictureImportBGRA(WebPPicture* picture,
+ const uint8_t* rgba, int rgba_stride) {
return Import(picture, rgba, rgba_stride, 4, 1, 1);
}
-//-----------------------------------------------------------------------------
-// Simplest call:
+int WebPPictureImportRGBX(WebPPicture* picture,
+ const uint8_t* rgba, int rgba_stride) {
+ return Import(picture, rgba, rgba_stride, 4, 0, 0);
+}
+
+int WebPPictureImportBGRX(WebPPicture* picture,
+ const uint8_t* rgba, int rgba_stride) {
+ return Import(picture, rgba, rgba_stride, 4, 1, 0);
+}
+
+//------------------------------------------------------------------------------
+// Automatic YUV <-> ARGB conversions.
+
+int WebPPictureYUVAToARGB(WebPPicture* picture) {
+ if (picture == NULL) return 0;
+ if (picture->memory_ == NULL || picture->y == NULL ||
+ picture->u == NULL || picture->v == NULL) {
+ return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
+ }
+ if ((picture->colorspace & WEBP_CSP_ALPHA_BIT) && picture->a == NULL) {
+ return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
+ }
+ if ((picture->colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) {
+ return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
+ }
+ // Allocate a new argb buffer (discarding the previous one).
+ if (!PictureAllocARGB(picture)) return 0;
+
+ // Convert
+ {
+ int y;
+ const int width = picture->width;
+ const int height = picture->height;
+ const int argb_stride = 4 * picture->argb_stride;
+ uint8_t* dst = (uint8_t*)picture->argb;
+ const uint8_t *cur_u = picture->u, *cur_v = picture->v, *cur_y = picture->y;
+ WebPUpsampleLinePairFunc upsample = WebPGetLinePairConverter(ALPHA_IS_LAST);
+
+ // First row, with replicated top samples.
+ upsample(NULL, cur_y, cur_u, cur_v, cur_u, cur_v, NULL, dst, width);
+ cur_y += picture->y_stride;
+ dst += argb_stride;
+ // Center rows.
+ for (y = 1; y + 1 < height; y += 2) {
+ const uint8_t* const top_u = cur_u;
+ const uint8_t* const top_v = cur_v;
+ cur_u += picture->uv_stride;
+ cur_v += picture->uv_stride;
+ upsample(cur_y, cur_y + picture->y_stride, top_u, top_v, cur_u, cur_v,
+ dst, dst + argb_stride, width);
+ cur_y += 2 * picture->y_stride;
+ dst += 2 * argb_stride;
+ }
+ // Last row (if needed), with replicated bottom samples.
+ if (height > 1 && !(height & 1)) {
+ upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width);
+ }
+ // Insert alpha values if needed, in replacement for the default 0xff ones.
+ if (picture->colorspace & WEBP_CSP_ALPHA_BIT) {
+ for (y = 0; y < height; ++y) {
+ uint32_t* const dst = picture->argb + y * picture->argb_stride;
+ const uint8_t* const src = picture->a + y * picture->a_stride;
+ int x;
+ for (x = 0; x < width; ++x) {
+ dst[x] = (dst[x] & 0x00ffffffu) | (src[x] << 24);
+ }
+ }
+ }
+ }
+ return 1;
+}
+
+int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) {
+ if (picture == NULL) return 0;
+ if (picture->argb == NULL) {
+ return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
+ } else {
+ const uint8_t* const argb = (const uint8_t*)picture->argb;
+ const uint8_t* const r = ALPHA_IS_LAST ? argb + 2 : argb + 1;
+ const uint8_t* const g = ALPHA_IS_LAST ? argb + 1 : argb + 2;
+ const uint8_t* const b = ALPHA_IS_LAST ? argb + 0 : argb + 3;
+ const uint8_t* const a = ALPHA_IS_LAST ? argb + 3 : argb + 0;
+ // We work on a tmp copy of 'picture', because ImportYUVAFromRGBA()
+ // would be calling WebPPictureFree(picture) otherwise.
+ WebPPicture tmp = *picture;
+ PictureResetARGB(&tmp); // reset ARGB buffer so that it's not free()'d.
+ tmp.use_argb = 0;
+ tmp.colorspace = colorspace & WEBP_CSP_UV_MASK;
+ if (!ImportYUVAFromRGBA(r, g, b, a, 4, 4 * picture->argb_stride, &tmp)) {
+ return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
+ }
+ // Copy back the YUV specs into 'picture'.
+ tmp.argb = picture->argb;
+ tmp.argb_stride = picture->argb_stride;
+ tmp.memory_argb_ = picture->memory_argb_;
+ *picture = tmp;
+ }
+ return 1;
+}
+
+//------------------------------------------------------------------------------
+// Helper: clean up fully transparent area to help compressibility.
+
+#define SIZE 8
+#define SIZE2 (SIZE / 2)
+static int is_transparent_area(const uint8_t* ptr, int stride, int size) {
+ int y, x;
+ for (y = 0; y < size; ++y) {
+ for (x = 0; x < size; ++x) {
+ if (ptr[x]) {
+ return 0;
+ }
+ }
+ ptr += stride;
+ }
+ return 1;
+}
+
+static WEBP_INLINE void flatten(uint8_t* ptr, int v, int stride, int size) {
+ int y;
+ for (y = 0; y < size; ++y) {
+ memset(ptr, v, size);
+ ptr += stride;
+ }
+}
+
+void WebPCleanupTransparentArea(WebPPicture* pic) {
+ int x, y, w, h;
+ const uint8_t* a_ptr;
+ int values[3] = { 0 };
+
+ if (pic == NULL) return;
+
+ a_ptr = pic->a;
+ if (a_ptr == NULL) return; // nothing to do
+
+ w = pic->width / SIZE;
+ h = pic->height / SIZE;
+ for (y = 0; y < h; ++y) {
+ int need_reset = 1;
+ for (x = 0; x < w; ++x) {
+ const int off_a = (y * pic->a_stride + x) * SIZE;
+ const int off_y = (y * pic->y_stride + x) * SIZE;
+ const int off_uv = (y * pic->uv_stride + x) * SIZE2;
+ if (is_transparent_area(a_ptr + off_a, pic->a_stride, SIZE)) {
+ if (need_reset) {
+ values[0] = pic->y[off_y];
+ values[1] = pic->u[off_uv];
+ values[2] = pic->v[off_uv];
+ need_reset = 0;
+ }
+ flatten(pic->y + off_y, values[0], pic->y_stride, SIZE);
+ flatten(pic->u + off_uv, values[1], pic->uv_stride, SIZE2);
+ flatten(pic->v + off_uv, values[2], pic->uv_stride, SIZE2);
+ } else {
+ need_reset = 1;
+ }
+ }
+ // ignore the left-overs on right/bottom
+ }
+}
+
+#undef SIZE
+#undef SIZE2
+
+
+//------------------------------------------------------------------------------
+// Distortion
+
+// Max value returned in case of exact similarity.
+static const double kMinDistortion_dB = 99.;
+
+int WebPPictureDistortion(const WebPPicture* pic1, const WebPPicture* pic2,
+ int type, float result[5]) {
+ int c;
+ DistoStats stats[5];
+ int has_alpha;
+
+ if (pic1 == NULL || pic2 == NULL ||
+ pic1->width != pic2->width || pic1->height != pic2->height ||
+ pic1->y == NULL || pic2->y == NULL ||
+ pic1->u == NULL || pic2->u == NULL ||
+ pic1->v == NULL || pic2->v == NULL ||
+ result == NULL) {
+ return 0;
+ }
+ // TODO(skal): provide distortion for ARGB too.
+ if (pic1->use_argb == 1 || pic1->use_argb != pic2->use_argb) {
+ return 0;
+ }
+
+ has_alpha = !!(pic1->colorspace & WEBP_CSP_ALPHA_BIT);
+ if (has_alpha != !!(pic2->colorspace & WEBP_CSP_ALPHA_BIT) ||
+ (has_alpha && (pic1->a == NULL || pic2->a == NULL))) {
+ return 0;
+ }
+
+ memset(stats, 0, sizeof(stats));
+ VP8SSIMAccumulatePlane(pic1->y, pic1->y_stride,
+ pic2->y, pic2->y_stride,
+ pic1->width, pic1->height, &stats[0]);
+ VP8SSIMAccumulatePlane(pic1->u, pic1->uv_stride,
+ pic2->u, pic2->uv_stride,
+ (pic1->width + 1) >> 1, (pic1->height + 1) >> 1,
+ &stats[1]);
+ VP8SSIMAccumulatePlane(pic1->v, pic1->uv_stride,
+ pic2->v, pic2->uv_stride,
+ (pic1->width + 1) >> 1, (pic1->height + 1) >> 1,
+ &stats[2]);
+ if (has_alpha) {
+ VP8SSIMAccumulatePlane(pic1->a, pic1->a_stride,
+ pic2->a, pic2->a_stride,
+ pic1->width, pic1->height, &stats[3]);
+ }
+ for (c = 0; c <= 4; ++c) {
+ if (type == 1) {
+ const double v = VP8SSIMGet(&stats[c]);
+ result[c] = (float)((v < 1.) ? -10.0 * log10(1. - v)
+ : kMinDistortion_dB);
+ } else {
+ const double v = VP8SSIMGetSquaredError(&stats[c]);
+ result[c] = (float)((v > 0.) ? -4.3429448 * log(v / (255 * 255.))
+ : kMinDistortion_dB);
+ }
+ // Accumulate forward
+ if (c < 4) VP8SSIMAddStats(&stats[c], &stats[4]);
+ }
+ return 1;
+}
+
+//------------------------------------------------------------------------------
+// Simplest high-level calls:
typedef int (*Importer)(WebPPicture* const, const uint8_t* const, int);
static size_t Encode(const uint8_t* rgba, int width, int height, int stride,
- Importer import, float quality_factor, uint8_t** output) {
- size_t output_size = 0;
+ Importer import, float quality_factor, int lossless,
+ uint8_t** output) {
WebPPicture pic;
WebPConfig config;
WebPMemoryWriter wrt;
@@ -611,29 +989,29 @@ static size_t Encode(const uint8_t* rgba, int width, int height, int stride,
return 0; // shouldn't happen, except if system installation is broken
}
+ config.lossless = !!lossless;
+ pic.use_argb = !!lossless;
pic.width = width;
pic.height = height;
pic.writer = WebPMemoryWrite;
pic.custom_ptr = &wrt;
-
- wrt.mem = output;
- wrt.size = &output_size;
- InitMemoryWriter(&wrt);
+ WebPMemoryWriterInit(&wrt);
ok = import(&pic, rgba, stride) && WebPEncode(&config, &pic);
WebPPictureFree(&pic);
if (!ok) {
- free(*output);
+ free(wrt.mem);
*output = NULL;
return 0;
}
- return output_size;
+ *output = wrt.mem;
+ return wrt.size;
}
-#define ENCODE_FUNC(NAME, IMPORTER) \
-size_t NAME(const uint8_t* in, int w, int h, int bps, float q, \
- uint8_t** out) { \
- return Encode(in, w, h, bps, IMPORTER, q, out); \
+#define ENCODE_FUNC(NAME, IMPORTER) \
+size_t NAME(const uint8_t* in, int w, int h, int bps, float q, \
+ uint8_t** out) { \
+ return Encode(in, w, h, bps, IMPORTER, q, 0, out); \
}
ENCODE_FUNC(WebPEncodeRGB, WebPPictureImportRGB);
@@ -643,7 +1021,20 @@ ENCODE_FUNC(WebPEncodeBGRA, WebPPictureImportBGRA);
#undef ENCODE_FUNC
-//-----------------------------------------------------------------------------
+#define LOSSLESS_DEFAULT_QUALITY 70.
+#define LOSSLESS_ENCODE_FUNC(NAME, IMPORTER) \
+size_t NAME(const uint8_t* in, int w, int h, int bps, uint8_t** out) { \
+ return Encode(in, w, h, bps, IMPORTER, LOSSLESS_DEFAULT_QUALITY, 1, out); \
+}
+
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGB, WebPPictureImportRGB);
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGR, WebPPictureImportBGR);
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGBA, WebPPictureImportRGBA);
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGRA, WebPPictureImportBGRA);
+
+#undef LOSSLESS_ENCODE_FUNC
+
+//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
diff --git a/src/enc/quant.c b/src/enc/quant.c
index 31ec8144..ea153849 100644
--- a/src/enc/quant.c
+++ b/src/enc/quant.c
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -12,8 +12,8 @@
#include <assert.h>
#include <math.h>
-#include "vp8enci.h"
-#include "cost.h"
+#include "./vp8enci.h"
+#include "./cost.h"
#define DO_TRELLIS_I4 1
#define DO_TRELLIS_I16 1 // not a huge gain, but ok at low bitrate.
@@ -33,13 +33,13 @@
extern "C" {
#endif
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
-static inline int clip(int v, int m, int M) {
+static WEBP_INLINE int clip(int v, int m, int M) {
return v < m ? m : v > M ? M : v;
}
-const uint8_t VP8Zigzag[16] = {
+static const uint8_t kZigzag[16] = {
0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
};
@@ -132,7 +132,7 @@ static const uint8_t kFreqSharpening[16] = {
90, 90, 90, 90
};
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Initialize quantization parameters in VP8Matrix
// Returns the average quantizer
@@ -143,7 +143,7 @@ static int ExpandMatrix(VP8Matrix* const m, int type) {
m->q_[i] = m->q_[1];
}
for (i = 0; i < 16; ++i) {
- const int j = VP8Zigzag[i];
+ const int j = kZigzag[i];
const int bias = kBiasMatrices[type][j];
m->iq_[j] = (1 << QFIX) / m->q_[j];
m->bias_[j] = BIAS(bias);
@@ -192,7 +192,7 @@ static void SetupMatrices(VP8Encoder* enc) {
}
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Initialize filtering parameters
// Very small filter-strength values have close to no visual effect. So we can
@@ -214,7 +214,7 @@ static void SetupFilterStrength(VP8Encoder* const enc) {
enc->filter_hdr_.sharpness_ = enc->config_->filter_sharpness;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Note: if you change the values below, remember that the max range
// allowed by the syntax for DQ_UV is [-16,16].
@@ -286,7 +286,7 @@ void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
SetupFilterStrength(enc); // initialize segments' filtering, eventually
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Form the predictions in cache
// Must be ordered using {DC_PRED, TM_PRED, V_PRED, H_PRED} as index
@@ -299,16 +299,16 @@ const int VP8I4ModeOffsets[NUM_BMODES] = {
};
void VP8MakeLuma16Preds(const VP8EncIterator* const it) {
- VP8Encoder* const enc = it->enc_;
- const uint8_t* left = it->x_ ? enc->y_left_ : NULL;
- const uint8_t* top = it->y_ ? enc->y_top_ + it->x_ * 16 : NULL;
+ const VP8Encoder* const enc = it->enc_;
+ const uint8_t* const left = it->x_ ? enc->y_left_ : NULL;
+ const uint8_t* const top = it->y_ ? enc->y_top_ + it->x_ * 16 : NULL;
VP8EncPredLuma16(it->yuv_p_, left, top);
}
void VP8MakeChroma8Preds(const VP8EncIterator* const it) {
- VP8Encoder* const enc = it->enc_;
- const uint8_t* left = it->x_ ? enc->u_left_ : NULL;
- const uint8_t* top = it->y_ ? enc->uv_top_ + it->x_ * 16 : NULL;
+ const VP8Encoder* const enc = it->enc_;
+ const uint8_t* const left = it->x_ ? enc->u_left_ : NULL;
+ const uint8_t* const top = it->y_ ? enc->uv_top_ + it->x_ * 16 : NULL;
VP8EncPredChroma8(it->yuv_p_, left, top);
}
@@ -316,7 +316,7 @@ void VP8MakeIntra4Preds(const VP8EncIterator* const it) {
VP8EncPredLuma4(it->yuv_p_, it->i4_top_);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Quantize
// Layout:
@@ -341,7 +341,7 @@ const int VP8Scan[16 + 4 + 4] = {
8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V
};
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Distortion measurement
static const uint16_t kWeightY[16] = {
@@ -384,7 +384,7 @@ static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
dst->score += src->score;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Performs trellis-optimized quantization.
// Trellis
@@ -406,13 +406,13 @@ typedef struct {
#define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA)
#define NODE(n, l) (nodes[(n) + 1][(l) + MIN_DELTA])
-static inline void SetRDScore(int lambda, VP8ModeScore* const rd) {
+static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {
// TODO: incorporate the "* 256" in the tables?
rd->score = rd->R * lambda + 256 * (rd->D + rd->SD);
}
-static inline score_t RDScoreTrellis(int lambda, score_t rate,
- score_t distortion) {
+static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
+ score_t distortion) {
return rate * lambda + 256 * distortion;
}
@@ -440,7 +440,7 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
// compute maximal distortion.
max_error = 0;
for (n = first; n < 16; ++n) {
- const int j = VP8Zigzag[n];
+ const int j = kZigzag[n];
const int err = in[j] * in[j];
max_error += kWeightTrellis[j] * err;
if (err > thresh) last = n;
@@ -464,7 +464,7 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
// traverse trellis.
for (n = first; n <= last; ++n) {
- const int j = VP8Zigzag[n];
+ const int j = kZigzag[n];
const int Q = mtx->q_[j];
const int iQ = mtx->iq_[j];
const int B = BIAS(0x00); // neutral bias
@@ -560,7 +560,7 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
for (; n >= first; --n) {
const Node* const node = &NODE(n, best_node);
- const int j = VP8Zigzag[n];
+ const int j = kZigzag[n];
out[n] = node->sign ? -node->level : node->level;
nz |= (node->level != 0);
in[j] = out[n] * mtx->q_[j];
@@ -571,7 +571,7 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
#undef NODE
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Performs: difference, transform, quantize, back-transform, add
// all at once. Output is the reconstructed block in *yuv_out, and the
// quantized levels in *levels.
@@ -685,7 +685,7 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
return (nz << 16);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost.
// Pick the mode is lower RD-cost = Rate + lamba * Distortion.
@@ -700,7 +700,7 @@ static void SwapOut(VP8EncIterator* const it) {
}
static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {
- VP8Encoder* const enc = it->enc_;
+ const VP8Encoder* const enc = it->enc_;
const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
const int lambda = dqm->lambda_i16_;
const int tlambda = dqm->tlambda_;
@@ -738,11 +738,11 @@ static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {
VP8SetIntra16Mode(it, rd->mode_i16);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// return the cost array corresponding to the surrounding prediction modes.
static const uint16_t* GetCostModeI4(VP8EncIterator* const it,
- const int modes[16]) {
+ const uint8_t modes[16]) {
const int preds_w = it->enc_->preds_w_;
const int x = (it->i4_ & 3), y = it->i4_ >> 2;
const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1];
@@ -751,16 +751,21 @@ static const uint16_t* GetCostModeI4(VP8EncIterator* const it,
}
static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
- VP8Encoder* const enc = it->enc_;
+ const VP8Encoder* const enc = it->enc_;
const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
const int lambda = dqm->lambda_i4_;
const int tlambda = dqm->tlambda_;
const uint8_t* const src0 = it->yuv_in_ + Y_OFF;
uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF;
+ int total_header_bits = 0;
VP8ModeScore rd_best;
+ if (enc->max_i4_header_bits_ == 0) {
+ return 0;
+ }
+
InitScore(&rd_best);
- rd_best.score = 0;
+ rd_best.score = 211; // '211' is the value of VP8BitCost(0, 145)
VP8IteratorStartI4(it);
do {
VP8ModeScore rd_i4;
@@ -799,7 +804,9 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
}
SetRDScore(dqm->lambda_mode_, &rd_i4);
AddScore(&rd_best, &rd_i4);
- if (rd_best.score >= rd->score) {
+ total_header_bits += mode_costs[best_mode];
+ if (rd_best.score >= rd->score ||
+ total_header_bits > enc->max_i4_header_bits_) {
return 0;
}
// Copy selected samples if not in the right place already.
@@ -817,10 +824,10 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
return 1; // select intra4x4 over intra16x16
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
- VP8Encoder* const enc = it->enc_;
+ const VP8Encoder* const enc = it->enc_;
const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
const int lambda = dqm->lambda_uv_;
const uint8_t* const src = it->yuv_in_ + U_OFF;
@@ -855,7 +862,7 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
AddScore(rd, &rd_best);
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Final reconstruction and quantization.
static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
@@ -882,7 +889,7 @@ static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
rd->nz = nz;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Entry point
int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt) {
diff --git a/src/enc/syntax.c b/src/enc/syntax.c
index 77df727b..99c21fec 100644
--- a/src/enc/syntax.c
+++ b/src/enc/syntax.c
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -10,70 +10,190 @@
// Author: Skal (pascal.massimino@gmail.com)
#include <assert.h>
-#include <math.h>
-#include "vp8enci.h"
+#include "./vp8enci.h"
+#include "webp/format_constants.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-#define KSIGNATURE 0x9d012a
-#define KHEADER_SIZE 10
-#define KRIFF_SIZE 20
-#define KSIZE_OFFSET (KRIFF_SIZE - 8)
+//------------------------------------------------------------------------------
+// Helper functions
-#define MAX_PARTITION0_SIZE (1 << 19) // max size of mode partition
-#define MAX_PARTITION_SIZE (1 << 24) // max size for token partition
-
-//-----------------------------------------------------------------------------
-// Writers for header's various pieces (in order of appearance)
-
-// Main keyframe header
-
-static void PutLE32(uint8_t* const data, uint32_t val) {
+// TODO(later): Move to webp/format_constants.h?
+static void PutLE24(uint8_t* const data, uint32_t val) {
data[0] = (val >> 0) & 0xff;
data[1] = (val >> 8) & 0xff;
data[2] = (val >> 16) & 0xff;
+}
+
+static void PutLE32(uint8_t* const data, uint32_t val) {
+ PutLE24(data, val);
data[3] = (val >> 24) & 0xff;
}
-static int PutHeader(int profile, size_t size0, size_t total_size,
- WebPPicture* const pic) {
- uint8_t buf[KHEADER_SIZE];
- uint8_t RIFF[KRIFF_SIZE] = {
- 'R', 'I', 'F', 'F', 0, 0, 0, 0, 'W', 'E', 'B', 'P', 'V', 'P', '8', ' '
+static int IsVP8XNeeded(const VP8Encoder* const enc) {
+ return !!enc->has_alpha_; // Currently the only case when VP8X is needed.
+ // This could change in the future.
+}
+
+static int PutPaddingByte(const WebPPicture* const pic) {
+
+ const uint8_t pad_byte[1] = { 0 };
+ return !!pic->writer(pad_byte, 1, pic);
+}
+
+//------------------------------------------------------------------------------
+// Writers for header's various pieces (in order of appearance)
+
+static WebPEncodingError PutRIFFHeader(const VP8Encoder* const enc,
+ size_t riff_size) {
+ const WebPPicture* const pic = enc->pic_;
+ uint8_t riff[RIFF_HEADER_SIZE] = {
+ 'R', 'I', 'F', 'F', 0, 0, 0, 0, 'W', 'E', 'B', 'P'
};
- uint32_t bits;
+ assert(riff_size == (uint32_t)riff_size);
+ PutLE32(riff + TAG_SIZE, (uint32_t)riff_size);
+ if (!pic->writer(riff, sizeof(riff), pic)) {
+ return VP8_ENC_ERROR_BAD_WRITE;
+ }
+ return VP8_ENC_OK;
+}
+
+static WebPEncodingError PutVP8XHeader(const VP8Encoder* const enc) {
+ const WebPPicture* const pic = enc->pic_;
+ uint8_t vp8x[CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE] = {
+ 'V', 'P', '8', 'X'
+ };
+ uint32_t flags = 0;
- if (size0 >= MAX_PARTITION0_SIZE) { // partition #0 is too big to fit
- return WebPEncodingSetError(pic, VP8_ENC_ERROR_PARTITION0_OVERFLOW);
+ assert(IsVP8XNeeded(enc));
+ assert(pic->width >= 1 && pic->height >= 1);
+ assert(pic->width <= MAX_CANVAS_SIZE && pic->height <= MAX_CANVAS_SIZE);
+
+ if (enc->has_alpha_) {
+ flags |= ALPHA_FLAG_BIT;
}
- PutLE32(RIFF + 4, total_size + KSIZE_OFFSET);
- PutLE32(RIFF + 16, total_size);
- if (!pic->writer(RIFF, sizeof(RIFF), pic)) {
- return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE);
+ PutLE32(vp8x + TAG_SIZE, VP8X_CHUNK_SIZE);
+ PutLE32(vp8x + CHUNK_HEADER_SIZE, flags);
+ PutLE24(vp8x + CHUNK_HEADER_SIZE + 4, pic->width - 1);
+ PutLE24(vp8x + CHUNK_HEADER_SIZE + 7, pic->height - 1);
+ if(!pic->writer(vp8x, sizeof(vp8x), pic)) {
+ return VP8_ENC_ERROR_BAD_WRITE;
}
+ return VP8_ENC_OK;
+}
- bits = 0 // keyframe (1b)
- | (profile << 1) // profile (3b)
- | (1 << 4) // visible (1b)
- | (size0 << 5); // partition length (19b)
- buf[0] = bits & 0xff;
- buf[1] = (bits >> 8) & 0xff;
- buf[2] = (bits >> 16) & 0xff;
+static WebPEncodingError PutAlphaChunk(const VP8Encoder* const enc) {
+ const WebPPicture* const pic = enc->pic_;
+ uint8_t alpha_chunk_hdr[CHUNK_HEADER_SIZE] = {
+ 'A', 'L', 'P', 'H'
+ };
+
+ assert(enc->has_alpha_);
+
+ // Alpha chunk header.
+ PutLE32(alpha_chunk_hdr + TAG_SIZE, enc->alpha_data_size_);
+ if (!pic->writer(alpha_chunk_hdr, sizeof(alpha_chunk_hdr), pic)) {
+ return VP8_ENC_ERROR_BAD_WRITE;
+ }
+
+ // Alpha chunk data.
+ if (!pic->writer(enc->alpha_data_, enc->alpha_data_size_, pic)) {
+ return VP8_ENC_ERROR_BAD_WRITE;
+ }
+
+ // Padding.
+ if ((enc->alpha_data_size_ & 1) && !PutPaddingByte(pic)) {
+ return VP8_ENC_ERROR_BAD_WRITE;
+ }
+ return VP8_ENC_OK;
+}
+
+static WebPEncodingError PutVP8Header(const WebPPicture* const pic,
+ size_t vp8_size) {
+ uint8_t vp8_chunk_hdr[CHUNK_HEADER_SIZE] = {
+ 'V', 'P', '8', ' '
+ };
+ assert(vp8_size == (uint32_t)vp8_size);
+ PutLE32(vp8_chunk_hdr + TAG_SIZE, (uint32_t)vp8_size);
+ if (!pic->writer(vp8_chunk_hdr, sizeof(vp8_chunk_hdr), pic)) {
+ return VP8_ENC_ERROR_BAD_WRITE;
+ }
+ return VP8_ENC_OK;
+}
+
+static WebPEncodingError PutVP8FrameHeader(const WebPPicture* const pic,
+ int profile, size_t size0) {
+ uint8_t vp8_frm_hdr[VP8_FRAME_HEADER_SIZE];
+ uint32_t bits;
+
+ if (size0 >= VP8_MAX_PARTITION0_SIZE) { // partition #0 is too big to fit
+ return VP8_ENC_ERROR_PARTITION0_OVERFLOW;
+ }
+
+ // Paragraph 9.1.
+ bits = 0 // keyframe (1b)
+ | (profile << 1) // profile (3b)
+ | (1 << 4) // visible (1b)
+ | ((uint32_t)size0 << 5); // partition length (19b)
+ vp8_frm_hdr[0] = (bits >> 0) & 0xff;
+ vp8_frm_hdr[1] = (bits >> 8) & 0xff;
+ vp8_frm_hdr[2] = (bits >> 16) & 0xff;
// signature
- buf[3] = (KSIGNATURE >> 16) & 0xff;
- buf[4] = (KSIGNATURE >> 8) & 0xff;
- buf[5] = (KSIGNATURE >> 0) & 0xff;
+ vp8_frm_hdr[3] = (VP8_SIGNATURE >> 16) & 0xff;
+ vp8_frm_hdr[4] = (VP8_SIGNATURE >> 8) & 0xff;
+ vp8_frm_hdr[5] = (VP8_SIGNATURE >> 0) & 0xff;
// dimensions
- buf[6] = pic->width & 0xff;
- buf[7] = pic->width >> 8;
- buf[8] = pic->height & 0xff;
- buf[9] = pic->height >> 8;
+ vp8_frm_hdr[6] = pic->width & 0xff;
+ vp8_frm_hdr[7] = pic->width >> 8;
+ vp8_frm_hdr[8] = pic->height & 0xff;
+ vp8_frm_hdr[9] = pic->height >> 8;
- return pic->writer(buf, sizeof(buf), pic);
+ if (!pic->writer(vp8_frm_hdr, sizeof(vp8_frm_hdr), pic)) {
+ return VP8_ENC_ERROR_BAD_WRITE;
+ }
+ return VP8_ENC_OK;
+}
+
+// WebP Headers.
+static int PutWebPHeaders(const VP8Encoder* const enc, size_t size0,
+ size_t vp8_size, size_t riff_size) {
+ WebPPicture* const pic = enc->pic_;
+ WebPEncodingError err = VP8_ENC_OK;
+
+ // RIFF header.
+ err = PutRIFFHeader(enc, riff_size);
+ if (err != VP8_ENC_OK) goto Error;
+
+ // VP8X.
+ if (IsVP8XNeeded(enc)) {
+ err = PutVP8XHeader(enc);
+ if (err != VP8_ENC_OK) goto Error;
+ }
+
+ // Alpha.
+ if (enc->has_alpha_) {
+ err = PutAlphaChunk(enc);
+ if (err != VP8_ENC_OK) goto Error;
+ }
+
+ // VP8 header.
+ err = PutVP8Header(pic, vp8_size);
+ if (err != VP8_ENC_OK) goto Error;
+
+ // VP8 frame header.
+ err = PutVP8FrameHeader(pic, enc->profile_, size0);
+ if (err != VP8_ENC_OK) goto Error;
+
+ // All OK.
+ return 1;
+
+ // Error.
+ Error:
+ return WebPEncodingSetError(pic, err);
}
// Segmentation header
@@ -144,7 +264,7 @@ static int EmitPartitionsSize(const VP8Encoder* const enc,
int p;
for (p = 0; p < enc->num_parts_ - 1; ++p) {
const size_t part_size = VP8BitWriterSize(enc->parts_ + p);
- if (part_size >= MAX_PARTITION_SIZE) {
+ if (part_size >= VP8_MAX_PARTITION_SIZE) {
return WebPEncodingSetError(pic, VP8_ENC_ERROR_PARTITION_OVERFLOW);
}
buf[3 * p + 0] = (part_size >> 0) & 0xff;
@@ -154,18 +274,12 @@ static int EmitPartitionsSize(const VP8Encoder* const enc,
return p ? pic->writer(buf, 3 * p, pic) : 1;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
#ifdef WEBP_EXPERIMENTAL_FEATURES
#define KTRAILER_SIZE 8
-static void PutLE24(uint8_t* buf, size_t value) {
- buf[0] = (value >> 0) & 0xff;
- buf[1] = (value >> 8) & 0xff;
- buf[2] = (value >> 16) & 0xff;
-}
-
static int WriteExtensions(VP8Encoder* const enc) {
uint8_t buffer[KTRAILER_SIZE];
VP8BitWriter* const bw = &enc->bw_;
@@ -182,14 +296,6 @@ static int WriteExtensions(VP8Encoder* const enc) {
return WebPEncodingSetError(pic, VP8_ENC_ERROR_BITSTREAM_OUT_OF_MEMORY);
}
}
- // Alpha (bytes 4..6)
- PutLE24(buffer + 4, enc->alpha_data_size_);
- if (enc->alpha_data_size_ > 0) {
- assert(enc->has_alpha_);
- if (!VP8BitWriterAppend(bw, enc->alpha_data_, enc->alpha_data_size_)) {
- return WebPEncodingSetError(pic, VP8_ENC_ERROR_BITSTREAM_OUT_OF_MEMORY);
- }
- }
buffer[KTRAILER_SIZE - 1] = 0x01; // marker
if (!VP8BitWriterAppend(bw, buffer, KTRAILER_SIZE)) {
@@ -200,14 +306,14 @@ static int WriteExtensions(VP8Encoder* const enc) {
#endif /* WEBP_EXPERIMENTAL_FEATURES */
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
static size_t GeneratePartition0(VP8Encoder* const enc) {
VP8BitWriter* const bw = &enc->bw_;
const int mb_size = enc->mb_w_ * enc->mb_h_;
uint64_t pos1, pos2, pos3;
#ifdef WEBP_EXPERIMENTAL_FEATURES
- const int need_extensions = enc->has_alpha_ || enc->use_layer_;
+ const int need_extensions = enc->use_layer_;
#endif
pos1 = VP8BitWriterPos(bw);
@@ -240,38 +346,67 @@ static size_t GeneratePartition0(VP8Encoder* const enc) {
if (enc->pic_->stats) {
enc->pic_->stats->header_bytes[0] = (int)((pos2 - pos1 + 7) >> 3);
enc->pic_->stats->header_bytes[1] = (int)((pos3 - pos2 + 7) >> 3);
- enc->pic_->stats->alpha_data_size = enc->alpha_data_size_;
- enc->pic_->stats->layer_data_size = enc->layer_data_size_;
+ enc->pic_->stats->alpha_data_size = (int)enc->alpha_data_size_;
+ enc->pic_->stats->layer_data_size = (int)enc->layer_data_size_;
}
return !bw->error_;
}
+void VP8EncFreeBitWriters(VP8Encoder* const enc) {
+ int p;
+ VP8BitWriterWipeOut(&enc->bw_);
+ for (p = 0; p < enc->num_parts_; ++p) {
+ VP8BitWriterWipeOut(enc->parts_ + p);
+ }
+}
+
int VP8EncWrite(VP8Encoder* const enc) {
WebPPicture* const pic = enc->pic_;
VP8BitWriter* const bw = &enc->bw_;
+ const int task_percent = 19;
+ const int percent_per_part = task_percent / enc->num_parts_;
+ const int final_percent = enc->percent_ + task_percent;
int ok = 0;
- size_t coded_size, pad;
+ size_t vp8_size, pad, riff_size;
int p;
// Partition #0 with header and partition sizes
- ok = GeneratePartition0(enc);
+ ok = !!GeneratePartition0(enc);
- // Compute total size (for the RIFF header)
- coded_size = KHEADER_SIZE + VP8BitWriterSize(bw) + 3 * (enc->num_parts_ - 1);
+ // Compute VP8 size
+ vp8_size = VP8_FRAME_HEADER_SIZE +
+ VP8BitWriterSize(bw) +
+ 3 * (enc->num_parts_ - 1);
for (p = 0; p < enc->num_parts_; ++p) {
- coded_size += VP8BitWriterSize(enc->parts_ + p);
+ vp8_size += VP8BitWriterSize(enc->parts_ + p);
+ }
+ pad = vp8_size & 1;
+ vp8_size += pad;
+
+ // Compute RIFF size
+ // At the minimum it is: "WEBPVP8 nnnn" + VP8 data size.
+ riff_size = TAG_SIZE + CHUNK_HEADER_SIZE + vp8_size;
+ if (IsVP8XNeeded(enc)) { // Add size for: VP8X header + data.
+ riff_size += CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE;
+ }
+ if (enc->has_alpha_) { // Add size for: ALPH header + data.
+ const uint32_t padded_alpha_size = enc->alpha_data_size_ +
+ (enc->alpha_data_size_ & 1);
+ riff_size += CHUNK_HEADER_SIZE + padded_alpha_size;
+ }
+ // Sanity check.
+ if (riff_size > 0xfffffffeU) {
+ return WebPEncodingSetError(pic, VP8_ENC_ERROR_FILE_TOO_BIG);
}
- pad = coded_size & 1;
- coded_size += pad;
// Emit headers and partition #0
{
const uint8_t* const part0 = VP8BitWriterBuf(bw);
const size_t size0 = VP8BitWriterSize(bw);
- ok = ok && PutHeader(enc->profile_, size0, coded_size, pic)
+ ok = ok && PutWebPHeaders(enc, size0, vp8_size, riff_size)
&& pic->writer(part0, size0, pic)
&& EmitPartitionsSize(enc, pic);
- free((void*)part0);
+ VP8BitWriterWipeOut(bw); // will free the internal buffer.
}
// Token partitions
@@ -280,20 +415,22 @@ int VP8EncWrite(VP8Encoder* const enc) {
const size_t size = VP8BitWriterSize(enc->parts_ + p);
if (size)
ok = ok && pic->writer(buf, size, pic);
- free((void*)buf);
+ VP8BitWriterWipeOut(enc->parts_ + p); // will free the internal buffer.
+ ok = ok && WebPReportProgress(pic, enc->percent_ + percent_per_part,
+ &enc->percent_);
}
// Padding byte
if (ok && pad) {
- const uint8_t pad_byte[1] = { 0 };
- ok = pic->writer(pad_byte, 1, pic);
+ ok = PutPaddingByte(pic);
}
- enc->coded_size_ = coded_size + KRIFF_SIZE;
+ enc->coded_size_ = (int)(CHUNK_HEADER_SIZE + riff_size);
+ ok = ok && WebPReportProgress(pic, final_percent, &enc->percent_);
return ok;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
diff --git a/src/enc/tree.c b/src/enc/tree.c
index b1a9aa40..8b25e5e4 100644
--- a/src/enc/tree.c
+++ b/src/enc/tree.c
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -9,13 +9,13 @@
//
// Author: Skal (pascal.massimino@gmail.com)
-#include "vp8enci.h"
+#include "./vp8enci.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Default probabilities
// Paragraph 13.5
@@ -158,9 +158,12 @@ const uint8_t
void VP8DefaultProbas(VP8Encoder* const enc) {
VP8Proba* const probas = &enc->proba_;
+ probas->use_skip_proba_ = 0;
memset(probas->segments_, 255u, sizeof(probas->segments_));
memcpy(probas->coeffs_, VP8CoeffsProba0, sizeof(VP8CoeffsProba0));
- probas->use_skip_proba_ = 0;
+ // Note: we could hard-code the level_costs_ corresponding to VP8CoeffsProba0,
+ // but that's ~11k of static data. Better call VP8CalculateLevelCosts() later.
+ probas->dirty_ = 1;
}
// Paragraph 11.5. 900bytes.
@@ -343,7 +346,7 @@ void VP8CodeIntraModes(VP8Encoder* const enc) {
} while (VP8IteratorNext(&it, 0));
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Paragraph 13
const uint8_t
diff --git a/src/enc/vp8enci.h b/src/enc/vp8enci.h
index 2be079e7..a0d9001f 100644
--- a/src/enc/vp8enci.h
+++ b/src/enc/vp8enci.h
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -12,21 +12,22 @@
#ifndef WEBP_ENC_VP8ENCI_H_
#define WEBP_ENC_VP8ENCI_H_
-#include "string.h" // for memcpy()
+#include <string.h> // for memcpy()
+#include "../dsp/dsp.h"
+#include "../utils/bit_writer.h"
#include "webp/encode.h"
-#include "bit_writer.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Various defines and enums
// version numbers
#define ENC_MAJ_VERSION 0
-#define ENC_MIN_VERSION 1
-#define ENC_REV_VERSION 2
+#define ENC_MIN_VERSION 2
+#define ENC_REV_VERSION 0
// size of histogram used by CollectHistogram.
#define MAX_COEFF_THRESH 64
@@ -156,16 +157,17 @@ typedef int64_t score_t; // type used for scores, rate, distortion
#define BIAS(b) ((b) << (QFIX - 8))
// Fun fact: this is the _only_ line where we're actually being lossy and
// discarding bits.
-static inline int QUANTDIV(int n, int iQ, int B) {
+static WEBP_INLINE int QUANTDIV(int n, int iQ, int B) {
return (n * iQ + B) >> QFIX;
}
extern const uint8_t VP8Zigzag[16];
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Headers
+typedef uint32_t proba_t; // 16b + 16b
typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS];
-typedef uint64_t StatsArray[NUM_CTX][NUM_PROBAS][2];
+typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS];
typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1];
typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats
@@ -184,8 +186,9 @@ typedef struct {
uint8_t segments_[3]; // probabilities for segment tree
uint8_t skip_proba_; // final probability of being skipped.
ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 924 bytes
- StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 7.4k
+ StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 4224 bytes
CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 11.4k
+ int dirty_; // if true, need to call VP8CalculateLevelCosts()
int use_skip_proba_; // Note: we always use skip_proba for now.
int nb_skip_; // number of skipped blocks
} VP8Proba;
@@ -199,19 +202,19 @@ typedef struct {
int i4x4_lf_delta_; // delta filter level for i4x4 relative to i16x16
} VP8FilterHeader;
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// Informations about the macroblocks.
typedef struct {
// block type
- uint8_t type_:2; // 0=i4x4, 1=i16x16
- uint8_t uv_mode_:2;
- uint8_t skip_:1;
- uint8_t segment_:2;
+ unsigned int type_:2; // 0=i4x4, 1=i16x16
+ unsigned int uv_mode_:2;
+ unsigned int skip_:1;
+ unsigned int segment_:2;
uint8_t alpha_; // quantization-susceptibility
} VP8MBInfo;
-typedef struct {
+typedef struct VP8Matrix {
uint16_t q_[16]; // quantizer steps
uint16_t iq_[16]; // reciprocals, fixed point.
uint16_t bias_[16]; // rounding bias
@@ -240,7 +243,7 @@ typedef struct {
int16_t y_ac_levels[16][16];
int16_t uv_levels[4 + 4][16];
int mode_i16; // mode number for intra16 prediction
- int modes_i4[16]; // mode numbers for intra4 predictions
+ uint8_t modes_i4[16]; // mode numbers for intra4 predictions
int mode_uv; // mode number of chroma prediction
uint32_t nz; // non-zero blocks
} VP8ModeScore;
@@ -271,6 +274,7 @@ typedef struct {
LFStats* lf_stats_; // filter stats (borrowed from enc_)
int do_trellis_; // if true, perform extra level optimisation
int done_; // true when scan is finished
+ int percent0_; // saved initial progress percent
} VP8EncIterator;
// in iterator.c
@@ -287,6 +291,9 @@ void VP8IteratorExport(const VP8EncIterator* const it);
// it->yuv_out_ or it->yuv_in_.
int VP8IteratorNext(VP8EncIterator* const it,
const uint8_t* const block_to_save);
+// Report progression based on macroblock rows. Return 0 for user-abort request.
+int VP8IteratorProgress(const VP8EncIterator* const it,
+ int final_delta_percent);
// Intra4x4 iterations
void VP8IteratorStartI4(VP8EncIterator* const it);
// returns true if not done.
@@ -299,13 +306,54 @@ void VP8IteratorBytesToNz(VP8EncIterator* const it);
// Helper functions to set mode properties
void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode);
-void VP8SetIntra4Mode(const VP8EncIterator* const it, int modes[16]);
+void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes);
void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode);
void VP8SetSkip(const VP8EncIterator* const it, int skip);
void VP8SetSegment(const VP8EncIterator* const it, int segment);
-void VP8IteratorResetCosts(VP8EncIterator* const it);
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+// Paginated token buffer
+
+// WIP: #define USE_TOKEN_BUFFER
+
+#ifdef USE_TOKEN_BUFFER
+
+#define MAX_NUM_TOKEN 2048
+
+typedef struct VP8Tokens VP8Tokens;
+struct VP8Tokens {
+ uint16_t tokens_[MAX_NUM_TOKEN]; // bit#15: bit, bits 0..14: slot
+ int left_;
+ VP8Tokens* next_;
+};
+
+typedef struct {
+ VP8Tokens* rows_;
+ uint16_t* tokens_; // set to (*last_)->tokens_
+ VP8Tokens** last_;
+ int left_;
+ int error_; // true in case of malloc error
+} VP8TBuffer;
+
+void VP8TBufferInit(VP8TBuffer* const b); // initialize an empty buffer
+int VP8TBufferNewPage(VP8TBuffer* const b); // allocate a new page
+void VP8TBufferClear(VP8TBuffer* const b); // de-allocate memory
+
+int VP8EmitTokens(const VP8TBuffer* const b, VP8BitWriter* const bw,
+ const uint8_t* const probas);
+
+static WEBP_INLINE int VP8AddToken(VP8TBuffer* const b,
+ int bit, int proba_idx) {
+ if (b->left_ > 0 || VP8TBufferNewPage(b)) {
+ const int slot = --b->left_;
+ b->tokens_[slot] = (bit << 15) | proba_idx;
+ }
+ return bit;
+}
+
+#endif // USE_TOKEN_BUFFER
+
+//------------------------------------------------------------------------------
// VP8Encoder
struct VP8Encoder {
@@ -329,10 +377,12 @@ struct VP8Encoder {
VP8BitWriter bw_; // part0
VP8BitWriter parts_[MAX_NUM_PARTITIONS]; // token partitions
+ int percent_; // for progress
+
// transparency blob
int has_alpha_;
uint8_t* alpha_data_; // non-NULL if transparency is present
- size_t alpha_data_size_;
+ uint32_t alpha_data_size_;
// enhancement layer
int use_layer_;
@@ -352,15 +402,16 @@ struct VP8Encoder {
// probabilities and statistics
VP8Proba proba_;
- uint64_t sse_[3]; // sum of Y/U/V squared errors for all macroblocks
+ uint64_t sse_[4]; // sum of Y/U/V/A squared errors for all macroblocks
uint64_t sse_count_; // pixel count for the sse_[] stats
int coded_size_;
int residual_bytes_[3][4];
int block_count_[3];
// quality/speed settings
- int method_; // 0=fastest, 6=best/slowest.
- int rd_opt_level_; // Deduced from method_.
+ int method_; // 0=fastest, 6=best/slowest.
+ int rd_opt_level_; // Deduced from method_.
+ int max_i4_header_bits_; // partition #0 safeness factor
// Memory
VP8MBInfo* mb_info_; // contextual macroblock infos (mb_w_ + 1)
@@ -380,7 +431,7 @@ struct VP8Encoder {
LFStats *lf_stats_; // autofilter stats (if NULL, autofilter is off)
};
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// internal functions. Not public.
// in tree.c
@@ -399,6 +450,8 @@ void VP8CodeIntraModes(VP8Encoder* const enc);
// and appending an assembly of all the pre-coded token partitions.
// Return true if everything is ok.
int VP8EncWrite(VP8Encoder* const enc);
+// Release memory allocated for bit-writing in VP8EncLoop & seq.
+void VP8EncFreeBitWriters(VP8Encoder* const enc);
// in frame.c
extern const uint8_t VP8EncBands[16 + 1];
@@ -419,13 +472,11 @@ int VP8StatLoop(VP8Encoder* const enc);
// in webpenc.c
// Assign an error code to a picture. Return false for convenience.
-int WebPEncodingSetError(WebPPicture* const pic, WebPEncodingError error);
+int WebPEncodingSetError(const WebPPicture* const pic, WebPEncodingError error);
+int WebPReportProgress(const WebPPicture* const pic,
+ int percent, int* const percent_store);
+
// in analysis.c
-// Compute susceptibility based on DCT-coeff histograms:
-// the higher, the "easier" the macroblock is to compress.
-typedef int (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
- int start_block, int end_block);
-extern VP8CHisto VP8CollectHistogram;
// Main analysis loop. Decides the segmentations and complexity.
// Assigns a first guess for Intra16 and uvmode_ prediction modes.
int VP8EncAnalyze(VP8Encoder* const enc);
@@ -437,10 +488,9 @@ void VP8SetSegmentParams(VP8Encoder* const enc, float quality);
int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt);
// in alpha.c
-void VP8EncInitAlpha(VP8Encoder* enc); // initialize alpha compression
-void VP8EncCodeAlphaBlock(VP8EncIterator* it); // analyze or code a macroblock
-int VP8EncFinishAlpha(VP8Encoder* enc); // finalize compressed data
-void VP8EncDeleteAlpha(VP8Encoder* enc); // delete compressed data
+void VP8EncInitAlpha(VP8Encoder* const enc); // initialize alpha compression
+int VP8EncFinishAlpha(VP8Encoder* const enc); // finalize compressed data
+void VP8EncDeleteAlpha(VP8Encoder* const enc); // delete compressed data
// in layer.c
void VP8EncInitLayer(VP8Encoder* const enc); // init everything
@@ -448,63 +498,28 @@ void VP8EncCodeLayerBlock(VP8EncIterator* it); // code one more macroblock
int VP8EncFinishLayer(VP8Encoder* const enc); // finalize coding
void VP8EncDeleteLayer(VP8Encoder* enc); // reclaim memory
- // in dsp.c
-int VP8GetAlpha(const int histo[MAX_COEFF_THRESH + 1]);
-
-// Transforms
-// VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms
-// will be done for (ref, in, dst) and (ref + 4, in + 16, dst + 4).
-typedef void (*VP8Idct)(const uint8_t* ref, const int16_t* in, uint8_t* dst,
- int do_two);
-typedef void (*VP8Fdct)(const uint8_t* src, const uint8_t* ref, int16_t* out);
-typedef void (*VP8WHT)(const int16_t* in, int16_t* out);
-extern VP8Idct VP8ITransform;
-extern VP8Fdct VP8FTransform;
-extern VP8WHT VP8ITransformWHT;
-extern VP8WHT VP8FTransformWHT;
-// Predictions
-// *dst is the destination block. *top, *top_right and *left can be NULL.
-typedef void (*VP8IntraPreds)(uint8_t *dst, const uint8_t* left,
- const uint8_t* top);
-typedef void (*VP8Intra4Preds)(uint8_t *dst, const uint8_t* top);
-extern VP8Intra4Preds VP8EncPredLuma4;
-extern VP8IntraPreds VP8EncPredLuma16;
-extern VP8IntraPreds VP8EncPredChroma8;
-
-typedef int (*VP8Metric)(const uint8_t* pix, const uint8_t* ref);
-extern VP8Metric VP8SSE16x16, VP8SSE16x8, VP8SSE8x8, VP8SSE4x4;
-typedef int (*VP8WMetric)(const uint8_t* pix, const uint8_t* ref,
- const uint16_t* const weights);
-extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16;
-
-typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst);
-extern VP8BlockCopy VP8Copy4x4;
-extern VP8BlockCopy VP8Copy8x8;
-extern VP8BlockCopy VP8Copy16x16;
-// Quantization
-typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16],
- int n, const VP8Matrix* const mtx);
-extern VP8QuantizeBlock VP8EncQuantizeBlock;
-
-typedef enum {
- kSSE2,
- kSSE3
-} CPUFeature;
-// returns true if the CPU supports the feature.
-typedef int (*VP8CPUInfo)(CPUFeature feature);
-extern VP8CPUInfo VP8EncGetCPUInfo;
-
-void VP8EncDspInit(void); // must be called before using any of the above
-
// in filter.c
-extern void VP8InitFilter(VP8EncIterator* const it);
-extern void VP8StoreFilterStats(VP8EncIterator* const it);
-extern void VP8AdjustFilterStrength(VP8EncIterator* const it);
-//-----------------------------------------------------------------------------
+// SSIM utils
+typedef struct {
+ double w, xm, ym, xxm, xym, yym;
+} DistoStats;
+void VP8SSIMAddStats(const DistoStats* const src, DistoStats* const dst);
+void VP8SSIMAccumulatePlane(const uint8_t* src1, int stride1,
+ const uint8_t* src2, int stride2,
+ int W, int H, DistoStats* const stats);
+double VP8SSIMGet(const DistoStats* const stats);
+double VP8SSIMGetSquaredError(const DistoStats* const stats);
+
+// autofilter
+void VP8InitFilter(VP8EncIterator* const it);
+void VP8StoreFilterStats(VP8EncIterator* const it);
+void VP8AdjustFilterStrength(VP8EncIterator* const it);
+
+//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif
-#endif // WEBP_ENC_VP8ENCI_H_
+#endif /* WEBP_ENC_VP8ENCI_H_ */
diff --git a/src/enc/vp8l.c b/src/enc/vp8l.c
new file mode 100644
index 00000000..41aa62b7
--- /dev/null
+++ b/src/enc/vp8l.c
@@ -0,0 +1,1150 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// main entry for the lossless encoder.
+//
+// Author: Vikas Arora (vikaas.arora@gmail.com)
+//
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "./backward_references.h"
+#include "./vp8enci.h"
+#include "./vp8li.h"
+#include "../dsp/lossless.h"
+#include "../utils/bit_writer.h"
+#include "../utils/huffman_encode.h"
+#include "../utils/utils.h"
+#include "webp/format_constants.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define PALETTE_KEY_RIGHT_SHIFT 22 // Key for 1K buffer.
+#define MAX_HUFF_IMAGE_SIZE (16 * 1024 * 1024)
+#define MAX_COLORS_FOR_GRAPH 64
+
+// -----------------------------------------------------------------------------
+// Palette
+
+static int CompareColors(const void* p1, const void* p2) {
+ const uint32_t a = *(const uint32_t*)p1;
+ const uint32_t b = *(const uint32_t*)p2;
+ return (a < b) ? -1 : (a > b) ? 1 : 0;
+}
+
+// If number of colors in the image is less than or equal to MAX_PALETTE_SIZE,
+// creates a palette and returns true, else returns false.
+static int AnalyzeAndCreatePalette(const WebPPicture* const pic,
+ uint32_t palette[MAX_PALETTE_SIZE],
+ int* const palette_size) {
+ int i, x, y, key;
+ int num_colors = 0;
+ uint8_t in_use[MAX_PALETTE_SIZE * 4] = { 0 };
+ uint32_t colors[MAX_PALETTE_SIZE * 4];
+ static const uint32_t kHashMul = 0x1e35a7bd;
+ const uint32_t* argb = pic->argb;
+ const int width = pic->width;
+ const int height = pic->height;
+ uint32_t last_pix = ~argb[0]; // so we're sure that last_pix != argb[0]
+
+ for (y = 0; y < height; ++y) {
+ for (x = 0; x < width; ++x) {
+ if (argb[x] == last_pix) {
+ continue;
+ }
+ last_pix = argb[x];
+ key = (kHashMul * last_pix) >> PALETTE_KEY_RIGHT_SHIFT;
+ while (1) {
+ if (!in_use[key]) {
+ colors[key] = last_pix;
+ in_use[key] = 1;
+ ++num_colors;
+ if (num_colors > MAX_PALETTE_SIZE) {
+ return 0;
+ }
+ break;
+ } else if (colors[key] == last_pix) {
+ // The color is already there.
+ break;
+ } else {
+ // Some other color sits there.
+ // Do linear conflict resolution.
+ ++key;
+ key &= (MAX_PALETTE_SIZE * 4 - 1); // key mask for 1K buffer.
+ }
+ }
+ }
+ argb += pic->argb_stride;
+ }
+
+ // TODO(skal): could we reuse in_use[] to speed up ApplyPalette()?
+ num_colors = 0;
+ for (i = 0; i < (int)(sizeof(in_use) / sizeof(in_use[0])); ++i) {
+ if (in_use[i]) {
+ palette[num_colors] = colors[i];
+ ++num_colors;
+ }
+ }
+
+ qsort(palette, num_colors, sizeof(*palette), CompareColors);
+ *palette_size = num_colors;
+ return 1;
+}
+
+static int AnalyzeEntropy(const uint32_t* argb,
+ int width, int height, int argb_stride,
+ double* const nonpredicted_bits,
+ double* const predicted_bits) {
+ int x, y;
+ const uint32_t* last_line = NULL;
+ uint32_t last_pix = argb[0]; // so we're sure that pix_diff == 0
+
+ VP8LHistogram* nonpredicted = NULL;
+ VP8LHistogram* predicted =
+ (VP8LHistogram*)malloc(2 * sizeof(*predicted));
+ if (predicted == NULL) return 0;
+ nonpredicted = predicted + 1;
+
+ VP8LHistogramInit(predicted, 0);
+ VP8LHistogramInit(nonpredicted, 0);
+ for (y = 0; y < height; ++y) {
+ for (x = 0; x < width; ++x) {
+ const uint32_t pix = argb[x];
+ const uint32_t pix_diff = VP8LSubPixels(pix, last_pix);
+ if (pix_diff == 0) continue;
+ if (last_line != NULL && pix == last_line[x]) {
+ continue;
+ }
+ last_pix = pix;
+ {
+ const PixOrCopy pix_token = PixOrCopyCreateLiteral(pix);
+ const PixOrCopy pix_diff_token = PixOrCopyCreateLiteral(pix_diff);
+ VP8LHistogramAddSinglePixOrCopy(nonpredicted, &pix_token);
+ VP8LHistogramAddSinglePixOrCopy(predicted, &pix_diff_token);
+ }
+ }
+ last_line = argb;
+ argb += argb_stride;
+ }
+ *nonpredicted_bits = VP8LHistogramEstimateBitsBulk(nonpredicted);
+ *predicted_bits = VP8LHistogramEstimateBitsBulk(predicted);
+ free(predicted);
+ return 1;
+}
+
+static int VP8LEncAnalyze(VP8LEncoder* const enc, WebPImageHint image_hint) {
+ const WebPPicture* const pic = enc->pic_;
+ assert(pic != NULL && pic->argb != NULL);
+
+ enc->use_palette_ =
+ AnalyzeAndCreatePalette(pic, enc->palette_, &enc->palette_size_);
+
+ if (image_hint == WEBP_HINT_GRAPH) {
+ if (enc->use_palette_ && enc->palette_size_ < MAX_COLORS_FOR_GRAPH) {
+ enc->use_palette_ = 0;
+ }
+ }
+
+ if (!enc->use_palette_) {
+ if (image_hint == WEBP_HINT_PHOTO) {
+ enc->use_predict_ = 1;
+ enc->use_cross_color_ = 1;
+ } else {
+ double non_pred_entropy, pred_entropy;
+ if (!AnalyzeEntropy(pic->argb, pic->width, pic->height, pic->argb_stride,
+ &non_pred_entropy, &pred_entropy)) {
+ return 0;
+ }
+ if (pred_entropy < 0.95 * non_pred_entropy) {
+ enc->use_predict_ = 1;
+ // TODO(vikasa): Observed some correlation of cross_color transform with
+ // predict. Need to investigate this further and add separate heuristic
+ // for setting use_cross_color flag.
+ enc->use_cross_color_ = 1;
+ }
+ }
+ }
+
+ return 1;
+}
+
+static int GetHuffBitLengthsAndCodes(
+ const VP8LHistogramSet* const histogram_image,
+ HuffmanTreeCode* const huffman_codes) {
+ int i, k;
+ int ok = 1;
+ uint64_t total_length_size = 0;
+ uint8_t* mem_buf = NULL;
+ const int histogram_image_size = histogram_image->size;
+
+ // Iterate over all histograms and get the aggregate number of codes used.
+ for (i = 0; i < histogram_image_size; ++i) {
+ const VP8LHistogram* const histo = histogram_image->histograms[i];
+ HuffmanTreeCode* const codes = &huffman_codes[5 * i];
+ for (k = 0; k < 5; ++k) {
+ const int num_symbols = (k == 0) ? VP8LHistogramNumCodes(histo)
+ : (k == 4) ? NUM_DISTANCE_CODES
+ : 256;
+ codes[k].num_symbols = num_symbols;
+ total_length_size += num_symbols;
+ }
+ }
+
+ // Allocate and Set Huffman codes.
+ {
+ uint16_t* codes;
+ uint8_t* lengths;
+ mem_buf = (uint8_t*)WebPSafeCalloc(total_length_size,
+ sizeof(*lengths) + sizeof(*codes));
+ if (mem_buf == NULL) {
+ ok = 0;
+ goto End;
+ }
+ codes = (uint16_t*)mem_buf;
+ lengths = (uint8_t*)&codes[total_length_size];
+ for (i = 0; i < 5 * histogram_image_size; ++i) {
+ const int bit_length = huffman_codes[i].num_symbols;
+ huffman_codes[i].codes = codes;
+ huffman_codes[i].code_lengths = lengths;
+ codes += bit_length;
+ lengths += bit_length;
+ }
+ }
+
+ // Create Huffman trees.
+ for (i = 0; i < histogram_image_size; ++i) {
+ HuffmanTreeCode* const codes = &huffman_codes[5 * i];
+ VP8LHistogram* const histo = histogram_image->histograms[i];
+ ok = ok && VP8LCreateHuffmanTree(histo->literal_, 15, codes + 0);
+ ok = ok && VP8LCreateHuffmanTree(histo->red_, 15, codes + 1);
+ ok = ok && VP8LCreateHuffmanTree(histo->blue_, 15, codes + 2);
+ ok = ok && VP8LCreateHuffmanTree(histo->alpha_, 15, codes + 3);
+ ok = ok && VP8LCreateHuffmanTree(histo->distance_, 15, codes + 4);
+ }
+
+ End:
+ if (!ok) free(mem_buf);
+ return ok;
+}
+
+static void StoreHuffmanTreeOfHuffmanTreeToBitMask(
+ VP8LBitWriter* const bw, const uint8_t* code_length_bitdepth) {
+ // RFC 1951 will calm you down if you are worried about this funny sequence.
+ // This sequence is tuned from that, but more weighted for lower symbol count,
+ // and more spiking histograms.
+ static const uint8_t kStorageOrder[CODE_LENGTH_CODES] = {
+ 17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+ };
+ int i;
+ // Throw away trailing zeros:
+ int codes_to_store = CODE_LENGTH_CODES;
+ for (; codes_to_store > 4; --codes_to_store) {
+ if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
+ break;
+ }
+ }
+ VP8LWriteBits(bw, 4, codes_to_store - 4);
+ for (i = 0; i < codes_to_store; ++i) {
+ VP8LWriteBits(bw, 3, code_length_bitdepth[kStorageOrder[i]]);
+ }
+}
+
+static void ClearHuffmanTreeIfOnlyOneSymbol(
+ HuffmanTreeCode* const huffman_code) {
+ int k;
+ int count = 0;
+ for (k = 0; k < huffman_code->num_symbols; ++k) {
+ if (huffman_code->code_lengths[k] != 0) {
+ ++count;
+ if (count > 1) return;
+ }
+ }
+ for (k = 0; k < huffman_code->num_symbols; ++k) {
+ huffman_code->code_lengths[k] = 0;
+ huffman_code->codes[k] = 0;
+ }
+}
+
+static void StoreHuffmanTreeToBitMask(
+ VP8LBitWriter* const bw,
+ const HuffmanTreeToken* const tokens, const int num_tokens,
+ const HuffmanTreeCode* const huffman_code) {
+ int i;
+ for (i = 0; i < num_tokens; ++i) {
+ const int ix = tokens[i].code;
+ const int extra_bits = tokens[i].extra_bits;
+ VP8LWriteBits(bw, huffman_code->code_lengths[ix], huffman_code->codes[ix]);
+ switch (ix) {
+ case 16:
+ VP8LWriteBits(bw, 2, extra_bits);
+ break;
+ case 17:
+ VP8LWriteBits(bw, 3, extra_bits);
+ break;
+ case 18:
+ VP8LWriteBits(bw, 7, extra_bits);
+ break;
+ }
+ }
+}
+
+static int StoreFullHuffmanCode(VP8LBitWriter* const bw,
+ const HuffmanTreeCode* const tree) {
+ int ok = 0;
+ uint8_t code_length_bitdepth[CODE_LENGTH_CODES] = { 0 };
+ uint16_t code_length_bitdepth_symbols[CODE_LENGTH_CODES] = { 0 };
+ const int max_tokens = tree->num_symbols;
+ int num_tokens;
+ HuffmanTreeCode huffman_code;
+ HuffmanTreeToken* const tokens =
+ (HuffmanTreeToken*)WebPSafeMalloc((uint64_t)max_tokens, sizeof(*tokens));
+ if (tokens == NULL) return 0;
+
+ huffman_code.num_symbols = CODE_LENGTH_CODES;
+ huffman_code.code_lengths = code_length_bitdepth;
+ huffman_code.codes = code_length_bitdepth_symbols;
+
+ VP8LWriteBits(bw, 1, 0);
+ num_tokens = VP8LCreateCompressedHuffmanTree(tree, tokens, max_tokens);
+ {
+ int histogram[CODE_LENGTH_CODES] = { 0 };
+ int i;
+ for (i = 0; i < num_tokens; ++i) {
+ ++histogram[tokens[i].code];
+ }
+
+ if (!VP8LCreateHuffmanTree(histogram, 7, &huffman_code)) {
+ goto End;
+ }
+ }
+
+ StoreHuffmanTreeOfHuffmanTreeToBitMask(bw, code_length_bitdepth);
+ ClearHuffmanTreeIfOnlyOneSymbol(&huffman_code);
+ {
+ int trailing_zero_bits = 0;
+ int trimmed_length = num_tokens;
+ int write_trimmed_length;
+ int length;
+ int i = num_tokens;
+ while (i-- > 0) {
+ const int ix = tokens[i].code;
+ if (ix == 0 || ix == 17 || ix == 18) {
+ --trimmed_length; // discount trailing zeros
+ trailing_zero_bits += code_length_bitdepth[ix];
+ if (ix == 17) {
+ trailing_zero_bits += 3;
+ } else if (ix == 18) {
+ trailing_zero_bits += 7;
+ }
+ } else {
+ break;
+ }
+ }
+ write_trimmed_length = (trimmed_length > 1 && trailing_zero_bits > 12);
+ length = write_trimmed_length ? trimmed_length : num_tokens;
+ VP8LWriteBits(bw, 1, write_trimmed_length);
+ if (write_trimmed_length) {
+ const int nbits = VP8LBitsLog2Ceiling(trimmed_length - 1);
+ const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2;
+ VP8LWriteBits(bw, 3, nbitpairs - 1);
+ assert(trimmed_length >= 2);
+ VP8LWriteBits(bw, nbitpairs * 2, trimmed_length - 2);
+ }
+ StoreHuffmanTreeToBitMask(bw, tokens, length, &huffman_code);
+ }
+ ok = 1;
+ End:
+ free(tokens);
+ return ok;
+}
+
+static int StoreHuffmanCode(VP8LBitWriter* const bw,
+ const HuffmanTreeCode* const huffman_code) {
+ int i;
+ int count = 0;
+ int symbols[2] = { 0, 0 };
+ const int kMaxBits = 8;
+ const int kMaxSymbol = 1 << kMaxBits;
+
+ // Check whether it's a small tree.
+ for (i = 0; i < huffman_code->num_symbols && count < 3; ++i) {
+ if (huffman_code->code_lengths[i] != 0) {
+ if (count < 2) symbols[count] = i;
+ ++count;
+ }
+ }
+
+ if (count == 0) { // emit minimal tree for empty cases
+ // bits: small tree marker: 1, count-1: 0, large 8-bit code: 0, code: 0
+ VP8LWriteBits(bw, 4, 0x01);
+ return 1;
+ } else if (count <= 2 && symbols[0] < kMaxSymbol && symbols[1] < kMaxSymbol) {
+ VP8LWriteBits(bw, 1, 1); // Small tree marker to encode 1 or 2 symbols.
+ VP8LWriteBits(bw, 1, count - 1);
+ if (symbols[0] <= 1) {
+ VP8LWriteBits(bw, 1, 0); // Code bit for small (1 bit) symbol value.
+ VP8LWriteBits(bw, 1, symbols[0]);
+ } else {
+ VP8LWriteBits(bw, 1, 1);
+ VP8LWriteBits(bw, 8, symbols[0]);
+ }
+ if (count == 2) {
+ VP8LWriteBits(bw, 8, symbols[1]);
+ }
+ return 1;
+ } else {
+ return StoreFullHuffmanCode(bw, huffman_code);
+ }
+}
+
+static void WriteHuffmanCode(VP8LBitWriter* const bw,
+ const HuffmanTreeCode* const code, int index) {
+ const int depth = code->code_lengths[index];
+ const int symbol = code->codes[index];
+ VP8LWriteBits(bw, depth, symbol);
+}
+
+static void StoreImageToBitMask(
+ VP8LBitWriter* const bw, int width, int histo_bits,
+ const VP8LBackwardRefs* const refs,
+ const uint16_t* histogram_symbols,
+ const HuffmanTreeCode* const huffman_codes) {
+ // x and y trace the position in the image.
+ int x = 0;
+ int y = 0;
+ const int histo_xsize = histo_bits ? VP8LSubSampleSize(width, histo_bits) : 1;
+ int i;
+ for (i = 0; i < refs->size; ++i) {
+ const PixOrCopy* const v = &refs->refs[i];
+ const int histogram_ix = histogram_symbols[histo_bits ?
+ (y >> histo_bits) * histo_xsize +
+ (x >> histo_bits) : 0];
+ const HuffmanTreeCode* const codes = huffman_codes + 5 * histogram_ix;
+ if (PixOrCopyIsCacheIdx(v)) {
+ const int code = PixOrCopyCacheIdx(v);
+ const int literal_ix = 256 + NUM_LENGTH_CODES + code;
+ WriteHuffmanCode(bw, codes, literal_ix);
+ } else if (PixOrCopyIsLiteral(v)) {
+ static const int order[] = { 1, 2, 0, 3 };
+ int k;
+ for (k = 0; k < 4; ++k) {
+ const int code = PixOrCopyLiteral(v, order[k]);
+ WriteHuffmanCode(bw, codes + k, code);
+ }
+ } else {
+ int bits, n_bits;
+ int code, distance;
+
+ PrefixEncode(v->len, &code, &n_bits, &bits);
+ WriteHuffmanCode(bw, codes, 256 + code);
+ VP8LWriteBits(bw, n_bits, bits);
+
+ distance = PixOrCopyDistance(v);
+ PrefixEncode(distance, &code, &n_bits, &bits);
+ WriteHuffmanCode(bw, codes + 4, code);
+ VP8LWriteBits(bw, n_bits, bits);
+ }
+ x += PixOrCopyLength(v);
+ while (x >= width) {
+ x -= width;
+ ++y;
+ }
+ }
+}
+
+// Special case of EncodeImageInternal() for cache-bits=0, histo_bits=31
+static int EncodeImageNoHuffman(VP8LBitWriter* const bw,
+ const uint32_t* const argb,
+ int width, int height, int quality) {
+ int i;
+ int ok = 0;
+ VP8LBackwardRefs refs;
+ HuffmanTreeCode huffman_codes[5] = { { 0, NULL, NULL } };
+ const uint16_t histogram_symbols[1] = { 0 }; // only one tree, one symbol
+ VP8LHistogramSet* const histogram_image = VP8LAllocateHistogramSet(1, 0);
+ if (histogram_image == NULL) return 0;
+
+ // Calculate backward references from ARGB image.
+ if (!VP8LGetBackwardReferences(width, height, argb, quality, 0, 1, &refs)) {
+ goto Error;
+ }
+ // Build histogram image and symbols from backward references.
+ VP8LHistogramStoreRefs(&refs, histogram_image->histograms[0]);
+
+ // Create Huffman bit lengths and codes for each histogram image.
+ assert(histogram_image->size == 1);
+ if (!GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) {
+ goto Error;
+ }
+
+ // No color cache, no Huffman image.
+ VP8LWriteBits(bw, 1, 0);
+
+ // Store Huffman codes.
+ for (i = 0; i < 5; ++i) {
+ HuffmanTreeCode* const codes = &huffman_codes[i];
+ if (!StoreHuffmanCode(bw, codes)) {
+ goto Error;
+ }
+ ClearHuffmanTreeIfOnlyOneSymbol(codes);
+ }
+
+ // Store actual literals.
+ StoreImageToBitMask(bw, width, 0, &refs, histogram_symbols, huffman_codes);
+ ok = 1;
+
+ Error:
+ free(histogram_image);
+ VP8LClearBackwardRefs(&refs);
+ free(huffman_codes[0].codes);
+ return ok;
+}
+
+static int EncodeImageInternal(VP8LBitWriter* const bw,
+ const uint32_t* const argb,
+ int width, int height, int quality,
+ int cache_bits, int histogram_bits) {
+ int ok = 0;
+ const int use_2d_locality = 1;
+ const int use_color_cache = (cache_bits > 0);
+ const uint32_t histogram_image_xysize =
+ VP8LSubSampleSize(width, histogram_bits) *
+ VP8LSubSampleSize(height, histogram_bits);
+ VP8LHistogramSet* histogram_image =
+ VP8LAllocateHistogramSet(histogram_image_xysize, 0);
+ int histogram_image_size = 0;
+ size_t bit_array_size = 0;
+ HuffmanTreeCode* huffman_codes = NULL;
+ VP8LBackwardRefs refs;
+ uint16_t* const histogram_symbols =
+ (uint16_t*)WebPSafeMalloc((uint64_t)histogram_image_xysize,
+ sizeof(*histogram_symbols));
+ assert(histogram_bits >= MIN_HUFFMAN_BITS);
+ assert(histogram_bits <= MAX_HUFFMAN_BITS);
+ if (histogram_image == NULL || histogram_symbols == NULL) goto Error;
+
+ // Calculate backward references from ARGB image.
+ if (!VP8LGetBackwardReferences(width, height, argb, quality, cache_bits,
+ use_2d_locality, &refs)) {
+ goto Error;
+ }
+ // Build histogram image and symbols from backward references.
+ if (!VP8LGetHistoImageSymbols(width, height, &refs,
+ quality, histogram_bits, cache_bits,
+ histogram_image,
+ histogram_symbols)) {
+ goto Error;
+ }
+ // Create Huffman bit lengths and codes for each histogram image.
+ histogram_image_size = histogram_image->size;
+ bit_array_size = 5 * histogram_image_size;
+ huffman_codes = (HuffmanTreeCode*)WebPSafeCalloc(bit_array_size,
+ sizeof(*huffman_codes));
+ if (huffman_codes == NULL ||
+ !GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) {
+ goto Error;
+ }
+
+ // Color Cache parameters.
+ VP8LWriteBits(bw, 1, use_color_cache);
+ if (use_color_cache) {
+ VP8LWriteBits(bw, 4, cache_bits);
+ }
+
+ // Huffman image + meta huffman.
+ {
+ const int write_histogram_image = (histogram_image_size > 1);
+ VP8LWriteBits(bw, 1, write_histogram_image);
+ if (write_histogram_image) {
+ uint32_t* const histogram_argb =
+ (uint32_t*)WebPSafeMalloc((uint64_t)histogram_image_xysize,
+ sizeof(*histogram_argb));
+ int max_index = 0;
+ uint32_t i;
+ if (histogram_argb == NULL) goto Error;
+ for (i = 0; i < histogram_image_xysize; ++i) {
+ const int index = histogram_symbols[i] & 0xffff;
+ histogram_argb[i] = 0xff000000 | (index << 8);
+ if (index >= max_index) {
+ max_index = index + 1;
+ }
+ }
+ histogram_image_size = max_index;
+
+ VP8LWriteBits(bw, 3, histogram_bits - 2);
+ ok = EncodeImageNoHuffman(bw, histogram_argb,
+ VP8LSubSampleSize(width, histogram_bits),
+ VP8LSubSampleSize(height, histogram_bits),
+ quality);
+ free(histogram_argb);
+ if (!ok) goto Error;
+ }
+ }
+
+ // Store Huffman codes.
+ {
+ int i;
+ for (i = 0; i < 5 * histogram_image_size; ++i) {
+ HuffmanTreeCode* const codes = &huffman_codes[i];
+ if (!StoreHuffmanCode(bw, codes)) goto Error;
+ ClearHuffmanTreeIfOnlyOneSymbol(codes);
+ }
+ }
+ // Free combined histograms.
+ free(histogram_image);
+ histogram_image = NULL;
+
+ // Store actual literals.
+ StoreImageToBitMask(bw, width, histogram_bits, &refs,
+ histogram_symbols, huffman_codes);
+ ok = 1;
+
+ Error:
+ if (!ok) free(histogram_image);
+
+ VP8LClearBackwardRefs(&refs);
+ if (huffman_codes != NULL) {
+ free(huffman_codes->codes);
+ free(huffman_codes);
+ }
+ free(histogram_symbols);
+ return ok;
+}
+
+// -----------------------------------------------------------------------------
+// Transforms
+
+// Check if it would be a good idea to subtract green from red and blue. We
+// only impact entropy in red/blue components, don't bother to look at others.
+static int EvalAndApplySubtractGreen(VP8LEncoder* const enc,
+ int width, int height,
+ VP8LBitWriter* const bw) {
+ if (!enc->use_palette_) {
+ int i;
+ const uint32_t* const argb = enc->argb_;
+ double bit_cost_before, bit_cost_after;
+ VP8LHistogram* const histo = (VP8LHistogram*)malloc(sizeof(*histo));
+ if (histo == NULL) return 0;
+
+ VP8LHistogramInit(histo, 1);
+ for (i = 0; i < width * height; ++i) {
+ const uint32_t c = argb[i];
+ ++histo->red_[(c >> 16) & 0xff];
+ ++histo->blue_[(c >> 0) & 0xff];
+ }
+ bit_cost_before = VP8LHistogramEstimateBits(histo);
+
+ VP8LHistogramInit(histo, 1);
+ for (i = 0; i < width * height; ++i) {
+ const uint32_t c = argb[i];
+ const int green = (c >> 8) & 0xff;
+ ++histo->red_[((c >> 16) - green) & 0xff];
+ ++histo->blue_[((c >> 0) - green) & 0xff];
+ }
+ bit_cost_after = VP8LHistogramEstimateBits(histo);
+ free(histo);
+
+ // Check if subtracting green yields low entropy.
+ enc->use_subtract_green_ = (bit_cost_after < bit_cost_before);
+ if (enc->use_subtract_green_) {
+ VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
+ VP8LWriteBits(bw, 2, SUBTRACT_GREEN);
+ VP8LSubtractGreenFromBlueAndRed(enc->argb_, width * height);
+ }
+ }
+ return 1;
+}
+
+static int ApplyPredictFilter(const VP8LEncoder* const enc,
+ int width, int height, int quality,
+ VP8LBitWriter* const bw) {
+ const int pred_bits = enc->transform_bits_;
+ const int transform_width = VP8LSubSampleSize(width, pred_bits);
+ const int transform_height = VP8LSubSampleSize(height, pred_bits);
+
+ VP8LResidualImage(width, height, pred_bits, enc->argb_, enc->argb_scratch_,
+ enc->transform_data_);
+ VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
+ VP8LWriteBits(bw, 2, PREDICTOR_TRANSFORM);
+ assert(pred_bits >= 2);
+ VP8LWriteBits(bw, 3, pred_bits - 2);
+ if (!EncodeImageNoHuffman(bw, enc->transform_data_,
+ transform_width, transform_height, quality)) {
+ return 0;
+ }
+ return 1;
+}
+
+static int ApplyCrossColorFilter(const VP8LEncoder* const enc,
+ int width, int height, int quality,
+ VP8LBitWriter* const bw) {
+ const int ccolor_transform_bits = enc->transform_bits_;
+ const int transform_width = VP8LSubSampleSize(width, ccolor_transform_bits);
+ const int transform_height = VP8LSubSampleSize(height, ccolor_transform_bits);
+ const int step = (quality == 0) ? 32 : 8;
+
+ VP8LColorSpaceTransform(width, height, ccolor_transform_bits, step,
+ enc->argb_, enc->transform_data_);
+ VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
+ VP8LWriteBits(bw, 2, CROSS_COLOR_TRANSFORM);
+ assert(ccolor_transform_bits >= 2);
+ VP8LWriteBits(bw, 3, ccolor_transform_bits - 2);
+ if (!EncodeImageNoHuffman(bw, enc->transform_data_,
+ transform_width, transform_height, quality)) {
+ return 0;
+ }
+ return 1;
+}
+
+// -----------------------------------------------------------------------------
+
+static void PutLE32(uint8_t* const data, uint32_t val) {
+ data[0] = (val >> 0) & 0xff;
+ data[1] = (val >> 8) & 0xff;
+ data[2] = (val >> 16) & 0xff;
+ data[3] = (val >> 24) & 0xff;
+}
+
+static WebPEncodingError WriteRiffHeader(const WebPPicture* const pic,
+ size_t riff_size, size_t vp8l_size) {
+ uint8_t riff[RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE + VP8L_SIGNATURE_SIZE] = {
+ 'R', 'I', 'F', 'F', 0, 0, 0, 0, 'W', 'E', 'B', 'P',
+ 'V', 'P', '8', 'L', 0, 0, 0, 0, VP8L_MAGIC_BYTE,
+ };
+ PutLE32(riff + TAG_SIZE, (uint32_t)riff_size);
+ PutLE32(riff + RIFF_HEADER_SIZE + TAG_SIZE, (uint32_t)vp8l_size);
+ if (!pic->writer(riff, sizeof(riff), pic)) {
+ return VP8_ENC_ERROR_BAD_WRITE;
+ }
+ return VP8_ENC_OK;
+}
+
+static int WriteImageSize(const WebPPicture* const pic,
+ VP8LBitWriter* const bw) {
+ const int width = pic->width - 1;
+ const int height = pic->height - 1;
+ assert(width < WEBP_MAX_DIMENSION && height < WEBP_MAX_DIMENSION);
+
+ VP8LWriteBits(bw, VP8L_IMAGE_SIZE_BITS, width);
+ VP8LWriteBits(bw, VP8L_IMAGE_SIZE_BITS, height);
+ return !bw->error_;
+}
+
+static int WriteRealAlphaAndVersion(VP8LBitWriter* const bw, int has_alpha) {
+ VP8LWriteBits(bw, 1, has_alpha);
+ VP8LWriteBits(bw, VP8L_VERSION_BITS, VP8L_VERSION);
+ return !bw->error_;
+}
+
+static WebPEncodingError WriteImage(const WebPPicture* const pic,
+ VP8LBitWriter* const bw,
+ size_t* const coded_size) {
+ WebPEncodingError err = VP8_ENC_OK;
+ const uint8_t* const webpll_data = VP8LBitWriterFinish(bw);
+ const size_t webpll_size = VP8LBitWriterNumBytes(bw);
+ const size_t vp8l_size = VP8L_SIGNATURE_SIZE + webpll_size;
+ const size_t pad = vp8l_size & 1;
+ const size_t riff_size = TAG_SIZE + CHUNK_HEADER_SIZE + vp8l_size + pad;
+
+ err = WriteRiffHeader(pic, riff_size, vp8l_size);
+ if (err != VP8_ENC_OK) goto Error;
+
+ if (!pic->writer(webpll_data, webpll_size, pic)) {
+ err = VP8_ENC_ERROR_BAD_WRITE;
+ goto Error;
+ }
+
+ if (pad) {
+ const uint8_t pad_byte[1] = { 0 };
+ if (!pic->writer(pad_byte, 1, pic)) {
+ err = VP8_ENC_ERROR_BAD_WRITE;
+ goto Error;
+ }
+ }
+ *coded_size = CHUNK_HEADER_SIZE + riff_size;
+ return VP8_ENC_OK;
+
+ Error:
+ return err;
+}
+
+// -----------------------------------------------------------------------------
+
+// Allocates the memory for argb (W x H) buffer, 2 rows of context for
+// prediction and transform data.
+static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc,
+ int width, int height) {
+ WebPEncodingError err = VP8_ENC_OK;
+ const int tile_size = 1 << enc->transform_bits_;
+ const uint64_t image_size = width * height;
+ const uint64_t argb_scratch_size = tile_size * width + width;
+ const uint64_t transform_data_size =
+ (uint64_t)VP8LSubSampleSize(width, enc->transform_bits_) *
+ (uint64_t)VP8LSubSampleSize(height, enc->transform_bits_);
+ const uint64_t total_size =
+ image_size + argb_scratch_size + transform_data_size;
+ uint32_t* mem = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*mem));
+ if (mem == NULL) {
+ err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+ goto Error;
+ }
+ enc->argb_ = mem;
+ mem += image_size;
+ enc->argb_scratch_ = mem;
+ mem += argb_scratch_size;
+ enc->transform_data_ = mem;
+ enc->current_width_ = width;
+
+ Error:
+ return err;
+}
+
+// Bundles multiple (2, 4 or 8) pixels into a single pixel.
+// Returns the new xsize.
+static void BundleColorMap(const WebPPicture* const pic,
+ int xbits, uint32_t* bundled_argb, int xs) {
+ int y;
+ const int bit_depth = 1 << (3 - xbits);
+ uint32_t code = 0;
+ const uint32_t* argb = pic->argb;
+ const int width = pic->width;
+ const int height = pic->height;
+
+ for (y = 0; y < height; ++y) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ const int mask = (1 << xbits) - 1;
+ const int xsub = x & mask;
+ if (xsub == 0) {
+ code = 0;
+ }
+ // TODO(vikasa): simplify the bundling logic.
+ code |= (argb[x] & 0xff00) << (bit_depth * xsub);
+ bundled_argb[y * xs + (x >> xbits)] = 0xff000000 | code;
+ }
+ argb += pic->argb_stride;
+ }
+}
+
+// Note: Expects "enc->palette_" to be set properly.
+// Also, "enc->palette_" will be modified after this call and should not be used
+// later.
+static WebPEncodingError ApplyPalette(VP8LBitWriter* const bw,
+ VP8LEncoder* const enc, int quality) {
+ WebPEncodingError err = VP8_ENC_OK;
+ int i, x, y;
+ const WebPPicture* const pic = enc->pic_;
+ uint32_t* argb = pic->argb;
+ const int width = pic->width;
+ const int height = pic->height;
+ uint32_t* const palette = enc->palette_;
+ const int palette_size = enc->palette_size_;
+
+ // Replace each input pixel by corresponding palette index.
+ for (y = 0; y < height; ++y) {
+ for (x = 0; x < width; ++x) {
+ const uint32_t pix = argb[x];
+ for (i = 0; i < palette_size; ++i) {
+ if (pix == palette[i]) {
+ argb[x] = 0xff000000u | (i << 8);
+ break;
+ }
+ }
+ }
+ argb += pic->argb_stride;
+ }
+
+ // Save palette to bitstream.
+ VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
+ VP8LWriteBits(bw, 2, COLOR_INDEXING_TRANSFORM);
+ assert(palette_size >= 1);
+ VP8LWriteBits(bw, 8, palette_size - 1);
+ for (i = palette_size - 1; i >= 1; --i) {
+ palette[i] = VP8LSubPixels(palette[i], palette[i - 1]);
+ }
+ if (!EncodeImageNoHuffman(bw, palette, palette_size, 1, quality)) {
+ err = VP8_ENC_ERROR_INVALID_CONFIGURATION;
+ goto Error;
+ }
+
+ if (palette_size <= 16) {
+ // Image can be packed (multiple pixels per uint32_t).
+ int xbits = 1;
+ if (palette_size <= 2) {
+ xbits = 3;
+ } else if (palette_size <= 4) {
+ xbits = 2;
+ }
+ err = AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height);
+ if (err != VP8_ENC_OK) goto Error;
+ BundleColorMap(pic, xbits, enc->argb_, enc->current_width_);
+ }
+
+ Error:
+ return err;
+}
+
+// -----------------------------------------------------------------------------
+
+static int GetHistoBits(const WebPConfig* const config,
+ const WebPPicture* const pic) {
+ const int width = pic->width;
+ const int height = pic->height;
+ const size_t hist_size = sizeof(VP8LHistogram);
+ // Make tile size a function of encoding method (Range: 0 to 6).
+ int histo_bits = 7 - config->method;
+ while (1) {
+ const size_t huff_image_size = VP8LSubSampleSize(width, histo_bits) *
+ VP8LSubSampleSize(height, histo_bits) *
+ hist_size;
+ if (huff_image_size <= MAX_HUFF_IMAGE_SIZE) break;
+ ++histo_bits;
+ }
+ return (histo_bits < MIN_HUFFMAN_BITS) ? MIN_HUFFMAN_BITS :
+ (histo_bits > MAX_HUFFMAN_BITS) ? MAX_HUFFMAN_BITS : histo_bits;
+}
+
+static void InitEncParams(VP8LEncoder* const enc) {
+ const WebPConfig* const config = enc->config_;
+ const WebPPicture* const picture = enc->pic_;
+ const int method = config->method;
+ const float quality = config->quality;
+ enc->transform_bits_ = (method < 4) ? 5 : (method > 4) ? 3 : 4;
+ enc->histo_bits_ = GetHistoBits(config, picture);
+ enc->cache_bits_ = (quality <= 25.f) ? 0 : 7;
+}
+
+// -----------------------------------------------------------------------------
+// VP8LEncoder
+
+static VP8LEncoder* VP8LEncoderNew(const WebPConfig* const config,
+ const WebPPicture* const picture) {
+ VP8LEncoder* const enc = (VP8LEncoder*)calloc(1, sizeof(*enc));
+ if (enc == NULL) {
+ WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
+ return NULL;
+ }
+ enc->config_ = config;
+ enc->pic_ = picture;
+ return enc;
+}
+
+static void VP8LEncoderDelete(VP8LEncoder* enc) {
+ free(enc->argb_);
+ free(enc);
+}
+
+// -----------------------------------------------------------------------------
+// Main call
+
+WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
+ const WebPPicture* const picture,
+ VP8LBitWriter* const bw) {
+ WebPEncodingError err = VP8_ENC_OK;
+ const int quality = (int)config->quality;
+ const int width = picture->width;
+ const int height = picture->height;
+ VP8LEncoder* const enc = VP8LEncoderNew(config, picture);
+ const size_t byte_position = VP8LBitWriterNumBytes(bw);
+
+ if (enc == NULL) {
+ err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+ goto Error;
+ }
+
+ InitEncParams(enc);
+
+ // ---------------------------------------------------------------------------
+ // Analyze image (entropy, num_palettes etc)
+
+ if (!VP8LEncAnalyze(enc, config->image_hint)) {
+ err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+ goto Error;
+ }
+
+ if (enc->use_palette_) {
+ err = ApplyPalette(bw, enc, quality);
+ if (err != VP8_ENC_OK) goto Error;
+ // Color cache is disabled for palette.
+ enc->cache_bits_ = 0;
+ }
+
+ // In case image is not packed.
+ if (enc->argb_ == NULL) {
+ int y;
+ err = AllocateTransformBuffer(enc, width, height);
+ if (err != VP8_ENC_OK) goto Error;
+ for (y = 0; y < height; ++y) {
+ memcpy(enc->argb_ + y * width,
+ picture->argb + y * picture->argb_stride,
+ width * sizeof(*enc->argb_));
+ }
+ enc->current_width_ = width;
+ }
+
+ // ---------------------------------------------------------------------------
+ // Apply transforms and write transform data.
+
+ if (!EvalAndApplySubtractGreen(enc, enc->current_width_, height, bw)) {
+ err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+ goto Error;
+ }
+
+ if (enc->use_predict_) {
+ if (!ApplyPredictFilter(enc, enc->current_width_, height, quality, bw)) {
+ err = VP8_ENC_ERROR_INVALID_CONFIGURATION;
+ goto Error;
+ }
+ }
+
+ if (enc->use_cross_color_) {
+ if (!ApplyCrossColorFilter(enc, enc->current_width_, height, quality, bw)) {
+ err = VP8_ENC_ERROR_INVALID_CONFIGURATION;
+ goto Error;
+ }
+ }
+
+ VP8LWriteBits(bw, 1, !TRANSFORM_PRESENT); // No more transforms.
+
+ // ---------------------------------------------------------------------------
+ // Estimate the color cache size.
+
+ if (enc->cache_bits_ > 0) {
+ if (!VP8LCalculateEstimateForCacheSize(enc->argb_, enc->current_width_,
+ height, &enc->cache_bits_)) {
+ err = VP8_ENC_ERROR_INVALID_CONFIGURATION;
+ goto Error;
+ }
+ }
+
+ // ---------------------------------------------------------------------------
+ // Encode and write the transformed image.
+
+ if (!EncodeImageInternal(bw, enc->argb_, enc->current_width_, height,
+ quality, enc->cache_bits_, enc->histo_bits_)) {
+ err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+ goto Error;
+ }
+
+ if (picture->stats != NULL) {
+ WebPAuxStats* const stats = picture->stats;
+ stats->lossless_features = 0;
+ if (enc->use_predict_) stats->lossless_features |= 1;
+ if (enc->use_cross_color_) stats->lossless_features |= 2;
+ if (enc->use_subtract_green_) stats->lossless_features |= 4;
+ if (enc->use_palette_) stats->lossless_features |= 8;
+ stats->histogram_bits = enc->histo_bits_;
+ stats->transform_bits = enc->transform_bits_;
+ stats->cache_bits = enc->cache_bits_;
+ stats->palette_size = enc->palette_size_;
+ stats->lossless_size = (int)(VP8LBitWriterNumBytes(bw) - byte_position);
+ }
+
+ Error:
+ VP8LEncoderDelete(enc);
+ return err;
+}
+
+int VP8LEncodeImage(const WebPConfig* const config,
+ const WebPPicture* const picture) {
+ int width, height;
+ int has_alpha;
+ size_t coded_size;
+ int percent = 0;
+ WebPEncodingError err = VP8_ENC_OK;
+ VP8LBitWriter bw;
+
+ if (picture == NULL) return 0;
+
+ if (config == NULL || picture->argb == NULL) {
+ err = VP8_ENC_ERROR_NULL_PARAMETER;
+ WebPEncodingSetError(picture, err);
+ return 0;
+ }
+
+ width = picture->width;
+ height = picture->height;
+ if (!VP8LBitWriterInit(&bw, (width * height) >> 1)) {
+ err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+ goto Error;
+ }
+
+ if (!WebPReportProgress(picture, 1, &percent)) {
+ UserAbort:
+ err = VP8_ENC_ERROR_USER_ABORT;
+ goto Error;
+ }
+ // Reset stats (for pure lossless coding)
+ if (picture->stats != NULL) {
+ WebPAuxStats* const stats = picture->stats;
+ memset(stats, 0, sizeof(*stats));
+ stats->PSNR[0] = 99.f;
+ stats->PSNR[1] = 99.f;
+ stats->PSNR[2] = 99.f;
+ stats->PSNR[3] = 99.f;
+ stats->PSNR[4] = 99.f;
+ }
+
+ // Write image size.
+ if (!WriteImageSize(picture, &bw)) {
+ err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+ goto Error;
+ }
+
+ has_alpha = WebPPictureHasTransparency(picture);
+ // Write the non-trivial Alpha flag and lossless version.
+ if (!WriteRealAlphaAndVersion(&bw, has_alpha)) {
+ err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+ goto Error;
+ }
+
+ if (!WebPReportProgress(picture, 5, &percent)) goto UserAbort;
+
+ // Encode main image stream.
+ err = VP8LEncodeStream(config, picture, &bw);
+ if (err != VP8_ENC_OK) goto Error;
+
+ // TODO(skal): have a fine-grained progress report in VP8LEncodeStream().
+ if (!WebPReportProgress(picture, 90, &percent)) goto UserAbort;
+
+ // Finish the RIFF chunk.
+ err = WriteImage(picture, &bw, &coded_size);
+ if (err != VP8_ENC_OK) goto Error;
+
+ if (!WebPReportProgress(picture, 100, &percent)) goto UserAbort;
+
+ // Save size.
+ if (picture->stats != NULL) {
+ picture->stats->coded_size += (int)coded_size;
+ picture->stats->lossless_size = (int)coded_size;
+ }
+
+ if (picture->extra_info != NULL) {
+ const int mb_w = (width + 15) >> 4;
+ const int mb_h = (height + 15) >> 4;
+ memset(picture->extra_info, 0, mb_w * mb_h * sizeof(*picture->extra_info));
+ }
+
+ Error:
+ if (bw.error_) err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+ VP8LBitWriterDestroy(&bw);
+ if (err != VP8_ENC_OK) {
+ WebPEncodingSetError(picture, err);
+ return 0;
+ }
+ return 1;
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
diff --git a/src/enc/vp8li.h b/src/enc/vp8li.h
new file mode 100644
index 00000000..5f3665a8
--- /dev/null
+++ b/src/enc/vp8li.h
@@ -0,0 +1,68 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Lossless encoder: internal header.
+//
+// Author: Vikas Arora (vikaas.arora@gmail.com)
+
+#ifndef WEBP_ENC_VP8LI_H_
+#define WEBP_ENC_VP8LI_H_
+
+#include "./histogram.h"
+#include "../utils/bit_writer.h"
+#include "webp/encode.h"
+#include "webp/format_constants.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct {
+ const WebPConfig* config_; // user configuration and parameters
+ const WebPPicture* pic_; // input picture.
+
+ uint32_t* argb_; // Transformed argb image data.
+ uint32_t* argb_scratch_; // Scratch memory for argb rows
+ // (used for prediction).
+ uint32_t* transform_data_; // Scratch memory for transform data.
+ int current_width_; // Corresponds to packed image width.
+
+ // Encoding parameters derived from quality parameter.
+ int histo_bits_;
+ int transform_bits_;
+ int cache_bits_; // If equal to 0, don't use color cache.
+
+ // Encoding parameters derived from image characteristics.
+ int use_cross_color_;
+ int use_subtract_green_;
+ int use_predict_;
+ int use_palette_;
+ int palette_size_;
+ uint32_t palette_[MAX_PALETTE_SIZE];
+} VP8LEncoder;
+
+//------------------------------------------------------------------------------
+// internal functions. Not public.
+
+// Encodes the picture.
+// Returns 0 if config or picture is NULL or picture doesn't have valid argb
+// input.
+int VP8LEncodeImage(const WebPConfig* const config,
+ const WebPPicture* const picture);
+
+// Encodes the main image stream using the supplied bit writer.
+WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
+ const WebPPicture* const picture,
+ VP8LBitWriter* const bw);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif /* WEBP_ENC_VP8LI_H_ */
diff --git a/src/enc/webpenc.c b/src/enc/webpenc.c
index 819dd639..3c275589 100644
--- a/src/enc/webpenc.c
+++ b/src/enc/webpenc.c
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -14,7 +14,9 @@
#include <string.h>
#include <math.h>
-#include "vp8enci.h"
+#include "./vp8enci.h"
+#include "./vp8li.h"
+#include "../utils/utils.h"
// #define PRINT_MEMORY_INFO
@@ -26,17 +28,15 @@ extern "C" {
#include <stdio.h>
#endif
-#define MAX_DIMENSION 16384 // maximum width/height allowed by the spec
-
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
int WebPGetEncoderVersion(void) {
return (ENC_MAJ_VERSION << 16) | (ENC_MIN_VERSION << 8) | ENC_REV_VERSION;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// WebPPicture
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
static int DummyWriter(const uint8_t* data, size_t data_size,
const WebPPicture* const picture) {
@@ -47,11 +47,11 @@ static int DummyWriter(const uint8_t* data, size_t data_size,
return 1;
}
-int WebPPictureInitInternal(WebPPicture* const picture, int version) {
- if (version != WEBP_ENCODER_ABI_VERSION) {
+int WebPPictureInitInternal(WebPPicture* picture, int version) {
+ if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_ENCODER_ABI_VERSION)) {
return 0; // caller/system version mismatch!
}
- if (picture) {
+ if (picture != NULL) {
memset(picture, 0, sizeof(*picture));
picture->writer = DummyWriter;
WebPEncodingSetError(picture, VP8_ENC_OK);
@@ -59,9 +59,9 @@ int WebPPictureInitInternal(WebPPicture* const picture, int version) {
return 1;
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// VP8Encoder
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
static void ResetSegmentHeader(VP8Encoder* const enc) {
VP8SegmentHeader* const hdr = &enc->segment_hdr_;
@@ -112,11 +112,15 @@ static void ResetBoundaryPredictions(VP8Encoder* const enc) {
static void MapConfigToTools(VP8Encoder* const enc) {
const int method = enc->config_->method;
+ const int limit = 100 - enc->config_->partition_limit;
enc->method_ = method;
enc->rd_opt_level_ = (method >= 6) ? 3
: (method >= 5) ? 2
: (method >= 3) ? 1
: 0;
+ enc->max_i4_header_bits_ =
+ 256 * 16 * 16 * // upper bound: up to 16bit per 4x4 block
+ (limit * limit) / (100 * 100); // ... modulated with a quadratic curve.
}
// Memory scaling with dimensions:
@@ -140,8 +144,8 @@ static void MapConfigToTools(VP8Encoder* const enc) {
// LFStats: 2048
// Picture size (yuv): 589824
-static VP8Encoder* InitEncoder(const WebPConfig* const config,
- WebPPicture* const picture) {
+static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
+ WebPPicture* const picture) {
const int use_filter =
(config->filter_strength > 0) || (config->autofilter > 0);
const int mb_w = (picture->width + 15) >> 4;
@@ -161,13 +165,14 @@ static VP8Encoder* InitEncoder(const WebPConfig* const config,
config->autofilter ? sizeof(LFStats) + ALIGN_CST : 0;
VP8Encoder* enc;
uint8_t* mem;
- size_t size = sizeof(VP8Encoder) + ALIGN_CST // main struct
- + cache_size // working caches
- + info_size // modes info
- + preds_size // prediction modes
- + samples_size // top/left samples
- + nz_size // coeff context bits
- + lf_stats_size; // autofilter stats
+ const uint64_t size = (uint64_t)sizeof(VP8Encoder) // main struct
+ + ALIGN_CST // cache alignment
+ + cache_size // working caches
+ + info_size // modes info
+ + preds_size // prediction modes
+ + samples_size // top/left samples
+ + nz_size // coeff context bits
+ + lf_stats_size; // autofilter stats
#ifdef PRINT_MEMORY_INFO
printf("===================================\n");
@@ -195,7 +200,7 @@ static VP8Encoder* InitEncoder(const WebPConfig* const config,
mb_w * mb_h * 384 * sizeof(uint8_t));
printf("===================================\n");
#endif
- mem = (uint8_t*)malloc(size);
+ mem = (uint8_t*)WebPSafeMalloc(size, sizeof(*mem));
if (mem == NULL) {
WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
return NULL;
@@ -240,6 +245,7 @@ static VP8Encoder* InitEncoder(const WebPConfig* const config,
enc->config_ = config;
enc->profile_ = use_filter ? ((config->filter_type == 1) ? 0 : 1) : 2;
enc->pic_ = picture;
+ enc->percent_ = 0;
MapConfigToTools(enc);
VP8EncDspInit();
@@ -248,25 +254,25 @@ static VP8Encoder* InitEncoder(const WebPConfig* const config,
ResetFilterHeader(enc);
ResetBoundaryPredictions(enc);
-#ifdef WEBP_EXPERIMENTAL_FEATURES
VP8EncInitAlpha(enc);
+#ifdef WEBP_EXPERIMENTAL_FEATURES
VP8EncInitLayer(enc);
#endif
return enc;
}
-static void DeleteEncoder(VP8Encoder* enc) {
- if (enc) {
-#ifdef WEBP_EXPERIMENTAL_FEATURES
+static void DeleteVP8Encoder(VP8Encoder* enc) {
+ if (enc != NULL) {
VP8EncDeleteAlpha(enc);
+#ifdef WEBP_EXPERIMENTAL_FEATURES
VP8EncDeleteLayer(enc);
#endif
free(enc);
}
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
static double GetPSNR(uint64_t err, uint64_t size) {
return err ? 10. * log10(255. * 255. * size / err) : 99.;
@@ -280,11 +286,12 @@ static void FinalizePSNR(const VP8Encoder* const enc) {
stats->PSNR[1] = (float)GetPSNR(sse[1], size / 4);
stats->PSNR[2] = (float)GetPSNR(sse[2], size / 4);
stats->PSNR[3] = (float)GetPSNR(sse[0] + sse[1] + sse[2], size * 3 / 2);
+ stats->PSNR[4] = (float)GetPSNR(sse[3], size);
}
static void StoreStats(VP8Encoder* const enc) {
WebPAuxStats* const stats = enc->pic_->stats;
- if (stats) {
+ if (stats != NULL) {
int i, s;
for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
stats->segment_level[i] = enc->dqm_[i].fstrength_;
@@ -299,19 +306,32 @@ static void StoreStats(VP8Encoder* const enc) {
stats->block_count[i] = enc->block_count_[i];
}
}
+ WebPReportProgress(enc->pic_, 100, &enc->percent_); // done!
}
-int WebPEncodingSetError(WebPPicture* const pic, WebPEncodingError error) {
- assert((int)error <= VP8_ENC_ERROR_BAD_WRITE);
+int WebPEncodingSetError(const WebPPicture* const pic,
+ WebPEncodingError error) {
+ assert((int)error < VP8_ENC_ERROR_LAST);
assert((int)error >= VP8_ENC_OK);
- pic->error_code = error;
+ ((WebPPicture*)pic)->error_code = error;
return 0;
}
-//-----------------------------------------------------------------------------
+int WebPReportProgress(const WebPPicture* const pic,
+ int percent, int* const percent_store) {
+ if (percent_store != NULL && percent != *percent_store) {
+ *percent_store = percent;
+ if (pic->progress_hook && !pic->progress_hook(percent, pic)) {
+ // user abort requested
+ WebPEncodingSetError(pic, VP8_ENC_ERROR_USER_ABORT);
+ return 0;
+ }
+ }
+ return 1; // ok
+}
+//------------------------------------------------------------------------------
-int WebPEncode(const WebPConfig* const config, WebPPicture* const pic) {
- VP8Encoder* enc;
+int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
int ok;
if (pic == NULL)
@@ -323,23 +343,43 @@ int WebPEncode(const WebPConfig* const config, WebPPicture* const pic) {
return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION);
if (pic->width <= 0 || pic->height <= 0)
return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION);
- if (pic->y == NULL || pic->u == NULL || pic->v == NULL)
- return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER);
- if (pic->width >= MAX_DIMENSION || pic->height >= MAX_DIMENSION)
+ if (pic->width > WEBP_MAX_DIMENSION || pic->height > WEBP_MAX_DIMENSION)
return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION);
- enc = InitEncoder(config, pic);
- if (enc == NULL) return 0; // pic->error is already set.
- ok = VP8EncAnalyze(enc)
- && VP8StatLoop(enc)
- && VP8EncLoop(enc)
+ if (pic->stats != NULL) memset(pic->stats, 0, sizeof(*pic->stats));
+
+ if (!config->lossless) {
+ VP8Encoder* enc = NULL;
+ if (pic->y == NULL || pic->u == NULL || pic->v == NULL) {
+ if (pic->argb != NULL) {
+ if (!WebPPictureARGBToYUVA(pic, WEBP_YUV420)) return 0;
+ } else {
+ return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER);
+ }
+ }
+
+ enc = InitVP8Encoder(config, pic);
+ if (enc == NULL) return 0; // pic->error is already set.
+ // Note: each of the tasks below account for 20% in the progress report.
+ ok = VP8EncAnalyze(enc)
+ && VP8StatLoop(enc)
+ && VP8EncLoop(enc)
+ && VP8EncFinishAlpha(enc)
#ifdef WEBP_EXPERIMENTAL_FEATURES
- && VP8EncFinishAlpha(enc)
- && VP8EncFinishLayer(enc)
+ && VP8EncFinishLayer(enc)
#endif
- && VP8EncWrite(enc);
- StoreStats(enc);
- DeleteEncoder(enc);
+ && VP8EncWrite(enc);
+ StoreStats(enc);
+ if (!ok) {
+ VP8EncFreeBitWriters(enc);
+ }
+ DeleteVP8Encoder(enc);
+ } else {
+ if (pic->argb == NULL)
+ return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER);
+
+ ok = VP8LEncodeImage(config, pic); // Sets pic->error in case of problem.
+ }
return ok;
}
diff --git a/src/utils/bit_reader.c b/src/utils/bit_reader.c
new file mode 100644
index 00000000..1afb1db8
--- /dev/null
+++ b/src/utils/bit_reader.c
@@ -0,0 +1,229 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Boolean decoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./bit_reader.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define MK(X) (((bit_t)(X) << (BITS)) | (MASK))
+
+//------------------------------------------------------------------------------
+// VP8BitReader
+
+void VP8InitBitReader(VP8BitReader* const br,
+ const uint8_t* const start, const uint8_t* const end) {
+ assert(br != NULL);
+ assert(start != NULL);
+ assert(start <= end);
+ br->range_ = MK(255 - 1);
+ br->buf_ = start;
+ br->buf_end_ = end;
+ br->value_ = 0;
+ br->missing_ = 8; // to load the very first 8bits
+ br->eof_ = 0;
+}
+
+const uint8_t kVP8Log2Range[128] = {
+ 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0
+};
+
+// range = (range << kVP8Log2Range[range]) + trailing 1's
+const bit_t kVP8NewRange[128] = {
+ MK(127), MK(127), MK(191), MK(127), MK(159), MK(191), MK(223), MK(127),
+ MK(143), MK(159), MK(175), MK(191), MK(207), MK(223), MK(239), MK(127),
+ MK(135), MK(143), MK(151), MK(159), MK(167), MK(175), MK(183), MK(191),
+ MK(199), MK(207), MK(215), MK(223), MK(231), MK(239), MK(247), MK(127),
+ MK(131), MK(135), MK(139), MK(143), MK(147), MK(151), MK(155), MK(159),
+ MK(163), MK(167), MK(171), MK(175), MK(179), MK(183), MK(187), MK(191),
+ MK(195), MK(199), MK(203), MK(207), MK(211), MK(215), MK(219), MK(223),
+ MK(227), MK(231), MK(235), MK(239), MK(243), MK(247), MK(251), MK(127),
+ MK(129), MK(131), MK(133), MK(135), MK(137), MK(139), MK(141), MK(143),
+ MK(145), MK(147), MK(149), MK(151), MK(153), MK(155), MK(157), MK(159),
+ MK(161), MK(163), MK(165), MK(167), MK(169), MK(171), MK(173), MK(175),
+ MK(177), MK(179), MK(181), MK(183), MK(185), MK(187), MK(189), MK(191),
+ MK(193), MK(195), MK(197), MK(199), MK(201), MK(203), MK(205), MK(207),
+ MK(209), MK(211), MK(213), MK(215), MK(217), MK(219), MK(221), MK(223),
+ MK(225), MK(227), MK(229), MK(231), MK(233), MK(235), MK(237), MK(239),
+ MK(241), MK(243), MK(245), MK(247), MK(249), MK(251), MK(253), MK(127)
+};
+
+#undef MK
+
+void VP8LoadFinalBytes(VP8BitReader* const br) {
+ assert(br != NULL && br->buf_ != NULL);
+ // Only read 8bits at a time
+ if (br->buf_ < br->buf_end_) {
+ br->value_ |= (bit_t)(*br->buf_++) << ((BITS) - 8 + br->missing_);
+ br->missing_ -= 8;
+ } else {
+ br->eof_ = 1;
+ }
+}
+
+//------------------------------------------------------------------------------
+// Higher-level calls
+
+uint32_t VP8GetValue(VP8BitReader* const br, int bits) {
+ uint32_t v = 0;
+ while (bits-- > 0) {
+ v |= VP8GetBit(br, 0x80) << bits;
+ }
+ return v;
+}
+
+int32_t VP8GetSignedValue(VP8BitReader* const br, int bits) {
+ const int value = VP8GetValue(br, bits);
+ return VP8Get(br) ? -value : value;
+}
+
+//------------------------------------------------------------------------------
+// VP8LBitReader
+
+#define MAX_NUM_BIT_READ 25
+
+static const uint32_t kBitMask[MAX_NUM_BIT_READ] = {
+ 0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767,
+ 65535, 131071, 262143, 524287, 1048575, 2097151, 4194303, 8388607, 16777215
+};
+
+void VP8LInitBitReader(VP8LBitReader* const br,
+ const uint8_t* const start,
+ size_t length) {
+ size_t i;
+ assert(br != NULL);
+ assert(start != NULL);
+ assert(length < 0xfffffff8u); // can't happen with a RIFF chunk.
+
+ br->buf_ = start;
+ br->len_ = length;
+ br->val_ = 0;
+ br->pos_ = 0;
+ br->bit_pos_ = 0;
+ br->eos_ = 0;
+ br->error_ = 0;
+ for (i = 0; i < sizeof(br->val_) && i < br->len_; ++i) {
+ br->val_ |= ((uint64_t)br->buf_[br->pos_]) << (8 * i);
+ ++br->pos_;
+ }
+}
+
+void VP8LBitReaderSetBuffer(VP8LBitReader* const br,
+ const uint8_t* const buf, size_t len) {
+ assert(br != NULL);
+ assert(buf != NULL);
+ assert(len < 0xfffffff8u); // can't happen with a RIFF chunk.
+ br->eos_ = (br->pos_ >= len);
+ br->buf_ = buf;
+ br->len_ = len;
+}
+
+static void ShiftBytes(VP8LBitReader* const br) {
+ while (br->bit_pos_ >= 8 && br->pos_ < br->len_) {
+ br->val_ >>= 8;
+ br->val_ |= ((uint64_t)br->buf_[br->pos_]) << 56;
+ ++br->pos_;
+ br->bit_pos_ -= 8;
+ }
+}
+
+void VP8LFillBitWindow(VP8LBitReader* const br) {
+ if (br->bit_pos_ >= 32) {
+#if defined(__x86_64__) || defined(_M_X64)
+ if (br->pos_ + 8 < br->len_) {
+ br->val_ >>= 32;
+ // The expression below needs a little-endian arch to work correctly.
+ // This gives a large speedup for decoding speed.
+ br->val_ |= *(const uint64_t *)(br->buf_ + br->pos_) << 32;
+ br->pos_ += 4;
+ br->bit_pos_ -= 32;
+ } else {
+ // Slow path.
+ ShiftBytes(br);
+ }
+#else
+ // Always the slow path.
+ ShiftBytes(br);
+#endif
+ }
+ if (br->pos_ == br->len_ && br->bit_pos_ == 64) {
+ br->eos_ = 1;
+ }
+}
+
+uint32_t VP8LReadOneBit(VP8LBitReader* const br) {
+ const uint32_t val = (br->val_ >> br->bit_pos_) & 1;
+ // Flag an error at end_of_stream.
+ if (!br->eos_) {
+ ++br->bit_pos_;
+ if (br->bit_pos_ >= 32) {
+ ShiftBytes(br);
+ }
+ // After this last bit is read, check if eos needs to be flagged.
+ if (br->pos_ == br->len_ && br->bit_pos_ == 64) {
+ br->eos_ = 1;
+ }
+ } else {
+ br->error_ = 1;
+ }
+ return val;
+}
+
+uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits) {
+ uint32_t val = 0;
+ assert(n_bits >= 0);
+ // Flag an error if end_of_stream or n_bits is more than allowed limit.
+ if (!br->eos_ && n_bits < MAX_NUM_BIT_READ) {
+ // If this read is going to cross the read buffer, set the eos flag.
+ if (br->pos_ == br->len_) {
+ if ((br->bit_pos_ + n_bits) >= 64) {
+ br->eos_ = 1;
+ if ((br->bit_pos_ + n_bits) > 64) return val;
+ }
+ }
+ val = (br->val_ >> br->bit_pos_) & kBitMask[n_bits];
+ br->bit_pos_ += n_bits;
+ if (br->bit_pos_ >= 40) {
+ if (br->pos_ + 5 < br->len_) {
+ br->val_ >>= 40;
+ br->val_ |=
+ (((uint64_t)br->buf_[br->pos_ + 0]) << 24) |
+ (((uint64_t)br->buf_[br->pos_ + 1]) << 32) |
+ (((uint64_t)br->buf_[br->pos_ + 2]) << 40) |
+ (((uint64_t)br->buf_[br->pos_ + 3]) << 48) |
+ (((uint64_t)br->buf_[br->pos_ + 4]) << 56);
+ br->pos_ += 5;
+ br->bit_pos_ -= 40;
+ }
+ if (br->bit_pos_ >= 8) {
+ ShiftBytes(br);
+ }
+ }
+ } else {
+ br->error_ = 1;
+ }
+ return val;
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
diff --git a/src/utils/bit_reader.h b/src/utils/bit_reader.h
new file mode 100644
index 00000000..11a40a55
--- /dev/null
+++ b/src/utils/bit_reader.h
@@ -0,0 +1,197 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Boolean decoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+// Vikas Arora (vikaas.arora@gmail.com)
+
+#ifndef WEBP_UTILS_BIT_READER_H_
+#define WEBP_UTILS_BIT_READER_H_
+
+#include <assert.h>
+#ifdef _MSC_VER
+#include <stdlib.h> // _byteswap_ulong
+#endif
+#include <string.h> // For memcpy
+#include "webp/types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define BITS 32 // can be 32, 16 or 8
+#define MASK ((((bit_t)1) << (BITS)) - 1)
+#if (BITS == 32)
+typedef uint64_t bit_t; // natural register type
+typedef uint32_t lbit_t; // natural type for memory I/O
+#elif (BITS == 16)
+typedef uint32_t bit_t;
+typedef uint16_t lbit_t;
+#else
+typedef uint32_t bit_t;
+typedef uint8_t lbit_t;
+#endif
+
+//------------------------------------------------------------------------------
+// Bitreader and code-tree reader
+
+typedef struct VP8BitReader VP8BitReader;
+struct VP8BitReader {
+ const uint8_t* buf_; // next byte to be read
+ const uint8_t* buf_end_; // end of read buffer
+ int eof_; // true if input is exhausted
+
+ // boolean decoder
+ bit_t range_; // current range minus 1. In [127, 254] interval.
+ bit_t value_; // current value
+ int missing_; // number of missing bits in value_ (8bit)
+};
+
+// Initialize the bit reader and the boolean decoder.
+void VP8InitBitReader(VP8BitReader* const br,
+ const uint8_t* const start, const uint8_t* const end);
+
+// return the next value made of 'num_bits' bits
+uint32_t VP8GetValue(VP8BitReader* const br, int num_bits);
+static WEBP_INLINE uint32_t VP8Get(VP8BitReader* const br) {
+ return VP8GetValue(br, 1);
+}
+
+// return the next value with sign-extension.
+int32_t VP8GetSignedValue(VP8BitReader* const br, int num_bits);
+
+// Read a bit with proba 'prob'. Speed-critical function!
+extern const uint8_t kVP8Log2Range[128];
+extern const bit_t kVP8NewRange[128];
+
+void VP8LoadFinalBytes(VP8BitReader* const br); // special case for the tail
+
+static WEBP_INLINE void VP8LoadNewBytes(VP8BitReader* const br) {
+ assert(br && br->buf_);
+ // Read 'BITS' bits at a time if possible.
+ if (br->buf_ + sizeof(lbit_t) <= br->buf_end_) {
+ // convert memory type to register type (with some zero'ing!)
+ bit_t bits;
+ lbit_t in_bits = *(lbit_t*)br->buf_;
+ br->buf_ += (BITS) >> 3;
+#if !defined(__BIG_ENDIAN__)
+#if (BITS == 32)
+#if defined(__i386__) || defined(__x86_64__)
+ __asm__ volatile("bswap %k0" : "=r"(in_bits) : "0"(in_bits));
+ bits = (bit_t)in_bits; // 32b -> 64b zero-extension
+#elif defined(_MSC_VER)
+ bits = _byteswap_ulong(in_bits);
+#else
+ bits = (bit_t)(in_bits >> 24) | ((in_bits >> 8) & 0xff00)
+ | ((in_bits << 8) & 0xff0000) | (in_bits << 24);
+#endif // x86
+#elif (BITS == 16)
+ // gcc will recognize a 'rorw $8, ...' here:
+ bits = (bit_t)(in_bits >> 8) | ((in_bits & 0xff) << 8);
+#endif
+#else // LITTLE_ENDIAN
+ bits = (bit_t)in_bits;
+#endif
+ br->value_ |= bits << br->missing_;
+ br->missing_ -= (BITS);
+ } else {
+ VP8LoadFinalBytes(br); // no need to be inlined
+ }
+}
+
+static WEBP_INLINE int VP8BitUpdate(VP8BitReader* const br, bit_t split) {
+ const bit_t value_split = split | (MASK);
+ if (br->missing_ > 0) { // Make sure we have a least BITS bits in 'value_'
+ VP8LoadNewBytes(br);
+ }
+ if (br->value_ > value_split) {
+ br->range_ -= value_split + 1;
+ br->value_ -= value_split + 1;
+ return 1;
+ } else {
+ br->range_ = value_split;
+ return 0;
+ }
+}
+
+static WEBP_INLINE void VP8Shift(VP8BitReader* const br) {
+ // range_ is in [0..127] interval here.
+ const int idx = br->range_ >> (BITS);
+ const int shift = kVP8Log2Range[idx];
+ br->range_ = kVP8NewRange[idx];
+ br->value_ <<= shift;
+ br->missing_ += shift;
+}
+
+static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob) {
+ // It's important to avoid generating a 64bit x 64bit multiply here.
+ // We just need an 8b x 8b after all.
+ const bit_t split =
+ (bit_t)((uint32_t)(br->range_ >> (BITS)) * prob) << ((BITS) - 8);
+ const int bit = VP8BitUpdate(br, split);
+ if (br->range_ <= (((bit_t)0x7e << (BITS)) | (MASK))) {
+ VP8Shift(br);
+ }
+ return bit;
+}
+
+static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v) {
+ const bit_t split = (br->range_ >> 1);
+ const int bit = VP8BitUpdate(br, split);
+ VP8Shift(br);
+ return bit ? -v : v;
+}
+
+
+// -----------------------------------------------------------------------------
+// Bitreader
+
+typedef struct {
+ uint64_t val_;
+ const uint8_t* buf_;
+ size_t len_;
+ size_t pos_;
+ int bit_pos_;
+ int eos_;
+ int error_;
+} VP8LBitReader;
+
+void VP8LInitBitReader(VP8LBitReader* const br,
+ const uint8_t* const start,
+ size_t length);
+
+// Sets a new data buffer.
+void VP8LBitReaderSetBuffer(VP8LBitReader* const br,
+ const uint8_t* const buffer, size_t length);
+
+// Reads the specified number of bits from Read Buffer.
+// Flags an error in case end_of_stream or n_bits is more than allowed limit.
+// Flags eos if this read attempt is going to cross the read buffer.
+uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits);
+
+// Reads one bit from Read Buffer. Flags an error in case end_of_stream.
+// Flags eos after reading last bit from the buffer.
+uint32_t VP8LReadOneBit(VP8LBitReader* const br);
+
+// VP8LReadOneBitUnsafe is faster than VP8LReadOneBit, but it can be called only
+// 32 times after the last VP8LFillBitWindow. Any subsequent calls
+// (without VP8LFillBitWindow) will return invalid data.
+static WEBP_INLINE uint32_t VP8LReadOneBitUnsafe(VP8LBitReader* const br) {
+ const uint32_t val = (br->val_ >> br->bit_pos_) & 1;
+ ++br->bit_pos_;
+ return val;
+}
+
+// Advances the Read buffer by 4 bytes to make room for reading next 32 bits.
+void VP8LFillBitWindow(VP8LBitReader* const br);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif /* WEBP_UTILS_BIT_READER_H_ */
diff --git a/src/enc/bit_writer.c b/src/utils/bit_writer.c
index 24bbd335..671159ca 100644
--- a/src/enc/bit_writer.c
+++ b/src/utils/bit_writer.c
@@ -1,4 +1,4 @@
-// Copyright 2011 Google Inc.
+// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
@@ -8,33 +8,40 @@
// Bit writing and boolean coder
//
// Author: Skal (pascal.massimino@gmail.com)
+// Vikas Arora (vikaas.arora@gmail.com)
#include <assert.h>
+#include <string.h> // for memcpy()
#include <stdlib.h>
-#include "vp8enci.h"
+#include "./bit_writer.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// VP8BitWriter
static int BitWriterResize(VP8BitWriter* const bw, size_t extra_size) {
uint8_t* new_buf;
size_t new_size;
- const size_t needed_size = bw->pos_ + extra_size;
+ const uint64_t needed_size_64b = (uint64_t)bw->pos_ + extra_size;
+ const size_t needed_size = (size_t)needed_size_64b;
+ if (needed_size_64b != needed_size) {
+ bw->error_ = 1;
+ return 0;
+ }
if (needed_size <= bw->max_pos_) return 1;
+ // If the following line wraps over 32bit, the test just after will catch it.
new_size = 2 * bw->max_pos_;
- if (new_size < needed_size)
- new_size = needed_size;
+ if (new_size < needed_size) new_size = needed_size;
if (new_size < 1024) new_size = 1024;
new_buf = (uint8_t*)malloc(new_size);
if (new_buf == NULL) {
bw->error_ = 1;
return 0;
}
- if (bw->pos_ > 0) memcpy(new_buf, bw->buf_, bw->pos_);
+ memcpy(new_buf, bw->buf_, bw->pos_);
free(bw->buf_);
bw->buf_ = new_buf;
bw->max_pos_ = new_size;
@@ -49,10 +56,8 @@ static void kFlush(VP8BitWriter* const bw) {
bw->nb_bits_ -= 8;
if ((bits & 0xff) != 0xff) {
size_t pos = bw->pos_;
- if (pos + bw->run_ >= bw->max_pos_) { // reallocate
- if (!BitWriterResize(bw, bw->run_ + 1)) {
- return;
- }
+ if (!BitWriterResize(bw, bw->run_ + 1)) {
+ return;
}
if (bits & 0x100) { // overflow -> propagate carry over pending 0xff's
if (pos > 0) bw->buf_[pos - 1]++;
@@ -68,7 +73,7 @@ static void kFlush(VP8BitWriter* const bw) {
}
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
// renormalization
static const uint8_t kNorm[128] = { // renorm_sizes[i] = 8 - log2(i)
@@ -84,7 +89,7 @@ static const uint8_t kNorm[128] = { // renorm_sizes[i] = 8 - log2(i)
};
// range = ((range + 1) << kVP8Log2Range[range]) - 1
-const uint8_t kNewRange[128] = {
+static const uint8_t kNewRange[128] = {
127, 127, 191, 127, 159, 191, 223, 127, 143, 159, 175, 191, 207, 223, 239,
127, 135, 143, 151, 159, 167, 175, 183, 191, 199, 207, 215, 223, 231, 239,
247, 127, 131, 135, 139, 143, 147, 151, 155, 159, 163, 167, 171, 175, 179,
@@ -147,7 +152,7 @@ void VP8PutSignedValue(VP8BitWriter* const bw, int value, int nb_bits) {
}
}
-//-----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
int VP8BitWriterInit(VP8BitWriter* const bw, size_t expected_size) {
bw->range_ = 255 - 1;
@@ -178,7 +183,101 @@ int VP8BitWriterAppend(VP8BitWriter* const bw,
return 1;
}
-//-----------------------------------------------------------------------------
+void VP8BitWriterWipeOut(VP8BitWriter* const bw) {
+ if (bw) {
+ free(bw->buf_);
+ memset(bw, 0, sizeof(*bw));
+ }
+}
+
+//------------------------------------------------------------------------------
+// VP8LBitWriter
+
+// Returns 1 on success.
+static int VP8LBitWriterResize(VP8LBitWriter* const bw, size_t extra_size) {
+ uint8_t* allocated_buf;
+ size_t allocated_size;
+ const size_t current_size = VP8LBitWriterNumBytes(bw);
+ const uint64_t size_required_64b = (uint64_t)current_size + extra_size;
+ const size_t size_required = (size_t)size_required_64b;
+ if (size_required != size_required_64b) {
+ bw->error_ = 1;
+ return 0;
+ }
+ if (bw->max_bytes_ > 0 && size_required <= bw->max_bytes_) return 1;
+ allocated_size = (3 * bw->max_bytes_) >> 1;
+ if (allocated_size < size_required) allocated_size = size_required;
+ // make allocated size multiple of 1k
+ allocated_size = (((allocated_size >> 10) + 1) << 10);
+ allocated_buf = (uint8_t*)malloc(allocated_size);
+ if (allocated_buf == NULL) {
+ bw->error_ = 1;
+ return 0;
+ }
+ memcpy(allocated_buf, bw->buf_, current_size);
+ free(bw->buf_);
+ bw->buf_ = allocated_buf;
+ bw->max_bytes_ = allocated_size;
+ memset(allocated_buf + current_size, 0, allocated_size - current_size);
+ return 1;
+}
+
+int VP8LBitWriterInit(VP8LBitWriter* const bw, size_t expected_size) {
+ memset(bw, 0, sizeof(*bw));
+ return VP8LBitWriterResize(bw, expected_size);
+}
+
+void VP8LBitWriterDestroy(VP8LBitWriter* const bw) {
+ if (bw != NULL) {
+ free(bw->buf_);
+ memset(bw, 0, sizeof(*bw));
+ }
+}
+
+void VP8LWriteBits(VP8LBitWriter* const bw, int n_bits, uint32_t bits) {
+ if (n_bits < 1) return;
+#if !defined(__BIG_ENDIAN__)
+ // Technically, this branch of the code can write up to 25 bits at a time,
+ // but in prefix encoding, the maximum number of bits written is 18 at a time.
+ {
+ uint8_t* const p = &bw->buf_[bw->bit_pos_ >> 3];
+ uint32_t v = *(const uint32_t*)p;
+ v |= bits << (bw->bit_pos_ & 7);
+ *(uint32_t*)p = v;
+ bw->bit_pos_ += n_bits;
+ }
+#else // BIG_ENDIAN
+ {
+ uint8_t* p = &bw->buf_[bw->bit_pos_ >> 3];
+ const int bits_reserved_in_first_byte = bw->bit_pos_ & 7;
+ const int bits_left_to_write = n_bits - 8 + bits_reserved_in_first_byte;
+ // implicit & 0xff is assumed for uint8_t arithmetics
+ *p++ |= bits << bits_reserved_in_first_byte;
+ bits >>= 8 - bits_reserved_in_first_byte;
+ if (bits_left_to_write >= 1) {
+ *p++ = bits;
+ bits >>= 8;
+ if (bits_left_to_write >= 9) {
+ *p++ = bits;
+ bits >>= 8;
+ }
+ }
+ assert(n_bits <= 25);
+ *p = bits;
+ bw->bit_pos_ += n_bits;
+ }
+#endif
+ if ((bw->bit_pos_ >> 3) > (bw->max_bytes_ - 8)) {
+ const uint64_t extra_size = 32768ULL + bw->max_bytes_;
+ if (extra_size != (size_t)extra_size ||
+ !VP8LBitWriterResize(bw, (size_t)extra_size)) {
+ bw->bit_pos_ = 0;
+ bw->error_ = 1;
+ }
+ }
+}
+
+//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
diff --git a/src/utils/bit_writer.h b/src/utils/bit_writer.h
new file mode 100644
index 00000000..30f71a8f
--- /dev/null
+++ b/src/utils/bit_writer.h
@@ -0,0 +1,123 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Bit writing and boolean coder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_UTILS_BIT_WRITER_H_
+#define WEBP_UTILS_BIT_WRITER_H_
+
+#include "webp/types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Bit-writing
+
+typedef struct VP8BitWriter VP8BitWriter;
+struct VP8BitWriter {
+ int32_t range_; // range-1
+ int32_t value_;
+ int run_; // number of outstanding bits
+ int nb_bits_; // number of pending bits
+ uint8_t* buf_; // internal buffer. Re-allocated regularly. Not owned.
+ size_t pos_;
+ size_t max_pos_;
+ int error_; // true in case of error
+};
+
+// Initialize the object. Allocates some initial memory based on expected_size.
+int VP8BitWriterInit(VP8BitWriter* const bw, size_t expected_size);
+// Finalize the bitstream coding. Returns a pointer to the internal buffer.
+uint8_t* VP8BitWriterFinish(VP8BitWriter* const bw);
+// Release any pending memory and zeroes the object. Not a mandatory call.
+// Only useful in case of error, when the internal buffer hasn't been grabbed!
+void VP8BitWriterWipeOut(VP8BitWriter* const bw);
+
+int VP8PutBit(VP8BitWriter* const bw, int bit, int prob);
+int VP8PutBitUniform(VP8BitWriter* const bw, int bit);
+void VP8PutValue(VP8BitWriter* const bw, int value, int nb_bits);
+void VP8PutSignedValue(VP8BitWriter* const bw, int value, int nb_bits);
+
+// Appends some bytes to the internal buffer. Data is copied.
+int VP8BitWriterAppend(VP8BitWriter* const bw,
+ const uint8_t* data, size_t size);
+
+// return approximate write position (in bits)
+static WEBP_INLINE uint64_t VP8BitWriterPos(const VP8BitWriter* const bw) {
+ return (uint64_t)(bw->pos_ + bw->run_) * 8 + 8 + bw->nb_bits_;
+}
+
+// Returns a pointer to the internal buffer.
+static WEBP_INLINE uint8_t* VP8BitWriterBuf(const VP8BitWriter* const bw) {
+ return bw->buf_;
+}
+// Returns the size of the internal buffer.
+static WEBP_INLINE size_t VP8BitWriterSize(const VP8BitWriter* const bw) {
+ return bw->pos_;
+}
+
+//------------------------------------------------------------------------------
+// VP8LBitWriter
+// TODO(vikasa): VP8LBitWriter is copied as-is from lossless code. There's scope
+// of re-using VP8BitWriter. Will evaluate once basic lossless encoder is
+// implemented.
+
+typedef struct {
+ uint8_t* buf_;
+ size_t bit_pos_;
+ size_t max_bytes_;
+
+ // After all bits are written, the caller must observe the state of
+ // error_. A value of 1 indicates that a memory allocation failure
+ // has happened during bit writing. A value of 0 indicates successful
+ // writing of bits.
+ int error_;
+} VP8LBitWriter;
+
+static WEBP_INLINE size_t VP8LBitWriterNumBytes(VP8LBitWriter* const bw) {
+ return (bw->bit_pos_ + 7) >> 3;
+}
+
+static WEBP_INLINE uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw) {
+ return bw->buf_;
+}
+
+// Returns 0 in case of memory allocation error.
+int VP8LBitWriterInit(VP8LBitWriter* const bw, size_t expected_size);
+
+void VP8LBitWriterDestroy(VP8LBitWriter* const bw);
+
+// This function writes bits into bytes in increasing addresses, and within
+// a byte least-significant-bit first.
+//
+// The function can write up to 16 bits in one go with WriteBits
+// Example: let's assume that 3 bits (Rs below) have been written already:
+//
+// BYTE-0 BYTE+1 BYTE+2
+//
+// 0000 0RRR 0000 0000 0000 0000
+//
+// Now, we could write 5 or less bits in MSB by just sifting by 3
+// and OR'ing to BYTE-0.
+//
+// For n bits, we take the last 5 bytes, OR that with high bits in BYTE-0,
+// and locate the rest in BYTE+1 and BYTE+2.
+//
+// VP8LBitWriter's error_ flag is set in case of memory allocation error.
+void VP8LWriteBits(VP8LBitWriter* const bw, int n_bits, uint32_t bits);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif /* WEBP_UTILS_BIT_WRITER_H_ */
diff --git a/src/utils/color_cache.c b/src/utils/color_cache.c
new file mode 100644
index 00000000..560f81db
--- /dev/null
+++ b/src/utils/color_cache.c
@@ -0,0 +1,44 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Color Cache for WebP Lossless
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include "./color_cache.h"
+#include "../utils/utils.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// VP8LColorCache.
+
+int VP8LColorCacheInit(VP8LColorCache* const cc, int hash_bits) {
+ const int hash_size = 1 << hash_bits;
+ assert(cc != NULL);
+ assert(hash_bits > 0);
+ cc->colors_ = (uint32_t*)WebPSafeCalloc((uint64_t)hash_size,
+ sizeof(*cc->colors_));
+ if (cc->colors_ == NULL) return 0;
+ cc->hash_shift_ = 32 - hash_bits;
+ return 1;
+}
+
+void VP8LColorCacheClear(VP8LColorCache* const cc) {
+ if (cc != NULL) {
+ free(cc->colors_);
+ cc->colors_ = NULL;
+ }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
diff --git a/src/utils/color_cache.h b/src/utils/color_cache.h
new file mode 100644
index 00000000..a5875318
--- /dev/null
+++ b/src/utils/color_cache.h
@@ -0,0 +1,68 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Color Cache for WebP Lossless
+//
+// Authors: Jyrki Alakuijala (jyrki@google.com)
+// Urvang Joshi (urvang@google.com)
+
+#ifndef WEBP_UTILS_COLOR_CACHE_H_
+#define WEBP_UTILS_COLOR_CACHE_H_
+
+#include "webp/types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// Main color cache struct.
+typedef struct {
+ uint32_t *colors_; // color entries
+ int hash_shift_; // Hash shift: 32 - hash_bits.
+} VP8LColorCache;
+
+static const uint32_t kHashMul = 0x1e35a7bd;
+
+static WEBP_INLINE uint32_t VP8LColorCacheLookup(
+ const VP8LColorCache* const cc, uint32_t key) {
+ assert(key <= (~0U >> cc->hash_shift_));
+ return cc->colors_[key];
+}
+
+static WEBP_INLINE void VP8LColorCacheInsert(const VP8LColorCache* const cc,
+ uint32_t argb) {
+ const uint32_t key = (kHashMul * argb) >> cc->hash_shift_;
+ cc->colors_[key] = argb;
+}
+
+static WEBP_INLINE int VP8LColorCacheGetIndex(const VP8LColorCache* const cc,
+ uint32_t argb) {
+ return (kHashMul * argb) >> cc->hash_shift_;
+}
+
+static WEBP_INLINE int VP8LColorCacheContains(const VP8LColorCache* const cc,
+ uint32_t argb) {
+ const uint32_t key = (kHashMul * argb) >> cc->hash_shift_;
+ return cc->colors_[key] == argb;
+}
+
+//------------------------------------------------------------------------------
+
+// Initializes the color cache with 'hash_bits' bits for the keys.
+// Returns false in case of memory error.
+int VP8LColorCacheInit(VP8LColorCache* const color_cache, int hash_bits);
+
+// Delete the memory associated to color cache.
+void VP8LColorCacheClear(VP8LColorCache* const color_cache);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif // WEBP_UTILS_COLOR_CACHE_H_
diff --git a/src/utils/filters.c b/src/utils/filters.c
new file mode 100644
index 00000000..08f52a3d
--- /dev/null
+++ b/src/utils/filters.c
@@ -0,0 +1,229 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Spatial prediction using various filters
+//
+// Author: Urvang (urvang@google.com)
+
+#include "./filters.h"
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Helpful macro.
+
+# define SANITY_CHECK(in, out) \
+ assert(in != NULL); \
+ assert(out != NULL); \
+ assert(width > 0); \
+ assert(height > 0); \
+ assert(bpp > 0); \
+ assert(stride >= width * bpp);
+
+static WEBP_INLINE void PredictLine(const uint8_t* src, const uint8_t* pred,
+ uint8_t* dst, int length, int inverse) {
+ int i;
+ if (inverse) {
+ for (i = 0; i < length; ++i) dst[i] = src[i] + pred[i];
+ } else {
+ for (i = 0; i < length; ++i) dst[i] = src[i] - pred[i];
+ }
+}
+
+//------------------------------------------------------------------------------
+// Horizontal filter.
+
+static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
+ int width, int height, int bpp, int stride, int inverse, uint8_t* out) {
+ int h;
+ const uint8_t* preds = (inverse ? out : in);
+ SANITY_CHECK(in, out);
+
+ // Filter line-by-line.
+ for (h = 0; h < height; ++h) {
+ // Leftmost pixel is predicted from above (except for topmost scanline).
+ if (h == 0) {
+ memcpy((void*)out, (const void*)in, bpp);
+ } else {
+ PredictLine(in, preds - stride, out, bpp, inverse);
+ }
+ PredictLine(in + bpp, preds, out + bpp, bpp * (width - 1), inverse);
+ preds += stride;
+ in += stride;
+ out += stride;
+ }
+}
+
+static void HorizontalFilter(const uint8_t* data, int width, int height,
+ int bpp, int stride, uint8_t* filtered_data) {
+ DoHorizontalFilter(data, width, height, bpp, stride, 0, filtered_data);
+}
+
+static void HorizontalUnfilter(const uint8_t* data, int width, int height,
+ int bpp, int stride, uint8_t* recon_data) {
+ DoHorizontalFilter(data, width, height, bpp, stride, 1, recon_data);
+}
+
+//------------------------------------------------------------------------------
+// Vertical filter.
+
+static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
+ int width, int height, int bpp, int stride, int inverse, uint8_t* out) {
+ int h;
+ const uint8_t* preds = (inverse ? out : in);
+ SANITY_CHECK(in, out);
+
+ // Very first top-left pixel is copied.
+ memcpy((void*)out, (const void*)in, bpp);
+ // Rest of top scan-line is left-predicted.
+ PredictLine(in + bpp, preds, out + bpp, bpp * (width - 1), inverse);
+
+ // Filter line-by-line.
+ for (h = 1; h < height; ++h) {
+ in += stride;
+ out += stride;
+ PredictLine(in, preds, out, bpp * width, inverse);
+ preds += stride;
+ }
+}
+
+static void VerticalFilter(const uint8_t* data, int width, int height,
+ int bpp, int stride, uint8_t* filtered_data) {
+ DoVerticalFilter(data, width, height, bpp, stride, 0, filtered_data);
+}
+
+static void VerticalUnfilter(const uint8_t* data, int width, int height,
+ int bpp, int stride, uint8_t* recon_data) {
+ DoVerticalFilter(data, width, height, bpp, stride, 1, recon_data);
+}
+
+//------------------------------------------------------------------------------
+// Gradient filter.
+
+static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
+ const int g = a + b - c;
+ return (g < 0) ? 0 : (g > 255) ? 255 : g;
+}
+
+static WEBP_INLINE
+void DoGradientFilter(const uint8_t* in, int width, int height,
+ int bpp, int stride, int inverse, uint8_t* out) {
+ const uint8_t* preds = (inverse ? out : in);
+ int h;
+ SANITY_CHECK(in, out);
+
+ // left prediction for top scan-line
+ memcpy((void*)out, (const void*)in, bpp);
+ PredictLine(in + bpp, preds, out + bpp, bpp * (width - 1), inverse);
+
+ // Filter line-by-line.
+ for (h = 1; h < height; ++h) {
+ int w;
+ preds += stride;
+ in += stride;
+ out += stride;
+ // leftmost pixel: predict from above.
+ PredictLine(in, preds - stride, out, bpp, inverse);
+ for (w = bpp; w < width * bpp; ++w) {
+ const int pred = GradientPredictor(preds[w - bpp],
+ preds[w - stride],
+ preds[w - stride - bpp]);
+ out[w] = in[w] + (inverse ? pred : -pred);
+ }
+ }
+}
+
+static void GradientFilter(const uint8_t* data, int width, int height,
+ int bpp, int stride, uint8_t* filtered_data) {
+ DoGradientFilter(data, width, height, bpp, stride, 0, filtered_data);
+}
+
+static void GradientUnfilter(const uint8_t* data, int width, int height,
+ int bpp, int stride, uint8_t* recon_data) {
+ DoGradientFilter(data, width, height, bpp, stride, 1, recon_data);
+}
+
+#undef SANITY_CHECK
+
+// -----------------------------------------------------------------------------
+// Quick estimate of a potentially interesting filter mode to try, in addition
+// to the default NONE.
+
+#define SMAX 16
+#define SDIFF(a, b) (abs((a) - (b)) >> 4) // Scoring diff, in [0..SMAX)
+
+WEBP_FILTER_TYPE EstimateBestFilter(const uint8_t* data,
+ int width, int height, int stride) {
+ int i, j;
+ int bins[WEBP_FILTER_LAST][SMAX];
+ memset(bins, 0, sizeof(bins));
+ // We only sample every other pixels. That's enough.
+ for (j = 2; j < height - 1; j += 2) {
+ const uint8_t* const p = data + j * stride;
+ int mean = p[0];
+ for (i = 2; i < width - 1; i += 2) {
+ const int diff0 = SDIFF(p[i], mean);
+ const int diff1 = SDIFF(p[i], p[i - 1]);
+ const int diff2 = SDIFF(p[i], p[i - width]);
+ const int grad_pred =
+ GradientPredictor(p[i - 1], p[i - width], p[i - width - 1]);
+ const int diff3 = SDIFF(p[i], grad_pred);
+ bins[WEBP_FILTER_NONE][diff0] = 1;
+ bins[WEBP_FILTER_HORIZONTAL][diff1] = 1;
+ bins[WEBP_FILTER_VERTICAL][diff2] = 1;
+ bins[WEBP_FILTER_GRADIENT][diff3] = 1;
+ mean = (3 * mean + p[i] + 2) >> 2;
+ }
+ }
+ {
+ WEBP_FILTER_TYPE filter, best_filter = WEBP_FILTER_NONE;
+ int best_score = 0x7fffffff;
+ for (filter = WEBP_FILTER_NONE; filter < WEBP_FILTER_LAST; ++filter) {
+ int score = 0;
+ for (i = 0; i < SMAX; ++i) {
+ if (bins[filter][i] > 0) {
+ score += i;
+ }
+ }
+ if (score < best_score) {
+ best_score = score;
+ best_filter = filter;
+ }
+ }
+ return best_filter;
+ }
+}
+
+#undef SMAX
+#undef SDIFF
+
+//------------------------------------------------------------------------------
+
+const WebPFilterFunc WebPFilters[WEBP_FILTER_LAST] = {
+ NULL, // WEBP_FILTER_NONE
+ HorizontalFilter, // WEBP_FILTER_HORIZONTAL
+ VerticalFilter, // WEBP_FILTER_VERTICAL
+ GradientFilter // WEBP_FILTER_GRADIENT
+};
+
+const WebPFilterFunc WebPUnfilters[WEBP_FILTER_LAST] = {
+ NULL, // WEBP_FILTER_NONE
+ HorizontalUnfilter, // WEBP_FILTER_HORIZONTAL
+ VerticalUnfilter, // WEBP_FILTER_VERTICAL
+ GradientUnfilter // WEBP_FILTER_GRADIENT
+};
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
diff --git a/src/utils/filters.h b/src/utils/filters.h
new file mode 100644
index 00000000..4989bb7b
--- /dev/null
+++ b/src/utils/filters.h
@@ -0,0 +1,54 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Spatial prediction using various filters
+//
+// Author: Urvang (urvang@google.com)
+
+#ifndef WEBP_UTILS_FILTERS_H_
+#define WEBP_UTILS_FILTERS_H_
+
+#include "webp/types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// Filters.
+typedef enum {
+ WEBP_FILTER_NONE = 0,
+ WEBP_FILTER_HORIZONTAL,
+ WEBP_FILTER_VERTICAL,
+ WEBP_FILTER_GRADIENT,
+ WEBP_FILTER_LAST = WEBP_FILTER_GRADIENT + 1, // end marker
+ WEBP_FILTER_BEST,
+ WEBP_FILTER_FAST
+} WEBP_FILTER_TYPE;
+
+typedef void (*WebPFilterFunc)(const uint8_t* in, int width, int height,
+ int bpp, int stride, uint8_t* out);
+
+// Filter the given data using the given predictor.
+// 'in' corresponds to a 2-dimensional pixel array of size (stride * height)
+// in raster order.
+// 'bpp' is number of bytes per pixel, and
+// 'stride' is number of bytes per scan line (with possible padding).
+// 'out' should be pre-allocated.
+extern const WebPFilterFunc WebPFilters[WEBP_FILTER_LAST];
+
+// Reconstruct the original data from the given filtered data.
+extern const WebPFilterFunc WebPUnfilters[WEBP_FILTER_LAST];
+
+// Fast estimate of a potentially good filter.
+extern WEBP_FILTER_TYPE EstimateBestFilter(const uint8_t* data,
+ int width, int height, int stride);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif /* WEBP_UTILS_FILTERS_H_ */
diff --git a/src/utils/huffman.c b/src/utils/huffman.c
new file mode 100644
index 00000000..54d85f9f
--- /dev/null
+++ b/src/utils/huffman.c
@@ -0,0 +1,238 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Utilities for building and looking up Huffman trees.
+//
+// Author: Urvang Joshi (urvang@google.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include "./huffman.h"
+#include "../utils/utils.h"
+#include "webp/format_constants.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define NON_EXISTENT_SYMBOL (-1)
+
+static void TreeNodeInit(HuffmanTreeNode* const node) {
+ node->children_ = -1; // means: 'unassigned so far'
+}
+
+static int NodeIsEmpty(const HuffmanTreeNode* const node) {
+ return (node->children_ < 0);
+}
+
+static int IsFull(const HuffmanTree* const tree) {
+ return (tree->num_nodes_ == tree->max_nodes_);
+}
+
+static void AssignChildren(HuffmanTree* const tree,
+ HuffmanTreeNode* const node) {
+ HuffmanTreeNode* const children = tree->root_ + tree->num_nodes_;
+ node->children_ = (int)(children - node);
+ assert(children - node == (int)(children - node));
+ tree->num_nodes_ += 2;
+ TreeNodeInit(children + 0);
+ TreeNodeInit(children + 1);
+}
+
+static int TreeInit(HuffmanTree* const tree, int num_leaves) {
+ assert(tree != NULL);
+ if (num_leaves == 0) return 0;
+ // We allocate maximum possible nodes in the tree at once.
+ // Note that a Huffman tree is a full binary tree; and in a full binary tree
+ // with L leaves, the total number of nodes N = 2 * L - 1.
+ tree->max_nodes_ = 2 * num_leaves - 1;
+ tree->root_ = (HuffmanTreeNode*)WebPSafeMalloc((uint64_t)tree->max_nodes_,
+ sizeof(*tree->root_));
+ if (tree->root_ == NULL) return 0;
+ TreeNodeInit(tree->root_); // Initialize root.
+ tree->num_nodes_ = 1;
+ return 1;
+}
+
+void HuffmanTreeRelease(HuffmanTree* const tree) {
+ if (tree != NULL) {
+ free(tree->root_);
+ tree->root_ = NULL;
+ tree->max_nodes_ = 0;
+ tree->num_nodes_ = 0;
+ }
+}
+
+int HuffmanCodeLengthsToCodes(const int* const code_lengths,
+ int code_lengths_size, int* const huff_codes) {
+ int symbol;
+ int code_len;
+ int code_length_hist[MAX_ALLOWED_CODE_LENGTH + 1] = { 0 };
+ int curr_code;
+ int next_codes[MAX_ALLOWED_CODE_LENGTH + 1] = { 0 };
+ int max_code_length = 0;
+
+ assert(code_lengths != NULL);
+ assert(code_lengths_size > 0);
+ assert(huff_codes != NULL);
+
+ // Calculate max code length.
+ for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+ if (code_lengths[symbol] > max_code_length) {
+ max_code_length = code_lengths[symbol];
+ }
+ }
+ if (max_code_length > MAX_ALLOWED_CODE_LENGTH) return 0;
+
+ // Calculate code length histogram.
+ for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+ ++code_length_hist[code_lengths[symbol]];
+ }
+ code_length_hist[0] = 0;
+
+ // Calculate the initial values of 'next_codes' for each code length.
+ // next_codes[code_len] denotes the code to be assigned to the next symbol
+ // of code length 'code_len'.
+ curr_code = 0;
+ next_codes[0] = -1; // Unused, as code length = 0 implies code doesn't exist.
+ for (code_len = 1; code_len <= max_code_length; ++code_len) {
+ curr_code = (curr_code + code_length_hist[code_len - 1]) << 1;
+ next_codes[code_len] = curr_code;
+ }
+
+ // Get symbols.
+ for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+ if (code_lengths[symbol] > 0) {
+ huff_codes[symbol] = next_codes[code_lengths[symbol]]++;
+ } else {
+ huff_codes[symbol] = NON_EXISTENT_SYMBOL;
+ }
+ }
+ return 1;
+}
+
+static int TreeAddSymbol(HuffmanTree* const tree,
+ int symbol, int code, int code_length) {
+ HuffmanTreeNode* node = tree->root_;
+ const HuffmanTreeNode* const max_node = tree->root_ + tree->max_nodes_;
+ while (code_length-- > 0) {
+ if (node >= max_node) {
+ return 0;
+ }
+ if (NodeIsEmpty(node)) {
+ if (IsFull(tree)) return 0; // error: too many symbols.
+ AssignChildren(tree, node);
+ } else if (HuffmanTreeNodeIsLeaf(node)) {
+ return 0; // leaf is already occupied.
+ }
+ node += node->children_ + ((code >> code_length) & 1);
+ }
+ if (NodeIsEmpty(node)) {
+ node->children_ = 0; // turn newly created node into a leaf.
+ } else if (!HuffmanTreeNodeIsLeaf(node)) {
+ return 0; // trying to assign a symbol to already used code.
+ }
+ node->symbol_ = symbol; // Add symbol in this node.
+ return 1;
+}
+
+int HuffmanTreeBuildImplicit(HuffmanTree* const tree,
+ const int* const code_lengths,
+ int code_lengths_size) {
+ int symbol;
+ int num_symbols = 0;
+ int root_symbol = 0;
+
+ assert(tree != NULL);
+ assert(code_lengths != NULL);
+
+ // Find out number of symbols and the root symbol.
+ for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+ if (code_lengths[symbol] > 0) {
+ // Note: code length = 0 indicates non-existent symbol.
+ ++num_symbols;
+ root_symbol = symbol;
+ }
+ }
+
+ // Initialize the tree. Will fail for num_symbols = 0
+ if (!TreeInit(tree, num_symbols)) return 0;
+
+ // Build tree.
+ if (num_symbols == 1) { // Trivial case.
+ const int max_symbol = code_lengths_size;
+ if (root_symbol < 0 || root_symbol >= max_symbol) {
+ HuffmanTreeRelease(tree);
+ return 0;
+ }
+ return TreeAddSymbol(tree, root_symbol, 0, 0);
+ } else { // Normal case.
+ int ok = 0;
+
+ // Get Huffman codes from the code lengths.
+ int* const codes =
+ (int*)WebPSafeMalloc((uint64_t)code_lengths_size, sizeof(*codes));
+ if (codes == NULL) goto End;
+
+ if (!HuffmanCodeLengthsToCodes(code_lengths, code_lengths_size, codes)) {
+ goto End;
+ }
+
+ // Add symbols one-by-one.
+ for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+ if (code_lengths[symbol] > 0) {
+ if (!TreeAddSymbol(tree, symbol, codes[symbol], code_lengths[symbol])) {
+ goto End;
+ }
+ }
+ }
+ ok = 1;
+ End:
+ free(codes);
+ ok = ok && IsFull(tree);
+ if (!ok) HuffmanTreeRelease(tree);
+ return ok;
+ }
+}
+
+int HuffmanTreeBuildExplicit(HuffmanTree* const tree,
+ const int* const code_lengths,
+ const int* const codes,
+ const int* const symbols, int max_symbol,
+ int num_symbols) {
+ int ok = 0;
+ int i;
+
+ assert(tree != NULL);
+ assert(code_lengths != NULL);
+ assert(codes != NULL);
+ assert(symbols != NULL);
+
+ // Initialize the tree. Will fail if num_symbols = 0.
+ if (!TreeInit(tree, num_symbols)) return 0;
+
+ // Add symbols one-by-one.
+ for (i = 0; i < num_symbols; ++i) {
+ if (codes[i] != NON_EXISTENT_SYMBOL) {
+ if (symbols[i] < 0 || symbols[i] >= max_symbol) {
+ goto End;
+ }
+ if (!TreeAddSymbol(tree, symbols[i], codes[i], code_lengths[i])) {
+ goto End;
+ }
+ }
+ }
+ ok = 1;
+ End:
+ ok = ok && IsFull(tree);
+ if (!ok) HuffmanTreeRelease(tree);
+ return ok;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
diff --git a/src/utils/huffman.h b/src/utils/huffman.h
new file mode 100644
index 00000000..19f50ece
--- /dev/null
+++ b/src/utils/huffman.h
@@ -0,0 +1,78 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Utilities for building and looking up Huffman trees.
+//
+// Author: Urvang Joshi (urvang@google.com)
+
+#ifndef WEBP_UTILS_HUFFMAN_H_
+#define WEBP_UTILS_HUFFMAN_H_
+
+#include <assert.h>
+#include "webp/types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// A node of a Huffman tree.
+typedef struct {
+ int symbol_;
+ int children_; // delta offset to both children (contiguous) or 0 if leaf.
+} HuffmanTreeNode;
+
+// Huffman Tree.
+typedef struct HuffmanTree HuffmanTree;
+struct HuffmanTree {
+ HuffmanTreeNode* root_; // all the nodes, starting at root.
+ int max_nodes_; // max number of nodes
+ int num_nodes_; // number of currently occupied nodes
+};
+
+// Returns true if the given node is a leaf of the Huffman tree.
+static WEBP_INLINE int HuffmanTreeNodeIsLeaf(
+ const HuffmanTreeNode* const node) {
+ return (node->children_ == 0);
+}
+
+// Go down one level. Most critical function. 'right_child' must be 0 or 1.
+static WEBP_INLINE const HuffmanTreeNode* HuffmanTreeNextNode(
+ const HuffmanTreeNode* node, int right_child) {
+ return node + node->children_ + right_child;
+}
+
+// Releases the nodes of the Huffman tree.
+// Note: It does NOT free 'tree' itself.
+void HuffmanTreeRelease(HuffmanTree* const tree);
+
+// Builds Huffman tree assuming code lengths are implicitly in symbol order.
+// Returns false in case of error (invalid tree or memory error).
+int HuffmanTreeBuildImplicit(HuffmanTree* const tree,
+ const int* const code_lengths,
+ int code_lengths_size);
+
+// Build a Huffman tree with explicitly given lists of code lengths, codes
+// and symbols. Verifies that all symbols added are smaller than max_symbol.
+// Returns false in case of an invalid symbol, invalid tree or memory error.
+int HuffmanTreeBuildExplicit(HuffmanTree* const tree,
+ const int* const code_lengths,
+ const int* const codes,
+ const int* const symbols, int max_symbol,
+ int num_symbols);
+
+// Utility: converts Huffman code lengths to corresponding Huffman codes.
+// 'huff_codes' should be pre-allocated.
+// Returns false in case of error (memory allocation, invalid codes).
+int HuffmanCodeLengthsToCodes(const int* const code_lengths,
+ int code_lengths_size, int* const huff_codes);
+
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif // WEBP_UTILS_HUFFMAN_H_
diff --git a/src/utils/huffman_encode.c b/src/utils/huffman_encode.c
new file mode 100644
index 00000000..2686c665
--- /dev/null
+++ b/src/utils/huffman_encode.c
@@ -0,0 +1,439 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+// Entropy encoding (Huffman) for webp lossless.
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include "./huffman_encode.h"
+#include "../utils/utils.h"
+#include "webp/format_constants.h"
+
+// -----------------------------------------------------------------------------
+// Util function to optimize the symbol map for RLE coding
+
+// Heuristics for selecting the stride ranges to collapse.
+static int ValuesShouldBeCollapsedToStrideAverage(int a, int b) {
+ return abs(a - b) < 4;
+}
+
+// Change the population counts in a way that the consequent
+// Hufmann tree compression, especially its RLE-part, give smaller output.
+static int OptimizeHuffmanForRle(int length, int* const counts) {
+ uint8_t* good_for_rle;
+ // 1) Let's make the Huffman code more compatible with rle encoding.
+ int i;
+ for (; length >= 0; --length) {
+ if (length == 0) {
+ return 1; // All zeros.
+ }
+ if (counts[length - 1] != 0) {
+ // Now counts[0..length - 1] does not have trailing zeros.
+ break;
+ }
+ }
+ // 2) Let's mark all population counts that already can be encoded
+ // with an rle code.
+ good_for_rle = (uint8_t*)calloc(length, 1);
+ if (good_for_rle == NULL) {
+ return 0;
+ }
+ {
+ // Let's not spoil any of the existing good rle codes.
+ // Mark any seq of 0's that is longer as 5 as a good_for_rle.
+ // Mark any seq of non-0's that is longer as 7 as a good_for_rle.
+ int symbol = counts[0];
+ int stride = 0;
+ for (i = 0; i < length + 1; ++i) {
+ if (i == length || counts[i] != symbol) {
+ if ((symbol == 0 && stride >= 5) ||
+ (symbol != 0 && stride >= 7)) {
+ int k;
+ for (k = 0; k < stride; ++k) {
+ good_for_rle[i - k - 1] = 1;
+ }
+ }
+ stride = 1;
+ if (i != length) {
+ symbol = counts[i];
+ }
+ } else {
+ ++stride;
+ }
+ }
+ }
+ // 3) Let's replace those population counts that lead to more rle codes.
+ {
+ int stride = 0;
+ int limit = counts[0];
+ int sum = 0;
+ for (i = 0; i < length + 1; ++i) {
+ if (i == length || good_for_rle[i] ||
+ (i != 0 && good_for_rle[i - 1]) ||
+ !ValuesShouldBeCollapsedToStrideAverage(counts[i], limit)) {
+ if (stride >= 4 || (stride >= 3 && sum == 0)) {
+ int k;
+ // The stride must end, collapse what we have, if we have enough (4).
+ int count = (sum + stride / 2) / stride;
+ if (count < 1) {
+ count = 1;
+ }
+ if (sum == 0) {
+ // Don't make an all zeros stride to be upgraded to ones.
+ count = 0;
+ }
+ for (k = 0; k < stride; ++k) {
+ // We don't want to change value at counts[i],
+ // that is already belonging to the next stride. Thus - 1.
+ counts[i - k - 1] = count;
+ }
+ }
+ stride = 0;
+ sum = 0;
+ if (i < length - 3) {
+ // All interesting strides have a count of at least 4,
+ // at least when non-zeros.
+ limit = (counts[i] + counts[i + 1] +
+ counts[i + 2] + counts[i + 3] + 2) / 4;
+ } else if (i < length) {
+ limit = counts[i];
+ } else {
+ limit = 0;
+ }
+ }
+ ++stride;
+ if (i != length) {
+ sum += counts[i];
+ if (stride >= 4) {
+ limit = (sum + stride / 2) / stride;
+ }
+ }
+ }
+ }
+ free(good_for_rle);
+ return 1;
+}
+
+typedef struct {
+ int total_count_;
+ int value_;
+ int pool_index_left_;
+ int pool_index_right_;
+} HuffmanTree;
+
+// A comparer function for two Huffman trees: sorts first by 'total count'
+// (more comes first), and then by 'value' (more comes first).
+static int CompareHuffmanTrees(const void* ptr1, const void* ptr2) {
+ const HuffmanTree* const t1 = (const HuffmanTree*)ptr1;
+ const HuffmanTree* const t2 = (const HuffmanTree*)ptr2;
+ if (t1->total_count_ > t2->total_count_) {
+ return -1;
+ } else if (t1->total_count_ < t2->total_count_) {
+ return 1;
+ } else {
+ if (t1->value_ < t2->value_) {
+ return -1;
+ }
+ if (t1->value_ > t2->value_) {
+ return 1;
+ }
+ return 0;
+ }
+}
+
+static void SetBitDepths(const HuffmanTree* const tree,
+ const HuffmanTree* const pool,
+ uint8_t* const bit_depths, int level) {
+ if (tree->pool_index_left_ >= 0) {
+ SetBitDepths(&pool[tree->pool_index_left_], pool, bit_depths, level + 1);
+ SetBitDepths(&pool[tree->pool_index_right_], pool, bit_depths, level + 1);
+ } else {
+ bit_depths[tree->value_] = level;
+ }
+}
+
+// Create an optimal Huffman tree.
+//
+// (data,length): population counts.
+// tree_limit: maximum bit depth (inclusive) of the codes.
+// bit_depths[]: how many bits are used for the symbol.
+//
+// Returns 0 when an error has occurred.
+//
+// The catch here is that the tree cannot be arbitrarily deep
+//
+// count_limit is the value that is to be faked as the minimum value
+// and this minimum value is raised until the tree matches the
+// maximum length requirement.
+//
+// This algorithm is not of excellent performance for very long data blocks,
+// especially when population counts are longer than 2**tree_limit, but
+// we are not planning to use this with extremely long blocks.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+static int GenerateOptimalTree(const int* const histogram, int histogram_size,
+ int tree_depth_limit,
+ uint8_t* const bit_depths) {
+ int count_min;
+ HuffmanTree* tree_pool;
+ HuffmanTree* tree;
+ int tree_size_orig = 0;
+ int i;
+
+ for (i = 0; i < histogram_size; ++i) {
+ if (histogram[i] != 0) {
+ ++tree_size_orig;
+ }
+ }
+
+ // 3 * tree_size is enough to cover all the nodes representing a
+ // population and all the inserted nodes combining two existing nodes.
+ // The tree pool needs 2 * (tree_size_orig - 1) entities, and the
+ // tree needs exactly tree_size_orig entities.
+ tree = (HuffmanTree*)WebPSafeMalloc(3ULL * tree_size_orig, sizeof(*tree));
+ if (tree == NULL) return 0;
+ tree_pool = tree + tree_size_orig;
+
+ // For block sizes with less than 64k symbols we never need to do a
+ // second iteration of this loop.
+ // If we actually start running inside this loop a lot, we would perhaps
+ // be better off with the Katajainen algorithm.
+ assert(tree_size_orig <= (1 << (tree_depth_limit - 1)));
+ for (count_min = 1; ; count_min *= 2) {
+ int tree_size = tree_size_orig;
+ // We need to pack the Huffman tree in tree_depth_limit bits.
+ // So, we try by faking histogram entries to be at least 'count_min'.
+ int idx = 0;
+ int j;
+ for (j = 0; j < histogram_size; ++j) {
+ if (histogram[j] != 0) {
+ const int count =
+ (histogram[j] < count_min) ? count_min : histogram[j];
+ tree[idx].total_count_ = count;
+ tree[idx].value_ = j;
+ tree[idx].pool_index_left_ = -1;
+ tree[idx].pool_index_right_ = -1;
+ ++idx;
+ }
+ }
+
+ // Build the Huffman tree.
+ qsort(tree, tree_size, sizeof(*tree), CompareHuffmanTrees);
+
+ if (tree_size > 1) { // Normal case.
+ int tree_pool_size = 0;
+ while (tree_size > 1) { // Finish when we have only one root.
+ int count;
+ tree_pool[tree_pool_size++] = tree[tree_size - 1];
+ tree_pool[tree_pool_size++] = tree[tree_size - 2];
+ count = tree_pool[tree_pool_size - 1].total_count_ +
+ tree_pool[tree_pool_size - 2].total_count_;
+ tree_size -= 2;
+ {
+ // Search for the insertion point.
+ int k;
+ for (k = 0; k < tree_size; ++k) {
+ if (tree[k].total_count_ <= count) {
+ break;
+ }
+ }
+ memmove(tree + (k + 1), tree + k, (tree_size - k) * sizeof(*tree));
+ tree[k].total_count_ = count;
+ tree[k].value_ = -1;
+
+ tree[k].pool_index_left_ = tree_pool_size - 1;
+ tree[k].pool_index_right_ = tree_pool_size - 2;
+ tree_size = tree_size + 1;
+ }
+ }
+ SetBitDepths(&tree[0], tree_pool, bit_depths, 0);
+ } else if (tree_size == 1) { // Trivial case: only one element.
+ bit_depths[tree[0].value_] = 1;
+ }
+
+ {
+ // Test if this Huffman tree satisfies our 'tree_depth_limit' criteria.
+ int max_depth = bit_depths[0];
+ for (j = 1; j < histogram_size; ++j) {
+ if (max_depth < bit_depths[j]) {
+ max_depth = bit_depths[j];
+ }
+ }
+ if (max_depth <= tree_depth_limit) {
+ break;
+ }
+ }
+ }
+ free(tree);
+ return 1;
+}
+
+// -----------------------------------------------------------------------------
+// Coding of the Huffman tree values
+
+static HuffmanTreeToken* CodeRepeatedValues(int repetitions,
+ HuffmanTreeToken* tokens,
+ int value, int prev_value) {
+ assert(value <= MAX_ALLOWED_CODE_LENGTH);
+ if (value != prev_value) {
+ tokens->code = value;
+ tokens->extra_bits = 0;
+ ++tokens;
+ --repetitions;
+ }
+ while (repetitions >= 1) {
+ if (repetitions < 3) {
+ int i;
+ for (i = 0; i < repetitions; ++i) {
+ tokens->code = value;
+ tokens->extra_bits = 0;
+ ++tokens;
+ }
+ break;
+ } else if (repetitions < 7) {
+ tokens->code = 16;
+ tokens->extra_bits = repetitions - 3;
+ ++tokens;
+ break;
+ } else {
+ tokens->code = 16;
+ tokens->extra_bits = 3;
+ ++tokens;
+ repetitions -= 6;
+ }
+ }
+ return tokens;
+}
+
+static HuffmanTreeToken* CodeRepeatedZeros(int repetitions,
+ HuffmanTreeToken* tokens) {
+ while (repetitions >= 1) {
+ if (repetitions < 3) {
+ int i;
+ for (i = 0; i < repetitions; ++i) {
+ tokens->code = 0; // 0-value
+ tokens->extra_bits = 0;
+ ++tokens;
+ }
+ break;
+ } else if (repetitions < 11) {
+ tokens->code = 17;
+ tokens->extra_bits = repetitions - 3;
+ ++tokens;
+ break;
+ } else if (repetitions < 139) {
+ tokens->code = 18;
+ tokens->extra_bits = repetitions - 11;
+ ++tokens;
+ break;
+ } else {
+ tokens->code = 18;
+ tokens->extra_bits = 0x7f; // 138 repeated 0s
+ ++tokens;
+ repetitions -= 138;
+ }
+ }
+ return tokens;
+}
+
+int VP8LCreateCompressedHuffmanTree(const HuffmanTreeCode* const tree,
+ HuffmanTreeToken* tokens, int max_tokens) {
+ HuffmanTreeToken* const starting_token = tokens;
+ HuffmanTreeToken* const ending_token = tokens + max_tokens;
+ const int depth_size = tree->num_symbols;
+ int prev_value = 8; // 8 is the initial value for rle.
+ int i = 0;
+ assert(tokens != NULL);
+ while (i < depth_size) {
+ const int value = tree->code_lengths[i];
+ int k = i + 1;
+ int runs;
+ while (k < depth_size && tree->code_lengths[k] == value) ++k;
+ runs = k - i;
+ if (value == 0) {
+ tokens = CodeRepeatedZeros(runs, tokens);
+ } else {
+ tokens = CodeRepeatedValues(runs, tokens, value, prev_value);
+ prev_value = value;
+ }
+ i += runs;
+ assert(tokens <= ending_token);
+ }
+ (void)ending_token; // suppress 'unused variable' warning
+ return (int)(tokens - starting_token);
+}
+
+// -----------------------------------------------------------------------------
+
+// Pre-reversed 4-bit values.
+static const uint8_t kReversedBits[16] = {
+ 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
+ 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
+};
+
+static uint32_t ReverseBits(int num_bits, uint32_t bits) {
+ uint32_t retval = 0;
+ int i = 0;
+ while (i < num_bits) {
+ i += 4;
+ retval |= kReversedBits[bits & 0xf] << (MAX_ALLOWED_CODE_LENGTH + 1 - i);
+ bits >>= 4;
+ }
+ retval >>= (MAX_ALLOWED_CODE_LENGTH + 1 - num_bits);
+ return retval;
+}
+
+// Get the actual bit values for a tree of bit depths.
+static void ConvertBitDepthsToSymbols(HuffmanTreeCode* const tree) {
+ // 0 bit-depth means that the symbol does not exist.
+ int i;
+ int len;
+ uint32_t next_code[MAX_ALLOWED_CODE_LENGTH + 1];
+ int depth_count[MAX_ALLOWED_CODE_LENGTH + 1] = { 0 };
+
+ assert(tree != NULL);
+ len = tree->num_symbols;
+ for (i = 0; i < len; ++i) {
+ const int code_length = tree->code_lengths[i];
+ assert(code_length <= MAX_ALLOWED_CODE_LENGTH);
+ ++depth_count[code_length];
+ }
+ depth_count[0] = 0; // ignore unused symbol
+ next_code[0] = 0;
+ {
+ uint32_t code = 0;
+ for (i = 1; i <= MAX_ALLOWED_CODE_LENGTH; ++i) {
+ code = (code + depth_count[i - 1]) << 1;
+ next_code[i] = code;
+ }
+ }
+ for (i = 0; i < len; ++i) {
+ const int code_length = tree->code_lengths[i];
+ tree->codes[i] = ReverseBits(code_length, next_code[code_length]++);
+ }
+}
+
+// -----------------------------------------------------------------------------
+// Main entry point
+
+int VP8LCreateHuffmanTree(int* const histogram, int tree_depth_limit,
+ HuffmanTreeCode* const tree) {
+ const int num_symbols = tree->num_symbols;
+ if (!OptimizeHuffmanForRle(num_symbols, histogram)) {
+ return 0;
+ }
+ if (!GenerateOptimalTree(histogram, num_symbols,
+ tree_depth_limit, tree->code_lengths)) {
+ return 0;
+ }
+ // Create the actual bit codes for the bit lengths.
+ ConvertBitDepthsToSymbols(tree);
+ return 1;
+}
diff --git a/src/utils/huffman_encode.h b/src/utils/huffman_encode.h
new file mode 100644
index 00000000..ada59d78
--- /dev/null
+++ b/src/utils/huffman_encode.h
@@ -0,0 +1,47 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+// Entropy encoding (Huffman) for webp lossless
+
+#ifndef WEBP_UTILS_HUFFMAN_ENCODE_H_
+#define WEBP_UTILS_HUFFMAN_ENCODE_H_
+
+#include "webp/types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// Struct for holding the tree header in coded form.
+typedef struct {
+ uint8_t code; // value (0..15) or escape code (16,17,18)
+ uint8_t extra_bits; // extra bits for escape codes
+} HuffmanTreeToken;
+
+// Struct to represent the tree codes (depth and bits array).
+typedef struct {
+ int num_symbols; // Number of symbols.
+ uint8_t* code_lengths; // Code lengths of the symbols.
+ uint16_t* codes; // Symbol Codes.
+} HuffmanTreeCode;
+
+// Turn the Huffman tree into a token sequence.
+// Returns the number of tokens used.
+int VP8LCreateCompressedHuffmanTree(const HuffmanTreeCode* const tree,
+ HuffmanTreeToken* tokens, int max_tokens);
+
+// Create an optimized tree, and tokenize it.
+int VP8LCreateHuffmanTree(int* const histogram, int tree_depth_limit,
+ HuffmanTreeCode* const tree);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif // WEBP_UTILS_HUFFMAN_ENCODE_H_
diff --git a/src/utils/quant_levels.c b/src/utils/quant_levels.c
new file mode 100644
index 00000000..f6884392
--- /dev/null
+++ b/src/utils/quant_levels.c
@@ -0,0 +1,154 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Quantize levels for specified number of quantization-levels ([2, 256]).
+// Min and max values are preserved (usual 0 and 255 for alpha plane).
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+
+#include "./quant_levels.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define NUM_SYMBOLS 256
+
+#define MAX_ITER 6 // Maximum number of convergence steps.
+#define ERROR_THRESHOLD 1e-4 // MSE stopping criterion.
+
+// -----------------------------------------------------------------------------
+// Quantize levels.
+
+int QuantizeLevels(uint8_t* const data, int width, int height,
+ int num_levels, uint64_t* const sse) {
+ int freq[NUM_SYMBOLS] = { 0 };
+ int q_level[NUM_SYMBOLS] = { 0 };
+ double inv_q_level[NUM_SYMBOLS] = { 0 };
+ int min_s = 255, max_s = 0;
+ const size_t data_size = height * width;
+ int i, num_levels_in, iter;
+ double last_err = 1.e38, err = 0.;
+ const double err_threshold = ERROR_THRESHOLD * data_size;
+
+ if (data == NULL) {
+ return 0;
+ }
+
+ if (width <= 0 || height <= 0) {
+ return 0;
+ }
+
+ if (num_levels < 2 || num_levels > 256) {
+ return 0;
+ }
+
+ {
+ size_t n;
+ num_levels_in = 0;
+ for (n = 0; n < data_size; ++n) {
+ num_levels_in += (freq[data[n]] == 0);
+ if (min_s > data[n]) min_s = data[n];
+ if (max_s < data[n]) max_s = data[n];
+ ++freq[data[n]];
+ }
+ }
+
+ if (num_levels_in <= num_levels) goto End; // nothing to do!
+
+ // Start with uniformly spread centroids.
+ for (i = 0; i < num_levels; ++i) {
+ inv_q_level[i] = min_s + (double)(max_s - min_s) * i / (num_levels - 1);
+ }
+
+ // Fixed values. Won't be changed.
+ q_level[min_s] = 0;
+ q_level[max_s] = num_levels - 1;
+ assert(inv_q_level[0] == min_s);
+ assert(inv_q_level[num_levels - 1] == max_s);
+
+ // k-Means iterations.
+ for (iter = 0; iter < MAX_ITER; ++iter) {
+ double q_sum[NUM_SYMBOLS] = { 0 };
+ double q_count[NUM_SYMBOLS] = { 0 };
+ int s, slot = 0;
+
+ // Assign classes to representatives.
+ for (s = min_s; s <= max_s; ++s) {
+ // Keep track of the nearest neighbour 'slot'
+ while (slot < num_levels - 1 &&
+ 2 * s > inv_q_level[slot] + inv_q_level[slot + 1]) {
+ ++slot;
+ }
+ if (freq[s] > 0) {
+ q_sum[slot] += s * freq[s];
+ q_count[slot] += freq[s];
+ }
+ q_level[s] = slot;
+ }
+
+ // Assign new representatives to classes.
+ if (num_levels > 2) {
+ for (slot = 1; slot < num_levels - 1; ++slot) {
+ const double count = q_count[slot];
+ if (count > 0.) {
+ inv_q_level[slot] = q_sum[slot] / count;
+ }
+ }
+ }
+
+ // Compute convergence error.
+ err = 0.;
+ for (s = min_s; s <= max_s; ++s) {
+ const double error = s - inv_q_level[q_level[s]];
+ err += freq[s] * error * error;
+ }
+
+ // Check for convergence: we stop as soon as the error is no
+ // longer improving.
+ if (last_err - err < err_threshold) break;
+ last_err = err;
+ }
+
+ // Remap the alpha plane to quantized values.
+ {
+ // double->int rounding operation can be costly, so we do it
+ // once for all before remapping. We also perform the data[] -> slot
+ // mapping, while at it (avoid one indirection in the final loop).
+ uint8_t map[NUM_SYMBOLS];
+ int s;
+ size_t n;
+ for (s = min_s; s <= max_s; ++s) {
+ const int slot = q_level[s];
+ map[s] = (uint8_t)(inv_q_level[slot] + .5);
+ }
+ // Final pass.
+ for (n = 0; n < data_size; ++n) {
+ data[n] = map[data[n]];
+ }
+ }
+ End:
+ // Store sum of squared error if needed.
+ if (sse != NULL) *sse = (uint64_t)err;
+
+ return 1;
+}
+
+int DequantizeLevels(uint8_t* const data, int width, int height) {
+ if (data == NULL || width <= 0 || height <= 0) return 0;
+ // TODO(skal): implement gradient smoothing.
+ (void)data;
+ (void)width;
+ (void)height;
+ return 1;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
diff --git a/src/utils/quant_levels.h b/src/utils/quant_levels.h
new file mode 100644
index 00000000..8dd3afee
--- /dev/null
+++ b/src/utils/quant_levels.h
@@ -0,0 +1,39 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Alpha plane quantization utility
+//
+// Author: Vikas Arora (vikasa@google.com)
+
+#ifndef WEBP_UTILS_QUANT_LEVELS_H_
+#define WEBP_UTILS_QUANT_LEVELS_H_
+
+#include <stdlib.h>
+
+#include "webp/types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// Replace the input 'data' of size 'width'x'height' with 'num-levels'
+// quantized values. If not NULL, 'sse' will contain the sum of squared error.
+// Valid range for 'num_levels' is [2, 256].
+// Returns false in case of error (data is NULL, or parameters are invalid).
+int QuantizeLevels(uint8_t* const data, int width, int height, int num_levels,
+ uint64_t* const sse);
+
+// Apply post-processing to input 'data' of size 'width'x'height' assuming
+// that the source was quantized to a reduced number of levels.
+// Returns false in case of error (data is NULL, invalid parameters, ...).
+int DequantizeLevels(uint8_t* const data, int width, int height);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif /* WEBP_UTILS_QUANT_LEVELS_H_ */
diff --git a/src/utils/rescaler.c b/src/utils/rescaler.c
new file mode 100644
index 00000000..9825dcbc
--- /dev/null
+++ b/src/utils/rescaler.c
@@ -0,0 +1,152 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Rescaling functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include "./rescaler.h"
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define RFIX 30
+#define MULT_FIX(x,y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX)
+
+void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height,
+ uint8_t* const dst, int dst_width, int dst_height,
+ int dst_stride, int num_channels, int x_add, int x_sub,
+ int y_add, int y_sub, int32_t* const work) {
+ wrk->x_expand = (src_width < dst_width);
+ wrk->src_width = src_width;
+ wrk->src_height = src_height;
+ wrk->dst_width = dst_width;
+ wrk->dst_height = dst_height;
+ wrk->dst = dst;
+ wrk->dst_stride = dst_stride;
+ wrk->num_channels = num_channels;
+ // for 'x_expand', we use bilinear interpolation
+ wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add - x_sub;
+ wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub;
+ wrk->y_accum = y_add;
+ wrk->y_add = y_add;
+ wrk->y_sub = y_sub;
+ wrk->fx_scale = (1 << RFIX) / x_sub;
+ wrk->fy_scale = (1 << RFIX) / y_sub;
+ wrk->fxy_scale = wrk->x_expand ?
+ ((int64_t)dst_height << RFIX) / (x_sub * src_height) :
+ ((int64_t)dst_height << RFIX) / (x_add * src_height);
+ wrk->irow = work;
+ wrk->frow = work + num_channels * dst_width;
+}
+
+void WebPRescalerImportRow(WebPRescaler* const wrk,
+ const uint8_t* const src, int channel) {
+ const int x_stride = wrk->num_channels;
+ const int x_out_max = wrk->dst_width * wrk->num_channels;
+ int x_in = channel;
+ int x_out;
+ int accum = 0;
+ if (!wrk->x_expand) {
+ int sum = 0;
+ for (x_out = channel; x_out < x_out_max; x_out += x_stride) {
+ accum += wrk->x_add;
+ for (; accum > 0; accum -= wrk->x_sub) {
+ sum += src[x_in];
+ x_in += x_stride;
+ }
+ { // Emit next horizontal pixel.
+ const int32_t base = src[x_in];
+ const int32_t frac = base * (-accum);
+ x_in += x_stride;
+ wrk->frow[x_out] = (sum + base) * wrk->x_sub - frac;
+ // fresh fractional start for next pixel
+ sum = (int)MULT_FIX(frac, wrk->fx_scale);
+ }
+ }
+ } else { // simple bilinear interpolation
+ int left = src[channel], right = src[channel];
+ for (x_out = channel; x_out < x_out_max; x_out += x_stride) {
+ if (accum < 0) {
+ left = right;
+ x_in += x_stride;
+ right = src[x_in];
+ accum += wrk->x_add;
+ }
+ wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum;
+ accum -= wrk->x_sub;
+ }
+ }
+ // Accumulate the new row's contribution
+ for (x_out = channel; x_out < x_out_max; x_out += x_stride) {
+ wrk->irow[x_out] += wrk->frow[x_out];
+ }
+}
+
+uint8_t* WebPRescalerExportRow(WebPRescaler* const wrk) {
+ if (wrk->y_accum <= 0) {
+ int x_out;
+ uint8_t* const dst = wrk->dst;
+ int32_t* const irow = wrk->irow;
+ const int32_t* const frow = wrk->frow;
+ const int yscale = wrk->fy_scale * (-wrk->y_accum);
+ const int x_out_max = wrk->dst_width * wrk->num_channels;
+
+ for (x_out = 0; x_out < x_out_max; ++x_out) {
+ const int frac = (int)MULT_FIX(frow[x_out], yscale);
+ const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale);
+ dst[x_out] = (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
+ irow[x_out] = frac; // new fractional start
+ }
+ wrk->y_accum += wrk->y_add;
+ wrk->dst += wrk->dst_stride;
+ return dst;
+ } else {
+ return NULL;
+ }
+}
+
+#undef MULT_FIX
+#undef RFIX
+
+//------------------------------------------------------------------------------
+// all-in-one calls
+
+int WebPRescalerImport(WebPRescaler* const wrk, int num_lines,
+ const uint8_t* src, int src_stride) {
+ int total_imported = 0;
+ while (total_imported < num_lines && wrk->y_accum > 0) {
+ int channel;
+ for (channel = 0; channel < wrk->num_channels; ++channel) {
+ WebPRescalerImportRow(wrk, src, channel);
+ }
+ src += src_stride;
+ ++total_imported;
+ wrk->y_accum -= wrk->y_sub;
+ }
+ return total_imported;
+}
+
+int WebPRescalerExport(WebPRescaler* const rescaler) {
+ int total_exported = 0;
+ while (WebPRescalerHasPendingOutput(rescaler)) {
+ WebPRescalerExportRow(rescaler);
+ ++total_exported;
+ }
+ return total_exported;
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
diff --git a/src/utils/rescaler.h b/src/utils/rescaler.h
new file mode 100644
index 00000000..88ac1b71
--- /dev/null
+++ b/src/utils/rescaler.h
@@ -0,0 +1,76 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Rescaling functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_UTILS_RESCALER_H_
+#define WEBP_UTILS_RESCALER_H_
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#include "webp/types.h"
+
+// Structure used for on-the-fly rescaling
+typedef struct {
+ int x_expand; // true if we're expanding in the x direction
+ int num_channels; // bytes to jump between pixels
+ int fy_scale, fx_scale; // fixed-point scaling factor
+ int64_t fxy_scale; // ''
+ // we need hpel-precise add/sub increments, for the downsampled U/V planes.
+ int y_accum; // vertical accumulator
+ int y_add, y_sub; // vertical increments (add ~= src, sub ~= dst)
+ int x_add, x_sub; // horizontal increments (add ~= src, sub ~= dst)
+ int src_width, src_height; // source dimensions
+ int dst_width, dst_height; // destination dimensions
+ uint8_t* dst;
+ int dst_stride;
+ int32_t* irow, *frow; // work buffer
+} WebPRescaler;
+
+// Initialize a rescaler given scratch area 'work' and dimensions of src & dst.
+void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height,
+ uint8_t* const dst,
+ int dst_width, int dst_height, int dst_stride,
+ int num_channels,
+ int x_add, int x_sub,
+ int y_add, int y_sub,
+ int32_t* const work);
+
+// Import a row of data and save its contribution in the rescaler.
+// 'channel' denotes the channel number to be imported.
+void WebPRescalerImportRow(WebPRescaler* const rescaler,
+ const uint8_t* const src, int channel);
+
+// Import multiple rows over all channels, until at least one row is ready to
+// be exported. Returns the actual number of lines that were imported.
+int WebPRescalerImport(WebPRescaler* const rescaler, int num_rows,
+ const uint8_t* src, int src_stride);
+
+// Return true if there is pending output rows ready.
+static WEBP_INLINE
+int WebPRescalerHasPendingOutput(const WebPRescaler* const rescaler) {
+ return (rescaler->y_accum <= 0);
+}
+
+// Export one row from rescaler. Returns the pointer where output was written,
+// or NULL if no row was pending.
+uint8_t* WebPRescalerExportRow(WebPRescaler* const wrk);
+
+// Export as many rows as possible. Return the numbers of rows written.
+int WebPRescalerExport(WebPRescaler* const wrk);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif /* WEBP_UTILS_RESCALER_H_ */
diff --git a/src/utils/thread.c b/src/utils/thread.c
new file mode 100644
index 00000000..ce89cf9d
--- /dev/null
+++ b/src/utils/thread.c
@@ -0,0 +1,247 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Multi-threaded worker
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <assert.h>
+#include <string.h> // for memset()
+#include "./thread.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#ifdef WEBP_USE_THREAD
+
+#if defined(_WIN32)
+
+//------------------------------------------------------------------------------
+// simplistic pthread emulation layer
+
+#include <process.h>
+
+// _beginthreadex requires __stdcall
+#define THREADFN unsigned int __stdcall
+#define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val)
+
+static int pthread_create(pthread_t* const thread, const void* attr,
+ unsigned int (__stdcall *start)(void*), void* arg) {
+ (void)attr;
+ *thread = (pthread_t)_beginthreadex(NULL, /* void *security */
+ 0, /* unsigned stack_size */
+ start,
+ arg,
+ 0, /* unsigned initflag */
+ NULL); /* unsigned *thrdaddr */
+ if (*thread == NULL) return 1;
+ SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL);
+ return 0;
+}
+
+static int pthread_join(pthread_t thread, void** value_ptr) {
+ (void)value_ptr;
+ return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 ||
+ CloseHandle(thread) == 0);
+}
+
+// Mutex
+static int pthread_mutex_init(pthread_mutex_t* const mutex, void* mutexattr) {
+ (void)mutexattr;
+ InitializeCriticalSection(mutex);
+ return 0;
+}
+
+static int pthread_mutex_lock(pthread_mutex_t* const mutex) {
+ EnterCriticalSection(mutex);
+ return 0;
+}
+
+static int pthread_mutex_unlock(pthread_mutex_t* const mutex) {
+ LeaveCriticalSection(mutex);
+ return 0;
+}
+
+static int pthread_mutex_destroy(pthread_mutex_t* const mutex) {
+ DeleteCriticalSection(mutex);
+ return 0;
+}
+
+// Condition
+static int pthread_cond_destroy(pthread_cond_t* const condition) {
+ int ok = 1;
+ ok &= (CloseHandle(condition->waiting_sem_) != 0);
+ ok &= (CloseHandle(condition->received_sem_) != 0);
+ ok &= (CloseHandle(condition->signal_event_) != 0);
+ return !ok;
+}
+
+static int pthread_cond_init(pthread_cond_t* const condition, void* cond_attr) {
+ (void)cond_attr;
+ condition->waiting_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
+ condition->received_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
+ condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL);
+ if (condition->waiting_sem_ == NULL ||
+ condition->received_sem_ == NULL ||
+ condition->signal_event_ == NULL) {
+ pthread_cond_destroy(condition);
+ return 1;
+ }
+ return 0;
+}
+
+static int pthread_cond_signal(pthread_cond_t* const condition) {
+ int ok = 1;
+ if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) {
+ // a thread is waiting in pthread_cond_wait: allow it to be notified
+ ok = SetEvent(condition->signal_event_);
+ // wait until the event is consumed so the signaler cannot consume
+ // the event via its own pthread_cond_wait.
+ ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) !=
+ WAIT_OBJECT_0);
+ }
+ return !ok;
+}
+
+static int pthread_cond_wait(pthread_cond_t* const condition,
+ pthread_mutex_t* const mutex) {
+ int ok;
+ // note that there is a consumer available so the signal isn't dropped in
+ // pthread_cond_signal
+ if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL))
+ return 1;
+ // now unlock the mutex so pthread_cond_signal may be issued
+ pthread_mutex_unlock(mutex);
+ ok = (WaitForSingleObject(condition->signal_event_, INFINITE) ==
+ WAIT_OBJECT_0);
+ ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL);
+ pthread_mutex_lock(mutex);
+ return !ok;
+}
+
+#else // _WIN32
+# define THREADFN void*
+# define THREAD_RETURN(val) val
+#endif
+
+//------------------------------------------------------------------------------
+
+static THREADFN WebPWorkerThreadLoop(void *ptr) { // thread loop
+ WebPWorker* const worker = (WebPWorker*)ptr;
+ int done = 0;
+ while (!done) {
+ pthread_mutex_lock(&worker->mutex_);
+ while (worker->status_ == OK) { // wait in idling mode
+ pthread_cond_wait(&worker->condition_, &worker->mutex_);
+ }
+ if (worker->status_ == WORK) {
+ if (worker->hook) {
+ worker->had_error |= !worker->hook(worker->data1, worker->data2);
+ }
+ worker->status_ = OK;
+ } else if (worker->status_ == NOT_OK) { // finish the worker
+ done = 1;
+ }
+ // signal to the main thread that we're done (for Sync())
+ pthread_cond_signal(&worker->condition_);
+ pthread_mutex_unlock(&worker->mutex_);
+ }
+ return THREAD_RETURN(NULL); // Thread is finished
+}
+
+// main thread state control
+static void WebPWorkerChangeState(WebPWorker* const worker,
+ WebPWorkerStatus new_status) {
+ // no-op when attempting to change state on a thread that didn't come up
+ if (worker->status_ < OK) return;
+
+ pthread_mutex_lock(&worker->mutex_);
+ // wait for the worker to finish
+ while (worker->status_ != OK) {
+ pthread_cond_wait(&worker->condition_, &worker->mutex_);
+ }
+ // assign new status and release the working thread if needed
+ if (new_status != OK) {
+ worker->status_ = new_status;
+ pthread_cond_signal(&worker->condition_);
+ }
+ pthread_mutex_unlock(&worker->mutex_);
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+
+void WebPWorkerInit(WebPWorker* const worker) {
+ memset(worker, 0, sizeof(*worker));
+ worker->status_ = NOT_OK;
+}
+
+int WebPWorkerSync(WebPWorker* const worker) {
+#ifdef WEBP_USE_THREAD
+ WebPWorkerChangeState(worker, OK);
+#endif
+ assert(worker->status_ <= OK);
+ return !worker->had_error;
+}
+
+int WebPWorkerReset(WebPWorker* const worker) {
+ int ok = 1;
+ worker->had_error = 0;
+ if (worker->status_ < OK) {
+#ifdef WEBP_USE_THREAD
+ if (pthread_mutex_init(&worker->mutex_, NULL) ||
+ pthread_cond_init(&worker->condition_, NULL)) {
+ return 0;
+ }
+ pthread_mutex_lock(&worker->mutex_);
+ ok = !pthread_create(&worker->thread_, NULL, WebPWorkerThreadLoop, worker);
+ if (ok) worker->status_ = OK;
+ pthread_mutex_unlock(&worker->mutex_);
+#else
+ worker->status_ = OK;
+#endif
+ } else if (worker->status_ > OK) {
+ ok = WebPWorkerSync(worker);
+ }
+ assert(!ok || (worker->status_ == OK));
+ return ok;
+}
+
+void WebPWorkerLaunch(WebPWorker* const worker) {
+#ifdef WEBP_USE_THREAD
+ WebPWorkerChangeState(worker, WORK);
+#else
+ if (worker->hook)
+ worker->had_error |= !worker->hook(worker->data1, worker->data2);
+#endif
+}
+
+void WebPWorkerEnd(WebPWorker* const worker) {
+ if (worker->status_ >= OK) {
+#ifdef WEBP_USE_THREAD
+ WebPWorkerChangeState(worker, NOT_OK);
+ pthread_join(worker->thread_, NULL);
+ pthread_mutex_destroy(&worker->mutex_);
+ pthread_cond_destroy(&worker->condition_);
+#else
+ worker->status_ = NOT_OK;
+#endif
+ }
+ assert(worker->status_ == NOT_OK);
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
diff --git a/src/utils/thread.h b/src/utils/thread.h
new file mode 100644
index 00000000..3191890b
--- /dev/null
+++ b/src/utils/thread.h
@@ -0,0 +1,86 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Multi-threaded worker
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_UTILS_THREAD_H_
+#define WEBP_UTILS_THREAD_H_
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#if WEBP_USE_THREAD
+
+#if defined(_WIN32)
+
+#include <windows.h>
+typedef HANDLE pthread_t;
+typedef CRITICAL_SECTION pthread_mutex_t;
+typedef struct {
+ HANDLE waiting_sem_;
+ HANDLE received_sem_;
+ HANDLE signal_event_;
+} pthread_cond_t;
+
+#else
+
+#include <pthread.h>
+
+#endif /* _WIN32 */
+#endif /* WEBP_USE_THREAD */
+
+// State of the worker thread object
+typedef enum {
+ NOT_OK = 0, // object is unusable
+ OK, // ready to work
+ WORK // busy finishing the current task
+} WebPWorkerStatus;
+
+// Function to be called by the worker thread. Takes two opaque pointers as
+// arguments (data1 and data2), and should return false in case of error.
+typedef int (*WebPWorkerHook)(void*, void*);
+
+// Synchronize object used to launch job in the worker thread
+typedef struct {
+#if WEBP_USE_THREAD
+ pthread_mutex_t mutex_;
+ pthread_cond_t condition_;
+ pthread_t thread_;
+#endif
+ WebPWorkerStatus status_;
+ WebPWorkerHook hook; // hook to call
+ void* data1; // first argument passed to 'hook'
+ void* data2; // second argument passed to 'hook'
+ int had_error; // return value of the last call to 'hook'
+} WebPWorker;
+
+// Must be called first, before any other method.
+void WebPWorkerInit(WebPWorker* const worker);
+// Must be called initialize the object and spawn the thread. Re-entrant.
+// Will potentially launch the thread. Returns false in case of error.
+int WebPWorkerReset(WebPWorker* const worker);
+// Make sure the previous work is finished. Returns true if worker->had_error
+// was not set and not error condition was triggered by the working thread.
+int WebPWorkerSync(WebPWorker* const worker);
+// Trigger the thread to call hook() with data1 and data2 argument. These
+// hook/data1/data2 can be changed at any time before calling this function,
+// but not be changed afterward until the next call to WebPWorkerSync().
+void WebPWorkerLaunch(WebPWorker* const worker);
+// Kill the thread and terminate the object. To use the object again, one
+// must call WebPWorkerReset() again.
+void WebPWorkerEnd(WebPWorker* const worker);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif /* WEBP_UTILS_THREAD_H_ */
diff --git a/src/utils/utils.c b/src/utils/utils.c
new file mode 100644
index 00000000..673b7e28
--- /dev/null
+++ b/src/utils/utils.c
@@ -0,0 +1,44 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Misc. common utility functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+#include "./utils.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Checked memory allocation
+
+static int CheckSizeArguments(uint64_t nmemb, size_t size) {
+ const uint64_t total_size = nmemb * size;
+ if (nmemb == 0) return 1;
+ if ((uint64_t)size > WEBP_MAX_ALLOCABLE_MEMORY / nmemb) return 0;
+ if (total_size != (size_t)total_size) return 0;
+ return 1;
+}
+
+void* WebPSafeMalloc(uint64_t nmemb, size_t size) {
+ if (!CheckSizeArguments(nmemb, size)) return NULL;
+ return malloc((size_t)(nmemb * size));
+}
+
+void* WebPSafeCalloc(uint64_t nmemb, size_t size) {
+ if (!CheckSizeArguments(nmemb, size)) return NULL;
+ return calloc((size_t)nmemb, size);
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
diff --git a/src/utils/utils.h b/src/utils/utils.h
new file mode 100644
index 00000000..aa445695
--- /dev/null
+++ b/src/utils/utils.h
@@ -0,0 +1,44 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Misc. common utility functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_UTILS_UTILS_H_
+#define WEBP_UTILS_UTILS_H_
+
+#include "webp/types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Memory allocation
+
+// This is the maximum memory amount that libwebp will ever try to allocate.
+#define WEBP_MAX_ALLOCABLE_MEMORY (1ULL << 40)
+
+// size-checking safe malloc/calloc: verify that the requested size is not too
+// large, or return NULL. You don't need to call these for constructs like
+// malloc(sizeof(foo)), but only if there's picture-dependent size involved
+// somewhere (like: malloc(num_pixels * sizeof(*something))). That's why this
+// safe malloc() borrows the signature from calloc(), pointing at the dangerous
+// underlying multiply involved.
+void* WebPSafeMalloc(uint64_t nmemb, size_t size);
+// Note that WebPSafeCalloc() expects the second argument type to be 'size_t'
+// in order to favor the "calloc(num_foo, sizeof(foo))" pattern.
+void* WebPSafeCalloc(uint64_t nmemb, size_t size);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif /* WEBP_UTILS_UTILS_H_ */