diff options
author | android-build-team Robot <android-build-team-robot@google.com> | 2019-11-11 21:08:36 +0000 |
---|---|---|
committer | android-build-team Robot <android-build-team-robot@google.com> | 2019-11-11 21:08:36 +0000 |
commit | 8f5f03b20c57d3f531bc4e2af10bcfc128b0f109 (patch) | |
tree | 3fb6763265bdf86328c9a85fa2761670efbb24d5 | |
parent | 2fe1fdfd8f1dbf58b35519d6c7ea25bdb51240c8 (diff) | |
parent | 6444bd7c5c74b582973d8a340bddeae05fcee55e (diff) | |
download | rs-android10-mainline-resolv-release.tar.gz |
Snap for 6001391 from 6444bd7c5c74b582973d8a340bddeae05fcee55e to qt-aml-resolv-releaseandroid-mainline-10.0.0_r8android10-mainline-resolv-release
Change-Id: I9cb3304d6916e85389f79aa7f07ebc042e0c1688
-rw-r--r-- | cpp/Android.bp | 10 | ||||
-rw-r--r-- | cpu_ref/Android.bp | 3 | ||||
-rw-r--r-- | cpu_ref/rsCpuIntrinsicResize.cpp | 84 |
3 files changed, 95 insertions, 2 deletions
diff --git a/cpp/Android.bp b/cpp/Android.bp index 35b09cd4..9d9041c7 100644 --- a/cpp/Android.bp +++ b/cpp/Android.bp @@ -65,7 +65,6 @@ cc_defaults { shared_libs: [ "libdl", "liblog", - "libz", ], } @@ -73,8 +72,15 @@ cc_library { name: "libRScpp", defaults: ["libRScpp-defaults"], + header_libs: [ + "libarect_headers", + "libbase_headers", + "libgui_headers", + "libnativebase_headers", + "libnativewindow_headers", + ], + shared_libs: [ - "libgui", "libutils", ], diff --git a/cpu_ref/Android.bp b/cpu_ref/Android.bp index e69c208d..c4099895 100644 --- a/cpu_ref/Android.bp +++ b/cpu_ref/Android.bp @@ -79,6 +79,9 @@ cc_library_shared { x86_64: { cflags: ["-DARCH_X86_HAVE_SSSE3"], srcs: ["rsCpuIntrinsics_x86.cpp"], + avx2: { + cflags: ["-DARCH_X86_HAVE_AVX2", "-mavx2", "-mfma"], + }, }, }, diff --git a/cpu_ref/rsCpuIntrinsicResize.cpp b/cpu_ref/rsCpuIntrinsicResize.cpp index ff42d796..8a3dd1ae 100644 --- a/cpu_ref/rsCpuIntrinsicResize.cpp +++ b/cpu_ref/rsCpuIntrinsicResize.cpp @@ -14,6 +14,11 @@ * limitations under the License. */ +#if defined(ARCH_X86_HAVE_AVX2) +#include <stdint.h> +#include <x86intrin.h> +#include <xmmintrin.h> +#endif #include "rsCpuIntrinsic.h" #include "rsCpuIntrinsicInlines.h" @@ -78,10 +83,20 @@ static float2 cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3, float x) + x * (3.f * (p1 - p2) + p3 - p0))); } + +#if defined(ARCH_X86_HAVE_AVX2) +static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) { + return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + + _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(4.f), _mm_set1_ps(p2),_mm_set1_ps(p3))) + + x * (_mm_cvtss_f32(_mm_fmadd_ss (_mm_set1_ps(3.f),_mm_set1_ps(p1 - p2),_mm_set1_ps(p3 - p0)))))); + +} +#else static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) { return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3 + x * (3.f * (p1 - p2) + p3 - p0))); } +#endif static uchar4 OneBiCubic(const uchar4 *yp0, const uchar4 *yp1, const uchar4 *yp2, const uchar4 *yp3, float xf, float yf, int width) { @@ -317,7 +332,14 @@ void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelDriverInfo *info, const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; + +#if defined(ARCH_X86_HAVE_AVX2) + float yf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(info->current.y + 0.5f),_mm_set1_ps(cp->scaleY), _mm_set1_ps(0.5f))); +#else float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f; +#endif + + int starty = (int) floor(yf - 1); yf = yf - floor(yf); int maxy = srcHeight - 1; @@ -363,7 +385,11 @@ void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelDriverInfo *info, #endif while(x1 < x2) { +#if defined(ARCH_X86_HAVE_AVX2) + float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(cp->scaleX) , _mm_set1_ps(0.5f))); +#else float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; +#endif *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); out++; x1++; @@ -384,7 +410,13 @@ void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelDriverInfo *info, const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; + +#if defined(ARCH_X86_HAVE_AVX2) + float yf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(info->current.y + 0.5f),_mm_set1_ps(cp->scaleY), _mm_set1_ps(0.5f))); +#else float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f; +#endif + int starty = (int) floor(yf - 1); yf = yf - floor(yf); int maxy = srcHeight - 1; @@ -430,7 +462,12 @@ void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelDriverInfo *info, #endif while(x1 < x2) { + +#if defined(ARCH_X86_HAVE_AVX2) + float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(cp->scaleX) , _mm_set1_ps(0.5f))); +#else float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; +#endif *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); out++; x1++; @@ -451,7 +488,13 @@ void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelDriverInfo *info, const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; + +#if defined(ARCH_X86_HAVE_AVX2) + float yf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(info->current.y + 0.5f),_mm_set1_ps(cp->scaleY), _mm_set1_ps(0.5f))); +#else float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f; +#endif + int starty = (int) floor(yf - 1); yf = yf - floor(yf); int maxy = srcHeight - 1; @@ -497,7 +540,13 @@ void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelDriverInfo *info, #endif while(x1 < x2) { + +#if defined(ARCH_X86_HAVE_AVX2) + float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(cp->scaleX) , _mm_set1_ps(0.5f))); +#else float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; +#endif + *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); out++; x1++; @@ -518,7 +567,12 @@ void RsdCpuScriptIntrinsicResize::kernelF4(const RsExpandKernelDriverInfo *info, const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; +#if defined(ARCH_X86_HAVE_AVX2) + float yf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(info->current.y + 0.5f),_mm_set1_ps(cp->scaleY), _mm_set1_ps(0.5f))); +#else float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f; +#endif + int starty = (int) floor(yf - 1); yf = yf - floor(yf); int maxy = srcHeight - 1; @@ -537,7 +591,13 @@ void RsdCpuScriptIntrinsicResize::kernelF4(const RsExpandKernelDriverInfo *info, uint32_t x2 = xend; while(x1 < x2) { + +#if defined(ARCH_X86_HAVE_AVX2) + float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(cp->scaleX) , _mm_set1_ps(0.5f))); +#else float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; +#endif + *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); out++; x1++; @@ -558,7 +618,13 @@ void RsdCpuScriptIntrinsicResize::kernelF2(const RsExpandKernelDriverInfo *info, const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; + +#if defined(ARCH_X86_HAVE_AVX2) + float yf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(info->current.y + 0.5f),_mm_set1_ps(cp->scaleY), _mm_set1_ps(0.5f))); +#else float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f; +#endif + int starty = (int) floor(yf - 1); yf = yf - floor(yf); int maxy = srcHeight - 1; @@ -577,7 +643,13 @@ void RsdCpuScriptIntrinsicResize::kernelF2(const RsExpandKernelDriverInfo *info, uint32_t x2 = xend; while(x1 < x2) { + +#if defined(ARCH_X86_HAVE_AVX2) + float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(cp->scaleX) , _mm_set1_ps(0.5f))); +#else float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; +#endif + *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); out++; x1++; @@ -598,7 +670,13 @@ void RsdCpuScriptIntrinsicResize::kernelF1(const RsExpandKernelDriverInfo *info, const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; + +#if defined(ARCH_X86_HAVE_AVX2) + float yf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(info->current.y + 0.5f),_mm_set1_ps(cp->scaleY), _mm_set1_ps(0.5f))); +#else float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f; +#endif + int starty = (int) floor(yf - 1); yf = yf - floor(yf); int maxy = srcHeight - 1; @@ -617,7 +695,13 @@ void RsdCpuScriptIntrinsicResize::kernelF1(const RsExpandKernelDriverInfo *info, uint32_t x2 = xend; while(x1 < x2) { + +#if defined(ARCH_X86_HAVE_AVX2) + float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(cp->scaleX) , _mm_set1_ps(0.5f))); +#else float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; +#endif + *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); out++; x1++; |