summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorandroid-build-team Robot <android-build-team-robot@google.com>2019-11-11 21:08:36 +0000
committerandroid-build-team Robot <android-build-team-robot@google.com>2019-11-11 21:08:36 +0000
commit8f5f03b20c57d3f531bc4e2af10bcfc128b0f109 (patch)
tree3fb6763265bdf86328c9a85fa2761670efbb24d5
parent2fe1fdfd8f1dbf58b35519d6c7ea25bdb51240c8 (diff)
parent6444bd7c5c74b582973d8a340bddeae05fcee55e (diff)
downloadrs-android10-mainline-resolv-release.tar.gz
Snap for 6001391 from 6444bd7c5c74b582973d8a340bddeae05fcee55e to qt-aml-resolv-releaseandroid-mainline-10.0.0_r8android10-mainline-resolv-release
Change-Id: I9cb3304d6916e85389f79aa7f07ebc042e0c1688
-rw-r--r--cpp/Android.bp10
-rw-r--r--cpu_ref/Android.bp3
-rw-r--r--cpu_ref/rsCpuIntrinsicResize.cpp84
3 files changed, 95 insertions, 2 deletions
diff --git a/cpp/Android.bp b/cpp/Android.bp
index 35b09cd4..9d9041c7 100644
--- a/cpp/Android.bp
+++ b/cpp/Android.bp
@@ -65,7 +65,6 @@ cc_defaults {
shared_libs: [
"libdl",
"liblog",
- "libz",
],
}
@@ -73,8 +72,15 @@ cc_library {
name: "libRScpp",
defaults: ["libRScpp-defaults"],
+ header_libs: [
+ "libarect_headers",
+ "libbase_headers",
+ "libgui_headers",
+ "libnativebase_headers",
+ "libnativewindow_headers",
+ ],
+
shared_libs: [
- "libgui",
"libutils",
],
diff --git a/cpu_ref/Android.bp b/cpu_ref/Android.bp
index e69c208d..c4099895 100644
--- a/cpu_ref/Android.bp
+++ b/cpu_ref/Android.bp
@@ -79,6 +79,9 @@ cc_library_shared {
x86_64: {
cflags: ["-DARCH_X86_HAVE_SSSE3"],
srcs: ["rsCpuIntrinsics_x86.cpp"],
+ avx2: {
+ cflags: ["-DARCH_X86_HAVE_AVX2", "-mavx2", "-mfma"],
+ },
},
},
diff --git a/cpu_ref/rsCpuIntrinsicResize.cpp b/cpu_ref/rsCpuIntrinsicResize.cpp
index ff42d796..8a3dd1ae 100644
--- a/cpu_ref/rsCpuIntrinsicResize.cpp
+++ b/cpu_ref/rsCpuIntrinsicResize.cpp
@@ -14,6 +14,11 @@
* limitations under the License.
*/
+#if defined(ARCH_X86_HAVE_AVX2)
+#include <stdint.h>
+#include <x86intrin.h>
+#include <xmmintrin.h>
+#endif
#include "rsCpuIntrinsic.h"
#include "rsCpuIntrinsicInlines.h"
@@ -78,10 +83,20 @@ static float2 cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3, float x)
+ x * (3.f * (p1 - p2) + p3 - p0)));
}
+
+#if defined(ARCH_X86_HAVE_AVX2)
+static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) {
+ return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 +
+ _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(4.f), _mm_set1_ps(p2),_mm_set1_ps(p3)))
+ + x * (_mm_cvtss_f32(_mm_fmadd_ss (_mm_set1_ps(3.f),_mm_set1_ps(p1 - p2),_mm_set1_ps(p3 - p0))))));
+
+}
+#else
static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) {
return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
+ x * (3.f * (p1 - p2) + p3 - p0)));
}
+#endif
static uchar4 OneBiCubic(const uchar4 *yp0, const uchar4 *yp1, const uchar4 *yp2, const uchar4 *yp3,
float xf, float yf, int width) {
@@ -317,7 +332,14 @@ void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelDriverInfo *info,
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
+
+#if defined(ARCH_X86_HAVE_AVX2)
+ float yf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(info->current.y + 0.5f),_mm_set1_ps(cp->scaleY), _mm_set1_ps(0.5f)));
+#else
float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
+#endif
+
+
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -363,7 +385,11 @@ void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelDriverInfo *info,
#endif
while(x1 < x2) {
+#if defined(ARCH_X86_HAVE_AVX2)
+ float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(cp->scaleX) , _mm_set1_ps(0.5f)));
+#else
float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
+#endif
*out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
out++;
x1++;
@@ -384,7 +410,13 @@ void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelDriverInfo *info,
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
+
+#if defined(ARCH_X86_HAVE_AVX2)
+ float yf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(info->current.y + 0.5f),_mm_set1_ps(cp->scaleY), _mm_set1_ps(0.5f)));
+#else
float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
+#endif
+
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -430,7 +462,12 @@ void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelDriverInfo *info,
#endif
while(x1 < x2) {
+
+#if defined(ARCH_X86_HAVE_AVX2)
+ float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(cp->scaleX) , _mm_set1_ps(0.5f)));
+#else
float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
+#endif
*out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
out++;
x1++;
@@ -451,7 +488,13 @@ void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelDriverInfo *info,
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
+
+#if defined(ARCH_X86_HAVE_AVX2)
+ float yf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(info->current.y + 0.5f),_mm_set1_ps(cp->scaleY), _mm_set1_ps(0.5f)));
+#else
float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
+#endif
+
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -497,7 +540,13 @@ void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelDriverInfo *info,
#endif
while(x1 < x2) {
+
+#if defined(ARCH_X86_HAVE_AVX2)
+ float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(cp->scaleX) , _mm_set1_ps(0.5f)));
+#else
float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
+#endif
+
*out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
out++;
x1++;
@@ -518,7 +567,12 @@ void RsdCpuScriptIntrinsicResize::kernelF4(const RsExpandKernelDriverInfo *info,
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
+#if defined(ARCH_X86_HAVE_AVX2)
+ float yf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(info->current.y + 0.5f),_mm_set1_ps(cp->scaleY), _mm_set1_ps(0.5f)));
+#else
float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
+#endif
+
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -537,7 +591,13 @@ void RsdCpuScriptIntrinsicResize::kernelF4(const RsExpandKernelDriverInfo *info,
uint32_t x2 = xend;
while(x1 < x2) {
+
+#if defined(ARCH_X86_HAVE_AVX2)
+ float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(cp->scaleX) , _mm_set1_ps(0.5f)));
+#else
float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
+#endif
+
*out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
out++;
x1++;
@@ -558,7 +618,13 @@ void RsdCpuScriptIntrinsicResize::kernelF2(const RsExpandKernelDriverInfo *info,
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
+
+#if defined(ARCH_X86_HAVE_AVX2)
+ float yf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(info->current.y + 0.5f),_mm_set1_ps(cp->scaleY), _mm_set1_ps(0.5f)));
+#else
float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
+#endif
+
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -577,7 +643,13 @@ void RsdCpuScriptIntrinsicResize::kernelF2(const RsExpandKernelDriverInfo *info,
uint32_t x2 = xend;
while(x1 < x2) {
+
+#if defined(ARCH_X86_HAVE_AVX2)
+ float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(cp->scaleX) , _mm_set1_ps(0.5f)));
+#else
float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
+#endif
+
*out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
out++;
x1++;
@@ -598,7 +670,13 @@ void RsdCpuScriptIntrinsicResize::kernelF1(const RsExpandKernelDriverInfo *info,
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
+
+#if defined(ARCH_X86_HAVE_AVX2)
+ float yf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(info->current.y + 0.5f),_mm_set1_ps(cp->scaleY), _mm_set1_ps(0.5f)));
+#else
float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
+#endif
+
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -617,7 +695,13 @@ void RsdCpuScriptIntrinsicResize::kernelF1(const RsExpandKernelDriverInfo *info,
uint32_t x2 = xend;
while(x1 < x2) {
+
+#if defined(ARCH_X86_HAVE_AVX2)
+ float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(cp->scaleX) , _mm_set1_ps(0.5f)));
+#else
float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
+#endif
+
*out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
out++;
x1++;