1 files changed, 0 insertions, 452 deletions
diff --git a/common/arm/svc/isvc_iquant_itrans_recon_neon.c b/common/arm/svc/isvc_iquant_itrans_recon_neon.c
index 270adde..8a97fbc 100644
--- a/common/arm/svc/isvc_iquant_itrans_recon_neon.c
+++ b/common/arm/svc/isvc_iquant_itrans_recon_neon.c
@@ -587,193 +587,6 @@ void isvc_iquant_itrans_recon_4x4_with_res_accumulate_neon(
                   vreinterpret_u32_u8(pred23_un), 1);
 }
 
-void isvc_iquant_itrans_recon_chroma_4x4_neon(
-    buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
-    buffer_container_t *ps_res, buffer_container_t *ps_rec,
-    iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
-    WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
-{
-    WORD16 *pi2_src = (WORD16 *) ps_src->pv_data;
-    UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data;
-    UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data;
-    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
-    WORD32 i4_out_stride = ps_rec->i4_data_stride;
-    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
-    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
-    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
-
-    WORD16 i2_rnd_factor = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
-
-    int16x4x4_t src_16x4x2;
-    int16x4x4_t iscal_16x4x2;
-    int16x4x4_t weigh_16x4x2;
-
-    int16x4_t q0_16x4, q1_16x4, q2_16x4, q3_16x4;
-    int32x4_t q0_32x4, q1_32x4, q2_32x4, q3_32x4;
-    int16x4_t rq1_16x4, rq3_16x4;
-    int16x4_t x0_16x4, x1_16x4, x2_16x4, x3_16x4;
-    int16x8_t x0_16x8, x1_16x8, x2_16x8, x3_16x8;
-    int16x4_t xx0_16x4, xx1_16x4, xx2_16x4, xx3_16x4;
-    int16x4x2_t xx0_16x4x2, xx1_16x4x2;
-    int32x2x2_t x0_32x2x2, x1_32x2x2;
-    int16x4_t weigh0_16x4, weigh1_16x4, weigh2_16x4, weigh3_16x4;
-
-    uint8x8_t pred0_in, pred1_in, pred2_in, pred3_in;
-    int16x8_t pred0, pred1, pred2, pred3;
-    int16x8_t rec0, rec1, rec2, rec3;
-    uint8x8_t rec0_un, rec1_un, rec2_un, rec3_un;
-    uint8x8_t out0, out1, out2, out3;
-
-    uint8x8_t chroma_mask_8x8 = vreinterpret_u8_u16(vdup_n_u16(0x00ff));
-
-    int16x4_t pos_255_16x4 = vdup_n_s16(((WORD16) UINT8_MAX));
-    int16x4_t neg_255_16x4 = vdup_n_s16(-((WORD16) UINT8_MAX));
-    int32x4_t qp_div_6_32x4 = vdupq_n_s32(u4_qp_div_6);
-    int32x4_t rnd_fact = vdupq_n_s32(i2_rnd_factor);
-
-    UNUSED(i4_iq_start_idx);
-    UNUSED(ps_res);
-    UNUSED(ps_res_pred);
-    UNUSED(u1_res_accumulate);
-
-    src_16x4x2 = vld4_s16(pi2_src);
-    iscal_16x4x2 = vld4_s16((const int16_t *) pu2_iscal_mat);
-    weigh_16x4x2 = vld4_s16((const int16_t *) pu2_weigh_mat);
-
-    weigh0_16x4 = vmul_s16(weigh_16x4x2.val[0], iscal_16x4x2.val[0]);
-    weigh1_16x4 = vmul_s16(weigh_16x4x2.val[1], iscal_16x4x2.val[1]);
-    weigh2_16x4 = vmul_s16(weigh_16x4x2.val[2], iscal_16x4x2.val[2]);
-    weigh3_16x4 = vmul_s16(weigh_16x4x2.val[3], iscal_16x4x2.val[3]);
-
-    q0_32x4 = vmull_s16(weigh0_16x4, src_16x4x2.val[0]);
-    q1_32x4 = vmull_s16(weigh1_16x4, src_16x4x2.val[1]);
-    q2_32x4 = vmull_s16(weigh2_16x4, src_16x4x2.val[2]);
-    q3_32x4 = vmull_s16(weigh3_16x4, src_16x4x2.val[3]);
-
-    q0_32x4 = vaddq_s32(q0_32x4, rnd_fact);
-    q1_32x4 = vaddq_s32(q1_32x4, rnd_fact);
-    q2_32x4 = vaddq_s32(q2_32x4, rnd_fact);
-    q3_32x4 = vaddq_s32(q3_32x4, rnd_fact);
-
-    q0_32x4 = vshlq_s32(q0_32x4, qp_div_6_32x4);
-    q1_32x4 = vshlq_s32(q1_32x4, qp_div_6_32x4);
-    q2_32x4 = vshlq_s32(q2_32x4, qp_div_6_32x4);
-    q3_32x4 = vshlq_s32(q3_32x4, qp_div_6_32x4);
-
-    q0_16x4 = vqshrn_n_s32(q0_32x4, 4);
-    q1_16x4 = vqshrn_n_s32(q1_32x4, 4);
-    q2_16x4 = vqshrn_n_s32(q2_32x4, 4);
-    q3_16x4 = vqshrn_n_s32(q3_32x4, 4);
-
-    q0_16x4 = vset_lane_s16(pi2_dc_src[0], q0_16x4, 0);
-
-    rq1_16x4 = vshr_n_s16(q1_16x4, 1);
-    rq3_16x4 = vshr_n_s16(q3_16x4, 1);
-
-    x0_16x4 = vadd_s16(q0_16x4, q2_16x4);
-    x1_16x4 = vsub_s16(q0_16x4, q2_16x4);
-    x2_16x4 = vsub_s16(rq1_16x4, q3_16x4);
-    x3_16x4 = vadd_s16(q1_16x4, rq3_16x4);
-
-    xx0_16x4 = vadd_s16(x0_16x4, x3_16x4);
-    xx1_16x4 = vadd_s16(x1_16x4, x2_16x4);
-    xx2_16x4 = vsub_s16(x1_16x4, x2_16x4);
-    xx3_16x4 = vsub_s16(x0_16x4, x3_16x4);
-
-    /* row 0 to row 3 */
-    xx0_16x4x2 = vtrn_s16(xx0_16x4, xx1_16x4);
-    xx1_16x4x2 = vtrn_s16(xx2_16x4, xx3_16x4);
-    x0_32x2x2 =
-        vzip_s32(vreinterpret_s32_s16(xx0_16x4x2.val[0]), vreinterpret_s32_s16(xx1_16x4x2.val[0]));
-    x1_32x2x2 =
-        vzip_s32(vreinterpret_s32_s16(xx0_16x4x2.val[1]), vreinterpret_s32_s16(xx1_16x4x2.val[1]));
-
-    x0_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[0]);
-    x1_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[0]);
-    x2_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[1]);
-    x3_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[1]);
-
-    /* Store Horz transform output into temp */
-    vst1_s16(pi2_tmp, x0_16x4);
-    vst1_s16(pi2_tmp + 4, x1_16x4);
-    vst1_s16(pi2_tmp + 8, x2_16x4);
-    vst1_s16(pi2_tmp + 12, x3_16x4);
-
-    /* vertical inverse transform */
-    rq1_16x4 = vshr_n_s16(x1_16x4, 1);
-    rq3_16x4 = vshr_n_s16(x3_16x4, 1);
-
-    xx0_16x4 = vadd_s16(x0_16x4, x2_16x4);
-    xx1_16x4 = vsub_s16(x0_16x4, x2_16x4);
-    xx2_16x4 = vsub_s16(rq1_16x4, x3_16x4);
-    xx3_16x4 = vadd_s16(x1_16x4, rq3_16x4);
-
-    x0_16x4 = vadd_s16(xx0_16x4, xx3_16x4);
-    x1_16x4 = vadd_s16(xx1_16x4, xx2_16x4);
-    x2_16x4 = vsub_s16(xx1_16x4, xx2_16x4);
-    x3_16x4 = vsub_s16(xx0_16x4, xx3_16x4);
-
-    x0_16x4 = vrshr_n_s16(x0_16x4, 6);
-    x1_16x4 = vrshr_n_s16(x1_16x4, 6);
-    x2_16x4 = vrshr_n_s16(x2_16x4, 6);
-    x3_16x4 = vrshr_n_s16(x3_16x4, 6);
-
-    /* Saturate all values < -255 to -255 and retain the rest as it is */
-    x0_16x4 = vmax_s16(x0_16x4, neg_255_16x4);
-    x1_16x4 = vmax_s16(x1_16x4, neg_255_16x4);
-    x2_16x4 = vmax_s16(x2_16x4, neg_255_16x4);
-    x3_16x4 = vmax_s16(x3_16x4, neg_255_16x4);
-
-    /* Saturate all values > 255 to 255 and retain the rest as it is */
-    x0_16x4 = vmin_s16(x0_16x4, pos_255_16x4);
-    x1_16x4 = vmin_s16(x1_16x4, pos_255_16x4);
-    x2_16x4 = vmin_s16(x2_16x4, pos_255_16x4);
-    x3_16x4 = vmin_s16(x3_16x4, pos_255_16x4);
-
-    x0_16x8 = vreinterpretq_s16_s32(vmovl_s16(x0_16x4));
-    x1_16x8 = vreinterpretq_s16_s32(vmovl_s16(x1_16x4));
-    x2_16x8 = vreinterpretq_s16_s32(vmovl_s16(x2_16x4));
-    x3_16x8 = vreinterpretq_s16_s32(vmovl_s16(x3_16x4));
-
-    pred0_in = vld1_u8((uint8_t *) pu1_pred);
-    pred1_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride));
-    pred2_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride << 1));
-    pred3_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride * 3));
-
-    pred0 = vreinterpretq_s16_u16(vmovl_u8(pred0_in));
-    pred1 = vreinterpretq_s16_u16(vmovl_u8(pred1_in));
-    pred2 = vreinterpretq_s16_u16(vmovl_u8(pred2_in));
-    pred3 = vreinterpretq_s16_u16(vmovl_u8(pred3_in));
-
-    /* Out pixel = pred + res */
-    rec0 = vaddq_s16(pred0, x0_16x8);
-    rec1 = vaddq_s16(pred1, x1_16x8);
-    rec2 = vaddq_s16(pred2, x2_16x8);
-    rec3 = vaddq_s16(pred3, x3_16x8);
-
-    out0 = vld1_u8(pu1_out);
-    out1 = vld1_u8(pu1_out + i4_out_stride);
-    out2 = vld1_u8(pu1_out + i4_out_stride * 2);
-    out3 = vld1_u8(pu1_out + i4_out_stride * 3);
-
-    /* Convert to 8 bit unsigned with saturation */
-    rec0_un = vqmovun_s16(rec0);
-    rec1_un = vqmovun_s16(rec1);
-    rec2_un = vqmovun_s16(rec2);
-    rec3_un = vqmovun_s16(rec3);
-
-    /* Store in alternate postions */
-    out0 = vbsl_u8(chroma_mask_8x8, rec0_un, out0);
-    out1 = vbsl_u8(chroma_mask_8x8, rec1_un, out1);
-    out2 = vbsl_u8(chroma_mask_8x8, rec2_un, out2);
-    out3 = vbsl_u8(chroma_mask_8x8, rec3_un, out3);
-
-    vst1_u8((pu1_out), out0);
-    vst1_u8((pu1_out + i4_out_stride), out1);
-    vst1_u8((pu1_out + (i4_out_stride << 1)), out2);
-    vst1_u8((pu1_out + ((i4_out_stride << 1) + i4_out_stride)), out3);
-}
-
 void isvc_iquant_itrans_recon_chroma_4x4_with_res_output_neon(
     buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
     buffer_container_t *ps_res, buffer_container_t *ps_rec,
@@ -1280,271 +1093,6 @@ void isvc_iquant_itrans_recon_4x4_dc_neon(buffer_container_t *ps_src, buffer_con
     vst1_lane_u32((uint32_t *) (pu1_out + i4_out_stride * 3), vreinterpret_u32_u8(pred3_in), 0);
 }
 
-void isvc_iquant_itrans_recon_4x4_dc_with_res_output_neon(
-    buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
-    buffer_container_t *ps_res, buffer_container_t *ps_rec,
-    iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
-    WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
-{
-    WORD16 *pi2_src = (WORD16 *) ps_src->pv_data;
-    WORD16 *pi2_res = (WORD16 *) ps_res->pv_data;
-    UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data;
-    UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data;
-    WORD32 i4_res_stride = ps_res->i4_data_stride;
-    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
-    WORD32 i4_out_stride = ps_rec->i4_data_stride;
-    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
-    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
-    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
-    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
-
-    WORD16 i2_it_out;
-    WORD32 i4_iq_out_temp;
-    int16x8_t temp_0;
-    int16x4_t residue_res;
-    uint8x8_t pred0_in, pred1_in, pred2_in, pred3_in;
-    int16x8_t pred0, pred1, pred2, pred3;
-
-    UNUSED(pi2_tmp);
-    UNUSED(ps_res_pred);
-    UNUSED(u1_res_accumulate);
-
-    if(i4_iq_start_idx == 0)
-    {
-        i4_iq_out_temp = pi2_src[0];
-        INV_QUANT(i4_iq_out_temp, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
-    }
-    else
-    {
-        i4_iq_out_temp = pi2_dc_src[0];
-    }
-
-    i2_it_out = ((i4_iq_out_temp + 32) >> 6);
-    temp_0 = vdupq_n_s16(i2_it_out);
-    residue_res = vdup_n_s16(isvc_get_residue(i2_it_out, 0, 0));
-
-    vst1_s16(pi2_res, residue_res);
-    vst1_s16(pi2_res + i4_res_stride, residue_res);
-    vst1_s16(pi2_res + (i4_res_stride << 1), residue_res);
-    vst1_s16(pi2_res + (i4_res_stride << 1) + i4_res_stride, residue_res);
-
-    pred0_in = vld1_u8(pu1_pred);
-    pu1_pred = pu1_pred + i4_pred_stride;
-    pred1_in = vld1_u8(pu1_pred);
-    pu1_pred = pu1_pred + i4_pred_stride;
-    pred2_in = vld1_u8(pu1_pred);
-    pu1_pred = pu1_pred + i4_pred_stride;
-    pred3_in = vld1_u8(pu1_pred);
-
-    pred0 = vreinterpretq_s16_u16(vmovl_u8(pred0_in));
-    pred1 = vreinterpretq_s16_u16(vmovl_u8(pred1_in));
-    pred2 = vreinterpretq_s16_u16(vmovl_u8(pred2_in));
-    pred3 = vreinterpretq_s16_u16(vmovl_u8(pred3_in));
-
-    /* Out pixel = Res + pred */
-    pred0 = vaddq_s16(pred0, temp_0);
-    pred1 = vaddq_s16(pred1, temp_0);
-    pred2 = vaddq_s16(pred2, temp_0);
-    pred3 = vaddq_s16(pred3, temp_0);
-
-    /* Convert to unsigned 8 bit with saturation */
-    pred0_in = vqmovun_s16(pred0);
-    pred1_in = vqmovun_s16(pred1);
-    pred2_in = vqmovun_s16(pred2);
-    pred3_in = vqmovun_s16(pred3);
-
-    vst1_lane_u32((uint32_t *) (pu1_out), vreinterpret_u32_u8(pred0_in), 0);
-    vst1_lane_u32((uint32_t *) (pu1_out + i4_out_stride), vreinterpret_u32_u8(pred1_in), 0);
-    vst1_lane_u32((uint32_t *) (pu1_out + i4_out_stride * 2), vreinterpret_u32_u8(pred2_in), 0);
-    vst1_lane_u32((uint32_t *) (pu1_out + i4_out_stride * 3), vreinterpret_u32_u8(pred3_in), 0);
-}
-
-void isvc_iquant_itrans_recon_4x4_dc_with_res_accumulate_neon(
-    buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
-    buffer_container_t *ps_res, buffer_container_t *ps_rec,
-    iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
-    WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
-{
-    WORD16 *pi2_src = (WORD16 *) ps_src->pv_data;
-    WORD16 *pi2_res = (WORD16 *) ps_res->pv_data;
-    WORD16 *pi2_res_pred = (WORD16 *) ps_res_pred->pv_data;
-    UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data;
-    UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data;
-    WORD32 i4_res_stride = ps_res->i4_data_stride;
-    WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
-    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
-    WORD32 i4_out_stride = ps_rec->i4_data_stride;
-    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
-    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
-    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
-    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
-
-    WORD32 i4_iq_out_temp;
-    int16x4_t temp_0;
-    uint8x8_t pred0_in, pred1_in, pred2_in, pred3_in;
-    int16x8_t pred0, pred1, pred2, pred3;
-    int16x8_t pred01_in, pred23_in;
-    uint8x8_t pred01_un, pred23_un;
-
-    int16x4_t resd0_in, resd1_in, resd2_in, resd3_in;
-    int16x8_t resd01_in, resd23_in;
-    int16x4_t pos_255 = vdup_n_s16(((WORD16) UINT8_MAX));
-    int16x4_t neg_255 = vdup_n_s16(-((WORD16) UINT8_MAX));
-
-    UNUSED(pi2_tmp);
-    UNUSED(u1_res_accumulate);
-
-    if(i4_iq_start_idx == 0)
-    {
-        i4_iq_out_temp = pi2_src[0];
-        INV_QUANT(i4_iq_out_temp, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
-    }
-    else
-    {
-        i4_iq_out_temp = pi2_dc_src[0];
-    }
-
-    temp_0 = vdup_n_s16((i4_iq_out_temp + 32) >> 6);
-
-    resd0_in = vld1_s16((int16_t *) pi2_res_pred);
-    resd1_in = vld1_s16((int16_t *) pi2_res_pred + i4_res_pred_stride);
-    resd2_in = vld1_s16((int16_t *) pi2_res_pred + (i4_res_pred_stride * 2));
-    resd3_in = vld1_s16((int16_t *) pi2_res_pred + (i4_res_pred_stride * 3));
-
-    /* Add res pred to the res obtained */
-    resd0_in = vadd_s16(resd0_in, temp_0);
-    resd1_in = vadd_s16(resd1_in, temp_0);
-    resd2_in = vadd_s16(resd2_in, temp_0);
-    resd3_in = vadd_s16(resd3_in, temp_0);
-
-    /* Saturate all values < -255 to -255 and retain the rest as it is */
-    resd0_in = vmax_s16(resd0_in, neg_255);
-    resd1_in = vmax_s16(resd1_in, neg_255);
-    resd2_in = vmax_s16(resd2_in, neg_255);
-    resd3_in = vmax_s16(resd3_in, neg_255);
-
-    /* Saturate all values > 255 to 255 and retain the rest as it is */
-    resd0_in = vmin_s16(resd0_in, pos_255);
-    resd1_in = vmin_s16(resd1_in, pos_255);
-    resd2_in = vmin_s16(resd2_in, pos_255);
-    resd3_in = vmin_s16(resd3_in, pos_255);
-
-    vst1_s16(pi2_res, resd0_in);
-    vst1_s16(pi2_res + i4_res_stride, resd1_in);
-    vst1_s16(pi2_res + (i4_res_stride << 1), resd2_in);
-    vst1_s16(pi2_res + (i4_res_stride << 1) + i4_res_stride, resd3_in);
-
-    resd01_in = vcombine_s16(resd0_in, resd1_in);
-    resd23_in = vcombine_s16(resd2_in, resd3_in);
-
-    pred0_in = vld1_u8(pu1_pred);
-    pu1_pred = pu1_pred + i4_pred_stride;
-    pred1_in = vld1_u8(pu1_pred);
-    pu1_pred = pu1_pred + i4_pred_stride;
-    pred2_in = vld1_u8(pu1_pred);
-    pu1_pred = pu1_pred + i4_pred_stride;
-    pred3_in = vld1_u8(pu1_pred);
-
-    pred0 = vreinterpretq_s16_u16(vmovl_u8(pred0_in));
-    pred1 = vreinterpretq_s16_u16(vmovl_u8(pred1_in));
-    pred2 = vreinterpretq_s16_u16(vmovl_u8(pred2_in));
-    pred3 = vreinterpretq_s16_u16(vmovl_u8(pred3_in));
-
-    pred01_in = vcombine_s16(vget_low_s16(pred0), vget_low_s16(pred1));
-    pred23_in = vcombine_s16(vget_low_s16(pred2), vget_low_s16(pred3));
-
-    /* Out pixel = Res + pred */
-    pred01_in = vaddq_s16(pred01_in, resd01_in);
-    pred23_in = vaddq_s16(pred23_in, resd23_in);
-
-    /* Convert to unsigned 8 bit with saturation */
-    pred01_un = vqmovun_s16(pred01_in);
-    pred23_un = vqmovun_s16(pred23_in);
-
-    vst1_lane_u32((uint32_t *) (pu1_out), vreinterpret_u32_u8(pred01_un), 0);
-    vst1_lane_u32((uint32_t *) (pu1_out + i4_out_stride), vreinterpret_u32_u8(pred01_un), 1);
-    vst1_lane_u32((uint32_t *) (pu1_out + (i4_out_stride << 1)), vreinterpret_u32_u8(pred23_un), 0);
-    vst1_lane_u32((uint32_t *) (pu1_out + ((i4_out_stride << 1) + i4_out_stride)),
-                  vreinterpret_u32_u8(pred23_un), 1);
-}
-
-void isvc_iquant_itrans_recon_chroma_4x4_dc_neon(
-    buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
-    buffer_container_t *ps_res, buffer_container_t *ps_rec,
-    iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
-    WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
-{
-    WORD16 *pi2_src = (WORD16 *) ps_src->pv_data;
-    UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data;
-    UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data;
-    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
-    WORD32 i4_out_stride = ps_rec->i4_data_stride;
-    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
-    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
-    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
-
-    WORD32 i4_iq_out_temp;
-    int16x8_t temp_0;
-    uint8x8_t pred0_in, pred1_in, pred2_in, pred3_in;
-    int16x8_t pred0, pred1, pred2, pred3;
-    uint8x8_t i4_out_horz_8x8_r0, i4_out_horz_8x8_r1, i4_out_horz_8x8_r2, i4_out_horz_8x8_r3;
-    uint8x8_t chroma_mask_8x8 = vreinterpret_u8_u16(vdup_n_u16(0x00ff));
-
-    UNUSED(pi2_src);
-    UNUSED(pu2_iscal_mat);
-    UNUSED(pu2_weigh_mat);
-    UNUSED(u4_qp_div_6);
-    UNUSED(pi2_tmp);
-    UNUSED(i4_iq_start_idx);
-    UNUSED(ps_res);
-    UNUSED(ps_res_pred);
-    UNUSED(u1_res_accumulate);
-
-    i4_iq_out_temp = pi2_dc_src[0];
-    temp_0 = vdupq_n_s16((i4_iq_out_temp + 32) >> 6);
-
-    pred0_in = vld1_u8(pu1_pred);
-    pu1_pred = pu1_pred + i4_pred_stride;
-    pred1_in = vld1_u8(pu1_pred);
-    pu1_pred = pu1_pred + i4_pred_stride;
-    pred2_in = vld1_u8(pu1_pred);
-    pu1_pred = pu1_pred + i4_pred_stride;
-    pred3_in = vld1_u8(pu1_pred);
-
-    pred0 = vreinterpretq_s16_u16(vmovl_u8(pred0_in));
-    pred1 = vreinterpretq_s16_u16(vmovl_u8(pred1_in));
-    pred2 = vreinterpretq_s16_u16(vmovl_u8(pred2_in));
-    pred3 = vreinterpretq_s16_u16(vmovl_u8(pred3_in));
-
-    /* Out pixel = Res + pred */
-    pred0 = vaddq_s16(pred0, temp_0);
-    pred1 = vaddq_s16(pred1, temp_0);
-    pred2 = vaddq_s16(pred2, temp_0);
-    pred3 = vaddq_s16(pred3, temp_0);
-
-    /* Convert to unsigned 8 bit with saturation */
-    pred0_in = vqmovun_s16(pred0);
-    pred1_in = vqmovun_s16(pred1);
-    pred2_in = vqmovun_s16(pred2);
-    pred3_in = vqmovun_s16(pred3);
-
-    i4_out_horz_8x8_r0 = vld1_u8(pu1_out);
-    i4_out_horz_8x8_r1 = vld1_u8(pu1_out + i4_out_stride);
-    i4_out_horz_8x8_r2 = vld1_u8(pu1_out + i4_out_stride * 2);
-    i4_out_horz_8x8_r3 = vld1_u8(pu1_out + i4_out_stride * 3);
-
-    /* Store out pixels in alternate positions */
-    i4_out_horz_8x8_r0 = vbsl_u8(chroma_mask_8x8, pred0_in, i4_out_horz_8x8_r0);
-    i4_out_horz_8x8_r1 = vbsl_u8(chroma_mask_8x8, pred1_in, i4_out_horz_8x8_r1);
-    i4_out_horz_8x8_r2 = vbsl_u8(chroma_mask_8x8, pred2_in, i4_out_horz_8x8_r2);
-    i4_out_horz_8x8_r3 = vbsl_u8(chroma_mask_8x8, pred3_in, i4_out_horz_8x8_r3);
-
-    vst1_u8((uint8_t *) (pu1_out), i4_out_horz_8x8_r0);
-    vst1_u8((uint8_t *) (pu1_out + i4_out_stride), i4_out_horz_8x8_r1);
-    vst1_u8((uint8_t *) (pu1_out + i4_out_stride * 2), i4_out_horz_8x8_r2);
-    vst1_u8((uint8_t *) (pu1_out + i4_out_stride * 3), i4_out_horz_8x8_r3);
-}
-
 void isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_output_neon(
     buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
     buffer_container_t *ps_res, buffer_container_t *ps_rec,