diff options
Diffstat (limited to 'config/arm/config/av1_rtcd.h')
-rw-r--r-- | config/arm/config/av1_rtcd.h | 151 |
1 files changed, 102 insertions, 49 deletions
diff --git a/config/arm/config/av1_rtcd.h b/config/arm/config/av1_rtcd.h index 1a3fa19ca..338b4a087 100644 --- a/config/arm/config/av1_rtcd.h +++ b/config/arm/config/av1_rtcd.h @@ -90,23 +90,37 @@ void aom_dist_wtd_comp_avg_upsampled_pred_c(MACROBLOCKD *xd, const struct AV1Com const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width, int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref, int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param, int subpel_search); -#define aom_dist_wtd_comp_avg_upsampled_pred aom_dist_wtd_comp_avg_upsampled_pred_c +void aom_dist_wtd_comp_avg_upsampled_pred_neon(MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col, + const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width, + int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref, + int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param, int subpel_search); +#define aom_dist_wtd_comp_avg_upsampled_pred aom_dist_wtd_comp_avg_upsampled_pred_neon void aom_highbd_comp_avg_upsampled_pred_c(MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col, const MV *const mv, uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8, int ref_stride, int bd, int subpel_search); -#define aom_highbd_comp_avg_upsampled_pred aom_highbd_comp_avg_upsampled_pred_c +void aom_highbd_comp_avg_upsampled_pred_neon(MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col, + const MV *const mv, uint8_t *comp_pred8, const uint8_t *pred8, int width, + int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8, int ref_stride, int bd, int subpel_search); +#define aom_highbd_comp_avg_upsampled_pred aom_highbd_comp_avg_upsampled_pred_neon void aom_highbd_dist_wtd_comp_avg_upsampled_pred_c(MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col, const MV *const mv, uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8, int ref_stride, int bd, const DIST_WTD_COMP_PARAMS *jcp_param, int subpel_search); -#define aom_highbd_dist_wtd_comp_avg_upsampled_pred aom_highbd_dist_wtd_comp_avg_upsampled_pred_c +void aom_highbd_dist_wtd_comp_avg_upsampled_pred_neon(MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col, + const MV *const mv, uint8_t *comp_pred8, const uint8_t *pred8, int width, + int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8, + int ref_stride, int bd, const DIST_WTD_COMP_PARAMS *jcp_param, int subpel_search); +#define aom_highbd_dist_wtd_comp_avg_upsampled_pred aom_highbd_dist_wtd_comp_avg_upsampled_pred_neon void aom_highbd_upsampled_pred_c(MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col, const MV *const mv, uint8_t *comp_pred8, int width, int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8, int ref_stride, int bd, int subpel_search); -#define aom_highbd_upsampled_pred aom_highbd_upsampled_pred_c +void aom_highbd_upsampled_pred_neon(MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col, + const MV *const mv, uint8_t *comp_pred8, int width, int height, int subpel_x_q3, + int subpel_y_q3, const uint8_t *ref8, int ref_stride, int bd, int subpel_search); +#define aom_highbd_upsampled_pred aom_highbd_upsampled_pred_neon void aom_quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr, const int log_scale); void aom_quantize_b_helper_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr, const int log_scale); @@ -120,8 +134,8 @@ void aom_upsampled_pred_neon(MACROBLOCKD *xd, const struct AV1Common *const cm, int subpel_y_q3, const uint8_t *ref, int ref_stride, int subpel_search); #define aom_upsampled_pred aom_upsampled_pred_neon -void av1_apply_selfguided_restoration_c(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd); -void av1_apply_selfguided_restoration_neon(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd); +int av1_apply_selfguided_restoration_c(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd); +int av1_apply_selfguided_restoration_neon(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd); #define av1_apply_selfguided_restoration av1_apply_selfguided_restoration_neon void av1_apply_temporal_filter_c(const struct yv12_buffer_config *frame_to_filter, const struct macroblockd *mbd, const BLOCK_SIZE block_size, const int mb_row, const int mb_col, const int num_planes, const double *noise_levels, const MV *subblock_mvs, const int *subblock_mses, const int q_factor, const int filter_strength, int tf_wgt_calc_lvl, const uint8_t *pred, uint32_t *accum, uint16_t *count); @@ -137,17 +151,16 @@ int64_t av1_block_error_lp_neon(const int16_t *coeff, const int16_t *dqcoeff, in #define av1_block_error_lp av1_block_error_lp_neon void av1_build_compound_diffwtd_mask_c(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w); -#define av1_build_compound_diffwtd_mask av1_build_compound_diffwtd_mask_c +void av1_build_compound_diffwtd_mask_neon(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w); +#define av1_build_compound_diffwtd_mask av1_build_compound_diffwtd_mask_neon void av1_build_compound_diffwtd_mask_d16_c(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, ConvolveParams *conv_params, int bd); void av1_build_compound_diffwtd_mask_d16_neon(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, ConvolveParams *conv_params, int bd); #define av1_build_compound_diffwtd_mask_d16 av1_build_compound_diffwtd_mask_d16_neon void av1_build_compound_diffwtd_mask_highbd_c(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd); -#define av1_build_compound_diffwtd_mask_highbd av1_build_compound_diffwtd_mask_highbd_c - -int64_t av1_calc_frame_error_c(const uint8_t *const ref, int stride, const uint8_t *const dst, int p_width, int p_height, int p_stride); -#define av1_calc_frame_error av1_calc_frame_error_c +void av1_build_compound_diffwtd_mask_highbd_neon(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd); +#define av1_build_compound_diffwtd_mask_highbd av1_build_compound_diffwtd_mask_highbd_neon void av1_calc_indices_dim1_c(const int16_t *data, const int16_t *centroids, uint8_t *indices, int64_t *total_dist, int n, int k); void av1_calc_indices_dim1_neon(const int16_t *data, const int16_t *centroids, uint8_t *indices, int64_t *total_dist, int n, int k); @@ -157,35 +170,39 @@ void av1_calc_indices_dim2_c(const int16_t *data, const int16_t *centroids, uint void av1_calc_indices_dim2_neon(const int16_t *data, const int16_t *centroids, uint8_t *indices, int64_t *total_dist, int n, int k); #define av1_calc_indices_dim2 av1_calc_indices_dim2_neon -void av1_calc_proj_params_c( const uint8_t *src8, int width, int height, int src_stride, const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride, int32_t *flt1, int flt1_stride, int64_t H[2][2], int64_t C[2], const sgr_params_type *params); -#define av1_calc_proj_params av1_calc_proj_params_c +void av1_calc_proj_params_c(const uint8_t *src8, int width, int height, int src_stride, const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride, int32_t *flt1, int flt1_stride, int64_t H[2][2], int64_t C[2], const sgr_params_type *params); +void av1_calc_proj_params_neon(const uint8_t *src8, int width, int height, int src_stride, const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride, int32_t *flt1, int flt1_stride, int64_t H[2][2], int64_t C[2], const sgr_params_type *params); +#define av1_calc_proj_params av1_calc_proj_params_neon -void av1_calc_proj_params_high_bd_c( const uint8_t *src8, int width, int height, int src_stride, const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride, int32_t *flt1, int flt1_stride, int64_t H[2][2], int64_t C[2], const sgr_params_type *params); -#define av1_calc_proj_params_high_bd av1_calc_proj_params_high_bd_c +void av1_calc_proj_params_high_bd_c(const uint8_t *src8, int width, int height, int src_stride, const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride, int32_t *flt1, int flt1_stride, int64_t H[2][2], int64_t C[2], const sgr_params_type *params); +void av1_calc_proj_params_high_bd_neon(const uint8_t *src8, int width, int height, int src_stride, const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride, int32_t *flt1, int flt1_stride, int64_t H[2][2], int64_t C[2], const sgr_params_type *params); +#define av1_calc_proj_params_high_bd av1_calc_proj_params_high_bd_neon -void av1_cnn_activate_c( float **input, int channels, int width, int height, int stride, ACTIVATION layer_activation); +void av1_cnn_activate_c(float **input, int channels, int width, int height, int stride, ACTIVATION layer_activation); #define av1_cnn_activate av1_cnn_activate_c -void av1_cnn_add_c( float **input, int channels, int width, int height, int stride, const float **add); +void av1_cnn_add_c(float **input, int channels, int width, int height, int stride, const float **add); #define av1_cnn_add av1_cnn_add_c void av1_cnn_batchnorm_c(float **image, int channels, int width, int height, int stride, const float *gamma, const float *beta, const float *mean, const float *std); #define av1_cnn_batchnorm av1_cnn_batchnorm_c -void av1_cnn_convolve_no_maxpool_padding_valid_c( const float **input, int in_width, int in_height, int in_stride, const CNN_LAYER_CONFIG *layer_config, float **output, int out_stride, int start_idx, int cstep, int channel_step); +void av1_cnn_convolve_no_maxpool_padding_valid_c(const float **input, int in_width, int in_height, int in_stride, const CNN_LAYER_CONFIG *layer_config, float **output, int out_stride, int start_idx, int cstep, int channel_step); #define av1_cnn_convolve_no_maxpool_padding_valid av1_cnn_convolve_no_maxpool_padding_valid_c -void av1_cnn_deconvolve_c( const float **input, int in_width, int in_height, int in_stride, const CNN_LAYER_CONFIG *layer_config, float **output, int out_stride); +void av1_cnn_deconvolve_c(const float **input, int in_width, int in_height, int in_stride, const CNN_LAYER_CONFIG *layer_config, float **output, int out_stride); #define av1_cnn_deconvolve av1_cnn_deconvolve_c -bool av1_cnn_predict_c( const float **input, int in_width, int in_height, int in_stride, const CNN_CONFIG *cnn_config, const CNN_THREAD_DATA *thread_data, CNN_MULTI_OUT *output_struct); +bool av1_cnn_predict_c(const float **input, int in_width, int in_height, int in_stride, const CNN_CONFIG *cnn_config, const CNN_THREAD_DATA *thread_data, CNN_MULTI_OUT *output_struct); #define av1_cnn_predict av1_cnn_predict_c void av1_compute_stats_c(int wiener_win, const uint8_t *dgd8, const uint8_t *src8, int16_t *dgd_avg, int16_t *src_avg, int h_start, int h_end, int v_start, int v_end, int dgd_stride, int src_stride, int64_t *M, int64_t *H, int use_downsampled_wiener_stats); -#define av1_compute_stats av1_compute_stats_c +void av1_compute_stats_neon(int wiener_win, const uint8_t *dgd8, const uint8_t *src8, int16_t *dgd_avg, int16_t *src_avg, int h_start, int h_end, int v_start, int v_end, int dgd_stride, int src_stride, int64_t *M, int64_t *H, int use_downsampled_wiener_stats); +#define av1_compute_stats av1_compute_stats_neon void av1_compute_stats_highbd_c(int wiener_win, const uint8_t *dgd8, const uint8_t *src8, int h_start, int h_end, int v_start, int v_end, int dgd_stride, int src_stride, int64_t *M, int64_t *H, aom_bit_depth_t bit_depth); -#define av1_compute_stats_highbd av1_compute_stats_highbd_c +void av1_compute_stats_highbd_neon(int wiener_win, const uint8_t *dgd8, const uint8_t *src8, int h_start, int h_end, int v_start, int v_end, int dgd_stride, int src_stride, int64_t *M, int64_t *H, aom_bit_depth_t bit_depth); +#define av1_compute_stats_highbd av1_compute_stats_highbd_neon void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_qn, const int y_step_qn, ConvolveParams *conv_params); #define av1_convolve_2d_scale av1_convolve_2d_scale_c @@ -194,6 +211,10 @@ void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int subpel_y_qn, ConvolveParams *conv_params); #define av1_convolve_2d_sr av1_convolve_2d_sr_neon +void av1_convolve_2d_sr_intrabc_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int subpel_y_qn, ConvolveParams *conv_params); +void av1_convolve_2d_sr_intrabc_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int subpel_y_qn, ConvolveParams *conv_params); +#define av1_convolve_2d_sr_intrabc av1_convolve_2d_sr_intrabc_neon + void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn); #define av1_convolve_horiz_rs av1_convolve_horiz_rs_c @@ -201,10 +222,18 @@ void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int d void av1_convolve_x_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const int subpel_x_qn, ConvolveParams *conv_params); #define av1_convolve_x_sr av1_convolve_x_sr_neon +void av1_convolve_x_sr_intrabc_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const int subpel_x_qn, ConvolveParams *conv_params); +void av1_convolve_x_sr_intrabc_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const int subpel_x_qn, ConvolveParams *conv_params); +#define av1_convolve_x_sr_intrabc av1_convolve_x_sr_intrabc_neon + void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_y, const int subpel_y_qn); void av1_convolve_y_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_y, const int subpel_y_qn); #define av1_convolve_y_sr av1_convolve_y_sr_neon +void av1_convolve_y_sr_intrabc_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_y, const int subpel_y_qn); +void av1_convolve_y_sr_intrabc_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_y, const int subpel_y_qn); +#define av1_convolve_y_sr_intrabc av1_convolve_y_sr_intrabc_neon + void av1_dist_wtd_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int subpel_y_qn, ConvolveParams *conv_params); void av1_dist_wtd_convolve_2d_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int subpel_y_qn, ConvolveParams *conv_params); #define av1_dist_wtd_convolve_2d av1_dist_wtd_convolve_2d_neon @@ -234,13 +263,12 @@ void av1_dr_prediction_z3_neon(uint8_t *dst, ptrdiff_t stride, int bw, int bh, c #define av1_dr_prediction_z3 av1_dr_prediction_z3_neon double av1_estimate_noise_from_single_plane_c(const uint8_t *src, int height, int width, int stride, int edge_thresh); -#define av1_estimate_noise_from_single_plane av1_estimate_noise_from_single_plane_c +double av1_estimate_noise_from_single_plane_neon(const uint8_t *src, int height, int width, int stride, int edge_thresh); +#define av1_estimate_noise_from_single_plane av1_estimate_noise_from_single_plane_neon void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength); -#define av1_filter_intra_edge av1_filter_intra_edge_c - -void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength); -#define av1_filter_intra_edge_high av1_filter_intra_edge_high_c +void av1_filter_intra_edge_neon(uint8_t *p, int sz, int strength); +#define av1_filter_intra_edge av1_filter_intra_edge_neon void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left, int mode); void av1_filter_intra_predictor_neon(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left, int mode); @@ -329,8 +357,8 @@ void av1_fwht4x4_neon(const int16_t *input, tran_low_t *output, int stride); uint32_t av1_get_crc32c_value_c(void *crc_calculator, uint8_t *p, size_t length); #define av1_get_crc32c_value av1_get_crc32c_value_c -void av1_get_horver_correlation_full_c( const int16_t *diff, int stride, int w, int h, float *hcorr, float *vcorr); -void av1_get_horver_correlation_full_neon( const int16_t *diff, int stride, int w, int h, float *hcorr, float *vcorr); +void av1_get_horver_correlation_full_c(const int16_t *diff, int stride, int w, int h, float *hcorr, float *vcorr); +void av1_get_horver_correlation_full_neon(const int16_t *diff, int stride, int w, int h, float *hcorr, float *vcorr); #define av1_get_horver_correlation_full av1_get_horver_correlation_full_neon void av1_get_nz_map_contexts_c(const uint8_t *const levels, const int16_t *const scan, const uint16_t eob, const TX_SIZE tx_size, const TX_CLASS tx_class, int8_t *const coeff_contexts); @@ -338,10 +366,12 @@ void av1_get_nz_map_contexts_neon(const uint8_t *const levels, const int16_t *co #define av1_get_nz_map_contexts av1_get_nz_map_contexts_neon void av1_highbd_apply_temporal_filter_c(const struct yv12_buffer_config *frame_to_filter, const struct macroblockd *mbd, const BLOCK_SIZE block_size, const int mb_row, const int mb_col, const int num_planes, const double *noise_levels, const MV *subblock_mvs, const int *subblock_mses, const int q_factor, const int filter_strength, int tf_wgt_calc_lvl, const uint8_t *pred, uint32_t *accum, uint16_t *count); -#define av1_highbd_apply_temporal_filter av1_highbd_apply_temporal_filter_c +void av1_highbd_apply_temporal_filter_neon(const struct yv12_buffer_config *frame_to_filter, const struct macroblockd *mbd, const BLOCK_SIZE block_size, const int mb_row, const int mb_col, const int num_planes, const double *noise_levels, const MV *subblock_mvs, const int *subblock_mses, const int q_factor, const int filter_strength, int tf_wgt_calc_lvl, const uint8_t *pred, uint32_t *accum, uint16_t *count); +#define av1_highbd_apply_temporal_filter av1_highbd_apply_temporal_filter_neon int64_t av1_highbd_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd); -#define av1_highbd_block_error av1_highbd_block_error_c +int64_t av1_highbd_block_error_neon(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd); +#define av1_highbd_block_error av1_highbd_block_error_neon void av1_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); #define av1_highbd_convolve8 av1_highbd_convolve8_c @@ -360,6 +390,10 @@ void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t * void av1_highbd_convolve_2d_sr_neon(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int subpel_y_qn, ConvolveParams *conv_params, int bd); #define av1_highbd_convolve_2d_sr av1_highbd_convolve_2d_sr_neon +void av1_highbd_convolve_2d_sr_intrabc_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int subpel_y_qn, ConvolveParams *conv_params, int bd); +void av1_highbd_convolve_2d_sr_intrabc_neon(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int subpel_y_qn, ConvolveParams *conv_params, int bd); +#define av1_highbd_convolve_2d_sr_intrabc av1_highbd_convolve_2d_sr_intrabc_neon + void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); #define av1_highbd_convolve_avg av1_highbd_convolve_avg_c @@ -374,10 +408,18 @@ void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *d void av1_highbd_convolve_x_sr_neon(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const int subpel_x_qn, ConvolveParams *conv_params, int bd); #define av1_highbd_convolve_x_sr av1_highbd_convolve_x_sr_neon +void av1_highbd_convolve_x_sr_intrabc_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const int subpel_x_qn, ConvolveParams *conv_params, int bd); +void av1_highbd_convolve_x_sr_intrabc_neon(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const int subpel_x_qn, ConvolveParams *conv_params, int bd); +#define av1_highbd_convolve_x_sr_intrabc av1_highbd_convolve_x_sr_intrabc_neon + void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_y, const int subpel_y_qn, int bd); void av1_highbd_convolve_y_sr_neon(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_y, const int subpel_y_qn, int bd); #define av1_highbd_convolve_y_sr av1_highbd_convolve_y_sr_neon +void av1_highbd_convolve_y_sr_intrabc_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_y, const int subpel_y_qn, int bd); +void av1_highbd_convolve_y_sr_intrabc_neon(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_y, const int subpel_y_qn, int bd); +#define av1_highbd_convolve_y_sr_intrabc av1_highbd_convolve_y_sr_intrabc_neon + void av1_highbd_dist_wtd_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int subpel_y_qn, ConvolveParams *conv_params, int bd); void av1_highbd_dist_wtd_convolve_2d_neon(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int subpel_y_qn, ConvolveParams *conv_params, int bd); #define av1_highbd_dist_wtd_convolve_2d av1_highbd_dist_wtd_convolve_2d_neon @@ -404,7 +446,12 @@ void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw, int #define av1_highbd_dr_prediction_z3 av1_highbd_dr_prediction_z3_c double av1_highbd_estimate_noise_from_single_plane_c(const uint16_t *src, int height, int width, int stride, int bit_depth, int edge_thresh); -#define av1_highbd_estimate_noise_from_single_plane av1_highbd_estimate_noise_from_single_plane_c +double av1_highbd_estimate_noise_from_single_plane_neon(const uint16_t *src, int height, int width, int stride, int bit_depth, int edge_thresh); +#define av1_highbd_estimate_noise_from_single_plane av1_highbd_estimate_noise_from_single_plane_neon + +void av1_highbd_filter_intra_edge_c(uint16_t *p, int sz, int strength); +void av1_highbd_filter_intra_edge_neon(uint16_t *p, int sz, int strength); +#define av1_highbd_filter_intra_edge av1_highbd_filter_intra_edge_neon void av1_highbd_inv_txfm_add_c(const tran_low_t *input, uint8_t *dest, int stride, const TxfmParam *txfm_param); void av1_highbd_inv_txfm_add_neon(const tran_low_t *input, uint8_t *dest, int stride, const TxfmParam *txfm_param); @@ -484,18 +531,24 @@ void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int des void av1_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd); #define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c -int64_t av1_highbd_pixel_proj_error_c( const uint8_t *src8, int width, int height, int src_stride, const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride, int32_t *flt1, int flt1_stride, int xq[2], const sgr_params_type *params); +int64_t av1_highbd_pixel_proj_error_c(const uint8_t *src8, int width, int height, int src_stride, const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride, int32_t *flt1, int flt1_stride, int xq[2], const sgr_params_type *params); #define av1_highbd_pixel_proj_error av1_highbd_pixel_proj_error_c void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); void av1_highbd_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); #define av1_highbd_quantize_fp av1_highbd_quantize_fp_neon +void av1_highbd_upsample_intra_edge_c(uint16_t *p, int sz, int bd); +void av1_highbd_upsample_intra_edge_neon(uint16_t *p, int sz, int bd); +#define av1_highbd_upsample_intra_edge av1_highbd_upsample_intra_edge_neon + void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); -#define av1_highbd_warp_affine av1_highbd_warp_affine_c +void av1_highbd_warp_affine_neon(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); +#define av1_highbd_warp_affine av1_highbd_warp_affine_neon -void av1_highbd_wiener_convolve_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params, int bd); -#define av1_highbd_wiener_convolve_add_src av1_highbd_wiener_convolve_add_src_c +void av1_highbd_wiener_convolve_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const WienerConvolveParams *conv_params, int bd); +void av1_highbd_wiener_convolve_add_src_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const WienerConvolveParams *conv_params, int bd); +#define av1_highbd_wiener_convolve_add_src av1_highbd_wiener_convolve_add_src_neon void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_16x16 av1_inv_txfm2d_add_16x16_c @@ -580,15 +633,15 @@ void av1_lowbd_fwd_txfm_c(const int16_t *src_diff, tran_low_t *coeff, int diff_s void av1_lowbd_fwd_txfm_neon(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TxfmParam *txfm_param); #define av1_lowbd_fwd_txfm av1_lowbd_fwd_txfm_neon -int64_t av1_lowbd_pixel_proj_error_c( const uint8_t *src8, int width, int height, int src_stride, const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride, int32_t *flt1, int flt1_stride, int xq[2], const sgr_params_type *params); -int64_t av1_lowbd_pixel_proj_error_neon( const uint8_t *src8, int width, int height, int src_stride, const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride, int32_t *flt1, int flt1_stride, int xq[2], const sgr_params_type *params); +int64_t av1_lowbd_pixel_proj_error_c(const uint8_t *src8, int width, int height, int src_stride, const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride, int32_t *flt1, int flt1_stride, int xq[2], const sgr_params_type *params); +int64_t av1_lowbd_pixel_proj_error_neon(const uint8_t *src8, int width, int height, int src_stride, const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride, int32_t *flt1, int flt1_stride, int xq[2], const sgr_params_type *params); #define av1_lowbd_pixel_proj_error av1_lowbd_pixel_proj_error_neon -void av1_nn_fast_softmax_16_c( const float *input_nodes, float *output); +void av1_nn_fast_softmax_16_c(const float *input_nodes, float *output); #define av1_nn_fast_softmax_16 av1_nn_fast_softmax_16_c -void av1_nn_predict_c( const float *input_nodes, const NN_CONFIG *const nn_config, int reduce_prec, float *const output); -void av1_nn_predict_neon( const float *input_nodes, const NN_CONFIG *const nn_config, int reduce_prec, float *const output); +void av1_nn_predict_c(const float *input_nodes, const NN_CONFIG *const nn_config, int reduce_prec, float *const output); +void av1_nn_predict_neon(const float *input_nodes, const NN_CONFIG *const nn_config, int reduce_prec, float *const output); #define av1_nn_predict av1_nn_predict_neon void av1_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale); @@ -631,27 +684,27 @@ void av1_txb_init_levels_neon(const tran_low_t *const coeff, const int width, co #define av1_txb_init_levels av1_txb_init_levels_neon void av1_upsample_intra_edge_c(uint8_t *p, int sz); -#define av1_upsample_intra_edge av1_upsample_intra_edge_c - -void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd); -#define av1_upsample_intra_edge_high av1_upsample_intra_edge_high_c +void av1_upsample_intra_edge_neon(uint8_t *p, int sz); +#define av1_upsample_intra_edge av1_upsample_intra_edge_neon void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); void av1_warp_affine_neon(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); #define av1_warp_affine av1_warp_affine_neon void av1_wedge_compute_delta_squares_c(int16_t *d, const int16_t *a, const int16_t *b, int N); -#define av1_wedge_compute_delta_squares av1_wedge_compute_delta_squares_c +void av1_wedge_compute_delta_squares_neon(int16_t *d, const int16_t *a, const int16_t *b, int N); +#define av1_wedge_compute_delta_squares av1_wedge_compute_delta_squares_neon int8_t av1_wedge_sign_from_residuals_c(const int16_t *ds, const uint8_t *m, int N, int64_t limit); -#define av1_wedge_sign_from_residuals av1_wedge_sign_from_residuals_c +int8_t av1_wedge_sign_from_residuals_neon(const int16_t *ds, const uint8_t *m, int N, int64_t limit); +#define av1_wedge_sign_from_residuals av1_wedge_sign_from_residuals_neon uint64_t av1_wedge_sse_from_residuals_c(const int16_t *r1, const int16_t *d, const uint8_t *m, int N); uint64_t av1_wedge_sse_from_residuals_neon(const int16_t *r1, const int16_t *d, const uint8_t *m, int N); #define av1_wedge_sse_from_residuals av1_wedge_sse_from_residuals_neon -void av1_wiener_convolve_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params); -void av1_wiener_convolve_add_src_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params); +void av1_wiener_convolve_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const WienerConvolveParams *conv_params); +void av1_wiener_convolve_add_src_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const WienerConvolveParams *conv_params); #define av1_wiener_convolve_add_src av1_wiener_convolve_add_src_neon void cdef_copy_rect8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int width, int height); |