aboutsummaryrefslogtreecommitdiff
path: root/shaders/cl/kernel_geo_map.cl
blob: 40b191a07bb0caf017f89b7020e13aa4aec165a7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
/*
 * kernel_geo_map
 * input_y,      input image, CL_R + CL_UNORM_INT8
 * input_uv, CL_RG + CL_UNORM_INT8
 * geo_table, CL_RGBA + CL_FLOAT
 * output_y,  CL_RGBA + CL_UNSIGNED_INT16
 * output_uv,  CL_RGBA + CL_UNSIGNED_INT16
 *
 * description:
 * the center of geo_table and output positons are both mapped to (0, 0)
 */

#ifndef ENABLE_LSC
#define ENABLE_LSC 0
#endif

#ifndef ENABLE_SCALE
#define ENABLE_SCALE 0
#endif

#define CONST_DATA_Y 0.0f
#define CONST_DATA_UV (float2)(0.5f, 0.5f)

// 8 bytes for each pixel
#define PIXEL_RES_STEP_X 8

void get_geo_mapped_y (
    __read_only image2d_t input,
    __read_only image2d_t geo_table, float2 table_pos, float step_x,
    bool *out_of_bound, float2 *input_pos, float8 *out_y)
{
    sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
    float *output_data = (float*)(out_y);
    int i = 0;

    for (i = 0; i < PIXEL_RES_STEP_X; ++i) {
        out_of_bound[i] =
            (min (table_pos.x, table_pos.y) < 0.0f) ||
            (max (table_pos.x, table_pos.y) > 1.0f);
        input_pos[i] = read_imagef (geo_table, sampler, table_pos).xy;
        out_of_bound[i] =
            out_of_bound[i] ||
            (min (input_pos[i].x, input_pos[i].y) < 0.0f) ||
            (max (input_pos[i].x, input_pos[i].y) > 1.0f);
        //need convert input_pos to (0.0 ~ 1.0)????
        output_data[i] = out_of_bound[i] ? CONST_DATA_Y : read_imagef (input, sampler, input_pos[i]).x;
        table_pos.x += step_x;
    }
}

void get_lsc_data (
    image2d_t lsc_table, int2 g_pos, float step_x,
    float2 gray_threshold, float8 output, float8 *lsc_data)
{
    sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
    float *lsc_ptr = (float *)(lsc_data);

    float2 pos = convert_float2((int2)(g_pos.x * PIXEL_RES_STEP_X, g_pos.y)) * step_x;
    for (int i = 0; i < PIXEL_RES_STEP_X; ++i) {
        lsc_ptr[i] = read_imagef (lsc_table, sampler, pos).x;
        pos.x += step_x;
    }

    float8 diff_ratio = (gray_threshold.y - output * 255.0f) / (gray_threshold.y - gray_threshold.x);
    diff_ratio = clamp (diff_ratio, 0.0f, 1.0f);
    (*lsc_data) = diff_ratio * diff_ratio * ((*lsc_data) - 1.0f) + 1.0f;
}

__kernel void
kernel_geo_map (
    __read_only image2d_t input_y, __read_only image2d_t input_uv,
    __read_only image2d_t geo_table, float2 table_scale_size,
#if ENABLE_SCALE
    float2 left_scale_factor, float2 right_scale_factor,
    float stable_y_start,
#endif
#if ENABLE_LSC
    __read_only image2d_t lsc_table, float2 gray_threshold,
#endif
    __write_only image2d_t output_y, __write_only image2d_t output_uv, float2 out_size)
{
    const int g_x = get_global_id (0);
    const int g_y_uv = get_global_id (1);
    const int g_y = get_global_id (1) * 2;
    float8 output_data;
    float2 from_pos;
    bool out_of_bound[8];
    float2 input_pos[8];
    // map to [-0.5, 0.5)
    float2 scale = 1.0f;

#if ENABLE_SCALE
    float a, b, c;
    float y_m = stable_y_start * 0.5f;

    float2 scale_factor = (g_x * PIXEL_RES_STEP_X < out_size.x / 2.0f) ? left_scale_factor : right_scale_factor;
    a = (1 - scale_factor.x) / ((stable_y_start - y_m) * (stable_y_start - y_m));
    b = -2 * a * y_m;
    c = 1 - a * stable_y_start * stable_y_start - b * stable_y_start;

    scale.y = (g_y >= stable_y_start) ? 1.0f : ((right_scale_factor.y - left_scale_factor.y) /
              (out_size.x - PIXEL_RES_STEP_X) * g_x * PIXEL_RES_STEP_X + left_scale_factor.y);
    float y_scale = ((float)g_y - stable_y_start) * scale.y + stable_y_start;
    scale.x = (y_scale >= stable_y_start) ? 1.0f : ((y_scale < y_m) ? scale_factor.x : (a * y_scale * y_scale + b * y_scale + c));
#endif

    float2 table_scale_step = 1.0f / (table_scale_size * scale);
    float2 out_map_pos;
    sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;

#if ENABLE_SCALE
    out_map_pos.x = (g_x * PIXEL_RES_STEP_X - out_size.x / 2.0f) * table_scale_step.x + 0.5f;
    out_map_pos.y = ((float)g_y - stable_y_start) * table_scale_step.y + stable_y_start / out_size.y;
#else
    out_map_pos = (convert_float2((int2)(g_x * PIXEL_RES_STEP_X, g_y)) - out_size / 2.0f) * table_scale_step + 0.5f;
#endif

    get_geo_mapped_y (input_y, geo_table, out_map_pos, table_scale_step.x, out_of_bound, input_pos, &output_data);

#if ENABLE_LSC
    float8 lsc_data;
    get_lsc_data (lsc_table, (int2)(g_x, g_y), table_scale_step.x, gray_threshold, output_data, &lsc_data);
    output_data = clamp (output_data * lsc_data, 0.0f, 1.0f);
#endif
    write_imageui (output_y, (int2)(g_x, g_y), convert_uint4(as_ushort4(convert_uchar8(output_data * 255.0f))));

    output_data.s01 = out_of_bound[0] ? CONST_DATA_UV : read_imagef (input_uv, sampler, input_pos[0]).xy;
    output_data.s23 = out_of_bound[2] ? CONST_DATA_UV : read_imagef (input_uv, sampler, input_pos[2]).xy;
    output_data.s45 = out_of_bound[4] ? CONST_DATA_UV : read_imagef (input_uv, sampler, input_pos[4]).xy;
    output_data.s67 = out_of_bound[6] ? CONST_DATA_UV : read_imagef (input_uv, sampler, input_pos[6]).xy;
    write_imageui (output_uv, (int2)(g_x, g_y_uv), convert_uint4(as_ushort4(convert_uchar8(output_data * 255.0f))));

#if ENABLE_SCALE
    scale.y = (g_y + 1 >= stable_y_start) ? 1.0f : ((right_scale_factor.y - left_scale_factor.y) /
              (out_size.x - PIXEL_RES_STEP_X) * g_x * PIXEL_RES_STEP_X + left_scale_factor.y);
    y_scale = (g_y + 1 - stable_y_start) * scale.y + stable_y_start;
    scale.x = (y_scale >= stable_y_start) ? 1.0f :
              ((y_scale < y_m) ? scale_factor.x : (a * y_scale * y_scale + b * y_scale + c));

    table_scale_step = 1.0f / (table_scale_size * scale);

    out_map_pos.x = (g_x * PIXEL_RES_STEP_X - out_size.x / 2.0f) * table_scale_step.x + 0.5f;
    out_map_pos.y = ((float)g_y + 1 - stable_y_start) * table_scale_step.y + stable_y_start / out_size.y;
#else
    out_map_pos.y += table_scale_step.y;
#endif

    get_geo_mapped_y (input_y, geo_table, out_map_pos, table_scale_step.x, out_of_bound, input_pos, &output_data);

#if ENABLE_LSC
    get_lsc_data (lsc_table, (int2)(g_x, g_y + 1), table_scale_step.x, gray_threshold, output_data, &lsc_data);
    output_data = clamp (output_data * lsc_data, 0.0f, 1.0f);
#endif
    write_imageui (output_y, (int2)(g_x, g_y + 1), convert_uint4(as_ushort4(convert_uchar8(output_data * 255.0f))));
}