由于對于dxva2解碼得到的數據不宜copy回內存給CPU處理,所以最好的辦法是在GPU上直接進行處理。D3D的像素著色器能夠對像素直接進行操作,實現點運算極其簡單方便,簡單的卷積運算效果也非常好。但D3D9的限制也很多,對于過于復雜的圖像處理則顯得有些不能勝任。
1.點運算
點運算用HLSL非常容易實現,幾乎是公式怎么寫,代碼就怎么寫。以RGB轉灰度圖顯示為例:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
|
texture Tex0 ; int iFlag = 0 ; float aValue= 0.0 ; float bValue= 0.0 ; sampler2D YTex = sampler_state { Texture = <Tex0> ; MipFilter = LINEAR ; MinFilter = LINEAR ; MagFilter = LINEAR ; AddressU = CLAMP ; AddressV = CLAMP ; }; struct PS_INPUT { float2 uvCoords0 : TEXCOORD0 ; }; float4 Main( PS_INPUT input ) : COLOR0 { float4 yuvColor ; //rgb to gray 不知道是不是這么顯示的,姑且這么認為 float gray = tex2D( YTex, input.uvCoords0 ).r * 0.299 + tex2D( YTex, input.uvCoords0 ).g * 0.587 + tex2D( YTex, input.uvCoords0 ).b * 0.114 ; float s = 0 ; if (iFlag == 0) { s = aValue * gray + bValue/255 ; } else if (iFlag == 1) { s = aValue * log (1+gray) ; } else if (iFlag == 2) { s = aValue * pow ( abs (gray),bValue) ; } yuvColor.r = s ; yuvColor.g = s ; yuvColor.b = s ; yuvColor.a = 1.0 ; return yuvColor ; } |
點運算如此簡單是因為GPU是并行運算的,我個人認為可以看成是每一個像素點(BGRA)對應一個線程,這大概就是OpenCL中所謂的數據并行。這是一個非常簡單的程序,指令數少,程序結構也很簡單,shader 的版本用2.0就可以輕松編過。
2.卷積運算舉例
指令數較多的情況2.0版本的shader就搞不定了,上3.0版本可以做一些簡單的卷積運算。以中值濾波為例:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
|
texture Tex0 ; matrix WorldMatrix; matrix ViewMatrix; matrix ProjMatrix; sampler2D YTex = sampler_state { Texture = <Tex0> ; MipFilter = LINEAR ; MinFilter = LINEAR ; MagFilter = LINEAR ; AddressU = CLAMP ; AddressV = CLAMP ; }; struct VS_INPUT { float4 pos : POSITION; float4 color : COLOR0; float2 tex : TEXCOORD0; }; // struct VS_OUTPUT { float4 pos : POSITION; float4 color : COLOR0; float2 tex : TEXCOORD0; }; float2 g_v4ScreenSize; int ksize = 1 ; float fLeft = -1.0f ; float fTop = -1.0f ; float fRight = -1.0f ; float fBottom = -1.0f ; //--------------------------------- BurTechnique -------------------------------------- VS_OUTPUT MainVS_Screen( VS_INPUT In ) { VS_OUTPUT Out = ( VS_OUTPUT )0; float4x4 matWorldView = mul(WorldMatrix,ViewMatrix); float4x4 matProject = mul(matWorldView,ProjMatrix); Out.pos = mul(In.pos,matProject); Out.tex = In.tex; Out.color = In.color; return Out; } float4 MainPS_Screen( VS_INPUT In ) : COLOR0 { float4 outColor = tex2D( YTex, In.tex ).rgba ; if (ksize <= 1 || ksize%2 == 0) { return outColor ; } if ( ksize > 11 || ksize < 3) { return outColor ; } if (!(In.tex.x < fRight && In.tex.y < fBottom && In.tex.x > fLeft && In.tex.y > fTop)) { return outColor ; } // 紋理大小 float2 TexSize = float2( g_v4ScreenSize.x , g_v4ScreenSize.y ); float x_off = 1.0f / TexSize.x; float y_off = 1.0f / TexSize.y; float2 fX0Y0 = In.tex - float2(x_off * ksize/2, y_off*ksize/2) ; float3 sum = {0.0f, 0.0f, 0.0f} ; if (ksize >= 3) { sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*0)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*1)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*2)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*0)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*1)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*2)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*0)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*1)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*2)).rgb; } if (ksize >= 5) { sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*0)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*1)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*2)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*3)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*4)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*0)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*1)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*2)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*3)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*4)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*3)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*3)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*3)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*4)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*4)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*4)).rgb; } if (ksize >= 7) { sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*0)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*1)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*2)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*3)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*4)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*5)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*6)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*0)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*1)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*2)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*3)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*4)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*5)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*6)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*5)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*5)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*5)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*5)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*5)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*6)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*6)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*6)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*6)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*6)).rgb; } if (ksize >= 9) { sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*0)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*1)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*2)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*3)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*4)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*5)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*6)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*7)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*8)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*0)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*1)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*2)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*3)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*4)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*5)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*6)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*7)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*8)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*7)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*7)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*7)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*7)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*7)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*7)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*7)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*8)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*8)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*8)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*8)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*8)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*8)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*8)).rgb; } if (ksize >= 11) { sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*0)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*1)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*2)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*3)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*4)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*5)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*6)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*7)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*8)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*9)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*10)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*0)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*1)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*2)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*3)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*4)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*5)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*6)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*7)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*8)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*9)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*10)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*9)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*9)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*9)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*9)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*9)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*9)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*9)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*9)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*9)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*10)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*10)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*10)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*10)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*10)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*10)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*10)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*10)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*10)).rgb; } outColor = float4(sum/(ksize*ksize),1.0f); return outColor ; } //--------------------------- 技術--------------------------- technique BurTechnique { pass P0 { LightEnable[0] = false ; VertexShader = compile vs_3_0 MainVS_Screen(); PixelShader = compile ps_3_0 MainPS_Screen(); } } |
由于3.0版本的shader似乎不允許pixel shader單獨出現,所以我從點運算用像素著色器實現改為用特效來實現。HLSL語法中有if語句,也有for語句,可是這個程序卻不厭其煩的把所有的都給列出來來,而沒有使用for循環。這是因為在實際使用中發現有一些限制,比如if語句的if(A>B),A與B中必須有一個是常量,就像上面見到的那種形式;for循環中間的判斷也是如此,只是在第二層j循環中可以是第一層循環的i,即不可以
1
2
3
4
5
6
7
|
for ( int i=0;i<ksize;i++) { for ( int j=0;j<ksize1;j++) { .......... } } |
以上代碼的ksize與ksize1都必須為常數,例外的情況是ksize1可以為第一層循環的 i 。這個問題不知道后續版本的shader有沒有,反正我當前使用的版本有。
另外有一個需要注意的地方是指令數,2.0版本的shader支持的指令數相當少,3.0版本則要多好多,我最長寫到了400多條快500條時才導致編譯失敗。 還有一個需要提醒的是3.0版本的shader只支持D3D 9.0C以后的。如果要求做更為復雜的圖像處理,可以的話建議上D3D11,compute shader雖然我沒用過,但從介紹來說,應該可以處理一些更為復雜的圖像處理。
以上就是本文的全部內容,希望本文的內容對大家的學習或者工作能帶來一定的幫助,同時也希望多多支持服務器之家!
原文鏈接:http://www.cnblogs.com/betterwgo/p/6403522.html