summaryrefslogtreecommitdiff
path: root/main/openssl/crypto/bn/asm/x86-gf2m.S
blob: 9ed29ae00ee0f30473387c775522df8c1c18786b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
.file	"crypto/bn/asm/x86-gf2m.s"
.text
.type	_mul_1x1_mmx,@function
.align	16
_mul_1x1_mmx:
	subl	$36,%esp
	movl	%eax,%ecx
	leal	(%eax,%eax,1),%edx
	andl	$1073741823,%ecx
	leal	(%edx,%edx,1),%ebp
	movl	$0,(%esp)
	andl	$2147483647,%edx
	movd	%eax,%mm2
	movd	%ebx,%mm3
	movl	%ecx,4(%esp)
	xorl	%edx,%ecx
	pxor	%mm5,%mm5
	pxor	%mm4,%mm4
	movl	%edx,8(%esp)
	xorl	%ebp,%edx
	movl	%ecx,12(%esp)
	pcmpgtd	%mm2,%mm5
	paddd	%mm2,%mm2
	xorl	%edx,%ecx
	movl	%ebp,16(%esp)
	xorl	%edx,%ebp
	pand	%mm3,%mm5
	pcmpgtd	%mm2,%mm4
	movl	%ecx,20(%esp)
	xorl	%ecx,%ebp
	psllq	$31,%mm5
	pand	%mm3,%mm4
	movl	%edx,24(%esp)
	movl	$7,%esi
	movl	%ebp,28(%esp)
	movl	%esi,%ebp
	andl	%ebx,%esi
	shrl	$3,%ebx
	movl	%ebp,%edi
	psllq	$30,%mm4
	andl	%ebx,%edi
	shrl	$3,%ebx
	movd	(%esp,%esi,4),%mm0
	movl	%ebp,%esi
	andl	%ebx,%esi
	shrl	$3,%ebx
	movd	(%esp,%edi,4),%mm2
	movl	%ebp,%edi
	psllq	$3,%mm2
	andl	%ebx,%edi
	shrl	$3,%ebx
	pxor	%mm2,%mm0
	movd	(%esp,%esi,4),%mm1
	movl	%ebp,%esi
	psllq	$6,%mm1
	andl	%ebx,%esi
	shrl	$3,%ebx
	pxor	%mm1,%mm0
	movd	(%esp,%edi,4),%mm2
	movl	%ebp,%edi
	psllq	$9,%mm2
	andl	%ebx,%edi
	shrl	$3,%ebx
	pxor	%mm2,%mm0
	movd	(%esp,%esi,4),%mm1
	movl	%ebp,%esi
	psllq	$12,%mm1
	andl	%ebx,%esi
	shrl	$3,%ebx
	pxor	%mm1,%mm0
	movd	(%esp,%edi,4),%mm2
	movl	%ebp,%edi
	psllq	$15,%mm2
	andl	%ebx,%edi
	shrl	$3,%ebx
	pxor	%mm2,%mm0
	movd	(%esp,%esi,4),%mm1
	movl	%ebp,%esi
	psllq	$18,%mm1
	andl	%ebx,%esi
	shrl	$3,%ebx
	pxor	%mm1,%mm0
	movd	(%esp,%edi,4),%mm2
	movl	%ebp,%edi
	psllq	$21,%mm2
	andl	%ebx,%edi
	shrl	$3,%ebx
	pxor	%mm2,%mm0
	movd	(%esp,%esi,4),%mm1
	movl	%ebp,%esi
	psllq	$24,%mm1
	andl	%ebx,%esi
	shrl	$3,%ebx
	pxor	%mm1,%mm0
	movd	(%esp,%edi,4),%mm2
	pxor	%mm4,%mm0
	psllq	$27,%mm2
	pxor	%mm2,%mm0
	movd	(%esp,%esi,4),%mm1
	pxor	%mm5,%mm0
	psllq	$30,%mm1
	addl	$36,%esp
	pxor	%mm1,%mm0
	ret
.size	_mul_1x1_mmx,.-_mul_1x1_mmx
.type	_mul_1x1_ialu,@function
.align	16
_mul_1x1_ialu:
	subl	$36,%esp
	movl	%eax,%ecx
	leal	(%eax,%eax,1),%edx
	leal	(,%eax,4),%ebp
	andl	$1073741823,%ecx
	leal	(%eax,%eax,1),%edi
	sarl	$31,%eax
	movl	$0,(%esp)
	andl	$2147483647,%edx
	movl	%ecx,4(%esp)
	xorl	%edx,%ecx
	movl	%edx,8(%esp)
	xorl	%ebp,%edx
	movl	%ecx,12(%esp)
	xorl	%edx,%ecx
	movl	%ebp,16(%esp)
	xorl	%edx,%ebp
	movl	%ecx,20(%esp)
	xorl	%ecx,%ebp
	sarl	$31,%edi
	andl	%ebx,%eax
	movl	%edx,24(%esp)
	andl	%ebx,%edi
	movl	%ebp,28(%esp)
	movl	%eax,%edx
	shll	$31,%eax
	movl	%edi,%ecx
	shrl	$1,%edx
	movl	$7,%esi
	shll	$30,%edi
	andl	%ebx,%esi
	shrl	$2,%ecx
	xorl	%edi,%eax
	shrl	$3,%ebx
	movl	$7,%edi
	andl	%ebx,%edi
	shrl	$3,%ebx
	xorl	%ecx,%edx
	xorl	(%esp,%esi,4),%eax
	movl	$7,%esi
	andl	%ebx,%esi
	shrl	$3,%ebx
	movl	(%esp,%edi,4),%ebp
	movl	$7,%edi
	movl	%ebp,%ecx
	shll	$3,%ebp
	andl	%ebx,%edi
	shrl	$29,%ecx
	xorl	%ebp,%eax
	shrl	$3,%ebx
	xorl	%ecx,%edx
	movl	(%esp,%esi,4),%ecx
	movl	$7,%esi
	movl	%ecx,%ebp
	shll	$6,%ecx
	andl	%ebx,%esi
	shrl	$26,%ebp
	xorl	%ecx,%eax
	shrl	$3,%ebx
	xorl	%ebp,%edx
	movl	(%esp,%edi,4),%ebp
	movl	$7,%edi
	movl	%ebp,%ecx
	shll	$9,%ebp
	andl	%ebx,%edi
	shrl	$23,%ecx
	xorl	%ebp,%eax
	shrl	$3,%ebx
	xorl	%ecx,%edx
	movl	(%esp,%esi,4),%ecx
	movl	$7,%esi
	movl	%ecx,%ebp
	shll	$12,%ecx
	andl	%ebx,%esi
	shrl	$20,%ebp
	xorl	%ecx,%eax
	shrl	$3,%ebx
	xorl	%ebp,%edx
	movl	(%esp,%edi,4),%ebp
	movl	$7,%edi
	movl	%ebp,%ecx
	shll	$15,%ebp
	andl	%ebx,%edi
	shrl	$17,%ecx
	xorl	%ebp,%eax
	shrl	$3,%ebx
	xorl	%ecx,%edx
	movl	(%esp,%esi,4),%ecx
	movl	$7,%esi
	movl	%ecx,%ebp
	shll	$18,%ecx
	andl	%ebx,%esi
	shrl	$14,%ebp
	xorl	%ecx,%eax
	shrl	$3,%ebx
	xorl	%ebp,%edx
	movl	(%esp,%edi,4),%ebp
	movl	$7,%edi
	movl	%ebp,%ecx
	shll	$21,%ebp
	andl	%ebx,%edi
	shrl	$11,%ecx
	xorl	%ebp,%eax
	shrl	$3,%ebx
	xorl	%ecx,%edx
	movl	(%esp,%esi,4),%ecx
	movl	$7,%esi
	movl	%ecx,%ebp
	shll	$24,%ecx
	andl	%ebx,%esi
	shrl	$8,%ebp
	xorl	%ecx,%eax
	shrl	$3,%ebx
	xorl	%ebp,%edx
	movl	(%esp,%edi,4),%ebp
	movl	%ebp,%ecx
	shll	$27,%ebp
	movl	(%esp,%esi,4),%edi
	shrl	$5,%ecx
	movl	%edi,%esi
	xorl	%ebp,%eax
	shll	$30,%edi
	xorl	%ecx,%edx
	shrl	$2,%esi
	xorl	%edi,%eax
	xorl	%esi,%edx
	addl	$36,%esp
	ret
.size	_mul_1x1_ialu,.-_mul_1x1_ialu
.globl	bn_GF2m_mul_2x2
.type	bn_GF2m_mul_2x2,@function
.align	16
bn_GF2m_mul_2x2:
.L_bn_GF2m_mul_2x2_begin:
	call	.L000PIC_me_up
.L000PIC_me_up:
	popl	%edx
	leal	_GLOBAL_OFFSET_TABLE_+[.-.L000PIC_me_up](%edx),%edx
	movl	OPENSSL_ia32cap_P@GOT(%edx),%edx
	movl	(%edx),%eax
	movl	4(%edx),%edx
	testl	$8388608,%eax
	jz	.L001ialu
	testl	$16777216,%eax
	jz	.L002mmx
	testl	$2,%edx
	jz	.L002mmx
	movups	8(%esp),%xmm0
	shufps	$177,%xmm0,%xmm0
.byte	102,15,58,68,192,1
	movl	4(%esp),%eax
	movups	%xmm0,(%eax)
	ret
.align	16
.L002mmx:
	pushl	%ebp
	pushl	%ebx
	pushl	%esi
	pushl	%edi
	movl	24(%esp),%eax
	movl	32(%esp),%ebx
	call	_mul_1x1_mmx
	movq	%mm0,%mm7
	movl	28(%esp),%eax
	movl	36(%esp),%ebx
	call	_mul_1x1_mmx
	movq	%mm0,%mm6
	movl	24(%esp),%eax
	movl	32(%esp),%ebx
	xorl	28(%esp),%eax
	xorl	36(%esp),%ebx
	call	_mul_1x1_mmx
	pxor	%mm7,%mm0
	movl	20(%esp),%eax
	pxor	%mm6,%mm0
	movq	%mm0,%mm2
	psllq	$32,%mm0
	popl	%edi
	psrlq	$32,%mm2
	popl	%esi
	pxor	%mm6,%mm0
	popl	%ebx
	pxor	%mm7,%mm2
	movq	%mm0,(%eax)
	popl	%ebp
	movq	%mm2,8(%eax)
	emms
	ret
.align	16
.L001ialu:
	pushl	%ebp
	pushl	%ebx
	pushl	%esi
	pushl	%edi
	subl	$20,%esp
	movl	44(%esp),%eax
	movl	52(%esp),%ebx
	call	_mul_1x1_ialu
	movl	%eax,8(%esp)
	movl	%edx,12(%esp)
	movl	48(%esp),%eax
	movl	56(%esp),%ebx
	call	_mul_1x1_ialu
	movl	%eax,(%esp)
	movl	%edx,4(%esp)
	movl	44(%esp),%eax
	movl	52(%esp),%ebx
	xorl	48(%esp),%eax
	xorl	56(%esp),%ebx
	call	_mul_1x1_ialu
	movl	40(%esp),%ebp
	movl	(%esp),%ebx
	movl	4(%esp),%ecx
	movl	8(%esp),%edi
	movl	12(%esp),%esi
	xorl	%edx,%eax
	xorl	%ecx,%edx
	xorl	%ebx,%eax
	movl	%ebx,(%ebp)
	xorl	%edi,%edx
	movl	%esi,12(%ebp)
	xorl	%esi,%eax
	addl	$20,%esp
	xorl	%esi,%edx
	popl	%edi
	xorl	%edx,%eax
	popl	%esi
	movl	%edx,8(%ebp)
	popl	%ebx
	movl	%eax,4(%ebp)
	popl	%ebp
	ret
.size	bn_GF2m_mul_2x2,.-.L_bn_GF2m_mul_2x2_begin
.byte	71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105
.byte	99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32
.byte	67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
.byte	112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte	62,0
.comm	OPENSSL_ia32cap_P,8,4