blob: 10cdf5bceebbaf6eb468f75b98f2144502d99efe [file] [log] [blame]
Markus Stockhausen209232d2015-03-01 19:30:35 +01001/*
2 * Fast MD5 implementation for PPC
3 *
4 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 */
12#include <asm/ppc_asm.h>
13#include <asm/asm-offsets.h>
14
15#define rHP r3
16#define rWP r4
17
18#define rH0 r0
19#define rH1 r6
20#define rH2 r7
21#define rH3 r5
22
23#define rW00 r8
24#define rW01 r9
25#define rW02 r10
26#define rW03 r11
27#define rW04 r12
28#define rW05 r14
29#define rW06 r15
30#define rW07 r16
31#define rW08 r17
32#define rW09 r18
33#define rW10 r19
34#define rW11 r20
35#define rW12 r21
36#define rW13 r22
37#define rW14 r23
38#define rW15 r24
39
40#define rT0 r25
41#define rT1 r26
42
43#define INITIALIZE \
44 PPC_STLU r1,-INT_FRAME_SIZE(r1); \
45 SAVE_8GPRS(14, r1); /* push registers onto stack */ \
46 SAVE_4GPRS(22, r1); \
47 SAVE_GPR(26, r1)
48
49#define FINALIZE \
50 REST_8GPRS(14, r1); /* pop registers from stack */ \
51 REST_4GPRS(22, r1); \
52 REST_GPR(26, r1); \
53 addi r1,r1,INT_FRAME_SIZE;
54
55#ifdef __BIG_ENDIAN__
56#define LOAD_DATA(reg, off) \
57 lwbrx reg,0,rWP; /* load data */
58#define INC_PTR \
59 addi rWP,rWP,4; /* increment per word */
60#define NEXT_BLOCK /* nothing to do */
61#else
62#define LOAD_DATA(reg, off) \
63 lwz reg,off(rWP); /* load data */
64#define INC_PTR /* nothing to do */
65#define NEXT_BLOCK \
66 addi rWP,rWP,64; /* increment per block */
67#endif
68
69#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \
70 LOAD_DATA(w0, off) /* W */ \
71 and rT0,b,c; /* 1: f = b and c */ \
72 INC_PTR /* ptr++ */ \
73 andc rT1,d,b; /* 1: f' = ~b and d */ \
74 LOAD_DATA(w1, off+4) /* W */ \
75 or rT0,rT0,rT1; /* 1: f = f or f' */ \
76 addi w0,w0,k0l; /* 1: wk = w + k */ \
77 add a,a,rT0; /* 1: a = a + f */ \
78 addis w0,w0,k0h; /* 1: wk = w + k' */ \
79 addis w1,w1,k1h; /* 2: wk = w + k */ \
80 add a,a,w0; /* 1: a = a + wk */ \
81 addi w1,w1,k1l; /* 2: wk = w + k' */ \
82 rotrwi a,a,p; /* 1: a = a rotl x */ \
83 add d,d,w1; /* 2: a = a + wk */ \
84 add a,a,b; /* 1: a = a + b */ \
85 and rT0,a,b; /* 2: f = b and c */ \
86 andc rT1,c,a; /* 2: f' = ~b and d */ \
87 or rT0,rT0,rT1; /* 2: f = f or f' */ \
88 add d,d,rT0; /* 2: a = a + f */ \
89 INC_PTR /* ptr++ */ \
90 rotrwi d,d,q; /* 2: a = a rotl x */ \
91 add d,d,a; /* 2: a = a + b */
92
93#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
94 andc rT0,c,d; /* 1: f = c and ~d */ \
95 and rT1,b,d; /* 1: f' = b and d */ \
96 addi w0,w0,k0l; /* 1: wk = w + k */ \
97 or rT0,rT0,rT1; /* 1: f = f or f' */ \
98 addis w0,w0,k0h; /* 1: wk = w + k' */ \
99 add a,a,rT0; /* 1: a = a + f */ \
100 addi w1,w1,k1l; /* 2: wk = w + k */ \
101 add a,a,w0; /* 1: a = a + wk */ \
102 addis w1,w1,k1h; /* 2: wk = w + k' */ \
103 andc rT0,b,c; /* 2: f = c and ~d */ \
104 rotrwi a,a,p; /* 1: a = a rotl x */ \
105 add a,a,b; /* 1: a = a + b */ \
106 add d,d,w1; /* 2: a = a + wk */ \
107 and rT1,a,c; /* 2: f' = b and d */ \
108 or rT0,rT0,rT1; /* 2: f = f or f' */ \
109 add d,d,rT0; /* 2: a = a + f */ \
110 rotrwi d,d,q; /* 2: a = a rotl x */ \
111 add d,d,a; /* 2: a = a +b */
112
113#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
114 xor rT0,b,c; /* 1: f' = b xor c */ \
115 addi w0,w0,k0l; /* 1: wk = w + k */ \
116 xor rT1,rT0,d; /* 1: f = f xor f' */ \
117 addis w0,w0,k0h; /* 1: wk = w + k' */ \
118 add a,a,rT1; /* 1: a = a + f */ \
119 addi w1,w1,k1l; /* 2: wk = w + k */ \
120 add a,a,w0; /* 1: a = a + wk */ \
121 addis w1,w1,k1h; /* 2: wk = w + k' */ \
122 rotrwi a,a,p; /* 1: a = a rotl x */ \
123 add d,d,w1; /* 2: a = a + wk */ \
124 add a,a,b; /* 1: a = a + b */ \
125 xor rT1,rT0,a; /* 2: f = b xor f' */ \
126 add d,d,rT1; /* 2: a = a + f */ \
127 rotrwi d,d,q; /* 2: a = a rotl x */ \
128 add d,d,a; /* 2: a = a + b */
129
130#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
131 addi w0,w0,k0l; /* 1: w = w + k */ \
132 orc rT0,b,d; /* 1: f = b or ~d */ \
133 addis w0,w0,k0h; /* 1: w = w + k' */ \
134 xor rT0,rT0,c; /* 1: f = f xor c */ \
135 add a,a,w0; /* 1: a = a + wk */ \
136 addi w1,w1,k1l; /* 2: w = w + k */ \
137 add a,a,rT0; /* 1: a = a + f */ \
138 addis w1,w1,k1h; /* 2: w = w + k' */ \
139 rotrwi a,a,p; /* 1: a = a rotl x */ \
140 add a,a,b; /* 1: a = a + b */ \
141 orc rT0,a,c; /* 2: f = b or ~d */ \
142 add d,d,w1; /* 2: a = a + wk */ \
143 xor rT0,rT0,b; /* 2: f = f xor c */ \
144 add d,d,rT0; /* 2: a = a + f */ \
145 rotrwi d,d,q; /* 2: a = a rotl x */ \
146 add d,d,a; /* 2: a = a + b */
147
148_GLOBAL(ppc_md5_transform)
149 INITIALIZE
150
151 mtctr r5
152 lwz rH0,0(rHP)
153 lwz rH1,4(rHP)
154 lwz rH2,8(rHP)
155 lwz rH3,12(rHP)
156
157ppc_md5_main:
158 R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0,
159 0xd76b, -23432, 0xe8c8, -18602)
160 R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8,
161 0x2420, 0x70db, 0xc1be, -12562)
162 R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16,
163 0xf57c, 0x0faf, 0x4788, -14806)
164 R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24,
165 0xa830, 0x4613, 0xfd47, -27391)
166 R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32,
167 0x6981, -26408, 0x8b45, -2129)
168 R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40,
169 0xffff, 0x5bb1, 0x895d, -10306)
170 R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48,
171 0x6b90, 0x1122, 0xfd98, 0x7193)
172 R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56,
173 0xa679, 0x438e, 0x49b4, 0x0821)
174
175 R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23,
176 0x0d56, 0x6e0c, 0x1810, 0x6d2d)
177 R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12,
178 0x9d02, -32109, 0x124c, 0x2332)
179 R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23,
180 0x8ea7, 0x4a33, 0x0245, -18270)
181 R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12,
182 0x8eee, -8608, 0xf258, -5095)
183 R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23,
184 0x969d, -10697, 0x1cbe, -15288)
185 R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12,
186 0x3317, 0x3e99, 0xdbd9, 0x7c15)
187 R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23,
188 0xac4b, 0x7772, 0xd8cf, 0x331d)
189 R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12,
190 0x6a28, 0x6dd8, 0x219a, 0x3b68)
191
192 R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21,
193 0x29cb, 0x28e5, 0x4218, -7788)
194 R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16, 9,
195 0x473f, 0x06d1, 0x3aae, 0x3036)
196 R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21,
197 0xaea1, -15134, 0x640b, -11295)
198 R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16, 9,
199 0x8f4c, 0x4887, 0xbc7c, -22499)
200 R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21,
201 0x7eb8, -27199, 0x00ea, 0x6050)
202 R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16, 9,
203 0xe01a, 0x22fe, 0x4447, 0x69c5)
204 R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21,
205 0xb7f3, 0x0253, 0x59b1, 0x4d5b)
206 R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16, 9,
207 0x4701, -27017, 0xc7bd, -19859)
208
209 R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22,
210 0x0988, -1462, 0x4c70, -19401)
211 R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11,
212 0xadaf, -5221, 0xfc99, 0x66f7)
213 R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22,
214 0x7e80, -16418, 0xba1e, -25587)
215 R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11,
216 0x4130, 0x380d, 0xe0c5, 0x738d)
217 lwz rW00,0(rHP)
218 R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22,
219 0xe837, -30770, 0xde8a, 0x69e8)
220 lwz rW14,4(rHP)
221 R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11,
222 0x9e79, 0x260f, 0x256d, -27941)
223 lwz rW12,8(rHP)
224 R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22,
225 0xab75, -20775, 0x4f9e, -28397)
226 lwz rW10,12(rHP)
227 R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11,
228 0x662b, 0x7c56, 0x11b2, 0x0358)
229
230 add rH0,rH0,rW00
231 stw rH0,0(rHP)
232 add rH1,rH1,rW14
233 stw rH1,4(rHP)
234 add rH2,rH2,rW12
235 stw rH2,8(rHP)
236 add rH3,rH3,rW10
237 stw rH3,12(rHP)
238 NEXT_BLOCK
239
240 bdnz ppc_md5_main
241
242 FINALIZE
243 blr