blob: 84ed33ab4c2de8038a0891eb06ddda06e2484a31 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * String handling functions for PowerPC.
3 *
4 * Copyright (C) 1996 Paul Mackerras.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070011#include <asm/processor.h>
12#include <asm/cache.h>
13#include <asm/errno.h>
14#include <asm/ppc_asm.h>
15
16#define COPY_16_BYTES \
17 lwz r7,4(r4); \
18 lwz r8,8(r4); \
19 lwz r9,12(r4); \
20 lwzu r10,16(r4); \
21 stw r7,4(r6); \
22 stw r8,8(r6); \
23 stw r9,12(r6); \
24 stwu r10,16(r6)
25
26#define COPY_16_BYTES_WITHEX(n) \
278 ## n ## 0: \
28 lwz r7,4(r4); \
298 ## n ## 1: \
30 lwz r8,8(r4); \
318 ## n ## 2: \
32 lwz r9,12(r4); \
338 ## n ## 3: \
34 lwzu r10,16(r4); \
358 ## n ## 4: \
36 stw r7,4(r6); \
378 ## n ## 5: \
38 stw r8,8(r6); \
398 ## n ## 6: \
40 stw r9,12(r6); \
418 ## n ## 7: \
42 stwu r10,16(r6)
43
44#define COPY_16_BYTES_EXCODE(n) \
459 ## n ## 0: \
46 addi r5,r5,-(16 * n); \
47 b 104f; \
489 ## n ## 1: \
49 addi r5,r5,-(16 * n); \
50 b 105f; \
51.section __ex_table,"a"; \
52 .align 2; \
53 .long 8 ## n ## 0b,9 ## n ## 0b; \
54 .long 8 ## n ## 1b,9 ## n ## 0b; \
55 .long 8 ## n ## 2b,9 ## n ## 0b; \
56 .long 8 ## n ## 3b,9 ## n ## 0b; \
57 .long 8 ## n ## 4b,9 ## n ## 1b; \
58 .long 8 ## n ## 5b,9 ## n ## 1b; \
59 .long 8 ## n ## 6b,9 ## n ## 1b; \
60 .long 8 ## n ## 7b,9 ## n ## 1b; \
61 .text
62
63 .text
64 .stabs "arch/ppc/lib/",N_SO,0,0,0f
65 .stabs "string.S",N_SO,0,0,0f
66
Stephen Rothwell7dffb722005-10-17 11:50:32 +100067CACHELINE_BYTES = L1_CACHE_BYTES
68LG_CACHELINE_BYTES = L1_CACHE_SHIFT
69CACHELINE_MASK = (L1_CACHE_BYTES-1)
Linus Torvalds1da177e2005-04-16 15:20:36 -070070
71_GLOBAL(strcpy)
72 addi r5,r3,-1
73 addi r4,r4,-1
741: lbzu r0,1(r4)
75 cmpwi 0,r0,0
76 stbu r0,1(r5)
77 bne 1b
78 blr
79
80/* This clears out any unused part of the destination buffer,
81 just as the libc version does. -- paulus */
82_GLOBAL(strncpy)
83 cmpwi 0,r5,0
84 beqlr
85 mtctr r5
86 addi r6,r3,-1
87 addi r4,r4,-1
881: lbzu r0,1(r4)
89 cmpwi 0,r0,0
90 stbu r0,1(r6)
91 bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */
92 bnelr /* if we didn't hit a null char, we're done */
93 mfctr r5
94 cmpwi 0,r5,0 /* any space left in destination buffer? */
95 beqlr /* we know r0 == 0 here */
962: stbu r0,1(r6) /* clear it out if so */
97 bdnz 2b
98 blr
99
100_GLOBAL(strcat)
101 addi r5,r3,-1
102 addi r4,r4,-1
1031: lbzu r0,1(r5)
104 cmpwi 0,r0,0
105 bne 1b
106 addi r5,r5,-1
1071: lbzu r0,1(r4)
108 cmpwi 0,r0,0
109 stbu r0,1(r5)
110 bne 1b
111 blr
112
113_GLOBAL(strcmp)
114 addi r5,r3,-1
115 addi r4,r4,-1
1161: lbzu r3,1(r5)
117 cmpwi 1,r3,0
118 lbzu r0,1(r4)
119 subf. r3,r0,r3
120 beqlr 1
121 beq 1b
122 blr
123
124_GLOBAL(strlen)
125 addi r4,r3,-1
1261: lbzu r0,1(r4)
127 cmpwi 0,r0,0
128 bne 1b
129 subf r3,r3,r4
130 blr
131
132/*
133 * Use dcbz on the complete cache lines in the destination
134 * to set them to zero. This requires that the destination
135 * area is cacheable. -- paulus
136 */
137_GLOBAL(cacheable_memzero)
138 mr r5,r4
139 li r4,0
140 addi r6,r3,-4
141 cmplwi 0,r5,4
142 blt 7f
143 stwu r4,4(r6)
144 beqlr
145 andi. r0,r6,3
146 add r5,r0,r5
147 subf r6,r0,r6
148 clrlwi r7,r6,32-LG_CACHELINE_BYTES
149 add r8,r7,r5
150 srwi r9,r8,LG_CACHELINE_BYTES
151 addic. r9,r9,-1 /* total number of complete cachelines */
152 ble 2f
153 xori r0,r7,CACHELINE_MASK & ~3
154 srwi. r0,r0,2
155 beq 3f
156 mtctr r0
1574: stwu r4,4(r6)
158 bdnz 4b
1593: mtctr r9
160 li r7,4
161#if !defined(CONFIG_8xx)
16210: dcbz r7,r6
163#else
16410: stw r4, 4(r6)
165 stw r4, 8(r6)
166 stw r4, 12(r6)
167 stw r4, 16(r6)
168#if CACHE_LINE_SIZE >= 32
169 stw r4, 20(r6)
170 stw r4, 24(r6)
171 stw r4, 28(r6)
172 stw r4, 32(r6)
173#endif /* CACHE_LINE_SIZE */
174#endif
175 addi r6,r6,CACHELINE_BYTES
176 bdnz 10b
177 clrlwi r5,r8,32-LG_CACHELINE_BYTES
178 addi r5,r5,4
1792: srwi r0,r5,2
180 mtctr r0
181 bdz 6f
1821: stwu r4,4(r6)
183 bdnz 1b
1846: andi. r5,r5,3
1857: cmpwi 0,r5,0
186 beqlr
187 mtctr r5
188 addi r6,r6,3
1898: stbu r4,1(r6)
190 bdnz 8b
191 blr
192
193_GLOBAL(memset)
194 rlwimi r4,r4,8,16,23
195 rlwimi r4,r4,16,0,15
196 addi r6,r3,-4
197 cmplwi 0,r5,4
198 blt 7f
199 stwu r4,4(r6)
200 beqlr
201 andi. r0,r6,3
202 add r5,r0,r5
203 subf r6,r0,r6
204 srwi r0,r5,2
205 mtctr r0
206 bdz 6f
2071: stwu r4,4(r6)
208 bdnz 1b
2096: andi. r5,r5,3
2107: cmpwi 0,r5,0
211 beqlr
212 mtctr r5
213 addi r6,r6,3
2148: stbu r4,1(r6)
215 bdnz 8b
216 blr
217
218/*
219 * This version uses dcbz on the complete cache lines in the
220 * destination area to reduce memory traffic. This requires that
221 * the destination area is cacheable.
222 * We only use this version if the source and dest don't overlap.
223 * -- paulus.
224 */
225_GLOBAL(cacheable_memcpy)
226 add r7,r3,r5 /* test if the src & dst overlap */
227 add r8,r4,r5
228 cmplw 0,r4,r7
229 cmplw 1,r3,r8
230 crand 0,0,4 /* cr0.lt &= cr1.lt */
231 blt memcpy /* if regions overlap */
232
233 addi r4,r4,-4
234 addi r6,r3,-4
235 neg r0,r3
236 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
237 beq 58f
238
239 cmplw 0,r5,r0 /* is this more than total to do? */
240 blt 63f /* if not much to do */
241 andi. r8,r0,3 /* get it word-aligned first */
242 subf r5,r0,r5
243 mtctr r8
244 beq+ 61f
24570: lbz r9,4(r4) /* do some bytes */
246 stb r9,4(r6)
247 addi r4,r4,1
248 addi r6,r6,1
249 bdnz 70b
25061: srwi. r0,r0,2
251 mtctr r0
252 beq 58f
25372: lwzu r9,4(r4) /* do some words */
254 stwu r9,4(r6)
255 bdnz 72b
256
25758: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
258 clrlwi r5,r5,32-LG_CACHELINE_BYTES
259 li r11,4
260 mtctr r0
261 beq 63f
26253:
263#if !defined(CONFIG_8xx)
264 dcbz r11,r6
265#endif
266 COPY_16_BYTES
Stephen Rothwell7dffb722005-10-17 11:50:32 +1000267#if L1_CACHE_BYTES >= 32
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 COPY_16_BYTES
Stephen Rothwell7dffb722005-10-17 11:50:32 +1000269#if L1_CACHE_BYTES >= 64
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270 COPY_16_BYTES
271 COPY_16_BYTES
Stephen Rothwell7dffb722005-10-17 11:50:32 +1000272#if L1_CACHE_BYTES >= 128
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273 COPY_16_BYTES
274 COPY_16_BYTES
275 COPY_16_BYTES
276 COPY_16_BYTES
277#endif
278#endif
279#endif
280 bdnz 53b
281
28263: srwi. r0,r5,2
283 mtctr r0
284 beq 64f
28530: lwzu r0,4(r4)
286 stwu r0,4(r6)
287 bdnz 30b
288
28964: andi. r0,r5,3
290 mtctr r0
291 beq+ 65f
29240: lbz r0,4(r4)
293 stb r0,4(r6)
294 addi r4,r4,1
295 addi r6,r6,1
296 bdnz 40b
29765: blr
298
299_GLOBAL(memmove)
300 cmplw 0,r3,r4
301 bgt backwards_memcpy
302 /* fall through */
303
304_GLOBAL(memcpy)
305 srwi. r7,r5,3
306 addi r6,r3,-4
307 addi r4,r4,-4
308 beq 2f /* if less than 8 bytes to do */
309 andi. r0,r6,3 /* get dest word aligned */
310 mtctr r7
311 bne 5f
3121: lwz r7,4(r4)
313 lwzu r8,8(r4)
314 stw r7,4(r6)
315 stwu r8,8(r6)
316 bdnz 1b
317 andi. r5,r5,7
3182: cmplwi 0,r5,4
319 blt 3f
320 lwzu r0,4(r4)
321 addi r5,r5,-4
322 stwu r0,4(r6)
3233: cmpwi 0,r5,0
324 beqlr
325 mtctr r5
326 addi r4,r4,3
327 addi r6,r6,3
3284: lbzu r0,1(r4)
329 stbu r0,1(r6)
330 bdnz 4b
331 blr
3325: subfic r0,r0,4
333 mtctr r0
3346: lbz r7,4(r4)
335 addi r4,r4,1
336 stb r7,4(r6)
337 addi r6,r6,1
338 bdnz 6b
339 subf r5,r0,r5
340 rlwinm. r7,r5,32-3,3,31
341 beq 2b
342 mtctr r7
343 b 1b
344
345_GLOBAL(backwards_memcpy)
346 rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */
347 add r6,r3,r5
348 add r4,r4,r5
349 beq 2f
350 andi. r0,r6,3
351 mtctr r7
352 bne 5f
3531: lwz r7,-4(r4)
354 lwzu r8,-8(r4)
355 stw r7,-4(r6)
356 stwu r8,-8(r6)
357 bdnz 1b
358 andi. r5,r5,7
3592: cmplwi 0,r5,4
360 blt 3f
361 lwzu r0,-4(r4)
362 subi r5,r5,4
363 stwu r0,-4(r6)
3643: cmpwi 0,r5,0
365 beqlr
366 mtctr r5
3674: lbzu r0,-1(r4)
368 stbu r0,-1(r6)
369 bdnz 4b
370 blr
3715: mtctr r0
3726: lbzu r7,-1(r4)
373 stbu r7,-1(r6)
374 bdnz 6b
375 subf r5,r0,r5
376 rlwinm. r7,r5,32-3,3,31
377 beq 2b
378 mtctr r7
379 b 1b
380
381_GLOBAL(memcmp)
382 cmpwi 0,r5,0
383 ble- 2f
384 mtctr r5
385 addi r6,r3,-1
386 addi r4,r4,-1
3871: lbzu r3,1(r6)
388 lbzu r0,1(r4)
389 subf. r3,r0,r3
390 bdnzt 2,1b
391 blr
3922: li r3,0
393 blr
394
395_GLOBAL(memchr)
396 cmpwi 0,r5,0
397 ble- 2f
398 mtctr r5
399 addi r3,r3,-1
4001: lbzu r0,1(r3)
401 cmpw 0,r0,r4
402 bdnzf 2,1b
403 beqlr
4042: li r3,0
405 blr
406
407_GLOBAL(__copy_tofrom_user)
408 addi r4,r4,-4
409 addi r6,r3,-4
410 neg r0,r3
411 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
412 beq 58f
413
414 cmplw 0,r5,r0 /* is this more than total to do? */
415 blt 63f /* if not much to do */
416 andi. r8,r0,3 /* get it word-aligned first */
417 mtctr r8
418 beq+ 61f
41970: lbz r9,4(r4) /* do some bytes */
42071: stb r9,4(r6)
421 addi r4,r4,1
422 addi r6,r6,1
423 bdnz 70b
42461: subf r5,r0,r5
425 srwi. r0,r0,2
426 mtctr r0
427 beq 58f
42872: lwzu r9,4(r4) /* do some words */
42973: stwu r9,4(r6)
430 bdnz 72b
431
432 .section __ex_table,"a"
433 .align 2
434 .long 70b,100f
435 .long 71b,101f
436 .long 72b,102f
437 .long 73b,103f
438 .text
439
44058: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
441 clrlwi r5,r5,32-LG_CACHELINE_BYTES
442 li r11,4
443 beq 63f
444
445#ifdef CONFIG_8xx
446 /* Don't use prefetch on 8xx */
447 mtctr r0
Paul Mackerras72480ef2005-05-20 16:50:55 +1000448 li r0,0
Linus Torvalds1da177e2005-04-16 15:20:36 -070044953: COPY_16_BYTES_WITHEX(0)
450 bdnz 53b
451
452#else /* not CONFIG_8xx */
453 /* Here we decide how far ahead to prefetch the source */
454 li r3,4
455 cmpwi r0,1
456 li r7,0
457 ble 114f
458 li r7,1
459#if MAX_COPY_PREFETCH > 1
460 /* Heuristically, for large transfers we prefetch
461 MAX_COPY_PREFETCH cachelines ahead. For small transfers
462 we prefetch 1 cacheline ahead. */
463 cmpwi r0,MAX_COPY_PREFETCH
464 ble 112f
465 li r7,MAX_COPY_PREFETCH
466112: mtctr r7
467111: dcbt r3,r4
468 addi r3,r3,CACHELINE_BYTES
469 bdnz 111b
470#else
471 dcbt r3,r4
472 addi r3,r3,CACHELINE_BYTES
473#endif /* MAX_COPY_PREFETCH > 1 */
474
475114: subf r8,r7,r0
476 mr r0,r7
477 mtctr r8
478
47953: dcbt r3,r4
48054: dcbz r11,r6
481 .section __ex_table,"a"
482 .align 2
483 .long 54b,105f
484 .text
485/* the main body of the cacheline loop */
486 COPY_16_BYTES_WITHEX(0)
Stephen Rothwell7dffb722005-10-17 11:50:32 +1000487#if L1_CACHE_BYTES >= 32
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488 COPY_16_BYTES_WITHEX(1)
Stephen Rothwell7dffb722005-10-17 11:50:32 +1000489#if L1_CACHE_BYTES >= 64
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490 COPY_16_BYTES_WITHEX(2)
491 COPY_16_BYTES_WITHEX(3)
Stephen Rothwell7dffb722005-10-17 11:50:32 +1000492#if L1_CACHE_BYTES >= 128
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493 COPY_16_BYTES_WITHEX(4)
494 COPY_16_BYTES_WITHEX(5)
495 COPY_16_BYTES_WITHEX(6)
496 COPY_16_BYTES_WITHEX(7)
497#endif
498#endif
499#endif
500 bdnz 53b
501 cmpwi r0,0
502 li r3,4
503 li r7,0
504 bne 114b
505#endif /* CONFIG_8xx */
506
50763: srwi. r0,r5,2
508 mtctr r0
509 beq 64f
51030: lwzu r0,4(r4)
51131: stwu r0,4(r6)
512 bdnz 30b
513
51464: andi. r0,r5,3
515 mtctr r0
516 beq+ 65f
51740: lbz r0,4(r4)
51841: stb r0,4(r6)
519 addi r4,r4,1
520 addi r6,r6,1
521 bdnz 40b
52265: li r3,0
523 blr
524
525/* read fault, initial single-byte copy */
526100: li r9,0
527 b 90f
528/* write fault, initial single-byte copy */
529101: li r9,1
53090: subf r5,r8,r5
531 li r3,0
532 b 99f
533/* read fault, initial word copy */
534102: li r9,0
535 b 91f
536/* write fault, initial word copy */
537103: li r9,1
53891: li r3,2
539 b 99f
540
541/*
542 * this stuff handles faults in the cacheline loop and branches to either
543 * 104f (if in read part) or 105f (if in write part), after updating r5
544 */
545 COPY_16_BYTES_EXCODE(0)
Stephen Rothwell7dffb722005-10-17 11:50:32 +1000546#if L1_CACHE_BYTES >= 32
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547 COPY_16_BYTES_EXCODE(1)
Stephen Rothwell7dffb722005-10-17 11:50:32 +1000548#if L1_CACHE_BYTES >= 64
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 COPY_16_BYTES_EXCODE(2)
550 COPY_16_BYTES_EXCODE(3)
Stephen Rothwell7dffb722005-10-17 11:50:32 +1000551#if L1_CACHE_BYTES >= 128
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552 COPY_16_BYTES_EXCODE(4)
553 COPY_16_BYTES_EXCODE(5)
554 COPY_16_BYTES_EXCODE(6)
555 COPY_16_BYTES_EXCODE(7)
556#endif
557#endif
558#endif
559
560/* read fault in cacheline loop */
561104: li r9,0
562 b 92f
563/* fault on dcbz (effectively a write fault) */
564/* or write fault in cacheline loop */
565105: li r9,1
56692: li r3,LG_CACHELINE_BYTES
Paul Mackerras72480ef2005-05-20 16:50:55 +1000567 mfctr r8
568 add r0,r0,r8
569 b 106f
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570/* read fault in final word loop */
571108: li r9,0
572 b 93f
573/* write fault in final word loop */
574109: li r9,1
57593: andi. r5,r5,3
576 li r3,2
577 b 99f
578/* read fault in final byte loop */
579110: li r9,0
580 b 94f
581/* write fault in final byte loop */
582111: li r9,1
58394: li r5,0
584 li r3,0
585/*
586 * At this stage the number of bytes not copied is
587 * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
588 */
58999: mfctr r0
Paul Mackerras72480ef2005-05-20 16:50:55 +1000590106: slw r3,r0,r3
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591 add. r3,r3,r5
592 beq 120f /* shouldn't happen */
593 cmpwi 0,r9,0
594 bne 120f
595/* for a read fault, first try to continue the copy one byte at a time */
596 mtctr r3
597130: lbz r0,4(r4)
598131: stb r0,4(r6)
599 addi r4,r4,1
600 addi r6,r6,1
601 bdnz 130b
602/* then clear out the destination: r3 bytes starting at 4(r6) */
603132: mfctr r3
604 srwi. r0,r3,2
605 li r9,0
606 mtctr r0
607 beq 113f
608112: stwu r9,4(r6)
609 bdnz 112b
610113: andi. r0,r3,3
611 mtctr r0
612 beq 120f
613114: stb r9,4(r6)
614 addi r6,r6,1
615 bdnz 114b
616120: blr
617
618 .section __ex_table,"a"
619 .align 2
620 .long 30b,108b
621 .long 31b,109b
622 .long 40b,110b
623 .long 41b,111b
624 .long 130b,132b
625 .long 131b,120b
626 .long 112b,120b
627 .long 114b,120b
628 .text
629
630_GLOBAL(__clear_user)
631 addi r6,r3,-4
632 li r3,0
633 li r5,0
634 cmplwi 0,r4,4
635 blt 7f
636 /* clear a single word */
63711: stwu r5,4(r6)
638 beqlr
639 /* clear word sized chunks */
640 andi. r0,r6,3
641 add r4,r0,r4
642 subf r6,r0,r6
643 srwi r0,r4,2
644 andi. r4,r4,3
645 mtctr r0
646 bdz 7f
6471: stwu r5,4(r6)
648 bdnz 1b
649 /* clear byte sized chunks */
6507: cmpwi 0,r4,0
651 beqlr
652 mtctr r4
653 addi r6,r6,3
6548: stbu r5,1(r6)
655 bdnz 8b
656 blr
65790: mr r3,r4
658 blr
65991: mfctr r3
660 slwi r3,r3,2
661 add r3,r3,r4
662 blr
66392: mfctr r3
664 blr
665
666 .section __ex_table,"a"
667 .align 2
668 .long 11b,90b
669 .long 1b,91b
670 .long 8b,92b
671 .text
672
673_GLOBAL(__strncpy_from_user)
674 addi r6,r3,-1
675 addi r4,r4,-1
676 cmpwi 0,r5,0
677 beq 2f
678 mtctr r5
6791: lbzu r0,1(r4)
680 cmpwi 0,r0,0
681 stbu r0,1(r6)
682 bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */
683 beq 3f
6842: addi r6,r6,1
6853: subf r3,r3,r6
686 blr
68799: li r3,-EFAULT
688 blr
689
690 .section __ex_table,"a"
691 .align 2
692 .long 1b,99b
693 .text
694
695/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */
696_GLOBAL(__strnlen_user)
697 addi r7,r3,-1
698 subf r6,r7,r5 /* top+1 - str */
699 cmplw 0,r4,r6
700 bge 0f
701 mr r6,r4
7020: mtctr r6 /* ctr = min(len, top - str) */
7031: lbzu r0,1(r7) /* get next byte */
704 cmpwi 0,r0,0
705 bdnzf 2,1b /* loop if --ctr != 0 && byte != 0 */
706 addi r7,r7,1
707 subf r3,r3,r7 /* number of bytes we have looked at */
708 beqlr /* return if we found a 0 byte */
709 cmpw 0,r3,r4 /* did we look at all len bytes? */
710 blt 99f /* if not, must have hit top */
711 addi r3,r4,1 /* return len + 1 to indicate no null found */
712 blr
71399: li r3,0 /* bad address, return 0 */
714 blr
715
716 .section __ex_table,"a"
717 .align 2
718 .long 1b,99b