difos/target/linux/ubicom32/files/arch/ubicom32/lib/mem_ubicom32.c
2010-02-22 13:54:47 +00:00

343 lines
8.1 KiB
C

/*
* arch/ubicom32/lib/mem_ubicom32.c
* String functions.
*
* (C) Copyright 2009, Ubicom, Inc.
*
* This file is part of the Ubicom32 Linux Kernel Port.
*
* The Ubicom32 Linux Kernel Port is free software: you can redistribute
* it and/or modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 2 of the
* License, or (at your option) any later version.
*
* The Ubicom32 Linux Kernel Port is distributed in the hope that it
* will be useful, but WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with the Ubicom32 Linux Kernel Port. If not,
* see <http://www.gnu.org/licenses/>.
*
* Ubicom32 implementation derived from (with many thanks):
* arch/m68knommu
* arch/blackfin
* arch/parisc
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/compiler.h>
#define LIKELY likely
#define UNLIKELY unlikely
typedef u32_t addr_t;
/*
* memcpy()
*/
void *memcpy(void *dest, const void *src, size_t n)
{
void *dest_ret = dest;
if (LIKELY((((addr_t)dest ^ (addr_t)src) & 3) == 0) && LIKELY(n > 6)) {
size_t m;
n -= (4 - (addr_t)dest) & 0x03;
m = n >> 2;
asm volatile (
" sub.4 d15, #0, %2 \n\t" // set up for jump table
" and.4 d15, #(32-1), d15 \n\t" // d15 = (-m) & (32 - 1)
" moveai a3, #%%hi(1f) \n\t"
" lea.1 a3, %%lo(1f)(a3) \n\t"
" lea.4 a3, (a3,d15) \n\t"
" bfextu d15, %0, #2 \n\t" // d15 = (dest & 3)
" jmpne.w.f 100f \n\t"
" calli a3, 0(a3) \n\t" // 4-byte alignment
"100: cmpi d15, #2 \n\t"
" jmpne.s.f 101f \n\t"
" move.2 (%0)2++, (%1)2++ \n\t"
" calli a3, 0(a3) \n\t" // 2-byte alignment
"101: move.1 (%0)1++, (%1)1++ \n\t"
" jmpgt.s.f 102f \n\t" // 3-byte alignment
" move.2 (%0)2++, (%1)2++ \n\t" // 1-byte alignment
"102: calli a3, 0(a3) \n\t"
"200: cmpi %3, #2 \n\t"
" jmplt.s.f 201f \n\t"
" move.2 (%0)2++, (%1)2++ \n\t"
" jmpeq.s.t 2f \n\t"
"201: move.1 (%0)1++, (%1)1++ \n\t"
" jmpt.w.t 2f \n\t"
"1: .rept 25 \n\t"
" movea (%0)4++, (%1)4++ \n\t"
" .endr \n\t"
" .rept 7 \n\t"
" move.4 (%0)4++, (%1)4++ \n\t"
" .endr \n\t"
" add.4 %2, #-32, %2 \n\t"
" jmpgt.w.f 1b \n\t"
" and.4 %3, #3, %3 \n\t" // check n
" jmpne.w.f 200b \n\t"
"2: \n\t"
: "+a" (dest), "+a" (src), "+d" (m), "+d" (n)
:
: "d15", "a3", "memory", "cc"
);
return dest_ret;
}
if (LIKELY((((addr_t)dest ^ (addr_t)src) & 1) == 0) && LIKELY(n > 2)) {
size_t m;
n -= (addr_t)dest & 0x01;
m = n >> 1;
asm volatile (
" sub.4 d15, #0, %2 \n\t" // set up for jump table
" and.4 d15, #(32-1), d15 \n\t" // d15 = (-m) & (32 - 1)
" moveai a3, #%%hi(1f) \n\t"
" lea.1 a3, %%lo(1f)(a3) \n\t"
" lea.4 a3, (a3,d15) \n\t"
" btst %0, #0 \n\t" // check bit 0
" jmpne.w.f 100f \n\t"
" calli a3, 0(a3) \n\t" // 4-byte alignment
"100: move.1 (%0)1++, (%1)1++ \n\t"
" calli a3, 0(a3) \n\t"
"200: move.1 (%0)1++, (%1)1++ \n\t"
" jmpt.w.t 2f \n\t"
"1: .rept 32 \n\t"
" move.2 (%0)2++, (%1)2++ \n\t"
" .endr \n\t"
" add.4 %2, #-32, %2 \n\t"
" jmpgt.w.f 1b \n\t"
" and.4 %3, #1, %3 \n\t" // check n
" jmpne.w.f 200b \n\t"
"2: \n\t"
: "+a" (dest), "+a" (src), "+d" (m), "+d" (n)
:
: "d15", "a3", "memory", "cc"
);
return dest_ret;
}
asm volatile (
" sub.4 d15, #0, %2 \n\t"
" jmpeq.w.f 2f \n\t"
" and.4 d15, #(16-1), d15 \n\t" // d15 = (-n) & (16 - 1)
" moveai a3, #%%hi(1f) \n\t"
" lea.1 a3, %%lo(1f)(a3) \n\t"
" lea.4 a3, (a3,d15) \n\t"
" calli a3, 0(a3) \n\t"
"1: .rept 16 \n\t"
" move.1 (%0)1++, (%1)1++ \n\t"
" .endr \n\t"
" add.4 %2, #-16, %2 \n\t"
" jmpgt.w.f 1b \n\t"
"2: \n\t"
: "+a" (dest), "+a" (src), "+d" (n)
:
: "d15", "a3", "memory", "cc"
);
return dest_ret;
}
/*
* memset()
*/
void *memset(void *s, int c, size_t n)
{
void *s_ret = s;
if (LIKELY(n > 6)) {
size_t m;
n -= (4 - (addr_t)s) & 0x03;
m = n >> 2;
asm volatile (
" sub.4 d15, #0, %2 \n\t" // set up for jump table
" and.4 d15, #(32-1), d15 \n\t" // d15 = (-m) & (32 - 1)
" shmrg.1 %1, %1, %1 \n\t"
" shmrg.2 %1, %1, %1 \n\t" // %1 = (c<<24)|(c<<16)|(c<<8)|c
" moveai a3, #%%hi(1f) \n\t"
" lea.1 a3, %%lo(1f)(a3) \n\t"
" lea.4 a3, (a3,d15) \n\t"
" bfextu d15, %0, #2 \n\t" // d15 = (s & 3)
" jmpne.w.f 100f \n\t"
" calli a3, 0(a3) \n\t" // 4-byte alignment
"100: cmpi d15, #2 \n\t"
" jmpne.s.f 101f \n\t"
" move.2 (%0)2++, %1 \n\t"
" calli a3, 0(a3) \n\t" // 2-byte alignment
"101: move.1 (%0)1++, %1 \n\t"
" jmpgt.s.f 102f \n\t" // 3-byte alignment
" move.2 (%0)2++, %1 \n\t" // 1-byte alignment
"102: calli a3, 0(a3) \n\t"
"200: cmpi %3, #2 \n\t"
" jmplt.s.f 201f \n\t"
" move.2 (%0)2++, %1 \n\t"
" jmpeq.s.t 2f \n\t"
"201: move.1 (%0)1++, %1 \n\t"
" jmpt.w.t 2f \n\t"
"1: .rept 25 \n\t"
" movea (%0)4++, %1 \n\t"
" .endr \n\t"
" .rept 7 \n\t"
" move.4 (%0)4++, %1 \n\t"
" .endr \n\t"
" add.4 %2, #-32, %2 \n\t"
" jmpgt.w.f 1b \n\t"
" and.4 %3, #3, %3 \n\t" // test bit 1 of n
" jmpne.w.f 200b \n\t"
"2: \n\t"
: "+a" (s), "+d" (c), "+d" (m), "+d" (n)
:
: "d15", "a3", "memory", "cc"
);
return s_ret;
}
asm volatile (
" sub.4 d15, #0, %2 \n\t"
" jmpeq.w.f 2f \n\t"
" and.4 d15, #(8-1), d15 \n\t" // d15 = (-%2) & (16 - 1)
" moveai a3, #%%hi(1f) \n\t"
" lea.1 a3, %%lo(1f)(a3) \n\t"
" lea.4 a3, (a3,d15) \n\t"
" calli a3, 0(a3) \n\t"
"1: .rept 8 \n\t"
" move.1 (%0)1++, %1 \n\t"
" .endr \n\t"
"2: \n\t"
: "+a" (s), "+d" (c), "+d" (n)
:
: "d15", "a3", "memory", "cc"
);
return s_ret;
}
void *memmove(void *dest, const void *src, size_t n)
{
char *tmp;
const char *s;
if (n == 0)
return dest;
tmp = dest;
s = src;
/*
* Will perform 16-bit move if possible
*/
if (likely((((u32)dest | (u32)src | n) & 1) == 0)) {
if (dest <= src) {
asm volatile (
" sub.4 d15, #0, %2 \n\t" // set up for jump table
" and.4 d15, #(32-2), d15 \n\t" // d15 = (- count) & (32 - 2)
" moveai a3, #%%hi(1f) \n\t"
" lea.1 a3, %%lo(1f)(a3) \n\t"
" lea.2 a3, (a3,d15) \n\t"
" calli a3, 0(a3) \n\t"
"1: .rept 16 \n\t"
" move.2 (%0)2++, (%1)2++ \n\t"
" .endr \n\t"
" add.4 %2, #-32, %2 \n\t"
" jmpgt.w.f 1b \n\t"
: "+a" (tmp), "+a" (s), "+d" (n)
:
: "d15", "a3", "memory", "cc"
);
} else {
tmp += n;
s += n;
asm volatile (
" sub.4 d15, #0, %2 \n\t" // set up for jump table
" and.4 d15, #(32-2), d15 \n\t" // d15 = (- count) & (32 - 2)
" moveai a3, #%%hi(1f) \n\t"
" lea.1 a3, %%lo(1f)(a3) \n\t"
" lea.2 a3, (a3,d15) \n\t"
" calli a3, 0(a3) \n\t"
"1: .rept 16 \n\t"
" move.2 -2(%0)++, -2(%1)++ \n\t"
" .endr \n\t"
" add.4 %2, #-32, %2 \n\t"
" jmpgt.w.f 1b \n\t"
: "+a" (tmp), "+a" (s), "+d" (n)
:
: "d15", "a3", "memory", "cc"
);
}
return dest;
}
if (dest <= src) {
asm volatile (
" sub.4 d15, #0, %2 \n\t" // set up for jump table
" and.4 d15, #(16-1), d15 \n\t" // d15 = (- count) & (16 - 1)
" moveai a3, #%%hi(1f) \n\t"
" lea.1 a3, %%lo(1f)(a3) \n\t"
" lea.4 a3, (a3,d15) \n\t"
" calli a3, 0(a3) \n\t"
"1: .rept 16 \n\t"
" move.1 (%0)1++, (%1)1++ \n\t"
" .endr \n\t"
" add.4 %2, #-16, %2 \n\t"
" jmpgt.w.f 1b \n\t"
: "+a" (tmp), "+a" (s), "+d" (n)
:
: "d15", "a3", "memory", "cc"
);
} else {
tmp += n;
s += n;
asm volatile (
" sub.4 d15, #0, %2 \n\t" // set up for jump table
" and.4 d15, #(16-1), d15 \n\t" // d15 = (- count) & (16 - 1)
" moveai a3, #%%hi(1f) \n\t"
" lea.1 a3, %%lo(1f)(a3) \n\t"
" lea.4 a3, (a3,d15) \n\t"
" calli a3, 0(a3) \n\t"
"1: .rept 16 \n\t"
" move.1 -1(%0)++, -1(%1)++ \n\t"
" .endr \n\t"
" add.4 %2, #-16, %2 \n\t"
" jmpgt.w.f 1b \n\t"
: "+a" (tmp), "+a" (s), "+d" (n)
:
: "d15", "a3", "memory", "cc"
);
}
return dest;
}