realtek: add rt-loader (runtime loader)
The bootloader of many Realtek switches only supports gzipped kernel images. With limited flash space that might get critical in future versions. For better compression allow support for compressed images. For this a new loader was developed. Several ideas have been taken over from the existing lzma loader but this has been enhanced to make integration simpler. What is new: - Loader is position independent. No need to define load addresses - Loader identifies device memory on its own - Loader uses "official" upstream kernel lzma uncompress https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/decompress_unlzma.c - Loader uses "official" UNMODIFIED nanoprintg that is used by several bare metal projects. https://github.com/charlesnicholson/nanoprintf Compiled the loader ist just under 12KiB and during boot it will show: rt-loader Found RTL8380M (chip id 6275C) with 256MB Relocate 2924240 bytes from 0x80100000 to 0x8fce0000 Extract kernel with 2900144 bytes from 0x8fce521c to 0x80100000... Extracted kernel size is 9814907 bytes Booting kernel from 0x80100000 ... [ 0.000000] Linux version 6.12.33 ... [ 0.000000] RTL838X model is 83806800 ... Signed-off-by: Markus Stockhausen <markus.stockhausen@gmx.de> Link: https://github.com/openwrt/openwrt/pull/18397 Signed-off-by: Robert Marko <robimarko@gmail.com>
This commit is contained in:
parent
ba2ae60a00
commit
ccbff8bbdd
11 changed files with 2603 additions and 0 deletions
98
target/linux/realtek/image/rt-loader/Makefile
Normal file
98
target/linux/realtek/image/rt-loader/Makefile
Normal file
|
@ -0,0 +1,98 @@
|
|||
# rt-loader make file
|
||||
# (c) 2025 Markus Stockhausen
|
||||
#
|
||||
# This is the make file for the rt-loader (aka runtime or realtek loader). It tries to
|
||||
# avoid copying files around where possible. Therefore it is controlled by the following
|
||||
# input parameters
|
||||
#
|
||||
# KERNEL_IMG_IN: The filename of an LZMA compressed kernel image. This is required
|
||||
# KERNEL_IMG_OUT: The filename of the kernel image with the rt-loader prepended.
|
||||
# If not given it will be created as image.bin into the BUILD_DIR.
|
||||
# BUILD_DIR: The temporary build dir. If not given it will be set to "build".
|
||||
#
|
||||
# To add it into the OpenWrt toolchain just create two new build commands
|
||||
#
|
||||
# define Build/rt-loader
|
||||
# $(MAKE) all clean -C rt-loader CROSS_COMPILE="$(TARGET_CROSS)" \
|
||||
# KERNEL_IMG_IN="$@" KERNEL_IMG_OUT="$@.new" BUILD_DIR="$@.build"
|
||||
# mv "$@.new" "$@"
|
||||
# endef
|
||||
#
|
||||
# define Build/rt-compress
|
||||
# $(STAGING_DIR_HOST)/bin/xz --format=lzma -9 --stdout "$@" > "$@.new"
|
||||
# mv "$@.new" "$@"
|
||||
# endef
|
||||
#
|
||||
# Use them in a new kernel build recipe
|
||||
#
|
||||
# define Device/uimage-rt-loader
|
||||
# KERNEL/rt-loader := kernel-bin | append-dtb | rt-compress | rt-loader
|
||||
# KERNEL := $$(KERNEL/rt-loader) | uImage none
|
||||
# KERNEL_INITRAMFS := $$(KERNEL/rt-loader) | uImage none
|
||||
# endef
|
||||
#
|
||||
# And finally add it to the target device. E.g.
|
||||
#
|
||||
# define Device/linksys_lgs310c
|
||||
# $(Device/uimage-rt-loader)
|
||||
# ...
|
||||
# endef
|
||||
|
||||
CC := $(CROSS_COMPILE)gcc
|
||||
LD := $(CROSS_COMPILE)ld
|
||||
OBJCOPY := $(CROSS_COMPILE)objcopy
|
||||
OBJDUMP := $(CROSS_COMPILE)objdump
|
||||
|
||||
CFLAGS = -fpic -mabicalls -O2 -fno-builtin-printf -Iinclude
|
||||
|
||||
ASFLAGS = -fpic -msoft-float -Iinclude
|
||||
|
||||
LDFLAGS = -static -nostdlib -T linker/linker.ld --no-warn-mismatch
|
||||
|
||||
O_FORMAT = $(shell $(OBJDUMP) -i | head -2 | grep elf32)
|
||||
|
||||
SOURCES = src/startup.S src/main.c src/board.c src/memory.c src/unlzma.c
|
||||
|
||||
BUILD_DIR ?= build
|
||||
|
||||
IMAGE_OBJ := $(BUILD_DIR)/image.o
|
||||
IMAGE_ELF := $(BUILD_DIR)/image.elf
|
||||
|
||||
KERNEL_IMG_OUT ?= $(BUILD_DIR)/image.bin
|
||||
|
||||
OBJECTS_C = $(filter %.c,$(SOURCES))
|
||||
OBJECTS_S = $(filter %.S,$(SOURCES))
|
||||
|
||||
OBJECTS := $(OBJECTS_S:.S=.o) $(OBJECTS_C:.c=.o)
|
||||
OBJECTS := $(patsubst %.o, $(BUILD_DIR)/%.o, $(OBJECTS)) $(IMAGE_OBJ)
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
ifndef KERNEL_IMG_IN
|
||||
$(error Compressed kernel image not given via KERNEL_IMG_IN)
|
||||
endif
|
||||
endif
|
||||
|
||||
all: $(KERNEL_IMG_OUT)
|
||||
|
||||
install:
|
||||
|
||||
$(BUILD_DIR)/%.o : %.c
|
||||
@mkdir -p $(dir $@)
|
||||
$(CC) $(CFLAGS) -c -o $@ $<
|
||||
|
||||
$(BUILD_DIR)/%.o : %.S
|
||||
@mkdir -p $(dir $@)
|
||||
$(CC) $(ASFLAGS) -c -o $@ $<
|
||||
|
||||
$(IMAGE_OBJ): $(KERNEL_IMG_IN)
|
||||
$(OBJCOPY) -I binary -O $(O_FORMAT) --rename-section .data=.kernel $< $@
|
||||
|
||||
$(IMAGE_ELF): $(OBJECTS)
|
||||
$(LD) $(LDFLAGS) -o $@ $(OBJECTS)
|
||||
|
||||
$(KERNEL_IMG_OUT): $(IMAGE_ELF)
|
||||
$(OBJCOPY) -O binary $< $@
|
||||
|
||||
clean:
|
||||
rm -rf $(BUILD_DIR)/
|
||||
|
14
target/linux/realtek/image/rt-loader/include/board.h
Normal file
14
target/linux/realtek/image/rt-loader/include/board.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
/*
|
||||
* rt-loader header
|
||||
* (c) 2025 Markus Stockhausen
|
||||
*/
|
||||
|
||||
#ifndef _BOARD_H_
|
||||
#define _BOARD_H_
|
||||
|
||||
unsigned int board_get_memory(void);
|
||||
void board_get_system(char *buffer, int len);
|
||||
void board_panic(void);
|
||||
void board_putchar(int ch, void *ctx);;
|
||||
|
||||
#endif // _BOARD_H_
|
17
target/linux/realtek/image/rt-loader/include/globals.h
Normal file
17
target/linux/realtek/image/rt-loader/include/globals.h
Normal file
|
@ -0,0 +1,17 @@
|
|||
/*
|
||||
* rt-loader header
|
||||
* (c) 2025 Markus Stockhausen
|
||||
*/
|
||||
|
||||
#ifndef _GLOBALS_H_
|
||||
#define _GLOBALS_H_
|
||||
|
||||
#define KSEG0 0x80000000
|
||||
#define STACK_SIZE 0x10000
|
||||
#define HEAP_SIZE 0x40000
|
||||
#define MEMORY_ALIGNMENT 32
|
||||
|
||||
#define printf(fmt, ...) npf_pprintf(board_putchar, NULL, fmt, ##__VA_ARGS__)
|
||||
#define snprintf npf_snprintf
|
||||
|
||||
#endif // _GLOBALS_H_
|
30
target/linux/realtek/image/rt-loader/include/memory.h
Normal file
30
target/linux/realtek/image/rt-loader/include/memory.h
Normal file
|
@ -0,0 +1,30 @@
|
|||
/*
|
||||
* rt-loader header
|
||||
* (c) 2025 Markus Stockhausen
|
||||
*/
|
||||
|
||||
#ifndef _MEMORY_H_
|
||||
#define _MEMORY_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include "globals.h"
|
||||
|
||||
#define CACHE_HIT_INVALIDATE_I 0x10
|
||||
#define CACHE_HIT_WRITEBACK_INV_D 0x15
|
||||
|
||||
#define ioread32(reg) (*(volatile int *)(reg))
|
||||
#define iowrite32(val, reg) (*(volatile int *)(reg) = val)
|
||||
|
||||
void flush_cache(void *start_addr, unsigned long size);
|
||||
void free(void *ptr);
|
||||
void *malloc(size_t size);
|
||||
int memcmp(const void *s1, const void *s2, size_t count);
|
||||
void *memmove(void *dst, const void *src, size_t count);
|
||||
void *memcpy(void *dst, const void *src, size_t count);
|
||||
void *memset(void *dst, int value, size_t count);
|
||||
size_t strlen(const char *s);
|
||||
|
||||
extern void *_heap_addr;
|
||||
extern void *_heap_addr_max;
|
||||
|
||||
#endif // _MEMORY_H_
|
1203
target/linux/realtek/image/rt-loader/include/nanoprintf.h
Normal file
1203
target/linux/realtek/image/rt-loader/include/nanoprintf.h
Normal file
File diff suppressed because it is too large
Load diff
41
target/linux/realtek/image/rt-loader/linker/linker.ld
Normal file
41
target/linux/realtek/image/rt-loader/linker/linker.ld
Normal file
|
@ -0,0 +1,41 @@
|
|||
ENTRY(_start)
|
||||
|
||||
SECTIONS {
|
||||
.text : {
|
||||
*(.text)
|
||||
}
|
||||
|
||||
.data : ALIGN(32) {
|
||||
*(.sdata*)
|
||||
*(.data*)
|
||||
}
|
||||
/*
|
||||
* In MIPS position independent code (PIC), the global offset table (GOT) is a data structure
|
||||
* used to facilitate access to global variables and functions when the code's final memory
|
||||
* location is not known at compile time. The GOT contains absolute addresses of global symbols,
|
||||
* but is itself located using a relative reference. This allows the code to be relocated at
|
||||
* runtime without modification.
|
||||
*/
|
||||
.got : ALIGN(32) {
|
||||
__got_start = .;
|
||||
*(.got*)
|
||||
__got_end = .;
|
||||
}
|
||||
/*
|
||||
* Storage for the compressed kernel image that was integrated into the loader during link time.
|
||||
* No code just binary data.
|
||||
*/
|
||||
.kernel : ALIGN(1) {
|
||||
__kernel_data_start = .;
|
||||
KEEP(*(.kernel))
|
||||
__kernel_data_end = .;
|
||||
}
|
||||
|
||||
.bss (NOLOAD) : ALIGN(4) {
|
||||
__bss_start = .;
|
||||
*(.bss)
|
||||
*(.sbss)
|
||||
*(COMMON)
|
||||
__bss_end = .;
|
||||
}
|
||||
}
|
110
target/linux/realtek/image/rt-loader/src/board.c
Normal file
110
target/linux/realtek/image/rt-loader/src/board.c
Normal file
|
@ -0,0 +1,110 @@
|
|||
/*
|
||||
* rt-loader board functions
|
||||
* (c) 2025 Markus Stockhausen
|
||||
*/
|
||||
|
||||
#include "globals.h"
|
||||
#include "memory.h"
|
||||
#include "nanoprintf.h"
|
||||
|
||||
#define DRAM_CONFIG_REG 0xb8001004
|
||||
#define UART_BUFFER_REG 0xb8002000
|
||||
#define UART_LINE_STATUS_REG 0xb8002014
|
||||
#define UART_TX_READY (1 << 29)
|
||||
|
||||
#define RTL838X_ENABLE_RW_MASK 0x3
|
||||
#define RTL838X_INT_RW_CTRL_REG 0xbb000058
|
||||
#define RTL838X_MODEL_NAME_INFO_REG 0xbb0000d4
|
||||
#define RTL839X_MODEL_NAME_INFO_REG 0xbb000ff0
|
||||
#define RTL83XX_CHIP_INFO_EN 0xa0000000
|
||||
#define RTL93XX_MODEL_NAME_INFO_REG 0xbb000004
|
||||
#define RTL93XX_CHIP_INFO_EN 0xa0000
|
||||
|
||||
/*
|
||||
* board_putchar() is the central function to write to serial console of the device. Some printf
|
||||
* libraries (e.g. https://github.com/mpaland/printf) need a fixed function name like _putchar.
|
||||
* To keep the original library as is, link the two functions with gcc compiler option
|
||||
* -D_putchar=board_putchar
|
||||
*/
|
||||
|
||||
void board_putchar(int ch, void *ctx)
|
||||
{
|
||||
while (!(ioread32(UART_LINE_STATUS_REG) & UART_TX_READY));
|
||||
iowrite32(((int)ch) << 24, UART_BUFFER_REG);
|
||||
|
||||
if (ch == '\n')
|
||||
board_putchar('\r', ctx);
|
||||
}
|
||||
|
||||
/*
|
||||
* board_get_memory() does what it is named after. On Realtek switches the DRAM config register
|
||||
* has information about bank count, bus width, ... From that the memory size can be derived.
|
||||
*/
|
||||
|
||||
unsigned int board_get_memory(void)
|
||||
{
|
||||
unsigned int dcr = ioread32(DRAM_CONFIG_REG);
|
||||
char ROWCNTv[] = {11, 12, 13, 14, 15, 16};
|
||||
char COLCNTv[] = {8, 9, 10, 11, 12};
|
||||
char BNKCNTv[] = {1, 2, 3};
|
||||
char BUSWIDv[] = {0, 1, 2};
|
||||
|
||||
return 1 << (BNKCNTv[(dcr >> 28) & 0x3] + BUSWIDv[(dcr >> 24) & 0x3] +
|
||||
ROWCNTv[(dcr >> 20) & 0xf] + COLCNTv[(dcr >> 16) & 0xf]);
|
||||
}
|
||||
|
||||
/*
|
||||
* board_get_system() generates a readable system name that will be printed during startup.
|
||||
* Formatting can be whatever is helpful.
|
||||
*/
|
||||
|
||||
void board_get_system(char *buffer, int len)
|
||||
{
|
||||
unsigned int chip_id, model_id, model_version, chip_version;
|
||||
unsigned int reg, val, act;
|
||||
|
||||
act = RTL93XX_CHIP_INFO_EN;
|
||||
reg = RTL93XX_MODEL_NAME_INFO_REG;
|
||||
val = ioread32(reg);
|
||||
|
||||
if ((val & 0xffec0000) == 0x93000000)
|
||||
goto found;
|
||||
|
||||
act = RTL83XX_CHIP_INFO_EN;
|
||||
reg = RTL839X_MODEL_NAME_INFO_REG;
|
||||
val = ioread32(reg);
|
||||
if ((val & 0xfff80000) == 0x83900000)
|
||||
goto found;
|
||||
|
||||
iowrite32(0x3, RTL838X_INT_RW_CTRL_REG);
|
||||
reg = RTL838X_MODEL_NAME_INFO_REG;
|
||||
val = ioread32(reg);
|
||||
found:
|
||||
model_id = val >> 16;
|
||||
model_version = (val >> 11) & 0x1f;
|
||||
|
||||
iowrite32(act, reg + 4);
|
||||
val = ioread32(reg + 4);
|
||||
chip_id = val & 0xffff;
|
||||
|
||||
if (model_id < 0x9300)
|
||||
chip_version = val >> 16 & 0x1f;
|
||||
else
|
||||
chip_version = val >> 28 & 0x0f;
|
||||
|
||||
snprintf(buffer, len, "RTL%04X%c (chip id %04x%c)",
|
||||
model_id, model_version ? model_version + 64 : 0,
|
||||
chip_id, chip_version ? chip_version + 64 : 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* board_panic() is called in critical cases. Whatever is needed can be done here. Maybe
|
||||
* an automatic reboot can be issued some day. For now just halt processing.
|
||||
*/
|
||||
|
||||
void board_panic(void)
|
||||
{
|
||||
printf("halt system\n");
|
||||
while (1) {
|
||||
}
|
||||
}
|
123
target/linux/realtek/image/rt-loader/src/main.c
Normal file
123
target/linux/realtek/image/rt-loader/src/main.c
Normal file
|
@ -0,0 +1,123 @@
|
|||
/*
|
||||
* rt-loader main program
|
||||
* (c) 2025 Markus Stockhausen
|
||||
*
|
||||
* This code was inspired by the OpenWrt lzma loader. Thanks to
|
||||
*
|
||||
* Copyright (C) 2004 Manuel Novoa III (mjn3@codepoet.org)
|
||||
* Copyright (C) 2005 Mineharu Takahara <mtakahar@yahoo.com>
|
||||
* Copyright (C) 2005 by Oleg I. Vdovikin <oleg@cs.msu.su>
|
||||
* Copyright (C) 2011 Gabor Juhos <juhosg@openwrt.org>
|
||||
*/
|
||||
|
||||
#include "board.h"
|
||||
#include "globals.h"
|
||||
#include "memory.h"
|
||||
|
||||
#define NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS 1
|
||||
#define NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS 0
|
||||
#define NANOPRINTF_USE_SMALL_FORMAT_SPECIFIERS 0
|
||||
#define NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS 0
|
||||
#define NANOPRINTF_USE_WRITEBACK_FORMAT_SPECIFIERS 0
|
||||
#define NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS 0
|
||||
#define NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS 0
|
||||
#define NANOPRINTF_IMPLEMENTATION
|
||||
#include "nanoprintf.h"
|
||||
|
||||
extern void *_kernel_load_addr;
|
||||
extern void *_kernel_data_addr;
|
||||
extern int _kernel_data_size;
|
||||
extern void *_my_load_addr;
|
||||
extern int _my_load_size;
|
||||
|
||||
extern int unlzma(unsigned char *buf, long in_len,
|
||||
long (*fill)(void*, unsigned long),
|
||||
long (*flush)(void*, unsigned long),
|
||||
unsigned char *output,
|
||||
long *outlen,
|
||||
long *posp,
|
||||
void(*error)(char *x));
|
||||
|
||||
typedef void (*entry_func_t)(unsigned long reg_a0, unsigned long reg_a1,
|
||||
unsigned long reg_a2, unsigned long reg_a3);
|
||||
|
||||
void *relocate(void *src, int len)
|
||||
{
|
||||
void *addr;
|
||||
unsigned int offs;
|
||||
|
||||
/*
|
||||
* Relocate to highest possible memory address. This is usually the RAM size minus some
|
||||
* space for the heap and the stack pointer. As we do not have any highmem features
|
||||
* limit this to 256MB.
|
||||
*/
|
||||
|
||||
offs = (board_get_memory() - STACK_SIZE - HEAP_SIZE - len - 1024) & 0xfff0000;
|
||||
addr = (void *)KSEG0 + offs;
|
||||
|
||||
printf("Relocate %d bytes from 0x%08x to 0x%08x\n", len, src, addr);
|
||||
|
||||
memcpy(addr, src, len);
|
||||
flush_cache(addr, len);
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
void welcome(void)
|
||||
{
|
||||
char system[80];
|
||||
|
||||
board_get_system(system, sizeof(system));
|
||||
|
||||
printf("rt-loader\n");
|
||||
printf("Running on %s with %dMB\n", system, board_get_memory() >> 20);
|
||||
}
|
||||
|
||||
void decompress_error(char *x)
|
||||
{
|
||||
printf("%s\n", x);
|
||||
}
|
||||
|
||||
void *decompress(void *out, void *in, int len)
|
||||
{
|
||||
long outlen;
|
||||
|
||||
printf("Extract kernel with %d bytes from 0x%08x to 0x%08x ...\n", len, in, out);
|
||||
|
||||
if (unlzma(in, len, 0, 0, out, &outlen, 0, decompress_error))
|
||||
board_panic();
|
||||
|
||||
printf("Extracted kernel size is %d bytes\n", outlen);
|
||||
flush_cache(out, outlen);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
void main(unsigned long reg_a0, unsigned long reg_a1,
|
||||
unsigned long reg_a2, unsigned long reg_a3)
|
||||
{
|
||||
entry_func_t fn;
|
||||
|
||||
if (_kernel_load_addr == _my_load_addr) {
|
||||
/*
|
||||
* During first run relocate the whole package to the end of memory. Use
|
||||
* _my_load_size as relocation length. That includes the bss section, aka
|
||||
* uninitialized globals. So it is possible to initialize globals during
|
||||
* first run and have them at hand after relocation.
|
||||
*/
|
||||
|
||||
welcome();
|
||||
fn = relocate(_my_load_addr, _my_load_size);
|
||||
fn(reg_a0, reg_a1, reg_a2, reg_a3);
|
||||
} else {
|
||||
/*
|
||||
* During second run extract the attached kernel image to the memory address
|
||||
* that the loader was loaded to in the first run.
|
||||
*/
|
||||
|
||||
fn = decompress(_kernel_load_addr, _kernel_data_addr, _kernel_data_size);
|
||||
|
||||
printf("Booting kernel from 0x%08x ...\n\n", fn);
|
||||
fn(reg_a0, reg_a1, reg_a2, reg_a3);
|
||||
}
|
||||
}
|
122
target/linux/realtek/image/rt-loader/src/memory.c
Normal file
122
target/linux/realtek/image/rt-loader/src/memory.c
Normal file
|
@ -0,0 +1,122 @@
|
|||
/*
|
||||
* rt-loader memory functions
|
||||
* (c) 2025 Markus Stockhausen
|
||||
*
|
||||
* This is a small function collection to get some rudimentary memory management working when
|
||||
* running bare metal. None of these functions is optimized but works well for current needs.
|
||||
*/
|
||||
|
||||
#include "board.h"
|
||||
#include "globals.h"
|
||||
#include "memory.h"
|
||||
#include "nanoprintf.h"
|
||||
|
||||
#define CACHE_OP(op, addr) \
|
||||
__asm__ __volatile__( \
|
||||
" .set push \n" \
|
||||
" .set noreorder \n" \
|
||||
" .set mips3\n\t \n" \
|
||||
" cache %0, %1 \n" \
|
||||
" .set pop \n" \
|
||||
: \
|
||||
: "i" (op), "R" (*(unsigned char *)(addr)))
|
||||
|
||||
void flush_cache(void *start_addr, unsigned long size)
|
||||
{
|
||||
/*
|
||||
* MIPS cores may have different cache lines. Most common are 16 and 32 bytes. Avoid
|
||||
* detection routines or multiple implementations and take the lowest known value that
|
||||
* will fit fine for cores with longer cache lines
|
||||
*/
|
||||
|
||||
unsigned long lsize = 16;
|
||||
unsigned long addr = (unsigned long)start_addr & ~(lsize - 1);
|
||||
unsigned long aend = ((unsigned long)start_addr + size - 1) & ~(lsize - 1);
|
||||
|
||||
while (1) {
|
||||
CACHE_OP(CACHE_HIT_INVALIDATE_I, addr);
|
||||
CACHE_OP(CACHE_HIT_WRITEBACK_INV_D, addr);
|
||||
if (addr == aend)
|
||||
break;
|
||||
addr += lsize;
|
||||
}
|
||||
}
|
||||
|
||||
void free(void *ptr)
|
||||
{
|
||||
/* this is only one shot allocation */
|
||||
}
|
||||
|
||||
int memcmp(const void *s1, const void *s2, size_t count)
|
||||
{
|
||||
volatile char *p1 = (volatile char *)s1;
|
||||
volatile char *p2 = (volatile char *)s2;
|
||||
|
||||
while (count--) {
|
||||
if (*p1 != *p2)
|
||||
return (int)(*p1) - (int)(*p2);
|
||||
|
||||
p1++;
|
||||
p2++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *memmove(void *dst, const void *src, size_t count)
|
||||
{
|
||||
volatile char *d = (volatile char *)dst;
|
||||
volatile char *s = (volatile char *)src;
|
||||
|
||||
if (d < s) {
|
||||
while (count--)
|
||||
*d++ = *s++;
|
||||
} else if (d > s) {
|
||||
d += count;
|
||||
s += count;
|
||||
while (count--)
|
||||
*--d = *--s;
|
||||
}
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
void *memcpy(void *dst, const void *src, size_t count)
|
||||
{
|
||||
memmove(dst, src, count);
|
||||
}
|
||||
|
||||
void *memset(void *dst, int c, size_t count)
|
||||
{
|
||||
volatile char *d = (volatile char *)dst;
|
||||
|
||||
while (count--)
|
||||
*d++ = c;
|
||||
|
||||
return (void *)d;
|
||||
}
|
||||
|
||||
void *malloc(size_t size)
|
||||
{
|
||||
void *start;
|
||||
|
||||
start = (void *)(((unsigned int)_heap_addr + MEMORY_ALIGNMENT - 1) & ~(MEMORY_ALIGNMENT - 1));
|
||||
if ((start + size) > _heap_addr_max) {
|
||||
printf("malloc(%d) failed. Only %dkB of %dkB heap left.\n",
|
||||
size, (_heap_addr_max - start) >> 10, HEAP_SIZE >> 10);
|
||||
board_panic();
|
||||
}
|
||||
|
||||
_heap_addr += size;
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
size_t strlen(const char *s)
|
||||
{
|
||||
const char *p = s;
|
||||
|
||||
while (*p) ++p;
|
||||
|
||||
return (size_t)(p - s);
|
||||
}
|
182
target/linux/realtek/image/rt-loader/src/startup.S
Normal file
182
target/linux/realtek/image/rt-loader/src/startup.S
Normal file
|
@ -0,0 +1,182 @@
|
|||
# rt-loader assembler startup code
|
||||
# (c) 2025 Markus Stockhausen
|
||||
|
||||
#include "globals.h"
|
||||
|
||||
# This start code allows to run a position independent code (PIC) on bare metal. In that case
|
||||
# all addresses are looked up via the global offset table (GOT). But that must be filled during
|
||||
# this initialization sequence. Without a proper GOT using standard "la" instruction in the code
|
||||
# will not work. Provide a macro that avoids the dependency.
|
||||
|
||||
.macro _LA reg, symbol
|
||||
lui \reg, %hi(\symbol)
|
||||
addi \reg, \reg, %lo(\symbol)
|
||||
add \reg, $t9
|
||||
.endm
|
||||
|
||||
.section .text
|
||||
.globl _start
|
||||
.ent _start
|
||||
_start:
|
||||
.set noreorder
|
||||
|
||||
# Determine current program load address and store it into t9.
|
||||
|
||||
bal _where_am_i
|
||||
nop
|
||||
_where_am_i:
|
||||
move $t9, $ra
|
||||
subu $t9, $t9, 0x8
|
||||
|
||||
|
||||
# Check if this our first run (_kernel_load_addr = 0?)
|
||||
|
||||
_LA $t6, _kernel_load_addr
|
||||
lw $t7, 0($t6)
|
||||
bne $zero, $t7, _init_done
|
||||
nop
|
||||
|
||||
# During first run store the current load address as the target kernel load address.
|
||||
|
||||
sw $t9, 0($t6)
|
||||
|
||||
# Same for the global variables in the BSS section. Clear them only during the first run. This
|
||||
# way the "global program state" can be copied over to the relocation address.
|
||||
|
||||
_LA $t3, __bss_start
|
||||
_LA $t4, __bss_end
|
||||
_bss_zero:
|
||||
beq $t3, $t4, _init_done
|
||||
nop
|
||||
sw $zero, 0($t3)
|
||||
addiu $t3, $t3, 4
|
||||
b _bss_zero
|
||||
nop
|
||||
|
||||
_init_done:
|
||||
|
||||
# Code is running bare metal and no one initializes the global offset table. After the build
|
||||
# process the table is relative to address 0x0. Starting from anywhere else breaks the program.
|
||||
# A manual update is required during startup. Usually this is quite easy by simply adding the
|
||||
# current load address to all entries.
|
||||
# But this code relocates itself to another memory address and starts itself over. At the new
|
||||
# address it will find a global offset table that fits to the previous execution. To solve this
|
||||
# store a copy of the last load address in got_delta variable and only add the difference after
|
||||
# a relocation. Sequence is as follows
|
||||
#
|
||||
# - U-Boot loads the code to 0x80100000
|
||||
# - U-Boot runs the code at 0x80100000
|
||||
# - code identifies its dynamic start_address = 0x80100000
|
||||
# - code reads (initial) _got_delta = 0x00000000
|
||||
# - code adds 0x80100000 to all GOT entries
|
||||
# - code stores _got_delta with 0x80100000
|
||||
# - code copies itself over to a new location 0x85000000
|
||||
# - code starts itself from 0x85000000
|
||||
# - code identifies its dynamic start_address = 0x85000000
|
||||
# - code reads (pre-filled) _got_delta = 0x80100000
|
||||
# - code adds 0x4f00000 (= 0x85000000 - 0x80100000) to all GOT entries
|
||||
# - ...
|
||||
#
|
||||
|
||||
_LA $t6, _got_delta
|
||||
lw $t5, 0($t6)
|
||||
subu $t7, $t9, $t5
|
||||
sw $t9, 0($t6)
|
||||
_LA $t3, __got_start
|
||||
_LA $t4, __got_end
|
||||
_got_patch:
|
||||
beq $t3, $t4, _got_done
|
||||
nop
|
||||
lw $t5, 0($t3)
|
||||
addu $t5, $t5, $t7
|
||||
sw $t5, 0($t3)
|
||||
addiu $t3, $t3, 4
|
||||
b _got_patch
|
||||
nop
|
||||
_got_done:
|
||||
|
||||
# Linker attached kernel to end of package. Store addresses in global variables
|
||||
|
||||
_LA $t8, _my_load_addr
|
||||
sw $t9, 0($t8)
|
||||
|
||||
_LA $t5, __kernel_data_start
|
||||
_LA $t4, _kernel_data_addr
|
||||
sw $t5, 0($t4)
|
||||
|
||||
_LA $t3, __kernel_data_end
|
||||
subu $t3, $t3, $t5
|
||||
_LA $t4, _kernel_data_size
|
||||
sw $t3, 0($t4)
|
||||
|
||||
# Determine own code size by looking where BSS ends.
|
||||
|
||||
_LA $t3, __bss_end
|
||||
subu $t6, $t3, $t9
|
||||
_LA $t4, _my_load_size
|
||||
sw $t6, 0($t4)
|
||||
|
||||
# Setup heap. It will start directly behind BSS
|
||||
|
||||
addiu $t3, MEMORY_ALIGNMENT
|
||||
li $t4, ~(MEMORY_ALIGNMENT - 1)
|
||||
and $t3, $t4
|
||||
|
||||
_LA $t5, _heap_addr
|
||||
sw $t3, 0($t5)
|
||||
|
||||
li $t4, HEAP_SIZE
|
||||
add $t3, $t4
|
||||
|
||||
_LA $t5, _heap_addr_max
|
||||
sw $t3, 0($t5)
|
||||
|
||||
# Setup stack that is located on top of heap.
|
||||
|
||||
li $t4, STACK_SIZE
|
||||
add $sp, $t3, $t4
|
||||
|
||||
# Adapt t9 so it points to main(). This is needed so main() can find the GOT via t9/gp
|
||||
|
||||
_LA $t8, main
|
||||
move $t9, $t8
|
||||
|
||||
# Call main() with parameters a0, a3, __kernel_start, __kernel_end
|
||||
bal main
|
||||
nop
|
||||
|
||||
.end _start
|
||||
|
||||
.section .data
|
||||
.align 4
|
||||
# delta for global offset table initialization
|
||||
_got_delta:
|
||||
.word 0
|
||||
# current heap address for malloc() / free()
|
||||
.globl _heap_addr
|
||||
_heap_addr:
|
||||
.word 0
|
||||
# maximum heap address
|
||||
.globl _heap_addr_max
|
||||
_heap_addr_max:
|
||||
.word 0
|
||||
# current program load address
|
||||
.globl _my_load_addr
|
||||
_my_load_addr:
|
||||
.word 0
|
||||
# total size of code including attached kernel and bss (uninitialized global variables)
|
||||
.globl _my_load_size
|
||||
_my_load_size:
|
||||
.word 0
|
||||
# target load address of kernel = this programs address during initial run
|
||||
.globl _kernel_load_addr
|
||||
_kernel_load_addr:
|
||||
.word 0
|
||||
# absolute start address of attached kernel
|
||||
.globl _kernel_data_addr
|
||||
_kernel_data_addr:
|
||||
.word 0
|
||||
# size of attached kernel
|
||||
.globl _kernel_data_size
|
||||
_kernel_data_size:
|
||||
.word 0
|
663
target/linux/realtek/image/rt-loader/src/unlzma.c
Normal file
663
target/linux/realtek/image/rt-loader/src/unlzma.c
Normal file
|
@ -0,0 +1,663 @@
|
|||
/* Lzma decompressor for Linux kernel. Shamelessly snarfed
|
||||
*from busybox 1.1.1
|
||||
*
|
||||
*Linux kernel adaptation
|
||||
*Copyright (C) 2006 Alain < alain@knaff.lu >
|
||||
*
|
||||
*Based on small lzma deflate implementation/Small range coder
|
||||
*implementation for lzma.
|
||||
*Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org >
|
||||
*
|
||||
*Based on LzmaDecode.c from the LZMA SDK 4.22 (https://www.7-zip.org/)
|
||||
*Copyright (C) 1999-2005 Igor Pavlov
|
||||
*
|
||||
*Copyrights of the parts, see headers below.
|
||||
*
|
||||
*
|
||||
*This program is free software; you can redistribute it and/or
|
||||
*modify it under the terms of the GNU Lesser General Public
|
||||
*License as published by the Free Software Foundation; either
|
||||
*version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
*This program is distributed in the hope that it will be useful,
|
||||
*but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
*MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
*Lesser General Public License for more details.
|
||||
*
|
||||
*You should have received a copy of the GNU Lesser General Public
|
||||
*License along with this library; if not, write to the Free Software
|
||||
*Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef MIN
|
||||
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
|
||||
#endif
|
||||
|
||||
static long long read_int(unsigned char *ptr, int size)
|
||||
{
|
||||
int i;
|
||||
long long ret = 0;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
ret = (ret << 8) | ptr[size-i-1];
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define ENDIAN_CONVERT(x) \
|
||||
x = (typeof(x))read_int((unsigned char *)&x, sizeof(x))
|
||||
|
||||
|
||||
/* Small range coder implementation for lzma.
|
||||
*Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org >
|
||||
*
|
||||
*Based on LzmaDecode.c from the LZMA SDK 4.22 (https://www.7-zip.org/)
|
||||
*Copyright (c) 1999-2005 Igor Pavlov
|
||||
*/
|
||||
|
||||
#include "memory.h"
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define LZMA_IOBUF_SIZE 0x10000
|
||||
|
||||
struct rc {
|
||||
long (*fill)(void*, unsigned long);
|
||||
uint8_t *ptr;
|
||||
uint8_t *buffer;
|
||||
uint8_t *buffer_end;
|
||||
long buffer_size;
|
||||
uint32_t code;
|
||||
uint32_t range;
|
||||
uint32_t bound;
|
||||
void (*error)(char *);
|
||||
};
|
||||
|
||||
|
||||
#define RC_TOP_BITS 24
|
||||
#define RC_MOVE_BITS 5
|
||||
#define RC_MODEL_TOTAL_BITS 11
|
||||
|
||||
|
||||
static long nofill(void *buffer, unsigned long len)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Called twice: once at startup and once in rc_normalize() */
|
||||
static void rc_read(struct rc *rc)
|
||||
{
|
||||
rc->buffer_size = rc->fill((char *)rc->buffer, LZMA_IOBUF_SIZE);
|
||||
if (rc->buffer_size <= 0)
|
||||
rc->error("unexpected EOF");
|
||||
rc->ptr = rc->buffer;
|
||||
rc->buffer_end = rc->buffer + rc->buffer_size;
|
||||
}
|
||||
|
||||
/* Called once */
|
||||
static inline void rc_init(struct rc *rc,
|
||||
long (*fill)(void*, unsigned long),
|
||||
char *buffer, long buffer_size)
|
||||
{
|
||||
if (fill)
|
||||
rc->fill = fill;
|
||||
else
|
||||
rc->fill = nofill;
|
||||
rc->buffer = (uint8_t *)buffer;
|
||||
rc->buffer_size = buffer_size;
|
||||
rc->buffer_end = rc->buffer + rc->buffer_size;
|
||||
rc->ptr = rc->buffer;
|
||||
|
||||
rc->code = 0;
|
||||
rc->range = 0xFFFFFFFF;
|
||||
}
|
||||
|
||||
static inline void rc_init_code(struct rc *rc)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 5; i++) {
|
||||
if (rc->ptr >= rc->buffer_end)
|
||||
rc_read(rc);
|
||||
rc->code = (rc->code << 8) | *rc->ptr++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Called twice, but one callsite is in inline'd rc_is_bit_0_helper() */
|
||||
static void rc_do_normalize(struct rc *rc)
|
||||
{
|
||||
if (rc->ptr >= rc->buffer_end)
|
||||
rc_read(rc);
|
||||
rc->range <<= 8;
|
||||
rc->code = (rc->code << 8) | *rc->ptr++;
|
||||
}
|
||||
static inline void rc_normalize(struct rc *rc)
|
||||
{
|
||||
if (rc->range < (1 << RC_TOP_BITS))
|
||||
rc_do_normalize(rc);
|
||||
}
|
||||
|
||||
/* Called 9 times */
|
||||
/* Why rc_is_bit_0_helper exists?
|
||||
*Because we want to always expose (rc->code < rc->bound) to optimizer
|
||||
*/
|
||||
static inline uint32_t rc_is_bit_0_helper(struct rc *rc, uint16_t *p)
|
||||
{
|
||||
rc_normalize(rc);
|
||||
rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS);
|
||||
return rc->bound;
|
||||
}
|
||||
static inline int rc_is_bit_0(struct rc *rc, uint16_t *p)
|
||||
{
|
||||
uint32_t t = rc_is_bit_0_helper(rc, p);
|
||||
return rc->code < t;
|
||||
}
|
||||
|
||||
/* Called ~10 times, but very small, thus inlined */
|
||||
static inline void rc_update_bit_0(struct rc *rc, uint16_t *p)
|
||||
{
|
||||
rc->range = rc->bound;
|
||||
*p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS;
|
||||
}
|
||||
static inline void rc_update_bit_1(struct rc *rc, uint16_t *p)
|
||||
{
|
||||
rc->range -= rc->bound;
|
||||
rc->code -= rc->bound;
|
||||
*p -= *p >> RC_MOVE_BITS;
|
||||
}
|
||||
|
||||
/* Called 4 times in unlzma loop */
|
||||
static int rc_get_bit(struct rc *rc, uint16_t *p, int *symbol)
|
||||
{
|
||||
if (rc_is_bit_0(rc, p)) {
|
||||
rc_update_bit_0(rc, p);
|
||||
*symbol *= 2;
|
||||
return 0;
|
||||
} else {
|
||||
rc_update_bit_1(rc, p);
|
||||
*symbol = *symbol * 2 + 1;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Called once */
|
||||
static inline int rc_direct_bit(struct rc *rc)
|
||||
{
|
||||
rc_normalize(rc);
|
||||
rc->range >>= 1;
|
||||
if (rc->code >= rc->range) {
|
||||
rc->code -= rc->range;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Called twice */
|
||||
static inline void rc_bit_tree_decode(struct rc *rc, uint16_t *p, int num_levels, int *symbol)
|
||||
{
|
||||
int i = num_levels;
|
||||
|
||||
*symbol = 1;
|
||||
while (i--)
|
||||
rc_get_bit(rc, p + *symbol, symbol);
|
||||
*symbol -= 1 << num_levels;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Small lzma deflate implementation.
|
||||
* Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org >
|
||||
*
|
||||
* Based on LzmaDecode.c from the LZMA SDK 4.22 (https://www.7-zip.org/)
|
||||
* Copyright (C) 1999-2005 Igor Pavlov
|
||||
*/
|
||||
|
||||
|
||||
struct lzma_header {
|
||||
uint8_t pos;
|
||||
uint32_t dict_size;
|
||||
uint64_t dst_size;
|
||||
} __attribute__ ((packed)) ;
|
||||
|
||||
|
||||
#define LZMA_BASE_SIZE 1846
|
||||
#define LZMA_LIT_SIZE 768
|
||||
|
||||
#define LZMA_NUM_POS_BITS_MAX 4
|
||||
|
||||
#define LZMA_LEN_NUM_LOW_BITS 3
|
||||
#define LZMA_LEN_NUM_MID_BITS 3
|
||||
#define LZMA_LEN_NUM_HIGH_BITS 8
|
||||
|
||||
#define LZMA_LEN_CHOICE 0
|
||||
#define LZMA_LEN_CHOICE_2 (LZMA_LEN_CHOICE + 1)
|
||||
#define LZMA_LEN_LOW (LZMA_LEN_CHOICE_2 + 1)
|
||||
#define LZMA_LEN_MID (LZMA_LEN_LOW \
|
||||
+ (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS)))
|
||||
#define LZMA_LEN_HIGH (LZMA_LEN_MID \
|
||||
+(1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS)))
|
||||
#define LZMA_NUM_LEN_PROBS (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS))
|
||||
|
||||
#define LZMA_NUM_STATES 12
|
||||
#define LZMA_NUM_LIT_STATES 7
|
||||
|
||||
#define LZMA_START_POS_MODEL_INDEX 4
|
||||
#define LZMA_END_POS_MODEL_INDEX 14
|
||||
#define LZMA_NUM_FULL_DISTANCES (1 << (LZMA_END_POS_MODEL_INDEX >> 1))
|
||||
|
||||
#define LZMA_NUM_POS_SLOT_BITS 6
|
||||
#define LZMA_NUM_LEN_TO_POS_STATES 4
|
||||
|
||||
#define LZMA_NUM_ALIGN_BITS 4
|
||||
|
||||
#define LZMA_MATCH_MIN_LEN 2
|
||||
|
||||
#define LZMA_IS_MATCH 0
|
||||
#define LZMA_IS_REP (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX))
|
||||
#define LZMA_IS_REP_G0 (LZMA_IS_REP + LZMA_NUM_STATES)
|
||||
#define LZMA_IS_REP_G1 (LZMA_IS_REP_G0 + LZMA_NUM_STATES)
|
||||
#define LZMA_IS_REP_G2 (LZMA_IS_REP_G1 + LZMA_NUM_STATES)
|
||||
#define LZMA_IS_REP_0_LONG (LZMA_IS_REP_G2 + LZMA_NUM_STATES)
|
||||
#define LZMA_POS_SLOT (LZMA_IS_REP_0_LONG \
|
||||
+ (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX))
|
||||
#define LZMA_SPEC_POS (LZMA_POS_SLOT \
|
||||
+(LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS))
|
||||
#define LZMA_ALIGN (LZMA_SPEC_POS \
|
||||
+ LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX)
|
||||
#define LZMA_LEN_CODER (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS))
|
||||
#define LZMA_REP_LEN_CODER (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS)
|
||||
#define LZMA_LITERAL (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS)
|
||||
|
||||
|
||||
struct writer {
|
||||
uint8_t *buffer;
|
||||
uint8_t previous_byte;
|
||||
size_t buffer_pos;
|
||||
int bufsize;
|
||||
size_t global_pos;
|
||||
long (*flush)(void*, unsigned long);
|
||||
struct lzma_header *header;
|
||||
};
|
||||
|
||||
struct cstate {
|
||||
int state;
|
||||
uint32_t rep0, rep1, rep2, rep3;
|
||||
};
|
||||
|
||||
static inline size_t get_pos(struct writer *wr)
|
||||
{
|
||||
return
|
||||
wr->global_pos + wr->buffer_pos;
|
||||
}
|
||||
|
||||
static inline uint8_t peek_old_byte(struct writer *wr, uint32_t offs)
|
||||
{
|
||||
if (!wr->flush) {
|
||||
int32_t pos;
|
||||
while (offs > wr->header->dict_size)
|
||||
offs -= wr->header->dict_size;
|
||||
pos = wr->buffer_pos - offs;
|
||||
return wr->buffer[pos];
|
||||
} else {
|
||||
uint32_t pos = wr->buffer_pos - offs;
|
||||
while (pos >= wr->header->dict_size)
|
||||
pos += wr->header->dict_size;
|
||||
return wr->buffer[pos];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static inline int write_byte(struct writer *wr, uint8_t byte)
|
||||
{
|
||||
wr->buffer[wr->buffer_pos++] = wr->previous_byte = byte;
|
||||
if (wr->flush && wr->buffer_pos == wr->header->dict_size) {
|
||||
wr->buffer_pos = 0;
|
||||
wr->global_pos += wr->header->dict_size;
|
||||
if (wr->flush((char *)wr->buffer, wr->header->dict_size)
|
||||
!= wr->header->dict_size)
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static inline int copy_byte(struct writer *wr, uint32_t offs)
|
||||
{
|
||||
return write_byte(wr, peek_old_byte(wr, offs));
|
||||
}
|
||||
|
||||
static inline int copy_bytes(struct writer *wr,
|
||||
uint32_t rep0, int len)
|
||||
{
|
||||
do {
|
||||
if (copy_byte(wr, rep0))
|
||||
return -1;
|
||||
len--;
|
||||
} while (len != 0 && wr->buffer_pos < wr->header->dst_size);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
static inline int process_bit0(struct writer *wr, struct rc *rc,
|
||||
struct cstate *cst, uint16_t *p,
|
||||
int pos_state, uint16_t *prob,
|
||||
int lc, uint32_t literal_pos_mask) {
|
||||
int mi = 1;
|
||||
rc_update_bit_0(rc, prob);
|
||||
prob = (p + LZMA_LITERAL +
|
||||
(LZMA_LIT_SIZE
|
||||
* (((get_pos(wr) & literal_pos_mask) << lc)
|
||||
+ (wr->previous_byte >> (8 - lc))))
|
||||
);
|
||||
|
||||
if (cst->state >= LZMA_NUM_LIT_STATES) {
|
||||
int match_byte = peek_old_byte(wr, cst->rep0);
|
||||
do {
|
||||
int bit;
|
||||
uint16_t *prob_lit;
|
||||
|
||||
match_byte <<= 1;
|
||||
bit = match_byte & 0x100;
|
||||
prob_lit = prob + 0x100 + bit + mi;
|
||||
if (rc_get_bit(rc, prob_lit, &mi)) {
|
||||
if (!bit)
|
||||
break;
|
||||
} else {
|
||||
if (bit)
|
||||
break;
|
||||
}
|
||||
} while (mi < 0x100);
|
||||
}
|
||||
while (mi < 0x100) {
|
||||
uint16_t *prob_lit = prob + mi;
|
||||
rc_get_bit(rc, prob_lit, &mi);
|
||||
}
|
||||
if (cst->state < 4)
|
||||
cst->state = 0;
|
||||
else if (cst->state < 10)
|
||||
cst->state -= 3;
|
||||
else
|
||||
cst->state -= 6;
|
||||
|
||||
return write_byte(wr, mi);
|
||||
}
|
||||
|
||||
static inline int process_bit1(struct writer *wr, struct rc *rc,
|
||||
struct cstate *cst, uint16_t *p,
|
||||
int pos_state, uint16_t *prob) {
|
||||
int offset;
|
||||
uint16_t *prob_len;
|
||||
int num_bits;
|
||||
int len;
|
||||
|
||||
rc_update_bit_1(rc, prob);
|
||||
prob = p + LZMA_IS_REP + cst->state;
|
||||
if (rc_is_bit_0(rc, prob)) {
|
||||
rc_update_bit_0(rc, prob);
|
||||
cst->rep3 = cst->rep2;
|
||||
cst->rep2 = cst->rep1;
|
||||
cst->rep1 = cst->rep0;
|
||||
cst->state = cst->state < LZMA_NUM_LIT_STATES ? 0 : 3;
|
||||
prob = p + LZMA_LEN_CODER;
|
||||
} else {
|
||||
rc_update_bit_1(rc, prob);
|
||||
prob = p + LZMA_IS_REP_G0 + cst->state;
|
||||
if (rc_is_bit_0(rc, prob)) {
|
||||
rc_update_bit_0(rc, prob);
|
||||
prob = (p + LZMA_IS_REP_0_LONG
|
||||
+ (cst->state <<
|
||||
LZMA_NUM_POS_BITS_MAX) +
|
||||
pos_state);
|
||||
if (rc_is_bit_0(rc, prob)) {
|
||||
rc_update_bit_0(rc, prob);
|
||||
|
||||
cst->state = cst->state < LZMA_NUM_LIT_STATES ?
|
||||
9 : 11;
|
||||
return copy_byte(wr, cst->rep0);
|
||||
} else {
|
||||
rc_update_bit_1(rc, prob);
|
||||
}
|
||||
} else {
|
||||
uint32_t distance;
|
||||
|
||||
rc_update_bit_1(rc, prob);
|
||||
prob = p + LZMA_IS_REP_G1 + cst->state;
|
||||
if (rc_is_bit_0(rc, prob)) {
|
||||
rc_update_bit_0(rc, prob);
|
||||
distance = cst->rep1;
|
||||
} else {
|
||||
rc_update_bit_1(rc, prob);
|
||||
prob = p + LZMA_IS_REP_G2 + cst->state;
|
||||
if (rc_is_bit_0(rc, prob)) {
|
||||
rc_update_bit_0(rc, prob);
|
||||
distance = cst->rep2;
|
||||
} else {
|
||||
rc_update_bit_1(rc, prob);
|
||||
distance = cst->rep3;
|
||||
cst->rep3 = cst->rep2;
|
||||
}
|
||||
cst->rep2 = cst->rep1;
|
||||
}
|
||||
cst->rep1 = cst->rep0;
|
||||
cst->rep0 = distance;
|
||||
}
|
||||
cst->state = cst->state < LZMA_NUM_LIT_STATES ? 8 : 11;
|
||||
prob = p + LZMA_REP_LEN_CODER;
|
||||
}
|
||||
|
||||
prob_len = prob + LZMA_LEN_CHOICE;
|
||||
if (rc_is_bit_0(rc, prob_len)) {
|
||||
rc_update_bit_0(rc, prob_len);
|
||||
prob_len = (prob + LZMA_LEN_LOW
|
||||
+ (pos_state <<
|
||||
LZMA_LEN_NUM_LOW_BITS));
|
||||
offset = 0;
|
||||
num_bits = LZMA_LEN_NUM_LOW_BITS;
|
||||
} else {
|
||||
rc_update_bit_1(rc, prob_len);
|
||||
prob_len = prob + LZMA_LEN_CHOICE_2;
|
||||
if (rc_is_bit_0(rc, prob_len)) {
|
||||
rc_update_bit_0(rc, prob_len);
|
||||
prob_len = (prob + LZMA_LEN_MID
|
||||
+ (pos_state <<
|
||||
LZMA_LEN_NUM_MID_BITS));
|
||||
offset = 1 << LZMA_LEN_NUM_LOW_BITS;
|
||||
num_bits = LZMA_LEN_NUM_MID_BITS;
|
||||
} else {
|
||||
rc_update_bit_1(rc, prob_len);
|
||||
prob_len = prob + LZMA_LEN_HIGH;
|
||||
offset = ((1 << LZMA_LEN_NUM_LOW_BITS)
|
||||
+ (1 << LZMA_LEN_NUM_MID_BITS));
|
||||
num_bits = LZMA_LEN_NUM_HIGH_BITS;
|
||||
}
|
||||
}
|
||||
|
||||
rc_bit_tree_decode(rc, prob_len, num_bits, &len);
|
||||
len += offset;
|
||||
|
||||
if (cst->state < 4) {
|
||||
int pos_slot;
|
||||
|
||||
cst->state += LZMA_NUM_LIT_STATES;
|
||||
prob =
|
||||
p + LZMA_POS_SLOT +
|
||||
((len <
|
||||
LZMA_NUM_LEN_TO_POS_STATES ? len :
|
||||
LZMA_NUM_LEN_TO_POS_STATES - 1)
|
||||
<< LZMA_NUM_POS_SLOT_BITS);
|
||||
rc_bit_tree_decode(rc, prob,
|
||||
LZMA_NUM_POS_SLOT_BITS,
|
||||
&pos_slot);
|
||||
if (pos_slot >= LZMA_START_POS_MODEL_INDEX) {
|
||||
int i, mi;
|
||||
num_bits = (pos_slot >> 1) - 1;
|
||||
cst->rep0 = 2 | (pos_slot & 1);
|
||||
if (pos_slot < LZMA_END_POS_MODEL_INDEX) {
|
||||
cst->rep0 <<= num_bits;
|
||||
prob = p + LZMA_SPEC_POS +
|
||||
cst->rep0 - pos_slot - 1;
|
||||
} else {
|
||||
num_bits -= LZMA_NUM_ALIGN_BITS;
|
||||
while (num_bits--)
|
||||
cst->rep0 = (cst->rep0 << 1) |
|
||||
rc_direct_bit(rc);
|
||||
prob = p + LZMA_ALIGN;
|
||||
cst->rep0 <<= LZMA_NUM_ALIGN_BITS;
|
||||
num_bits = LZMA_NUM_ALIGN_BITS;
|
||||
}
|
||||
i = 1;
|
||||
mi = 1;
|
||||
while (num_bits--) {
|
||||
if (rc_get_bit(rc, prob + mi, &mi))
|
||||
cst->rep0 |= i;
|
||||
i <<= 1;
|
||||
}
|
||||
} else
|
||||
cst->rep0 = pos_slot;
|
||||
if (++(cst->rep0) == 0)
|
||||
return 0;
|
||||
if (cst->rep0 > wr->header->dict_size
|
||||
|| cst->rep0 > get_pos(wr))
|
||||
return -1;
|
||||
}
|
||||
|
||||
len += LZMA_MATCH_MIN_LEN;
|
||||
|
||||
return copy_bytes(wr, cst->rep0, len);
|
||||
}
|
||||
|
||||
|
||||
|
||||
int unlzma(unsigned char *buf, long in_len,
|
||||
long (*fill)(void*, unsigned long),
|
||||
long (*flush)(void*, unsigned long),
|
||||
unsigned char *output,
|
||||
long *outlen,
|
||||
long *posp,
|
||||
void(*error)(char *x))
|
||||
{
|
||||
struct lzma_header header;
|
||||
int lc, pb, lp;
|
||||
uint32_t pos_state_mask;
|
||||
uint32_t literal_pos_mask;
|
||||
uint16_t *p;
|
||||
int num_probs;
|
||||
struct rc rc;
|
||||
int i, mi;
|
||||
struct writer wr;
|
||||
struct cstate cst;
|
||||
unsigned char *inbuf;
|
||||
int ret = -1;
|
||||
|
||||
rc.error = error;
|
||||
|
||||
if (buf)
|
||||
inbuf = buf;
|
||||
else
|
||||
inbuf = malloc(LZMA_IOBUF_SIZE);
|
||||
if (!inbuf) {
|
||||
error("Could not allocate input buffer");
|
||||
goto exit_0;
|
||||
}
|
||||
|
||||
cst.state = 0;
|
||||
cst.rep0 = cst.rep1 = cst.rep2 = cst.rep3 = 1;
|
||||
|
||||
wr.header = &header;
|
||||
wr.flush = flush;
|
||||
wr.global_pos = 0;
|
||||
wr.previous_byte = 0;
|
||||
wr.buffer_pos = 0;
|
||||
|
||||
rc_init(&rc, fill, inbuf, in_len);
|
||||
|
||||
for (i = 0; i < sizeof(header); i++) {
|
||||
if (rc.ptr >= rc.buffer_end)
|
||||
rc_read(&rc);
|
||||
((unsigned char *)&header)[i] = *rc.ptr++;
|
||||
}
|
||||
|
||||
if (header.pos >= (9 * 5 * 5)) {
|
||||
error("bad header");
|
||||
goto exit_1;
|
||||
}
|
||||
|
||||
mi = 0;
|
||||
lc = header.pos;
|
||||
while (lc >= 9) {
|
||||
mi++;
|
||||
lc -= 9;
|
||||
}
|
||||
pb = 0;
|
||||
lp = mi;
|
||||
while (lp >= 5) {
|
||||
pb++;
|
||||
lp -= 5;
|
||||
}
|
||||
pos_state_mask = (1 << pb) - 1;
|
||||
literal_pos_mask = (1 << lp) - 1;
|
||||
|
||||
ENDIAN_CONVERT(header.dict_size);
|
||||
ENDIAN_CONVERT(header.dst_size);
|
||||
|
||||
if (header.dict_size == 0)
|
||||
header.dict_size = 1;
|
||||
|
||||
if (output)
|
||||
wr.buffer = output;
|
||||
else {
|
||||
wr.bufsize = MIN(header.dst_size, header.dict_size);
|
||||
wr.buffer = malloc(wr.bufsize);
|
||||
}
|
||||
if (wr.buffer == NULL)
|
||||
goto exit_1;
|
||||
|
||||
num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp));
|
||||
p = (uint16_t *) malloc(num_probs * sizeof(*p));
|
||||
if (p == NULL)
|
||||
goto exit_2;
|
||||
num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp));
|
||||
for (i = 0; i < num_probs; i++)
|
||||
p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1;
|
||||
|
||||
rc_init_code(&rc);
|
||||
|
||||
while (get_pos(&wr) < header.dst_size) {
|
||||
int pos_state = get_pos(&wr) & pos_state_mask;
|
||||
uint16_t *prob = p + LZMA_IS_MATCH +
|
||||
(cst.state << LZMA_NUM_POS_BITS_MAX) + pos_state;
|
||||
if (rc_is_bit_0(&rc, prob)) {
|
||||
if (process_bit0(&wr, &rc, &cst, p, pos_state, prob,
|
||||
lc, literal_pos_mask)) {
|
||||
error("LZMA data is corrupt");
|
||||
goto exit_3;
|
||||
}
|
||||
} else {
|
||||
if (process_bit1(&wr, &rc, &cst, p, pos_state, prob)) {
|
||||
error("LZMA data is corrupt");
|
||||
goto exit_3;
|
||||
}
|
||||
if (cst.rep0 == 0)
|
||||
break;
|
||||
}
|
||||
if (rc.buffer_size <= 0)
|
||||
goto exit_3;
|
||||
}
|
||||
|
||||
*outlen = get_pos(&wr);
|
||||
|
||||
if (posp)
|
||||
*posp = rc.ptr-rc.buffer;
|
||||
if (!wr.flush || wr.flush(wr.buffer, wr.buffer_pos) == wr.buffer_pos)
|
||||
ret = 0;
|
||||
exit_3:
|
||||
free(p);
|
||||
exit_2:
|
||||
if (!output)
|
||||
free(wr.buffer);
|
||||
exit_1:
|
||||
if (!buf)
|
||||
free(inbuf);
|
||||
exit_0:
|
||||
return ret;
|
||||
}
|
Loading…
Reference in a new issue