diff --git a/target/linux/realtek/image/rt-loader/Makefile b/target/linux/realtek/image/rt-loader/Makefile new file mode 100644 index 00000000000..6479db705c8 --- /dev/null +++ b/target/linux/realtek/image/rt-loader/Makefile @@ -0,0 +1,98 @@ +# rt-loader make file +# (c) 2025 Markus Stockhausen +# +# This is the make file for the rt-loader (aka runtime or realtek loader). It tries to +# avoid copying files around where possible. Therefore it is controlled by the following +# input parameters +# +# KERNEL_IMG_IN: The filename of an LZMA compressed kernel image. This is required +# KERNEL_IMG_OUT: The filename of the kernel image with the rt-loader prepended. +# If not given it will be created as image.bin into the BUILD_DIR. +# BUILD_DIR: The temporary build dir. If not given it will be set to "build". +# +# To add it into the OpenWrt toolchain just create two new build commands +# +# define Build/rt-loader +# $(MAKE) all clean -C rt-loader CROSS_COMPILE="$(TARGET_CROSS)" \ +# KERNEL_IMG_IN="$@" KERNEL_IMG_OUT="$@.new" BUILD_DIR="$@.build" +# mv "$@.new" "$@" +# endef +# +# define Build/rt-compress +# $(STAGING_DIR_HOST)/bin/xz --format=lzma -9 --stdout "$@" > "$@.new" +# mv "$@.new" "$@" +# endef +# +# Use them in a new kernel build recipe +# +# define Device/uimage-rt-loader +# KERNEL/rt-loader := kernel-bin | append-dtb | rt-compress | rt-loader +# KERNEL := $$(KERNEL/rt-loader) | uImage none +# KERNEL_INITRAMFS := $$(KERNEL/rt-loader) | uImage none +# endef +# +# And finally add it to the target device. E.g. +# +# define Device/linksys_lgs310c +# $(Device/uimage-rt-loader) +# ... +# endef + +CC := $(CROSS_COMPILE)gcc +LD := $(CROSS_COMPILE)ld +OBJCOPY := $(CROSS_COMPILE)objcopy +OBJDUMP := $(CROSS_COMPILE)objdump + +CFLAGS = -fpic -mabicalls -O2 -fno-builtin-printf -Iinclude + +ASFLAGS = -fpic -msoft-float -Iinclude + +LDFLAGS = -static -nostdlib -T linker/linker.ld --no-warn-mismatch + +O_FORMAT = $(shell $(OBJDUMP) -i | head -2 | grep elf32) + +SOURCES = src/startup.S src/main.c src/board.c src/memory.c src/unlzma.c + +BUILD_DIR ?= build + +IMAGE_OBJ := $(BUILD_DIR)/image.o +IMAGE_ELF := $(BUILD_DIR)/image.elf + +KERNEL_IMG_OUT ?= $(BUILD_DIR)/image.bin + +OBJECTS_C = $(filter %.c,$(SOURCES)) +OBJECTS_S = $(filter %.S,$(SOURCES)) + +OBJECTS := $(OBJECTS_S:.S=.o) $(OBJECTS_C:.c=.o) +OBJECTS := $(patsubst %.o, $(BUILD_DIR)/%.o, $(OBJECTS)) $(IMAGE_OBJ) + +ifneq ($(MAKECMDGOALS),clean) +ifndef KERNEL_IMG_IN +$(error Compressed kernel image not given via KERNEL_IMG_IN) +endif +endif + +all: $(KERNEL_IMG_OUT) + +install: + +$(BUILD_DIR)/%.o : %.c + @mkdir -p $(dir $@) + $(CC) $(CFLAGS) -c -o $@ $< + +$(BUILD_DIR)/%.o : %.S + @mkdir -p $(dir $@) + $(CC) $(ASFLAGS) -c -o $@ $< + +$(IMAGE_OBJ): $(KERNEL_IMG_IN) + $(OBJCOPY) -I binary -O $(O_FORMAT) --rename-section .data=.kernel $< $@ + +$(IMAGE_ELF): $(OBJECTS) + $(LD) $(LDFLAGS) -o $@ $(OBJECTS) + +$(KERNEL_IMG_OUT): $(IMAGE_ELF) + $(OBJCOPY) -O binary $< $@ + +clean: + rm -rf $(BUILD_DIR)/ + diff --git a/target/linux/realtek/image/rt-loader/include/board.h b/target/linux/realtek/image/rt-loader/include/board.h new file mode 100644 index 00000000000..b0d0945890a --- /dev/null +++ b/target/linux/realtek/image/rt-loader/include/board.h @@ -0,0 +1,14 @@ +/* + * rt-loader header + * (c) 2025 Markus Stockhausen + */ + +#ifndef _BOARD_H_ +#define _BOARD_H_ + +unsigned int board_get_memory(void); +void board_get_system(char *buffer, int len); +void board_panic(void); +void board_putchar(int ch, void *ctx);; + +#endif // _BOARD_H_ diff --git a/target/linux/realtek/image/rt-loader/include/globals.h b/target/linux/realtek/image/rt-loader/include/globals.h new file mode 100644 index 00000000000..49052b81559 --- /dev/null +++ b/target/linux/realtek/image/rt-loader/include/globals.h @@ -0,0 +1,17 @@ +/* + * rt-loader header + * (c) 2025 Markus Stockhausen + */ + +#ifndef _GLOBALS_H_ +#define _GLOBALS_H_ + +#define KSEG0 0x80000000 +#define STACK_SIZE 0x10000 +#define HEAP_SIZE 0x40000 +#define MEMORY_ALIGNMENT 32 + +#define printf(fmt, ...) npf_pprintf(board_putchar, NULL, fmt, ##__VA_ARGS__) +#define snprintf npf_snprintf + +#endif // _GLOBALS_H_ diff --git a/target/linux/realtek/image/rt-loader/include/memory.h b/target/linux/realtek/image/rt-loader/include/memory.h new file mode 100644 index 00000000000..80d0f8a2835 --- /dev/null +++ b/target/linux/realtek/image/rt-loader/include/memory.h @@ -0,0 +1,30 @@ +/* + * rt-loader header + * (c) 2025 Markus Stockhausen + */ + +#ifndef _MEMORY_H_ +#define _MEMORY_H_ + +#include +#include "globals.h" + +#define CACHE_HIT_INVALIDATE_I 0x10 +#define CACHE_HIT_WRITEBACK_INV_D 0x15 + +#define ioread32(reg) (*(volatile int *)(reg)) +#define iowrite32(val, reg) (*(volatile int *)(reg) = val) + +void flush_cache(void *start_addr, unsigned long size); +void free(void *ptr); +void *malloc(size_t size); +int memcmp(const void *s1, const void *s2, size_t count); +void *memmove(void *dst, const void *src, size_t count); +void *memcpy(void *dst, const void *src, size_t count); +void *memset(void *dst, int value, size_t count); +size_t strlen(const char *s); + +extern void *_heap_addr; +extern void *_heap_addr_max; + +#endif // _MEMORY_H_ diff --git a/target/linux/realtek/image/rt-loader/include/nanoprintf.h b/target/linux/realtek/image/rt-loader/include/nanoprintf.h new file mode 100644 index 00000000000..a415ad9f0df --- /dev/null +++ b/target/linux/realtek/image/rt-loader/include/nanoprintf.h @@ -0,0 +1,1203 @@ +/* nanoprintf v0.5.5: a tiny embeddable printf replacement written in C. + https://github.com/charlesnicholson/nanoprintf + charles.nicholson+nanoprintf@gmail.com + dual-licensed under 0bsd and unlicense, take your pick. see eof for details. */ + +#ifndef NPF_H_INCLUDED +#define NPF_H_INCLUDED + +#include +#include + +// Define this to fully sandbox nanoprintf inside of a translation unit. +#ifdef NANOPRINTF_VISIBILITY_STATIC + #define NPF_VISIBILITY static +#else + #define NPF_VISIBILITY extern +#endif + +#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) + #define NPF_PRINTF_ATTR(FORMAT_INDEX, VARGS_INDEX) \ + __attribute__((format(printf, FORMAT_INDEX, VARGS_INDEX))) +#else + #define NPF_PRINTF_ATTR(FORMAT_INDEX, VARGS_INDEX) +#endif + +// Public API + +#ifdef __cplusplus +#define NPF_RESTRICT +extern "C" { +#else +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) +#define NPF_RESTRICT restrict +#else +#define NPF_RESTRICT +#endif +#endif + +// The npf_ functions all return the number of bytes required to express the +// fully-formatted string, not including the null terminator character. +// The npf_ functions do not return negative values, since the lack of 'l' length +// modifier support makes encoding errors impossible. + +NPF_VISIBILITY int npf_snprintf(char * NPF_RESTRICT buffer, + size_t bufsz, + const char * NPF_RESTRICT format, + ...) NPF_PRINTF_ATTR(3, 4); + +NPF_VISIBILITY int npf_vsnprintf(char * NPF_RESTRICT buffer, + size_t bufsz, + char const * NPF_RESTRICT format, + va_list vlist) NPF_PRINTF_ATTR(3, 0); + +typedef void (*npf_putc)(int c, void *ctx); +NPF_VISIBILITY int npf_pprintf(npf_putc pc, + void * NPF_RESTRICT pc_ctx, + char const * NPF_RESTRICT format, + ...) NPF_PRINTF_ATTR(3, 4); + +NPF_VISIBILITY int npf_vpprintf(npf_putc pc, + void * NPF_RESTRICT pc_ctx, + char const * NPF_RESTRICT format, + va_list vlist) NPF_PRINTF_ATTR(3, 0); + +#ifdef __cplusplus +} +#endif + +#endif // NPF_H_INCLUDED + +/* The implementation of nanoprintf begins here, to be compiled only if + NANOPRINTF_IMPLEMENTATION is defined. In a multi-file library what follows would + be nanoprintf.c. */ + +#ifdef NANOPRINTF_IMPLEMENTATION + +#ifndef NPF_IMPLEMENTATION_INCLUDED +#define NPF_IMPLEMENTATION_INCLUDED + +#include +#include + +// The conversion buffer must fit at least UINT64_MAX in octal format with the leading '0'. +#ifndef NANOPRINTF_CONVERSION_BUFFER_SIZE + #define NANOPRINTF_CONVERSION_BUFFER_SIZE 23 +#endif +#if NANOPRINTF_CONVERSION_BUFFER_SIZE < 23 + #error The size of the conversion buffer must be at least 23 bytes. +#endif + +// Pick reasonable defaults if nothing's been configured. +#if !defined(NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS) && \ + !defined(NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS) && \ + !defined(NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS) && \ + !defined(NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS) && \ + !defined(NANOPRINTF_USE_SMALL_FORMAT_SPECIFIERS) && \ + !defined(NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS) && \ + !defined(NANOPRINTF_USE_WRITEBACK_FORMAT_SPECIFIERS) && \ + !defined(NANOPRINTF_USE_ALT_FORM_FLAG) + #define NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS 1 + #define NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS 1 + #define NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS 1 + #define NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS 0 + #define NANOPRINTF_USE_SMALL_FORMAT_SPECIFIERS 1 + #define NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS 0 + #define NANOPRINTF_USE_WRITEBACK_FORMAT_SPECIFIERS 0 + #define NANOPRINTF_USE_ALT_FORM_FLAG 1 +#endif + +// If anything's been configured, everything must be configured. +#ifndef NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS + #error NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS must be #defined to 0 or 1 +#endif +#ifndef NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS + #error NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS must be #defined to 0 or 1 +#endif +#ifndef NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS + #error NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS must be #defined to 0 or 1 +#endif +#ifndef NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS + #error NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS must be #defined to 0 or 1 +#endif +#ifndef NANOPRINTF_USE_SMALL_FORMAT_SPECIFIERS + #error NANOPRINTF_USE_SMALL_FORMAT_SPECIFIERS must be #defined to 0 or 1 +#endif +#ifndef NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS + #error NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS must be #defined to 0 or 1 +#endif +#ifndef NANOPRINTF_USE_WRITEBACK_FORMAT_SPECIFIERS + #error NANOPRINTF_USE_WRITEBACK_FORMAT_SPECIFIERS must be #defined to 0 or 1 +#endif + +// Ensure flags are compatible. +#if (NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS == 1) && \ + (NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 0) + #error Precision format specifiers must be enabled if float support is enabled. +#endif + +// intmax_t / uintmax_t require stdint from c99 / c++11 +#if NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS == 1 + #ifndef _MSC_VER + #ifdef __cplusplus + #if __cplusplus < 201103L + #error large format specifier support requires C++11 or later. + #endif + #else + #if __STDC_VERSION__ < 199409L + #error nanoprintf requires C99 or later. + #endif + #endif + #endif +#endif + +// Figure out if we can disable warnings with pragmas. +#ifdef __clang__ + #define NPF_CLANG 1 + #define NPF_GCC_PAST_4_6 0 +#else + #define NPF_CLANG 0 + #if defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))) + #define NPF_GCC_PAST_4_6 1 + #else + #define NPF_GCC_PAST_4_6 0 + #endif +#endif + +#if NPF_CLANG || NPF_GCC_PAST_4_6 + #define NPF_HAVE_GCC_WARNING_PRAGMAS 1 +#else + #define NPF_HAVE_GCC_WARNING_PRAGMAS 0 +#endif + +#if NPF_HAVE_GCC_WARNING_PRAGMAS + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wunused-function" + #pragma GCC diagnostic ignored "-Wimplicit-fallthrough" + #ifdef __cplusplus + #pragma GCC diagnostic ignored "-Wold-style-cast" + #endif + #pragma GCC diagnostic ignored "-Wpadded" + #pragma GCC diagnostic ignored "-Wfloat-equal" + #if NPF_CLANG + #pragma GCC diagnostic ignored "-Wc++98-compat-pedantic" + #pragma GCC diagnostic ignored "-Wcovered-switch-default" + #pragma GCC diagnostic ignored "-Wdeclaration-after-statement" + #pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant" + #ifndef __APPLE__ + #pragma GCC diagnostic ignored "-Wunsafe-buffer-usage" + #endif + #elif NPF_GCC_PAST_4_6 + #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" + #endif +#endif + +#ifdef _MSC_VER + #pragma warning(push) + #pragma warning(disable:4619) // there is no warning number 'number' + // C4619 has to be disabled first! + #pragma warning(disable:4127) // conditional expression is constant + #pragma warning(disable:4505) // unreferenced local function has been removed + #pragma warning(disable:4514) // unreferenced inline function has been removed + #pragma warning(disable:4701) // potentially uninitialized local variable used + #pragma warning(disable:4706) // assignment within conditional expression + #pragma warning(disable:4710) // function not inlined + #pragma warning(disable:4711) // function selected for inline expansion + #pragma warning(disable:4820) // padding added after struct member + #pragma warning(disable:5039) // potentially throwing function passed to extern C function + #pragma warning(disable:5045) // compiler will insert Spectre mitigation for memory load + #pragma warning(disable:5262) // implicit switch fall-through + #pragma warning(disable:26812) // enum type is unscoped +#endif + +#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) + #define NPF_NOINLINE __attribute__((noinline)) + #define NPF_FORCE_INLINE inline __attribute__((always_inline)) +#elif defined(_MSC_VER) + #define NPF_NOINLINE __declspec(noinline) + #define NPF_FORCE_INLINE inline __forceinline +#else + #define NPF_NOINLINE + #define NPF_FORCE_INLINE +#endif + +#if (NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1) || \ + (NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1) +enum { + NPF_FMT_SPEC_OPT_NONE, + NPF_FMT_SPEC_OPT_LITERAL, + NPF_FMT_SPEC_OPT_STAR, +}; +#endif + +enum { + NPF_FMT_SPEC_LEN_MOD_NONE, +#if NANOPRINTF_USE_SMALL_FORMAT_SPECIFIERS == 1 + NPF_FMT_SPEC_LEN_MOD_SHORT, // 'h' + NPF_FMT_SPEC_LEN_MOD_CHAR, // 'hh' +#endif + NPF_FMT_SPEC_LEN_MOD_LONG, // 'l' + NPF_FMT_SPEC_LEN_MOD_LONG_DOUBLE, // 'L' +#if NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS == 1 + NPF_FMT_SPEC_LEN_MOD_LARGE_LONG_LONG, // 'll' + NPF_FMT_SPEC_LEN_MOD_LARGE_INTMAX, // 'j' + NPF_FMT_SPEC_LEN_MOD_LARGE_SIZET, // 'z' + NPF_FMT_SPEC_LEN_MOD_LARGE_PTRDIFFT, // 't' +#endif +}; + +enum { + NPF_FMT_SPEC_CONV_NONE, + NPF_FMT_SPEC_CONV_PERCENT, // '%' + NPF_FMT_SPEC_CONV_CHAR, // 'c' + NPF_FMT_SPEC_CONV_STRING, // 's' + NPF_FMT_SPEC_CONV_SIGNED_INT, // 'i', 'd' +#if NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS == 1 + NPF_FMT_SPEC_CONV_BINARY, // 'b' +#endif + NPF_FMT_SPEC_CONV_OCTAL, // 'o' + NPF_FMT_SPEC_CONV_HEX_INT, // 'x', 'X' + NPF_FMT_SPEC_CONV_UNSIGNED_INT, // 'u' + NPF_FMT_SPEC_CONV_POINTER, // 'p' +#if NANOPRINTF_USE_WRITEBACK_FORMAT_SPECIFIERS == 1 + NPF_FMT_SPEC_CONV_WRITEBACK, // 'n' +#endif +#if NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS == 1 + NPF_FMT_SPEC_CONV_FLOAT_DEC, // 'f', 'F' + NPF_FMT_SPEC_CONV_FLOAT_SCI, // 'e', 'E' + NPF_FMT_SPEC_CONV_FLOAT_SHORTEST, // 'g', 'G' + NPF_FMT_SPEC_CONV_FLOAT_HEX, // 'a', 'A' +#endif +}; + +typedef struct npf_format_spec { +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + int field_width; +#endif +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + int prec; + uint8_t prec_opt; +#endif +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + uint8_t field_width_opt; + char left_justified; // '-' + char leading_zero_pad; // '0' +#endif + char prepend; // ' ' or '+' +#if NANOPRINTF_USE_ALT_FORM_FLAG == 1 + char alt_form; // '#' +#endif + char case_adjust; // 'a' - 'A' , or 0 (must be non-negative to work) + uint8_t length_modifier; + uint8_t conv_spec; +} npf_format_spec_t; + +#if NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS == 0 + typedef long npf_int_t; + typedef unsigned long npf_uint_t; +#else + typedef intmax_t npf_int_t; + typedef uintmax_t npf_uint_t; +#endif + +typedef struct npf_bufputc_ctx { + char *dst; + size_t len; + size_t cur; +} npf_bufputc_ctx_t; + +#if NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS == 1 + typedef char npf_size_is_ptrdiff[(sizeof(size_t) == sizeof(ptrdiff_t)) ? 1 : -1]; + typedef ptrdiff_t npf_ssize_t; + typedef size_t npf_uptrdiff_t; +#endif + +#ifdef _MSC_VER + #include +#endif + +#define NPF_MIN(x, y) ((x) <= (y) ? (x) : (y)) +#define NPF_MAX(x, y) ((x) >= (y) ? (x) : (y)) + +static int npf_parse_format_spec(char const *format, npf_format_spec_t *out_spec) { + char const *cur = format; + +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + out_spec->left_justified = 0; + out_spec->leading_zero_pad = 0; +#endif + out_spec->case_adjust = 'a' - 'A'; // lowercase + out_spec->prepend = 0; +#if NANOPRINTF_USE_ALT_FORM_FLAG == 1 + out_spec->alt_form = 0; +#endif + + while (*++cur) { // cur points at the leading '%' character + switch (*cur) { // Optional flags +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + case '-': out_spec->left_justified = '-'; out_spec->leading_zero_pad = 0; continue; + case '0': out_spec->leading_zero_pad = !out_spec->left_justified; continue; +#endif + case '+': out_spec->prepend = '+'; continue; + case ' ': if (out_spec->prepend == 0) { out_spec->prepend = ' '; } continue; +#if NANOPRINTF_USE_ALT_FORM_FLAG == 1 + case '#': out_spec->alt_form = '#'; continue; +#endif + default: break; + } + break; + } + +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + out_spec->field_width = 0; + out_spec->field_width_opt = NPF_FMT_SPEC_OPT_NONE; + if (*cur == '*') { + out_spec->field_width_opt = NPF_FMT_SPEC_OPT_STAR; + ++cur; + } else { + while ((*cur >= '0') && (*cur <= '9')) { + out_spec->field_width_opt = NPF_FMT_SPEC_OPT_LITERAL; + out_spec->field_width = (out_spec->field_width * 10) + (*cur++ - '0'); + } + } +#endif + +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + out_spec->prec = 0; + out_spec->prec_opt = NPF_FMT_SPEC_OPT_NONE; + if (*cur == '.') { + ++cur; + if (*cur == '*') { + out_spec->prec_opt = NPF_FMT_SPEC_OPT_STAR; + ++cur; + } else { + if (*cur == '-') { + ++cur; + } else { + out_spec->prec_opt = NPF_FMT_SPEC_OPT_LITERAL; + } + while ((*cur >= '0') && (*cur <= '9')) { + out_spec->prec = (out_spec->prec * 10) + (*cur++ - '0'); + } + } + } +#endif + + uint_fast8_t tmp_conv = NPF_FMT_SPEC_CONV_NONE; + out_spec->length_modifier = NPF_FMT_SPEC_LEN_MOD_NONE; + switch (*cur++) { // Length modifier +#if NANOPRINTF_USE_SMALL_FORMAT_SPECIFIERS == 1 + case 'h': + out_spec->length_modifier = NPF_FMT_SPEC_LEN_MOD_SHORT; + if (*cur == 'h') { + out_spec->length_modifier = NPF_FMT_SPEC_LEN_MOD_CHAR; + ++cur; + } + break; +#endif + case 'l': + out_spec->length_modifier = NPF_FMT_SPEC_LEN_MOD_LONG; +#if NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS == 1 + if (*cur == 'l') { + out_spec->length_modifier = NPF_FMT_SPEC_LEN_MOD_LARGE_LONG_LONG; + ++cur; + } +#endif + break; +#if NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS == 1 + case 'L': out_spec->length_modifier = NPF_FMT_SPEC_LEN_MOD_LONG_DOUBLE; break; +#endif +#if NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS == 1 + case 'j': out_spec->length_modifier = NPF_FMT_SPEC_LEN_MOD_LARGE_INTMAX; break; + case 'z': out_spec->length_modifier = NPF_FMT_SPEC_LEN_MOD_LARGE_SIZET; break; + case 't': out_spec->length_modifier = NPF_FMT_SPEC_LEN_MOD_LARGE_PTRDIFFT; break; +#endif + default: --cur; break; + } + + switch (*cur++) { // Conversion specifier + case '%': out_spec->conv_spec = NPF_FMT_SPEC_CONV_PERCENT; +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + out_spec->prec_opt = NPF_FMT_SPEC_OPT_NONE; + out_spec->prec = 0; +#endif + break; + + case 'c': out_spec->conv_spec = NPF_FMT_SPEC_CONV_CHAR; +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + out_spec->prec_opt = NPF_FMT_SPEC_OPT_NONE; + out_spec->prec = 0; +#endif + break; + + case 's': out_spec->conv_spec = NPF_FMT_SPEC_CONV_STRING; + break; + + case 'i': + case 'd': tmp_conv = NPF_FMT_SPEC_CONV_SIGNED_INT; goto finish; + case 'o': tmp_conv = NPF_FMT_SPEC_CONV_OCTAL; goto finish; + case 'u': tmp_conv = NPF_FMT_SPEC_CONV_UNSIGNED_INT; goto finish; + case 'X': out_spec->case_adjust = 0; + case 'x': tmp_conv = NPF_FMT_SPEC_CONV_HEX_INT; goto finish; + finish: + out_spec->conv_spec = (uint8_t)tmp_conv; +#if (NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1) && \ + (NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1) + if (out_spec->prec_opt != NPF_FMT_SPEC_OPT_NONE) { out_spec->leading_zero_pad = 0; } +#endif + break; + +#if NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS == 1 + case 'F': out_spec->case_adjust = 0; + case 'f': + out_spec->conv_spec = NPF_FMT_SPEC_CONV_FLOAT_DEC; + if (out_spec->prec_opt == NPF_FMT_SPEC_OPT_NONE) { out_spec->prec = 6; } + break; + + case 'E': out_spec->case_adjust = 0; + case 'e': + out_spec->conv_spec = NPF_FMT_SPEC_CONV_FLOAT_SCI; + if (out_spec->prec_opt == NPF_FMT_SPEC_OPT_NONE) { out_spec->prec = 6; } + break; + + case 'G': out_spec->case_adjust = 0; + case 'g': + out_spec->conv_spec = NPF_FMT_SPEC_CONV_FLOAT_SHORTEST; + if (out_spec->prec_opt == NPF_FMT_SPEC_OPT_NONE) { out_spec->prec = 6; } + break; + + case 'A': out_spec->case_adjust = 0; + case 'a': + out_spec->conv_spec = NPF_FMT_SPEC_CONV_FLOAT_HEX; + if (out_spec->prec_opt == NPF_FMT_SPEC_OPT_NONE) { out_spec->prec = 6; } + break; +#endif + +#if NANOPRINTF_USE_WRITEBACK_FORMAT_SPECIFIERS == 1 + case 'n': + // todo: reject string if flags or width or precision exist + out_spec->conv_spec = NPF_FMT_SPEC_CONV_WRITEBACK; +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + out_spec->prec_opt = NPF_FMT_SPEC_OPT_NONE; +#endif + break; +#endif + + case 'p': + out_spec->conv_spec = NPF_FMT_SPEC_CONV_POINTER; +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + out_spec->prec_opt = NPF_FMT_SPEC_OPT_NONE; +#endif + break; + +#if NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS == 1 + case 'B': + out_spec->case_adjust = 0; + case 'b': + out_spec->conv_spec = NPF_FMT_SPEC_CONV_BINARY; + break; +#endif + + default: return 0; + } + + return (int)(cur - format); +} + +static NPF_NOINLINE int npf_utoa_rev( + npf_uint_t val, char *buf, uint_fast8_t base, char case_adj) { + uint_fast8_t n = 0; + do { + int_fast8_t const d = (int_fast8_t)(val % base); + *buf++ = (char)(((d < 10) ? '0' : ('A' - 10 + case_adj)) + d); + ++n; + val /= base; + } while (val); + return (int)n; +} + +#if NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS == 1 + +#include + +#if (DBL_MANT_DIG <= 11) && (DBL_MAX_EXP <= 16) + typedef uint_fast16_t npf_double_bin_t; + typedef int_fast8_t npf_ftoa_exp_t; +#elif (DBL_MANT_DIG <= 24) && (DBL_MAX_EXP <= 128) + typedef uint_fast32_t npf_double_bin_t; + typedef int_fast8_t npf_ftoa_exp_t; +#elif (DBL_MANT_DIG <= 53) && (DBL_MAX_EXP <= 1024) + typedef uint_fast64_t npf_double_bin_t; + typedef int_fast16_t npf_ftoa_exp_t; +#else + #error Unsupported width of the double type. +#endif + +// The floating point conversion code works with an unsigned integer type of any size. +#ifndef NANOPRINTF_CONVERSION_FLOAT_TYPE + #define NANOPRINTF_CONVERSION_FLOAT_TYPE unsigned int +#endif +typedef NANOPRINTF_CONVERSION_FLOAT_TYPE npf_ftoa_man_t; + +#if (NANOPRINTF_CONVERSION_BUFFER_SIZE <= UINT_FAST8_MAX) && (UINT_FAST8_MAX <= INT_MAX) + typedef uint_fast8_t npf_ftoa_dec_t; +#else + typedef int npf_ftoa_dec_t; +#endif + +enum { + NPF_DOUBLE_EXP_MASK = DBL_MAX_EXP * 2 - 1, + NPF_DOUBLE_EXP_BIAS = DBL_MAX_EXP - 1, + NPF_DOUBLE_MAN_BITS = DBL_MANT_DIG - 1, + NPF_DOUBLE_BIN_BITS = sizeof(npf_double_bin_t) * CHAR_BIT, + NPF_DOUBLE_SIGN_POS = sizeof(double) * CHAR_BIT - 1, + NPF_FTOA_MAN_BITS = sizeof(npf_ftoa_man_t) * CHAR_BIT, + NPF_FTOA_SHIFT_BITS = + ((NPF_FTOA_MAN_BITS < DBL_MANT_DIG) ? NPF_FTOA_MAN_BITS : DBL_MANT_DIG) - 1 +}; + +/* Generally, floating-point conversion implementations use + grisu2 (https://bit.ly/2JgMggX) and ryu (https://bit.ly/2RLXSg0) algorithms, + which are mathematically exact and fast, but require large lookup tables. + + This implementation was inspired by Wojciech Muła's (zdjęcia@garnek.pl) + algorithm (http://0x80.pl/notesen/2015-12-29-float-to-string.html) and + extended further by adding dynamic scaling and configurable integer width by + Oskars Rubenis (https://github.com/Okarss). */ + +static NPF_FORCE_INLINE npf_double_bin_t npf_double_to_int_rep(double f) { + // Union-cast is UB pre-C11 and in all C++; the compiler optimizes the code below. + npf_double_bin_t bin; + char const *src = (char const *)&f; + char *dst = (char *)&bin; + for (uint_fast8_t i = 0; i < sizeof(f); ++i) { dst[i] = src[i]; } + return bin; +} + +static int npf_ftoa_rev(char *buf, npf_format_spec_t const *spec, double f) { + char const *ret = NULL; + npf_double_bin_t bin = npf_double_to_int_rep(f); + + // Unsigned -> signed int casting is IB and can raise a signal but generally doesn't. + npf_ftoa_exp_t exp = + (npf_ftoa_exp_t)((npf_ftoa_exp_t)(bin >> NPF_DOUBLE_MAN_BITS) & NPF_DOUBLE_EXP_MASK); + + bin &= ((npf_double_bin_t)0x1 << NPF_DOUBLE_MAN_BITS) - 1; + if (exp == (npf_ftoa_exp_t)NPF_DOUBLE_EXP_MASK) { // special value + ret = (bin) ? "NAN" : "FNI"; + goto exit; + } + if (spec->prec > (NANOPRINTF_CONVERSION_BUFFER_SIZE - 2)) { goto exit; } + if (exp) { // normal number + bin |= (npf_double_bin_t)0x1 << NPF_DOUBLE_MAN_BITS; + } else { // subnormal number + ++exp; + } + exp = (npf_ftoa_exp_t)(exp - NPF_DOUBLE_EXP_BIAS); + + uint_fast8_t carry; carry = 0; + npf_ftoa_dec_t end, dec; dec = (npf_ftoa_dec_t)spec->prec; + if (dec +#if NANOPRINTF_USE_ALT_FORM_FLAG == 1 + || spec->alt_form +#endif + ) { + buf[dec++] = '.'; + } + + { // Integer part + npf_ftoa_man_t man_i; + + if (exp >= 0) { + int_fast8_t shift_i = + (int_fast8_t)((exp > NPF_FTOA_SHIFT_BITS) ? (int)NPF_FTOA_SHIFT_BITS : exp); + npf_ftoa_exp_t exp_i = (npf_ftoa_exp_t)(exp - shift_i); + shift_i = (int_fast8_t)(NPF_DOUBLE_MAN_BITS - shift_i); + man_i = (npf_ftoa_man_t)(bin >> shift_i); + + if (exp_i) { + if (shift_i) { + carry = (bin >> (shift_i - 1)) & 0x1; + } + exp = NPF_DOUBLE_MAN_BITS; // invalidate the fraction part + } + + // Scale the exponent from base-2 to base-10. + for (; exp_i; --exp_i) { + if (!(man_i & ((npf_ftoa_man_t)0x1 << (NPF_FTOA_MAN_BITS - 1)))) { + man_i = (npf_ftoa_man_t)(man_i << 1); + man_i = (npf_ftoa_man_t)(man_i | carry); carry = 0; + } else { + if (dec >= NANOPRINTF_CONVERSION_BUFFER_SIZE) { goto exit; } + buf[dec++] = '0'; + carry = (((uint_fast8_t)(man_i % 5) + carry) > 2); + man_i /= 5; + } + } + } else { + man_i = 0; + } + end = dec; + + do { // Print the integer + if (end >= NANOPRINTF_CONVERSION_BUFFER_SIZE) { goto exit; } + buf[end++] = (char)('0' + (char)(man_i % 10)); + man_i /= 10; + } while (man_i); + } + + { // Fraction part + npf_ftoa_man_t man_f; + npf_ftoa_dec_t dec_f = (npf_ftoa_dec_t)spec->prec; + + if (exp < NPF_DOUBLE_MAN_BITS) { + int_fast8_t shift_f = (int_fast8_t)((exp < 0) ? -1 : exp); + npf_ftoa_exp_t exp_f = (npf_ftoa_exp_t)(exp - shift_f); + npf_double_bin_t bin_f = + bin << ((NPF_DOUBLE_BIN_BITS - NPF_DOUBLE_MAN_BITS) + shift_f); + + // This if-else statement can be completely optimized at compile time. + if (NPF_DOUBLE_BIN_BITS > NPF_FTOA_MAN_BITS) { + man_f = (npf_ftoa_man_t)(bin_f >> ((unsigned)(NPF_DOUBLE_BIN_BITS - + NPF_FTOA_MAN_BITS) % + NPF_DOUBLE_BIN_BITS)); + carry = (uint_fast8_t)((bin_f >> ((unsigned)(NPF_DOUBLE_BIN_BITS - + NPF_FTOA_MAN_BITS - 1) % + NPF_DOUBLE_BIN_BITS)) & 0x1); + } else { + man_f = (npf_ftoa_man_t)((npf_ftoa_man_t)bin_f + << ((unsigned)(NPF_FTOA_MAN_BITS - + NPF_DOUBLE_BIN_BITS) % NPF_FTOA_MAN_BITS)); + carry = 0; + } + + // Scale the exponent from base-2 to base-10 and prepare the first digit. + for (uint_fast8_t digit = 0; dec_f && (exp_f < 4); ++exp_f) { + if ((man_f > ((npf_ftoa_man_t)-4 / 5)) || digit) { + carry = (uint_fast8_t)(man_f & 0x1); + man_f = (npf_ftoa_man_t)(man_f >> 1); + } else { + man_f = (npf_ftoa_man_t)(man_f * 5); + if (carry) { man_f = (npf_ftoa_man_t)(man_f + 3); carry = 0; } + if (exp_f < 0) { + buf[--dec_f] = '0'; + } else { + ++digit; + } + } + } + man_f = (npf_ftoa_man_t)(man_f + carry); + carry = (exp_f >= 0); + dec = 0; + } else { + man_f = 0; + } + + if (dec_f) { + // Print the fraction + for (;;) { + buf[--dec_f] = (char)('0' + (char)(man_f >> (NPF_FTOA_MAN_BITS - 4))); + man_f = (npf_ftoa_man_t)(man_f & ~((npf_ftoa_man_t)0xF << (NPF_FTOA_MAN_BITS - 4))); + if (!dec_f) { break; } + man_f = (npf_ftoa_man_t)(man_f * 10); + } + man_f = (npf_ftoa_man_t)(man_f << 4); + } + if (exp < NPF_DOUBLE_MAN_BITS) { + carry &= (uint_fast8_t)(man_f >> (NPF_FTOA_MAN_BITS - 1)); + } + } + + // Round the number + for (; carry; ++dec) { + if (dec >= NANOPRINTF_CONVERSION_BUFFER_SIZE) { goto exit; } + if (dec >= end) { buf[end++] = '0'; } + if (buf[dec] == '.') { continue; } + carry = (buf[dec] == '9'); + buf[dec] = (char)(carry ? '0' : (buf[dec] + 1)); + } + + return (int)end; +exit: + if (!ret) { ret = "RRE"; } + uint_fast8_t i; + for (i = 0; ret[i]; ++i) { buf[i] = (char)(ret[i] + spec->case_adjust); } + return -(int)i; +} + +#endif // NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS + +#if NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS == 1 +static int npf_bin_len(npf_uint_t u) { + // Return the length of the binary string format of 'u', preferring intrinsics. + if (!u) { return 1; } + +#ifdef _MSC_VER // Win64, use _BSR64 for everything. If x86, use _BSR when non-large. + #ifdef _M_X64 + #define NPF_HAVE_BUILTIN_CLZ + #define NPF_CLZ _BitScanReverse64 + #elif NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS == 0 + #define NPF_HAVE_BUILTIN_CLZ + #define NPF_CLZ _BitScanReverse + #endif + #ifdef NPF_HAVE_BUILTIN_CLZ + unsigned long idx; + NPF_CLZ(&idx, u); + return (int)(idx + 1); + #endif +#elif NPF_CLANG || NPF_GCC_PAST_4_6 + #define NPF_HAVE_BUILTIN_CLZ + #if NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS == 1 + #define NPF_CLZ(X) ((sizeof(long long) * CHAR_BIT) - (size_t)__builtin_clzll(X)) + #else + #define NPF_CLZ(X) ((sizeof(long) * CHAR_BIT) - (size_t)__builtin_clzl(X)) + #endif + return (int)NPF_CLZ(u); +#endif + +#ifndef NPF_HAVE_BUILTIN_CLZ + int n; + for (n = 0; u; ++n, u >>= 1); // slow but small software fallback + return n; +#else + #undef NPF_HAVE_BUILTIN_CLZ + #undef NPF_CLZ +#endif +} +#endif + +static void npf_bufputc(int c, void *ctx) { + npf_bufputc_ctx_t *bpc = (npf_bufputc_ctx_t *)ctx; + if (bpc->cur < bpc->len) { bpc->dst[bpc->cur++] = (char)c; } +} + +static void npf_bufputc_nop(int c, void *ctx) { (void)c; (void)ctx; } + +typedef struct npf_cnt_putc_ctx { + npf_putc pc; + void *ctx; + int n; +} npf_cnt_putc_ctx_t; + +static void npf_putc_cnt(int c, void *ctx) { + npf_cnt_putc_ctx_t *pc_cnt = (npf_cnt_putc_ctx_t *)ctx; + ++pc_cnt->n; + pc_cnt->pc(c, pc_cnt->ctx); // sibling-call optimization +} + +#define NPF_PUTC(VAL) do { npf_putc_cnt((int)(VAL), &pc_cnt); } while (0) + +#define NPF_EXTRACT(MOD, CAST_TO, EXTRACT_AS) \ + case NPF_FMT_SPEC_LEN_MOD_##MOD: val = (CAST_TO)va_arg(args, EXTRACT_AS); break + +#define NPF_WRITEBACK(MOD, TYPE) \ + case NPF_FMT_SPEC_LEN_MOD_##MOD: *(va_arg(args, TYPE *)) = (TYPE)pc_cnt.n; break + +int npf_vpprintf(npf_putc pc, void *pc_ctx, char const *format, va_list args) { + npf_format_spec_t fs; + char const *cur = format; + npf_cnt_putc_ctx_t pc_cnt; + pc_cnt.pc = pc; + pc_cnt.ctx = pc_ctx; + pc_cnt.n = 0; + + while (*cur) { + int const fs_len = (*cur != '%') ? 0 : npf_parse_format_spec(cur, &fs); + if (!fs_len) { NPF_PUTC(*cur++); continue; } + cur += fs_len; + + // Extract star-args immediately +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + if (fs.field_width_opt == NPF_FMT_SPEC_OPT_STAR) { + fs.field_width = va_arg(args, int); + if (fs.field_width < 0) { + fs.field_width = -fs.field_width; + fs.left_justified = 1; + } + } +#endif +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + if (fs.prec_opt == NPF_FMT_SPEC_OPT_STAR) { + fs.prec = va_arg(args, int); + if (fs.prec < 0) { fs.prec_opt = NPF_FMT_SPEC_OPT_NONE; } + } +#endif + + union { char cbuf_mem[NANOPRINTF_CONVERSION_BUFFER_SIZE]; npf_uint_t binval; } u; + char *cbuf = u.cbuf_mem, sign_c = 0; + int cbuf_len = 0; + char need_0x = 0; +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + int field_pad = 0; + char pad_c = 0; +#endif +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + int prec_pad = 0; +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + uint_fast8_t zero = 0; +#endif +#endif + + // Extract and convert the argument to string, point cbuf at the text. + switch (fs.conv_spec) { + case NPF_FMT_SPEC_CONV_PERCENT: + *cbuf = '%'; + cbuf_len = 1; + break; + + case NPF_FMT_SPEC_CONV_CHAR: + *cbuf = (char)va_arg(args, int); + cbuf_len = (*cbuf) ? 1 : 0; + break; + + case NPF_FMT_SPEC_CONV_STRING: { + cbuf = va_arg(args, char *); +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + for (char const *s = cbuf; + ((fs.prec_opt == NPF_FMT_SPEC_OPT_NONE) || (cbuf_len < fs.prec)) && cbuf && *s; + ++s, ++cbuf_len); +#else + for (char const *s = cbuf; cbuf && *s; ++s, ++cbuf_len); // strlen +#endif + } break; + + case NPF_FMT_SPEC_CONV_SIGNED_INT: { + npf_int_t val = 0; + switch (fs.length_modifier) { + NPF_EXTRACT(NONE, int, int); +#if NANOPRINTF_USE_SMALL_FORMAT_SPECIFIERS == 1 + NPF_EXTRACT(SHORT, short, int); + NPF_EXTRACT(CHAR, signed char, int); +#endif + NPF_EXTRACT(LONG, long, long); +#if NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS == 1 + NPF_EXTRACT(LARGE_LONG_LONG, long long, long long); + NPF_EXTRACT(LARGE_INTMAX, intmax_t, intmax_t); + NPF_EXTRACT(LARGE_SIZET, npf_ssize_t, npf_ssize_t); + NPF_EXTRACT(LARGE_PTRDIFFT, ptrdiff_t, ptrdiff_t); +#endif + default: break; + } + + sign_c = (val < 0) ? '-' : fs.prepend; + +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + zero = !val; +#endif + // special case, if prec and value are 0, skip + if (!val && (fs.prec_opt != NPF_FMT_SPEC_OPT_NONE) && !fs.prec) { + cbuf_len = 0; + } else +#endif + { + npf_uint_t uval = (npf_uint_t)val; + if (val < 0) { uval = 0 - uval; } + cbuf_len = npf_utoa_rev(uval, cbuf, 10, fs.case_adjust); + } + } break; + +#if NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS == 1 + case NPF_FMT_SPEC_CONV_BINARY: +#endif + case NPF_FMT_SPEC_CONV_OCTAL: + case NPF_FMT_SPEC_CONV_HEX_INT: + case NPF_FMT_SPEC_CONV_UNSIGNED_INT: + case NPF_FMT_SPEC_CONV_POINTER: { + npf_uint_t val = 0; + + if (fs.conv_spec == NPF_FMT_SPEC_CONV_POINTER) { + val = (npf_uint_t)(uintptr_t)va_arg(args, void *); + } else { + switch (fs.length_modifier) { + NPF_EXTRACT(NONE, unsigned, unsigned); +#if NANOPRINTF_USE_SMALL_FORMAT_SPECIFIERS == 1 + NPF_EXTRACT(SHORT, unsigned short, unsigned); + NPF_EXTRACT(CHAR, unsigned char, unsigned); +#endif + NPF_EXTRACT(LONG, unsigned long, unsigned long); +#if NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS == 1 + NPF_EXTRACT(LARGE_LONG_LONG, unsigned long long, unsigned long long); + NPF_EXTRACT(LARGE_INTMAX, uintmax_t, uintmax_t); + NPF_EXTRACT(LARGE_SIZET, size_t, size_t); + NPF_EXTRACT(LARGE_PTRDIFFT, npf_uptrdiff_t, npf_uptrdiff_t); +#endif + default: break; + } + } + +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + zero = !val; +#endif + if (!val && (fs.prec_opt != NPF_FMT_SPEC_OPT_NONE) && !fs.prec) { + // Zero value and explicitly-requested zero precision means "print nothing". +#if NANOPRINTF_USE_ALT_FORM_FLAG == 1 + if ((fs.conv_spec == NPF_FMT_SPEC_CONV_OCTAL) && fs.alt_form) { + fs.prec = 1; // octal special case, print a single '0' + } +#endif + } else +#endif +#if NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS == 1 + if (fs.conv_spec == NPF_FMT_SPEC_CONV_BINARY) { + cbuf_len = npf_bin_len(val); u.binval = val; + } else +#endif + { + uint_fast8_t const base = (fs.conv_spec == NPF_FMT_SPEC_CONV_OCTAL) ? + 8u : ((fs.conv_spec == NPF_FMT_SPEC_CONV_UNSIGNED_INT) ? 10u : 16u); + cbuf_len = npf_utoa_rev(val, cbuf, base, fs.case_adjust); + } + +#if NANOPRINTF_USE_ALT_FORM_FLAG == 1 + if (val && fs.alt_form && (fs.conv_spec == NPF_FMT_SPEC_CONV_OCTAL)) { + cbuf[cbuf_len++] = '0'; // OK to add leading octal '0' immediately. + } + + if (val && fs.alt_form) { // 0x or 0b but can't write it yet. + if ((fs.conv_spec == NPF_FMT_SPEC_CONV_HEX_INT) || + (fs.conv_spec == NPF_FMT_SPEC_CONV_POINTER)) { need_0x = 'X'; } +#if NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS == 1 + else if (fs.conv_spec == NPF_FMT_SPEC_CONV_BINARY) { need_0x = 'B'; } +#endif + if (need_0x) { need_0x = (char)(need_0x + fs.case_adjust); } + } +#endif + } break; + +#if NANOPRINTF_USE_WRITEBACK_FORMAT_SPECIFIERS == 1 + case NPF_FMT_SPEC_CONV_WRITEBACK: + switch (fs.length_modifier) { + NPF_WRITEBACK(NONE, int); +#if NANOPRINTF_USE_SMALL_FORMAT_SPECIFIERS == 1 + NPF_WRITEBACK(SHORT, short); + NPF_WRITEBACK(CHAR, signed char); +#endif + NPF_WRITEBACK(LONG, long); +#if NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS == 1 + NPF_WRITEBACK(LARGE_LONG_LONG, long long); + NPF_WRITEBACK(LARGE_INTMAX, intmax_t); + NPF_WRITEBACK(LARGE_SIZET, npf_ssize_t); + NPF_WRITEBACK(LARGE_PTRDIFFT, ptrdiff_t); +#endif + default: break; + } break; +#endif + +#if NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS == 1 + case NPF_FMT_SPEC_CONV_FLOAT_DEC: + case NPF_FMT_SPEC_CONV_FLOAT_SCI: + case NPF_FMT_SPEC_CONV_FLOAT_SHORTEST: + case NPF_FMT_SPEC_CONV_FLOAT_HEX: { + double val; + if (fs.length_modifier == NPF_FMT_SPEC_LEN_MOD_LONG_DOUBLE) { + val = (double)va_arg(args, long double); + } else { + val = va_arg(args, double); + } + + sign_c = (npf_double_to_int_rep(val) >> NPF_DOUBLE_SIGN_POS) ? '-' : fs.prepend; +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + zero = (val == 0.); +#endif + cbuf_len = npf_ftoa_rev(cbuf, &fs, val); + if (cbuf_len < 0) { // negative means text (not number), so ignore the '0' flag + cbuf_len = -cbuf_len; +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + fs.leading_zero_pad = 0; +#endif + } + } break; +#endif + default: break; + } + +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + // Compute the field width pad character + if (fs.field_width_opt != NPF_FMT_SPEC_OPT_NONE) { + if (fs.leading_zero_pad) { +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + if ((fs.prec_opt != NPF_FMT_SPEC_OPT_NONE) && !fs.prec && zero) { + pad_c = ' '; + } else +#endif + { pad_c = '0'; } + } else { pad_c = ' '; } + } +#endif + + // Compute the number of bytes to truncate or '0'-pad. +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + if (fs.conv_spec != NPF_FMT_SPEC_CONV_STRING) { +#if NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS == 1 + // float precision is after the decimal point + if ((fs.conv_spec != NPF_FMT_SPEC_CONV_FLOAT_DEC) && + (fs.conv_spec != NPF_FMT_SPEC_CONV_FLOAT_SCI) && + (fs.conv_spec != NPF_FMT_SPEC_CONV_FLOAT_SHORTEST) && + (fs.conv_spec != NPF_FMT_SPEC_CONV_FLOAT_HEX)) +#endif + { prec_pad = NPF_MAX(0, fs.prec - cbuf_len); } + } +#endif + +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + // Given the full converted length, how many pad bytes? + field_pad = fs.field_width - cbuf_len - !!sign_c; + if (need_0x) { field_pad -= 2; } +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + field_pad -= prec_pad; +#endif + field_pad = NPF_MAX(0, field_pad); + + // Apply right-justified field width if requested + if (!fs.left_justified && pad_c) { // If leading zeros pad, sign goes first. + if (pad_c == '0') { + if (sign_c) { NPF_PUTC(sign_c); sign_c = 0; } + // Pad byte is '0', write '0x' before '0' pad chars. + if (need_0x) { NPF_PUTC('0'); NPF_PUTC(need_0x); } + } + while (field_pad-- > 0) { NPF_PUTC(pad_c); } + // Pad byte is ' ', write '0x' after ' ' pad chars but before number. + if ((pad_c != '0') && need_0x) { NPF_PUTC('0'); NPF_PUTC(need_0x); } + } else +#endif + { if (need_0x) { NPF_PUTC('0'); NPF_PUTC(need_0x); } } // no pad, '0x' requested. + + // Write the converted payload + if (fs.conv_spec == NPF_FMT_SPEC_CONV_STRING) { + for (int i = 0; cbuf && (i < cbuf_len); ++i) { NPF_PUTC(cbuf[i]); } + } else { + if (sign_c) { NPF_PUTC(sign_c); } +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + while (prec_pad-- > 0) { NPF_PUTC('0'); } // int precision leads. +#endif +#if NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS == 1 + if (fs.conv_spec == NPF_FMT_SPEC_CONV_BINARY) { + while (cbuf_len) { NPF_PUTC('0' + ((u.binval >> --cbuf_len) & 1)); } + } else +#endif + { while (cbuf_len-- > 0) { NPF_PUTC(cbuf[cbuf_len]); } } // payload is reversed + } + +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + if (fs.left_justified && pad_c) { // Apply left-justified field width + while (field_pad-- > 0) { NPF_PUTC(pad_c); } + } +#endif + } + + return pc_cnt.n; +} + +#undef NPF_PUTC +#undef NPF_EXTRACT +#undef NPF_WRITEBACK + +int npf_pprintf(npf_putc pc, + void * NPF_RESTRICT pc_ctx, + char const * NPF_RESTRICT format, + ...) { + va_list val; + va_start(val, format); + int const rv = npf_vpprintf(pc, pc_ctx, format, val); + va_end(val); + return rv; +} + +int npf_snprintf(char * NPF_RESTRICT buffer, + size_t bufsz, + const char * NPF_RESTRICT format, + ...) { + va_list val; + va_start(val, format); + int const rv = npf_vsnprintf(buffer, bufsz, format, val); + va_end(val); + return rv; +} + +int npf_vsnprintf(char * NPF_RESTRICT buffer, + size_t bufsz, + char const * NPF_RESTRICT format, + va_list vlist) { + npf_bufputc_ctx_t bufputc_ctx; + bufputc_ctx.dst = buffer; + bufputc_ctx.len = bufsz; + bufputc_ctx.cur = 0; + + npf_putc const pc = buffer ? npf_bufputc : npf_bufputc_nop; + int const n = npf_vpprintf(pc, &bufputc_ctx, format, vlist); + + if (buffer && bufsz) { +#ifdef NANOPRINTF_SNPRINTF_SAFE_EMPTY_STRING_ON_OVERFLOW + buffer[(n < 0 || (unsigned)n >= bufsz) ? 0 : n] = '\0'; +#else + buffer[n < 0 ? 0 : NPF_MIN((unsigned)n, bufsz - 1)] = '\0'; +#endif + } + + return n; +} + +#if NPF_HAVE_GCC_WARNING_PRAGMAS + #pragma GCC diagnostic pop +#endif + +#ifdef _MSC_VER + #pragma warning(pop) +#endif + +#endif // NPF_IMPLEMENTATION_INCLUDED +#endif // NANOPRINTF_IMPLEMENTATION + +/* + nanoprintf is dual-licensed under both the "Unlicense" and the + "Zero-Clause BSD" (0BSD) licenses. The intent of this dual-licensing + structure is to make nanoprintf as consumable as possible in as many + environments / countries / companies as possible without any + encumberances. + + The text of the two licenses follows below: + + ============================== UNLICENSE ============================== + + This is free and unencumbered software released into the public domain. + + Anyone is free to copy, modify, publish, use, compile, sell, or + distribute this software, either in source code form or as a compiled + binary, for any purpose, commercial or non-commercial, and by any + means. + + In jurisdictions that recognize copyright laws, the author or authors + of this software dedicate any and all copyright interest in the + software to the public domain. We make this dedication for the benefit + of the public at large and to the detriment of our heirs and + successors. We intend this dedication to be an overt act of + relinquishment in perpetuity of all present and future rights to this + software under copyright law. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + For more information, please refer to + + ================================ 0BSD ================================= + + Copyright (C) 2019- by Charles Nicholson + + Permission to use, copy, modify, and/or distribute this software for + any purpose with or without fee is hereby granted. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ diff --git a/target/linux/realtek/image/rt-loader/linker/linker.ld b/target/linux/realtek/image/rt-loader/linker/linker.ld new file mode 100644 index 00000000000..dd1fb5aaa00 --- /dev/null +++ b/target/linux/realtek/image/rt-loader/linker/linker.ld @@ -0,0 +1,41 @@ +ENTRY(_start) + +SECTIONS { + .text : { + *(.text) + } + + .data : ALIGN(32) { + *(.sdata*) + *(.data*) + } +/* + * In MIPS position independent code (PIC), the global offset table (GOT) is a data structure + * used to facilitate access to global variables and functions when the code's final memory + * location is not known at compile time. The GOT contains absolute addresses of global symbols, + * but is itself located using a relative reference. This allows the code to be relocated at + * runtime without modification. + */ + .got : ALIGN(32) { + __got_start = .; + *(.got*) + __got_end = .; + } +/* + * Storage for the compressed kernel image that was integrated into the loader during link time. + * No code just binary data. + */ + .kernel : ALIGN(1) { + __kernel_data_start = .; + KEEP(*(.kernel)) + __kernel_data_end = .; + } + + .bss (NOLOAD) : ALIGN(4) { + __bss_start = .; + *(.bss) + *(.sbss) + *(COMMON) + __bss_end = .; + } +} diff --git a/target/linux/realtek/image/rt-loader/src/board.c b/target/linux/realtek/image/rt-loader/src/board.c new file mode 100644 index 00000000000..d6d5865673d --- /dev/null +++ b/target/linux/realtek/image/rt-loader/src/board.c @@ -0,0 +1,110 @@ +/* + * rt-loader board functions + * (c) 2025 Markus Stockhausen + */ + +#include "globals.h" +#include "memory.h" +#include "nanoprintf.h" + +#define DRAM_CONFIG_REG 0xb8001004 +#define UART_BUFFER_REG 0xb8002000 +#define UART_LINE_STATUS_REG 0xb8002014 +#define UART_TX_READY (1 << 29) + +#define RTL838X_ENABLE_RW_MASK 0x3 +#define RTL838X_INT_RW_CTRL_REG 0xbb000058 +#define RTL838X_MODEL_NAME_INFO_REG 0xbb0000d4 +#define RTL839X_MODEL_NAME_INFO_REG 0xbb000ff0 +#define RTL83XX_CHIP_INFO_EN 0xa0000000 +#define RTL93XX_MODEL_NAME_INFO_REG 0xbb000004 +#define RTL93XX_CHIP_INFO_EN 0xa0000 + +/* + * board_putchar() is the central function to write to serial console of the device. Some printf + * libraries (e.g. https://github.com/mpaland/printf) need a fixed function name like _putchar. + * To keep the original library as is, link the two functions with gcc compiler option + * -D_putchar=board_putchar + */ + +void board_putchar(int ch, void *ctx) +{ + while (!(ioread32(UART_LINE_STATUS_REG) & UART_TX_READY)); + iowrite32(((int)ch) << 24, UART_BUFFER_REG); + + if (ch == '\n') + board_putchar('\r', ctx); +} + +/* + * board_get_memory() does what it is named after. On Realtek switches the DRAM config register + * has information about bank count, bus width, ... From that the memory size can be derived. + */ + +unsigned int board_get_memory(void) +{ + unsigned int dcr = ioread32(DRAM_CONFIG_REG); + char ROWCNTv[] = {11, 12, 13, 14, 15, 16}; + char COLCNTv[] = {8, 9, 10, 11, 12}; + char BNKCNTv[] = {1, 2, 3}; + char BUSWIDv[] = {0, 1, 2}; + + return 1 << (BNKCNTv[(dcr >> 28) & 0x3] + BUSWIDv[(dcr >> 24) & 0x3] + + ROWCNTv[(dcr >> 20) & 0xf] + COLCNTv[(dcr >> 16) & 0xf]); +} + +/* + * board_get_system() generates a readable system name that will be printed during startup. + * Formatting can be whatever is helpful. + */ + +void board_get_system(char *buffer, int len) +{ + unsigned int chip_id, model_id, model_version, chip_version; + unsigned int reg, val, act; + + act = RTL93XX_CHIP_INFO_EN; + reg = RTL93XX_MODEL_NAME_INFO_REG; + val = ioread32(reg); + + if ((val & 0xffec0000) == 0x93000000) + goto found; + + act = RTL83XX_CHIP_INFO_EN; + reg = RTL839X_MODEL_NAME_INFO_REG; + val = ioread32(reg); + if ((val & 0xfff80000) == 0x83900000) + goto found; + + iowrite32(0x3, RTL838X_INT_RW_CTRL_REG); + reg = RTL838X_MODEL_NAME_INFO_REG; + val = ioread32(reg); +found: + model_id = val >> 16; + model_version = (val >> 11) & 0x1f; + + iowrite32(act, reg + 4); + val = ioread32(reg + 4); + chip_id = val & 0xffff; + + if (model_id < 0x9300) + chip_version = val >> 16 & 0x1f; + else + chip_version = val >> 28 & 0x0f; + + snprintf(buffer, len, "RTL%04X%c (chip id %04x%c)", + model_id, model_version ? model_version + 64 : 0, + chip_id, chip_version ? chip_version + 64 : 0); +} + +/* + * board_panic() is called in critical cases. Whatever is needed can be done here. Maybe + * an automatic reboot can be issued some day. For now just halt processing. + */ + +void board_panic(void) +{ + printf("halt system\n"); + while (1) { + } +} diff --git a/target/linux/realtek/image/rt-loader/src/main.c b/target/linux/realtek/image/rt-loader/src/main.c new file mode 100644 index 00000000000..747881ea067 --- /dev/null +++ b/target/linux/realtek/image/rt-loader/src/main.c @@ -0,0 +1,123 @@ +/* + * rt-loader main program + * (c) 2025 Markus Stockhausen + * + * This code was inspired by the OpenWrt lzma loader. Thanks to + * + * Copyright (C) 2004 Manuel Novoa III (mjn3@codepoet.org) + * Copyright (C) 2005 Mineharu Takahara + * Copyright (C) 2005 by Oleg I. Vdovikin + * Copyright (C) 2011 Gabor Juhos + */ + +#include "board.h" +#include "globals.h" +#include "memory.h" + +#define NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS 1 +#define NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS 0 +#define NANOPRINTF_USE_SMALL_FORMAT_SPECIFIERS 0 +#define NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS 0 +#define NANOPRINTF_USE_WRITEBACK_FORMAT_SPECIFIERS 0 +#define NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS 0 +#define NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS 0 +#define NANOPRINTF_IMPLEMENTATION +#include "nanoprintf.h" + +extern void *_kernel_load_addr; +extern void *_kernel_data_addr; +extern int _kernel_data_size; +extern void *_my_load_addr; +extern int _my_load_size; + +extern int unlzma(unsigned char *buf, long in_len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), + unsigned char *output, + long *outlen, + long *posp, + void(*error)(char *x)); + +typedef void (*entry_func_t)(unsigned long reg_a0, unsigned long reg_a1, + unsigned long reg_a2, unsigned long reg_a3); + +void *relocate(void *src, int len) +{ + void *addr; + unsigned int offs; + + /* + * Relocate to highest possible memory address. This is usually the RAM size minus some + * space for the heap and the stack pointer. As we do not have any highmem features + * limit this to 256MB. + */ + + offs = (board_get_memory() - STACK_SIZE - HEAP_SIZE - len - 1024) & 0xfff0000; + addr = (void *)KSEG0 + offs; + + printf("Relocate %d bytes from 0x%08x to 0x%08x\n", len, src, addr); + + memcpy(addr, src, len); + flush_cache(addr, len); + + return addr; +} + +void welcome(void) +{ + char system[80]; + + board_get_system(system, sizeof(system)); + + printf("rt-loader\n"); + printf("Running on %s with %dMB\n", system, board_get_memory() >> 20); +} + +void decompress_error(char *x) +{ + printf("%s\n", x); +} + +void *decompress(void *out, void *in, int len) +{ + long outlen; + + printf("Extract kernel with %d bytes from 0x%08x to 0x%08x ...\n", len, in, out); + + if (unlzma(in, len, 0, 0, out, &outlen, 0, decompress_error)) + board_panic(); + + printf("Extracted kernel size is %d bytes\n", outlen); + flush_cache(out, outlen); + + return out; +} + +void main(unsigned long reg_a0, unsigned long reg_a1, + unsigned long reg_a2, unsigned long reg_a3) +{ + entry_func_t fn; + + if (_kernel_load_addr == _my_load_addr) { + /* + * During first run relocate the whole package to the end of memory. Use + * _my_load_size as relocation length. That includes the bss section, aka + * uninitialized globals. So it is possible to initialize globals during + * first run and have them at hand after relocation. + */ + + welcome(); + fn = relocate(_my_load_addr, _my_load_size); + fn(reg_a0, reg_a1, reg_a2, reg_a3); + } else { + /* + * During second run extract the attached kernel image to the memory address + * that the loader was loaded to in the first run. + */ + + fn = decompress(_kernel_load_addr, _kernel_data_addr, _kernel_data_size); + + printf("Booting kernel from 0x%08x ...\n\n", fn); + fn(reg_a0, reg_a1, reg_a2, reg_a3); + } +} diff --git a/target/linux/realtek/image/rt-loader/src/memory.c b/target/linux/realtek/image/rt-loader/src/memory.c new file mode 100644 index 00000000000..6ff5a448974 --- /dev/null +++ b/target/linux/realtek/image/rt-loader/src/memory.c @@ -0,0 +1,122 @@ +/* + * rt-loader memory functions + * (c) 2025 Markus Stockhausen + * + * This is a small function collection to get some rudimentary memory management working when + * running bare metal. None of these functions is optimized but works well for current needs. + */ + +#include "board.h" +#include "globals.h" +#include "memory.h" +#include "nanoprintf.h" + +#define CACHE_OP(op, addr) \ + __asm__ __volatile__( \ + " .set push \n" \ + " .set noreorder \n" \ + " .set mips3\n\t \n" \ + " cache %0, %1 \n" \ + " .set pop \n" \ + : \ + : "i" (op), "R" (*(unsigned char *)(addr))) + +void flush_cache(void *start_addr, unsigned long size) +{ + /* + * MIPS cores may have different cache lines. Most common are 16 and 32 bytes. Avoid + * detection routines or multiple implementations and take the lowest known value that + * will fit fine for cores with longer cache lines + */ + + unsigned long lsize = 16; + unsigned long addr = (unsigned long)start_addr & ~(lsize - 1); + unsigned long aend = ((unsigned long)start_addr + size - 1) & ~(lsize - 1); + + while (1) { + CACHE_OP(CACHE_HIT_INVALIDATE_I, addr); + CACHE_OP(CACHE_HIT_WRITEBACK_INV_D, addr); + if (addr == aend) + break; + addr += lsize; + } +} + +void free(void *ptr) +{ + /* this is only one shot allocation */ +} + +int memcmp(const void *s1, const void *s2, size_t count) +{ + volatile char *p1 = (volatile char *)s1; + volatile char *p2 = (volatile char *)s2; + + while (count--) { + if (*p1 != *p2) + return (int)(*p1) - (int)(*p2); + + p1++; + p2++; + } + + return 0; +} + +void *memmove(void *dst, const void *src, size_t count) +{ + volatile char *d = (volatile char *)dst; + volatile char *s = (volatile char *)src; + + if (d < s) { + while (count--) + *d++ = *s++; + } else if (d > s) { + d += count; + s += count; + while (count--) + *--d = *--s; + } + + return dst; +} + +void *memcpy(void *dst, const void *src, size_t count) +{ + memmove(dst, src, count); +} + +void *memset(void *dst, int c, size_t count) +{ + volatile char *d = (volatile char *)dst; + + while (count--) + *d++ = c; + + return (void *)d; +} + +void *malloc(size_t size) +{ + void *start; + + start = (void *)(((unsigned int)_heap_addr + MEMORY_ALIGNMENT - 1) & ~(MEMORY_ALIGNMENT - 1)); + if ((start + size) > _heap_addr_max) { + printf("malloc(%d) failed. Only %dkB of %dkB heap left.\n", + size, (_heap_addr_max - start) >> 10, HEAP_SIZE >> 10); + board_panic(); + } + + _heap_addr += size; + + return start; +} + +size_t strlen(const char *s) +{ + const char *p = s; + + while (*p) ++p; + + return (size_t)(p - s); +} diff --git a/target/linux/realtek/image/rt-loader/src/startup.S b/target/linux/realtek/image/rt-loader/src/startup.S new file mode 100644 index 00000000000..898f7e1a163 --- /dev/null +++ b/target/linux/realtek/image/rt-loader/src/startup.S @@ -0,0 +1,182 @@ +# rt-loader assembler startup code +# (c) 2025 Markus Stockhausen + +#include "globals.h" + +# This start code allows to run a position independent code (PIC) on bare metal. In that case +# all addresses are looked up via the global offset table (GOT). But that must be filled during +# this initialization sequence. Without a proper GOT using standard "la" instruction in the code +# will not work. Provide a macro that avoids the dependency. + +.macro _LA reg, symbol + lui \reg, %hi(\symbol) + addi \reg, \reg, %lo(\symbol) + add \reg, $t9 +.endm + + .section .text + .globl _start + .ent _start +_start: + .set noreorder + +# Determine current program load address and store it into t9. + + bal _where_am_i + nop +_where_am_i: + move $t9, $ra + subu $t9, $t9, 0x8 + + +# Check if this our first run (_kernel_load_addr = 0?) + + _LA $t6, _kernel_load_addr + lw $t7, 0($t6) + bne $zero, $t7, _init_done + nop + +# During first run store the current load address as the target kernel load address. + + sw $t9, 0($t6) + +# Same for the global variables in the BSS section. Clear them only during the first run. This +# way the "global program state" can be copied over to the relocation address. + + _LA $t3, __bss_start + _LA $t4, __bss_end +_bss_zero: + beq $t3, $t4, _init_done + nop + sw $zero, 0($t3) + addiu $t3, $t3, 4 + b _bss_zero + nop + +_init_done: + +# Code is running bare metal and no one initializes the global offset table. After the build +# process the table is relative to address 0x0. Starting from anywhere else breaks the program. +# A manual update is required during startup. Usually this is quite easy by simply adding the +# current load address to all entries. +# But this code relocates itself to another memory address and starts itself over. At the new +# address it will find a global offset table that fits to the previous execution. To solve this +# store a copy of the last load address in got_delta variable and only add the difference after +# a relocation. Sequence is as follows +# +# - U-Boot loads the code to 0x80100000 +# - U-Boot runs the code at 0x80100000 +# - code identifies its dynamic start_address = 0x80100000 +# - code reads (initial) _got_delta = 0x00000000 +# - code adds 0x80100000 to all GOT entries +# - code stores _got_delta with 0x80100000 +# - code copies itself over to a new location 0x85000000 +# - code starts itself from 0x85000000 +# - code identifies its dynamic start_address = 0x85000000 +# - code reads (pre-filled) _got_delta = 0x80100000 +# - code adds 0x4f00000 (= 0x85000000 - 0x80100000) to all GOT entries +# - ... +# + + _LA $t6, _got_delta + lw $t5, 0($t6) + subu $t7, $t9, $t5 + sw $t9, 0($t6) + _LA $t3, __got_start + _LA $t4, __got_end +_got_patch: + beq $t3, $t4, _got_done + nop + lw $t5, 0($t3) + addu $t5, $t5, $t7 + sw $t5, 0($t3) + addiu $t3, $t3, 4 + b _got_patch + nop +_got_done: + +# Linker attached kernel to end of package. Store addresses in global variables + + _LA $t8, _my_load_addr + sw $t9, 0($t8) + + _LA $t5, __kernel_data_start + _LA $t4, _kernel_data_addr + sw $t5, 0($t4) + + _LA $t3, __kernel_data_end + subu $t3, $t3, $t5 + _LA $t4, _kernel_data_size + sw $t3, 0($t4) + +# Determine own code size by looking where BSS ends. + + _LA $t3, __bss_end + subu $t6, $t3, $t9 + _LA $t4, _my_load_size + sw $t6, 0($t4) + +# Setup heap. It will start directly behind BSS + + addiu $t3, MEMORY_ALIGNMENT + li $t4, ~(MEMORY_ALIGNMENT - 1) + and $t3, $t4 + + _LA $t5, _heap_addr + sw $t3, 0($t5) + + li $t4, HEAP_SIZE + add $t3, $t4 + + _LA $t5, _heap_addr_max + sw $t3, 0($t5) + +# Setup stack that is located on top of heap. + + li $t4, STACK_SIZE + add $sp, $t3, $t4 + +# Adapt t9 so it points to main(). This is needed so main() can find the GOT via t9/gp + + _LA $t8, main + move $t9, $t8 + +# Call main() with parameters a0, a3, __kernel_start, __kernel_end + bal main + nop + + .end _start + + .section .data + .align 4 +# delta for global offset table initialization +_got_delta: + .word 0 +# current heap address for malloc() / free() + .globl _heap_addr +_heap_addr: + .word 0 +# maximum heap address + .globl _heap_addr_max +_heap_addr_max: + .word 0 +# current program load address + .globl _my_load_addr +_my_load_addr: + .word 0 +# total size of code including attached kernel and bss (uninitialized global variables) + .globl _my_load_size +_my_load_size: + .word 0 +# target load address of kernel = this programs address during initial run + .globl _kernel_load_addr +_kernel_load_addr: + .word 0 +# absolute start address of attached kernel + .globl _kernel_data_addr +_kernel_data_addr: + .word 0 +# size of attached kernel + .globl _kernel_data_size +_kernel_data_size: + .word 0 diff --git a/target/linux/realtek/image/rt-loader/src/unlzma.c b/target/linux/realtek/image/rt-loader/src/unlzma.c new file mode 100644 index 00000000000..a7ddc004ea9 --- /dev/null +++ b/target/linux/realtek/image/rt-loader/src/unlzma.c @@ -0,0 +1,663 @@ +/* Lzma decompressor for Linux kernel. Shamelessly snarfed + *from busybox 1.1.1 + * + *Linux kernel adaptation + *Copyright (C) 2006 Alain < alain@knaff.lu > + * + *Based on small lzma deflate implementation/Small range coder + *implementation for lzma. + *Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > + * + *Based on LzmaDecode.c from the LZMA SDK 4.22 (https://www.7-zip.org/) + *Copyright (C) 1999-2005 Igor Pavlov + * + *Copyrights of the parts, see headers below. + * + * + *This program is free software; you can redistribute it and/or + *modify it under the terms of the GNU Lesser General Public + *License as published by the Free Software Foundation; either + *version 2.1 of the License, or (at your option) any later version. + * + *This program is distributed in the hope that it will be useful, + *but WITHOUT ANY WARRANTY; without even the implied warranty of + *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + *Lesser General Public License for more details. + * + *You should have received a copy of the GNU Lesser General Public + *License along with this library; if not, write to the Free Software + *Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef MIN +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#endif + +static long long read_int(unsigned char *ptr, int size) +{ + int i; + long long ret = 0; + + for (i = 0; i < size; i++) + ret = (ret << 8) | ptr[size-i-1]; + return ret; +} + +#define ENDIAN_CONVERT(x) \ + x = (typeof(x))read_int((unsigned char *)&x, sizeof(x)) + + +/* Small range coder implementation for lzma. + *Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > + * + *Based on LzmaDecode.c from the LZMA SDK 4.22 (https://www.7-zip.org/) + *Copyright (c) 1999-2005 Igor Pavlov + */ + +#include "memory.h" +#include +#include + +#define LZMA_IOBUF_SIZE 0x10000 + +struct rc { + long (*fill)(void*, unsigned long); + uint8_t *ptr; + uint8_t *buffer; + uint8_t *buffer_end; + long buffer_size; + uint32_t code; + uint32_t range; + uint32_t bound; + void (*error)(char *); +}; + + +#define RC_TOP_BITS 24 +#define RC_MOVE_BITS 5 +#define RC_MODEL_TOTAL_BITS 11 + + +static long nofill(void *buffer, unsigned long len) +{ + return -1; +} + +/* Called twice: once at startup and once in rc_normalize() */ +static void rc_read(struct rc *rc) +{ + rc->buffer_size = rc->fill((char *)rc->buffer, LZMA_IOBUF_SIZE); + if (rc->buffer_size <= 0) + rc->error("unexpected EOF"); + rc->ptr = rc->buffer; + rc->buffer_end = rc->buffer + rc->buffer_size; +} + +/* Called once */ +static inline void rc_init(struct rc *rc, + long (*fill)(void*, unsigned long), + char *buffer, long buffer_size) +{ + if (fill) + rc->fill = fill; + else + rc->fill = nofill; + rc->buffer = (uint8_t *)buffer; + rc->buffer_size = buffer_size; + rc->buffer_end = rc->buffer + rc->buffer_size; + rc->ptr = rc->buffer; + + rc->code = 0; + rc->range = 0xFFFFFFFF; +} + +static inline void rc_init_code(struct rc *rc) +{ + int i; + + for (i = 0; i < 5; i++) { + if (rc->ptr >= rc->buffer_end) + rc_read(rc); + rc->code = (rc->code << 8) | *rc->ptr++; + } +} + + +/* Called twice, but one callsite is in inline'd rc_is_bit_0_helper() */ +static void rc_do_normalize(struct rc *rc) +{ + if (rc->ptr >= rc->buffer_end) + rc_read(rc); + rc->range <<= 8; + rc->code = (rc->code << 8) | *rc->ptr++; +} +static inline void rc_normalize(struct rc *rc) +{ + if (rc->range < (1 << RC_TOP_BITS)) + rc_do_normalize(rc); +} + +/* Called 9 times */ +/* Why rc_is_bit_0_helper exists? + *Because we want to always expose (rc->code < rc->bound) to optimizer + */ +static inline uint32_t rc_is_bit_0_helper(struct rc *rc, uint16_t *p) +{ + rc_normalize(rc); + rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS); + return rc->bound; +} +static inline int rc_is_bit_0(struct rc *rc, uint16_t *p) +{ + uint32_t t = rc_is_bit_0_helper(rc, p); + return rc->code < t; +} + +/* Called ~10 times, but very small, thus inlined */ +static inline void rc_update_bit_0(struct rc *rc, uint16_t *p) +{ + rc->range = rc->bound; + *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; +} +static inline void rc_update_bit_1(struct rc *rc, uint16_t *p) +{ + rc->range -= rc->bound; + rc->code -= rc->bound; + *p -= *p >> RC_MOVE_BITS; +} + +/* Called 4 times in unlzma loop */ +static int rc_get_bit(struct rc *rc, uint16_t *p, int *symbol) +{ + if (rc_is_bit_0(rc, p)) { + rc_update_bit_0(rc, p); + *symbol *= 2; + return 0; + } else { + rc_update_bit_1(rc, p); + *symbol = *symbol * 2 + 1; + return 1; + } +} + +/* Called once */ +static inline int rc_direct_bit(struct rc *rc) +{ + rc_normalize(rc); + rc->range >>= 1; + if (rc->code >= rc->range) { + rc->code -= rc->range; + return 1; + } + return 0; +} + +/* Called twice */ +static inline void rc_bit_tree_decode(struct rc *rc, uint16_t *p, int num_levels, int *symbol) +{ + int i = num_levels; + + *symbol = 1; + while (i--) + rc_get_bit(rc, p + *symbol, symbol); + *symbol -= 1 << num_levels; +} + + +/* + * Small lzma deflate implementation. + * Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > + * + * Based on LzmaDecode.c from the LZMA SDK 4.22 (https://www.7-zip.org/) + * Copyright (C) 1999-2005 Igor Pavlov + */ + + +struct lzma_header { + uint8_t pos; + uint32_t dict_size; + uint64_t dst_size; +} __attribute__ ((packed)) ; + + +#define LZMA_BASE_SIZE 1846 +#define LZMA_LIT_SIZE 768 + +#define LZMA_NUM_POS_BITS_MAX 4 + +#define LZMA_LEN_NUM_LOW_BITS 3 +#define LZMA_LEN_NUM_MID_BITS 3 +#define LZMA_LEN_NUM_HIGH_BITS 8 + +#define LZMA_LEN_CHOICE 0 +#define LZMA_LEN_CHOICE_2 (LZMA_LEN_CHOICE + 1) +#define LZMA_LEN_LOW (LZMA_LEN_CHOICE_2 + 1) +#define LZMA_LEN_MID (LZMA_LEN_LOW \ + + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS))) +#define LZMA_LEN_HIGH (LZMA_LEN_MID \ + +(1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS))) +#define LZMA_NUM_LEN_PROBS (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS)) + +#define LZMA_NUM_STATES 12 +#define LZMA_NUM_LIT_STATES 7 + +#define LZMA_START_POS_MODEL_INDEX 4 +#define LZMA_END_POS_MODEL_INDEX 14 +#define LZMA_NUM_FULL_DISTANCES (1 << (LZMA_END_POS_MODEL_INDEX >> 1)) + +#define LZMA_NUM_POS_SLOT_BITS 6 +#define LZMA_NUM_LEN_TO_POS_STATES 4 + +#define LZMA_NUM_ALIGN_BITS 4 + +#define LZMA_MATCH_MIN_LEN 2 + +#define LZMA_IS_MATCH 0 +#define LZMA_IS_REP (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)) +#define LZMA_IS_REP_G0 (LZMA_IS_REP + LZMA_NUM_STATES) +#define LZMA_IS_REP_G1 (LZMA_IS_REP_G0 + LZMA_NUM_STATES) +#define LZMA_IS_REP_G2 (LZMA_IS_REP_G1 + LZMA_NUM_STATES) +#define LZMA_IS_REP_0_LONG (LZMA_IS_REP_G2 + LZMA_NUM_STATES) +#define LZMA_POS_SLOT (LZMA_IS_REP_0_LONG \ + + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)) +#define LZMA_SPEC_POS (LZMA_POS_SLOT \ + +(LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS)) +#define LZMA_ALIGN (LZMA_SPEC_POS \ + + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX) +#define LZMA_LEN_CODER (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS)) +#define LZMA_REP_LEN_CODER (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS) +#define LZMA_LITERAL (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS) + + +struct writer { + uint8_t *buffer; + uint8_t previous_byte; + size_t buffer_pos; + int bufsize; + size_t global_pos; + long (*flush)(void*, unsigned long); + struct lzma_header *header; +}; + +struct cstate { + int state; + uint32_t rep0, rep1, rep2, rep3; +}; + +static inline size_t get_pos(struct writer *wr) +{ + return + wr->global_pos + wr->buffer_pos; +} + +static inline uint8_t peek_old_byte(struct writer *wr, uint32_t offs) +{ + if (!wr->flush) { + int32_t pos; + while (offs > wr->header->dict_size) + offs -= wr->header->dict_size; + pos = wr->buffer_pos - offs; + return wr->buffer[pos]; + } else { + uint32_t pos = wr->buffer_pos - offs; + while (pos >= wr->header->dict_size) + pos += wr->header->dict_size; + return wr->buffer[pos]; + } + +} + +static inline int write_byte(struct writer *wr, uint8_t byte) +{ + wr->buffer[wr->buffer_pos++] = wr->previous_byte = byte; + if (wr->flush && wr->buffer_pos == wr->header->dict_size) { + wr->buffer_pos = 0; + wr->global_pos += wr->header->dict_size; + if (wr->flush((char *)wr->buffer, wr->header->dict_size) + != wr->header->dict_size) + return -1; + } + return 0; +} + + +static inline int copy_byte(struct writer *wr, uint32_t offs) +{ + return write_byte(wr, peek_old_byte(wr, offs)); +} + +static inline int copy_bytes(struct writer *wr, + uint32_t rep0, int len) +{ + do { + if (copy_byte(wr, rep0)) + return -1; + len--; + } while (len != 0 && wr->buffer_pos < wr->header->dst_size); + + return len; +} + +static inline int process_bit0(struct writer *wr, struct rc *rc, + struct cstate *cst, uint16_t *p, + int pos_state, uint16_t *prob, + int lc, uint32_t literal_pos_mask) { + int mi = 1; + rc_update_bit_0(rc, prob); + prob = (p + LZMA_LITERAL + + (LZMA_LIT_SIZE + * (((get_pos(wr) & literal_pos_mask) << lc) + + (wr->previous_byte >> (8 - lc)))) + ); + + if (cst->state >= LZMA_NUM_LIT_STATES) { + int match_byte = peek_old_byte(wr, cst->rep0); + do { + int bit; + uint16_t *prob_lit; + + match_byte <<= 1; + bit = match_byte & 0x100; + prob_lit = prob + 0x100 + bit + mi; + if (rc_get_bit(rc, prob_lit, &mi)) { + if (!bit) + break; + } else { + if (bit) + break; + } + } while (mi < 0x100); + } + while (mi < 0x100) { + uint16_t *prob_lit = prob + mi; + rc_get_bit(rc, prob_lit, &mi); + } + if (cst->state < 4) + cst->state = 0; + else if (cst->state < 10) + cst->state -= 3; + else + cst->state -= 6; + + return write_byte(wr, mi); +} + +static inline int process_bit1(struct writer *wr, struct rc *rc, + struct cstate *cst, uint16_t *p, + int pos_state, uint16_t *prob) { + int offset; + uint16_t *prob_len; + int num_bits; + int len; + + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + cst->rep3 = cst->rep2; + cst->rep2 = cst->rep1; + cst->rep1 = cst->rep0; + cst->state = cst->state < LZMA_NUM_LIT_STATES ? 0 : 3; + prob = p + LZMA_LEN_CODER; + } else { + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP_G0 + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + prob = (p + LZMA_IS_REP_0_LONG + + (cst->state << + LZMA_NUM_POS_BITS_MAX) + + pos_state); + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + + cst->state = cst->state < LZMA_NUM_LIT_STATES ? + 9 : 11; + return copy_byte(wr, cst->rep0); + } else { + rc_update_bit_1(rc, prob); + } + } else { + uint32_t distance; + + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP_G1 + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + distance = cst->rep1; + } else { + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP_G2 + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + distance = cst->rep2; + } else { + rc_update_bit_1(rc, prob); + distance = cst->rep3; + cst->rep3 = cst->rep2; + } + cst->rep2 = cst->rep1; + } + cst->rep1 = cst->rep0; + cst->rep0 = distance; + } + cst->state = cst->state < LZMA_NUM_LIT_STATES ? 8 : 11; + prob = p + LZMA_REP_LEN_CODER; + } + + prob_len = prob + LZMA_LEN_CHOICE; + if (rc_is_bit_0(rc, prob_len)) { + rc_update_bit_0(rc, prob_len); + prob_len = (prob + LZMA_LEN_LOW + + (pos_state << + LZMA_LEN_NUM_LOW_BITS)); + offset = 0; + num_bits = LZMA_LEN_NUM_LOW_BITS; + } else { + rc_update_bit_1(rc, prob_len); + prob_len = prob + LZMA_LEN_CHOICE_2; + if (rc_is_bit_0(rc, prob_len)) { + rc_update_bit_0(rc, prob_len); + prob_len = (prob + LZMA_LEN_MID + + (pos_state << + LZMA_LEN_NUM_MID_BITS)); + offset = 1 << LZMA_LEN_NUM_LOW_BITS; + num_bits = LZMA_LEN_NUM_MID_BITS; + } else { + rc_update_bit_1(rc, prob_len); + prob_len = prob + LZMA_LEN_HIGH; + offset = ((1 << LZMA_LEN_NUM_LOW_BITS) + + (1 << LZMA_LEN_NUM_MID_BITS)); + num_bits = LZMA_LEN_NUM_HIGH_BITS; + } + } + + rc_bit_tree_decode(rc, prob_len, num_bits, &len); + len += offset; + + if (cst->state < 4) { + int pos_slot; + + cst->state += LZMA_NUM_LIT_STATES; + prob = + p + LZMA_POS_SLOT + + ((len < + LZMA_NUM_LEN_TO_POS_STATES ? len : + LZMA_NUM_LEN_TO_POS_STATES - 1) + << LZMA_NUM_POS_SLOT_BITS); + rc_bit_tree_decode(rc, prob, + LZMA_NUM_POS_SLOT_BITS, + &pos_slot); + if (pos_slot >= LZMA_START_POS_MODEL_INDEX) { + int i, mi; + num_bits = (pos_slot >> 1) - 1; + cst->rep0 = 2 | (pos_slot & 1); + if (pos_slot < LZMA_END_POS_MODEL_INDEX) { + cst->rep0 <<= num_bits; + prob = p + LZMA_SPEC_POS + + cst->rep0 - pos_slot - 1; + } else { + num_bits -= LZMA_NUM_ALIGN_BITS; + while (num_bits--) + cst->rep0 = (cst->rep0 << 1) | + rc_direct_bit(rc); + prob = p + LZMA_ALIGN; + cst->rep0 <<= LZMA_NUM_ALIGN_BITS; + num_bits = LZMA_NUM_ALIGN_BITS; + } + i = 1; + mi = 1; + while (num_bits--) { + if (rc_get_bit(rc, prob + mi, &mi)) + cst->rep0 |= i; + i <<= 1; + } + } else + cst->rep0 = pos_slot; + if (++(cst->rep0) == 0) + return 0; + if (cst->rep0 > wr->header->dict_size + || cst->rep0 > get_pos(wr)) + return -1; + } + + len += LZMA_MATCH_MIN_LEN; + + return copy_bytes(wr, cst->rep0, len); +} + + + +int unlzma(unsigned char *buf, long in_len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), + unsigned char *output, + long *outlen, + long *posp, + void(*error)(char *x)) +{ + struct lzma_header header; + int lc, pb, lp; + uint32_t pos_state_mask; + uint32_t literal_pos_mask; + uint16_t *p; + int num_probs; + struct rc rc; + int i, mi; + struct writer wr; + struct cstate cst; + unsigned char *inbuf; + int ret = -1; + + rc.error = error; + + if (buf) + inbuf = buf; + else + inbuf = malloc(LZMA_IOBUF_SIZE); + if (!inbuf) { + error("Could not allocate input buffer"); + goto exit_0; + } + + cst.state = 0; + cst.rep0 = cst.rep1 = cst.rep2 = cst.rep3 = 1; + + wr.header = &header; + wr.flush = flush; + wr.global_pos = 0; + wr.previous_byte = 0; + wr.buffer_pos = 0; + + rc_init(&rc, fill, inbuf, in_len); + + for (i = 0; i < sizeof(header); i++) { + if (rc.ptr >= rc.buffer_end) + rc_read(&rc); + ((unsigned char *)&header)[i] = *rc.ptr++; + } + + if (header.pos >= (9 * 5 * 5)) { + error("bad header"); + goto exit_1; + } + + mi = 0; + lc = header.pos; + while (lc >= 9) { + mi++; + lc -= 9; + } + pb = 0; + lp = mi; + while (lp >= 5) { + pb++; + lp -= 5; + } + pos_state_mask = (1 << pb) - 1; + literal_pos_mask = (1 << lp) - 1; + + ENDIAN_CONVERT(header.dict_size); + ENDIAN_CONVERT(header.dst_size); + + if (header.dict_size == 0) + header.dict_size = 1; + + if (output) + wr.buffer = output; + else { + wr.bufsize = MIN(header.dst_size, header.dict_size); + wr.buffer = malloc(wr.bufsize); + } + if (wr.buffer == NULL) + goto exit_1; + + num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); + p = (uint16_t *) malloc(num_probs * sizeof(*p)); + if (p == NULL) + goto exit_2; + num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp)); + for (i = 0; i < num_probs; i++) + p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1; + + rc_init_code(&rc); + + while (get_pos(&wr) < header.dst_size) { + int pos_state = get_pos(&wr) & pos_state_mask; + uint16_t *prob = p + LZMA_IS_MATCH + + (cst.state << LZMA_NUM_POS_BITS_MAX) + pos_state; + if (rc_is_bit_0(&rc, prob)) { + if (process_bit0(&wr, &rc, &cst, p, pos_state, prob, + lc, literal_pos_mask)) { + error("LZMA data is corrupt"); + goto exit_3; + } + } else { + if (process_bit1(&wr, &rc, &cst, p, pos_state, prob)) { + error("LZMA data is corrupt"); + goto exit_3; + } + if (cst.rep0 == 0) + break; + } + if (rc.buffer_size <= 0) + goto exit_3; + } + + *outlen = get_pos(&wr); + + if (posp) + *posp = rc.ptr-rc.buffer; + if (!wr.flush || wr.flush(wr.buffer, wr.buffer_pos) == wr.buffer_pos) + ret = 0; +exit_3: + free(p); +exit_2: + if (!output) + free(wr.buffer); +exit_1: + if (!buf) + free(inbuf); +exit_0: + return ret; +}