libs: introduce lmo - Lua Machine Objects, an implementation of binary hash tables

This commit is contained in:
Jo-Philipp Wich 2009-07-09 15:04:27 +00:00
parent fb64c14609
commit d9d3c71435
9 changed files with 872 additions and 0 deletions

46
libs/lmo/Makefile Normal file
View file

@ -0,0 +1,46 @@
ifneq (,$(wildcard ../../build/config.mk))
include ../../build/config.mk
include ../../build/module.mk
include ../../build/gccconfig.mk
else
include standalone.mk
endif
LMO_LDFLAGS =
LMO_CFLAGS =
LMO_SO = lmo.so
LMO_PO2LMO = po2lmo
LMO_LOOKUP = lookup
LMO_COMMON_OBJ = src/lmo_core.o src/lmo_hash.o
LMO_PO2LMO_OBJ = src/lmo_po2lmo.o
LMO_LOOKUP_OBJ = src/lmo_lookup.o
LMO_LUALIB_OBJ = src/lmo_lualib.o
%.o: %.c
$(COMPILE) $(LMO_CFLAGS) $(LUA_CFLAGS) $(FPIC) -c -o $@ $<
compile: build-clean $(LMO_COMMON_OBJ) $(LMO_PO2LMO_OBJ) $(LMO_LOOKUP_OBJ) $(LMO_LUALIB_OBJ)
$(LINK) $(SHLIB_FLAGS) $(LMO_LDFLAGS) -o src/$(LMO_SO) \
$(LMO_COMMON_OBJ) $(LMO_LUALIB_OBJ)
$(LINK) $(LMO_LDFLAGS) -o src/$(LMO_PO2LMO) $(LMO_COMMON_OBJ) $(LMO_PO2LMO_OBJ)
$(LINK) $(LMO_LDFLAGS) -o src/$(LMO_LOOKUP) $(LMO_COMMON_OBJ) $(LMO_LOOKUP_OBJ)
mkdir -p dist$(LUA_LIBRARYDIR)
cp src/$(LMO_SO) dist$(LUA_LIBRARYDIR)/$(LMO_SO)
install: build
cp -pR dist$(LUA_LIBRARYDIR)/* $(LUA_LIBRARYDIR)
clean: build-clean
build-clean:
rm -f src/*.o src/lookup src/po2lmo src/lmo.so
host-compile: build-clean host-clean $(LMO_COMMON_OBJ) $(LMO_PO2LMO_OBJ)
$(LINK) $(LMO_LDFLAGS) -o src/$(LMO_PO2LMO) $(LMO_COMMON_OBJ) $(LMO_PO2LMO_OBJ)
host-install: host-compile
cp src/$(LMO_PO2LMO) ../../build/$(LMO_PO2LMO)
host-clean:
rm -f ../../build/$(LMO_PO2LMO)

72
libs/lmo/src/lmo.h Normal file
View file

@ -0,0 +1,72 @@
/*
* lmo - Lua Machine Objects - General header
*
* Copyright (C) 2009 Jo-Philipp Wich <xm@subsignal.org>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _LMO_H_
#define _LMO_H_
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <errno.h>
#if (defined(__GNUC__) && defined(__i386__))
#define sfh_get16(d) (*((const uint16_t *) (d)))
#else
#define sfh_get16(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
+(uint32_t)(((const uint8_t *)(d))[0]) )
#endif
struct lmo_entry {
uint32_t key_id;
uint32_t val_id;
uint32_t offset;
uint32_t length;
struct lmo_entry *next;
} __attribute__((packed));
typedef struct lmo_entry lmo_entry_t;
struct lmo_archive {
int fd;
uint32_t length;
lmo_entry_t *index;
char *mmap;
};
typedef struct lmo_archive lmo_archive_t;
uint32_t sfh_hash(const char * data, int len);
char _lmo_error[1024];
const char * lmo_error(void);
lmo_archive_t * lmo_open(const char *file);
int lmo_lookup(lmo_archive_t *ar, const char *key, char *dest, int len);
void lmo_close(lmo_archive_t *ar);
#endif

231
libs/lmo/src/lmo_core.c Normal file
View file

@ -0,0 +1,231 @@
/*
* lmo - Lua Machine Objects - Base functions
*
* Copyright (C) 2009 Jo-Philipp Wich <xm@subsignal.org>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "lmo.h"
extern char _lmo_error[1024];
static int lmo_read32( int fd, uint32_t *val )
{
uint8_t buffer[5];
if( read(fd, buffer, 4) < 4 )
return -1;
buffer[4] = 0;
*val = ntohl(*((uint32_t *) buffer));
return 4;
}
static char * error(const char *message, int add_errno)
{
memset(_lmo_error, 0, sizeof(_lmo_error));
if( add_errno )
snprintf(_lmo_error, sizeof(_lmo_error),
"%s: %s", message, strerror(errno));
else
snprintf(_lmo_error, sizeof(_lmo_error), "%s", message);
return NULL;
}
const char * lmo_error(void)
{
return _lmo_error;
}
lmo_archive_t * lmo_open(const char *file)
{
int in = -1;
uint32_t idx_offset = 0;
uint32_t i;
struct stat s;
lmo_archive_t *ar = NULL;
lmo_entry_t *head = NULL;
lmo_entry_t *entry = NULL;
if( stat(file, &s) == -1 )
{
error("Can not stat file", 1);
goto cleanup;
}
if( (in = open(file, O_RDONLY)) == -1 )
{
error("Can not open file", 1);
goto cleanup;
}
if( lseek(in, -sizeof(uint32_t), SEEK_END) == -1 )
{
error("Can not seek to eof", 1);
goto cleanup;
}
if( lmo_read32(in, &idx_offset) != 4 )
{
error("Unexpected EOF while reading index offset", 0);
goto cleanup;
}
if( lseek(in, idx_offset, SEEK_SET) == -1 )
{
error("Can not seek to index offset", 1);
goto cleanup;
}
if( (ar = (lmo_archive_t *) malloc(sizeof(lmo_archive_t))) != NULL )
{
ar->fd = in;
ar->length = idx_offset;
for( i = idx_offset;
i < (s.st_size - sizeof(uint32_t));
i += (4 * sizeof(uint32_t))
) {
if( (entry = (lmo_entry_t *) malloc(sizeof(lmo_entry_t))) != NULL )
{
if( (lmo_read32(ar->fd, &entry->key_id) == 4) &&
(lmo_read32(ar->fd, &entry->val_id) == 4) &&
(lmo_read32(ar->fd, &entry->offset) == 4) &&
(lmo_read32(ar->fd, &entry->length) == 4)
) {
entry->next = head;
head = entry;
}
else
{
error("Unexpected EOF while reading index entry", 0);
goto cleanup;
}
}
else
{
error("Out of memory", 0);
goto cleanup;
}
}
ar->index = head;
if( lseek(ar->fd, 0, SEEK_SET) == -1 )
{
error("Can not seek to start", 1);
goto cleanup;
}
if( (ar->mmap = mmap(NULL, ar->length, PROT_READ, MAP_SHARED, ar->fd, 0)) == MAP_FAILED )
{
error("Failed to memory map archive contents", 1);
goto cleanup;
}
return ar;
}
else
{
error("Out of memory", 0);
goto cleanup;
}
cleanup:
if( in > -1 )
close(in);
if( head != NULL )
{
entry = head;
while( entry != NULL )
{
head = entry->next;
free(entry);
entry = head;
}
head = entry = NULL;
}
if( ar != NULL )
{
if( (ar->mmap != NULL) && (ar->mmap != MAP_FAILED) )
munmap(ar->mmap, ar->length);
free(ar);
ar = NULL;
}
return NULL;
}
void lmo_close(lmo_archive_t *ar)
{
lmo_entry_t *head = NULL;
lmo_entry_t *entry = NULL;
if( ar != NULL )
{
entry = ar->index;
while( entry != NULL )
{
head = entry->next;
free(entry);
entry = head;
}
head = entry = NULL;
if( (ar->mmap != NULL) && (ar->mmap != MAP_FAILED) )
munmap(ar->mmap, ar->length);
close(ar->fd);
free(ar);
ar = NULL;
}
}
int lmo_lookup(lmo_archive_t *ar, const char *key, char *dest, int len)
{
uint32_t look_key = sfh_hash(key, strlen(key));
int copy_len = -1;
lmo_entry_t *entry = ar->index;
while( entry != NULL )
{
if( entry->key_id == look_key )
{
copy_len = (len > entry->length) ? entry->length : len;
memcpy(dest, &ar->mmap[entry->offset], copy_len);
break;
}
entry = entry->next;
}
return copy_len;
}

53
libs/lmo/src/lmo_hash.c Normal file
View file

@ -0,0 +1,53 @@
/*
* Hash function from http://www.azillionmonkeys.com/qed/hash.html
* Copyright (C) 2004-2008 by Paul Hsieh
*/
#include "lmo.h"
uint32_t sfh_hash(const char * data, int len)
{
uint32_t hash = len, tmp;
int rem;
if (len <= 0 || data == NULL) return 0;
rem = len & 3;
len >>= 2;
/* Main loop */
for (;len > 0; len--) {
hash += sfh_get16(data);
tmp = (sfh_get16(data+2) << 11) ^ hash;
hash = (hash << 16) ^ tmp;
data += 2*sizeof(uint16_t);
hash += hash >> 11;
}
/* Handle end cases */
switch (rem) {
case 3: hash += sfh_get16(data);
hash ^= hash << 16;
hash ^= data[sizeof(uint16_t)] << 18;
hash += hash >> 11;
break;
case 2: hash += sfh_get16(data);
hash ^= hash << 11;
hash += hash >> 17;
break;
case 1: hash += *data;
hash ^= hash << 10;
hash += hash >> 1;
}
/* Force "avalanching" of final 127 bits */
hash ^= hash << 3;
hash += hash >> 5;
hash ^= hash << 4;
hash += hash >> 17;
hash ^= hash << 25;
hash += hash >> 6;
return hash;
}

58
libs/lmo/src/lmo_lookup.c Normal file
View file

@ -0,0 +1,58 @@
/*
* lmo - Lua Machine Objects - Lookup utility
*
* Copyright (C) 2009 Jo-Philipp Wich <xm@subsignal.org>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "lmo.h"
extern char _lmo_error[1024];
static void die(const char *msg)
{
printf("Error: %s\n", msg);
exit(1);
}
static void usage(const char *name)
{
printf("Usage: %s input.lmo key\n", name);
exit(1);
}
int main(int argc, char *argv[])
{
char val[4096];
lmo_archive_t *ar = NULL;
if( argc != 3 )
usage(argv[0]);
if( (ar = (lmo_archive_t *) lmo_open(argv[1])) != NULL )
{
if( lmo_lookup(ar, argv[2], val, sizeof(val)) > -1 )
{
printf("%s\n", val);
}
lmo_close(ar);
}
else
{
die(lmo_error());
}
return 0;
}

124
libs/lmo/src/lmo_lualib.c Normal file
View file

@ -0,0 +1,124 @@
/*
* lmo - Lua Machine Objects - Lookup utility
*
* Copyright (C) 2009 Jo-Philipp Wich <xm@subsignal.org>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "lmo_lualib.h"
extern char _lmo_error[1024];
static int lmo_L_open(lua_State *L) {
const char *filename = luaL_checklstring(L, 1, NULL);
lmo_archive_t *ar, **udata;
if( (ar = lmo_open(filename)) != NULL )
{
if( (udata = lua_newuserdata(L, sizeof(lmo_archive_t *))) != NULL )
{
*udata = ar;
luaL_getmetatable(L, LMO_ARCHIVE_META);
lua_setmetatable(L, -2);
return 1;
}
lua_pushnil(L);
lua_pushstring(L, "out of memory");
return 2;
}
lua_pushnil(L);
lua_pushstring(L, lmo_error());
return 2;
}
static int lmo_L_hash(lua_State *L) {
const char *data = luaL_checkstring(L, 1);
uint32_t hash = sfh_hash(data, strlen(data));
lua_pushnumber(L, hash);
return 1;
}
static int lmo_L_lookup(lua_State *L) {
lmo_archive_t **ar = luaL_checkudata(L, 1, LMO_ARCHIVE_META);
lmo_entry_t *e = (*ar)->index;
const char *key = luaL_checkstring(L, 2);
uint32_t hash = sfh_hash(key, strlen(key));
while( e != NULL )
{
if( e->key_id == hash )
{
lua_pushlstring(L, &(*ar)->mmap[e->offset], e->length);
return 1;
}
e = e->next;
}
lua_pushnil(L);
return 1;
}
static int lmo_L__gc(lua_State *L) {
lmo_archive_t **ar = luaL_checkudata(L, 1, LMO_ARCHIVE_META);
if( (*ar) != NULL )
lmo_close(*ar);
*ar = NULL;
return 0;
}
static int lmo_L__tostring(lua_State *L) {
lmo_archive_t **ar = luaL_checkudata(L, 1, LMO_ARCHIVE_META);
lua_pushfstring(L, "LMO Archive (%d bytes)", (*ar)->length);
return 1;
}
/* method table */
static const luaL_reg M[] = {
{"close", lmo_L__gc},
{"lookup", lmo_L_lookup},
{"__tostring", lmo_L__tostring},
{"__gc", lmo_L__gc},
{NULL, NULL}
};
/* module table */
static const luaL_reg R[] = {
{"open", lmo_L_open},
{"hash", lmo_L_hash},
{NULL, NULL}
};
LUALIB_API int luaopen_lmo(lua_State *L) {
luaL_newmetatable(L, LMO_LUALIB_META);
luaL_register(L, NULL, R);
lua_pushvalue(L, -1);
lua_setfield(L, -2, "__index");
lua_setglobal(L, LMO_LUALIB_META);
luaL_newmetatable(L, LMO_ARCHIVE_META);
luaL_register(L, NULL, M);
lua_pushvalue(L, -1);
lua_setfield(L, -2, "__index");
lua_setglobal(L, LMO_ARCHIVE_META);
return 1;
}

33
libs/lmo/src/lmo_lualib.h Normal file
View file

@ -0,0 +1,33 @@
/*
* lmo - Lua Machine Objects - Lua library header
*
* Copyright (C) 2009 Jo-Philipp Wich <xm@subsignal.org>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _LMO_LUALIB_H_
#define _LMO_LUALIB_H_
#include <lua.h>
#include <lualib.h>
#include <lauxlib.h>
#include "lmo.h"
#define LMO_LUALIB_META "lmo"
#define LMO_ARCHIVE_META "lmo.archive"
LUALIB_API int luaopen_lmo(lua_State *L);
#endif

199
libs/lmo/src/lmo_po2lmo.c Normal file
View file

@ -0,0 +1,199 @@
/*
* lmo - Lua Machine Objects - PO to LMO conversion tool
*
* Copyright (C) 2009 Jo-Philipp Wich <xm@subsignal.org>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "lmo.h"
static void die(const char *msg)
{
fprintf(stderr, "Error: %s\n", msg);
exit(1);
}
static void usage(const char *name)
{
fprintf(stderr, "Usage: %s input.po output.lmo\n", name);
exit(1);
}
static void print(const void *ptr, size_t size, size_t nmemb, FILE *stream)
{
if( fwrite(ptr, size, nmemb, stream) == 0 )
die("Failed to write stdout");
}
static int extract_string(const char *src, char *dest, int len)
{
int pos = 0;
int esc = 0;
int off = -1;
for( pos = 0; (pos < strlen(src)) && (pos < len); pos++ )
{
if( (off == -1) && (src[pos] == '"') )
{
off = pos + 1;
}
else if( off >= 0 )
{
if( esc == 1 )
{
dest[pos-off] = src[pos];
esc = 0;
}
else if( src[pos] == '\\' )
{
off++;
esc = 1;
}
else if( src[pos] != '"' )
{
dest[pos-off] = src[pos];
}
else
{
dest[pos-off] = '\0';
break;
}
}
}
return (off > -1) ? strlen(dest) : -1;
}
int main(int argc, char *argv[])
{
char line[4096];
char key[4096];
char val[4096];
char tmp[4096];
int state = 0;
int offset = 0;
int length = 0;
FILE *in;
FILE *out;
lmo_entry_t *head = NULL;
lmo_entry_t *entry = NULL;
if( (argc != 3) || ((in = fopen(argv[1], "r")) == NULL) || ((out = fopen(argv[2], "w")) == NULL) )
usage(argv[0]);
memset(line, 0, sizeof(key));
memset(key, 0, sizeof(val));
memset(val, 0, sizeof(val));
while( (NULL != fgets(line, sizeof(line), in)) || (state >= 2 && feof(in)) )
{
if( state == 0 && strstr(line, "msgid \"") == line )
{
switch(extract_string(line, key, sizeof(key)))
{
case -1:
die("Syntax error in msgid");
case 0:
continue;
default:
state = 1;
}
}
else if( state == 1 && strstr(line, "msgstr \"") == line )
{
switch(extract_string(line, val, sizeof(val)))
{
case -1:
die("Syntax error in msgstr");
case 0:
state = 2;
break;
default:
state = 3;
}
}
else if( state == 2 )
{
switch(extract_string(line, tmp, sizeof(tmp)))
{
case -1:
state = 3;
break;
default:
strcat(val, tmp);
}
}
else if( state == 3 )
{
if( strlen(key) > 0 && strlen(val) > 0 )
{
if( (entry = (lmo_entry_t *) malloc(sizeof(lmo_entry_t))) != NULL )
{
memset(entry, 0, sizeof(entry));
length = strlen(val) + ((4 - (strlen(val) % 4)) % 4);
entry->key_id = htonl(sfh_hash(key, strlen(key)));
entry->val_id = htonl(sfh_hash(val, strlen(val)));
entry->offset = htonl(offset);
entry->length = htonl(strlen(val));
print(val, length, 1, out);
offset += length;
entry->next = head;
head = entry;
}
else
{
die("Out of memory");
}
}
state = 0;
memset(key, 0, sizeof(key));
memset(val, 0, sizeof(val));
}
memset(line, 0, sizeof(line));
}
entry = head;
while( entry != NULL )
{
print(&entry->key_id, sizeof(uint32_t), 1, out);
print(&entry->val_id, sizeof(uint32_t), 1, out);
print(&entry->offset, sizeof(uint32_t), 1, out);
print(&entry->length, sizeof(uint32_t), 1, out);
entry = entry->next;
}
if( offset > 0 )
{
offset = htonl(offset);
print(&offset, sizeof(uint32_t), 1, out);
fsync(fileno(out));
fclose(out);
}
else
{
fclose(out);
unlink(argv[2]);
}
fclose(in);
return(0);
}

56
libs/lmo/standalone.mk Normal file
View file

@ -0,0 +1,56 @@
LUAC = luac
LUAC_OPTIONS = -s
LUA_TARGET ?= source
LUA_MODULEDIR = /usr/local/share/lua/5.1
LUA_LIBRARYDIR = /usr/local/lib/lua/5.1
OS ?= $(shell uname)
LUA_SHLIBS = $(shell pkg-config --silence-errors --libs lua5.1 || pkg-config --silence-errors --libs lua-5.1 || pkg-config --silence-errors --libs lua)
LUA_LIBS = $(if $(LUA_SHLIBS),$(LUA_SHLIBS),$(firstword $(wildcard /usr/lib/liblua.a /usr/local/lib/liblua.a /opt/local/lib/liblua.a)))
LUA_CFLAGS = $(shell pkg-config --silence-errors --cflags lua5.1 || pkg-config --silence-errors --cflags lua-5.1 || pkg-config --silence-errors --cflags lua)
CC = gcc
AR = ar
RANLIB = ranlib
CFLAGS = -O2
FPIC = -fPIC
EXTRA_CFLAGS = --std=gnu99
WFLAGS = -Wall -Werror -pedantic
CPPFLAGS =
COMPILE = $(CC) $(CPPFLAGS) $(CFLAGS) $(EXTRA_CFLAGS) $(WFLAGS)
ifeq ($(OS),Darwin)
SHLIB_FLAGS = -bundle -undefined dynamic_lookup
else
SHLIB_FLAGS = -shared
endif
LINK = $(CC) $(LDFLAGS)
.PHONY: all build compile luacompile luasource clean luaclean
all: build
build: luabuild gccbuild
luabuild: lua$(LUA_TARGET)
gccbuild: compile
compile:
clean: luaclean
luasource:
mkdir -p dist$(LUA_MODULEDIR)
cp -pR root/* dist 2>/dev/null || true
cp -pR lua/* dist$(LUA_MODULEDIR) 2>/dev/null || true
for i in $$(find dist -name .svn); do rm -rf $$i || true; done
luastrip: luasource
for i in $$(find dist -type f -name '*.lua'); do perl -e 'undef $$/; open( F, "< $$ARGV[0]" ) || die $$!; $$src = <F>; close F; $$src =~ s/--\[\[.*?\]\](--)?//gs; $$src =~ s/^\s*--.*?\n//gm; open( F, "> $$ARGV[0]" ) || die $$!; print F $$src; close F' $$i; done
luacompile: luasource
for i in $$(find dist -name *.lua -not -name debug.lua); do $(LUAC) $(LUAC_OPTIONS) -o $$i $$i; done
luaclean:
rm -rf dist