libs/json: add proper support for decoding unicode escapes

This commit is contained in:
Jo-Philipp Wich 2013-12-03 15:04:32 +00:00
parent e97596f42d
commit 297d368ecc

View file

@ -15,16 +15,16 @@ $Id$
Decoder: Decoder:
Info: Info:
null will be decoded to luci.json.null if first parameter of Decoder() is true null will be decoded to luci.json.null if first parameter of Decoder() is true
Example: Example:
decoder = luci.json.Decoder() decoder = luci.json.Decoder()
luci.ltn12.pump.all(luci.ltn12.source.string("decodableJSON"), decoder:sink()) luci.ltn12.pump.all(luci.ltn12.source.string("decodableJSON"), decoder:sink())
luci.util.dumptable(decoder:get()) luci.util.dumptable(decoder:get())
Known issues: Known issues:
does not support unicode conversion \uXXYY with XX != 00 will be ignored does not support unicode conversion \uXXYY with XX != 00 will be ignored
Encoder: Encoder:
Info: Info:
Accepts numbers, strings, nil, booleans as they are Accepts numbers, strings, nil, booleans as they are
@ -33,12 +33,13 @@ Encoder:
Mixed tables will loose their associative values during conversion Mixed tables will loose their associative values during conversion
Iterator functions will be encoded as an array of their return values Iterator functions will be encoded as an array of their return values
Non-iterator functions will probably corrupt the encoder Non-iterator functions will probably corrupt the encoder
Example: Example:
encoder = luci.json.Encoder(encodableData) encoder = luci.json.Encoder(encodableData)
luci.ltn12.pump.all(encoder:source(), luci.ltn12.sink.file(io.open("someFile", w))) luci.ltn12.pump.all(encoder:source(), luci.ltn12.sink.file(io.open("someFile", w)))
]]-- ]]--
local nixio = require "nixio"
local util = require "luci.util" local util = require "luci.util"
local table = require "table" local table = require "table"
local string = require "string" local string = require "string"
@ -54,6 +55,11 @@ local ipairs = ipairs
local next = next local next = next
local pcall = pcall local pcall = pcall
local band = nixio.bit.band
local bor = nixio.bit.bor
local rshift = nixio.bit.rshift
local char = string.char
local getmetatable = getmetatable local getmetatable = getmetatable
--- LuCI JSON-Library --- LuCI JSON-Library
@ -98,7 +104,7 @@ end
-- @name Encoder -- @name Encoder
-- @param data Lua-Object to be encoded. -- @param data Lua-Object to be encoded.
-- @param buffersize Blocksize of returned data source. -- @param buffersize Blocksize of returned data source.
-- @param fastescape Use non-standard escaping (don't escape control chars) -- @param fastescape Use non-standard escaping (don't escape control chars)
-- @return JSON-Encoder -- @return JSON-Encoder
Encoder = util.class() Encoder = util.class()
@ -107,7 +113,7 @@ function Encoder.__init__(self, data, buffersize, fastescape)
self.buffersize = buffersize or 512 self.buffersize = buffersize or 512
self.buffer = "" self.buffer = ""
self.fastescape = fastescape self.fastescape = fastescape
getmetatable(self).__call = Encoder.source getmetatable(self).__call = Encoder.source
end end
@ -122,19 +128,19 @@ function Encoder.source(self)
else else
return nil, data return nil, data
end end
end end
end end
function Encoder.dispatch(self, data, start) function Encoder.dispatch(self, data, start)
local parser = self.parsers[type(data)] local parser = self.parsers[type(data)]
parser(self, data) parser(self, data)
if start then if start then
if #self.buffer > 0 then if #self.buffer > 0 then
coroutine.yield(self.buffer) coroutine.yield(self.buffer)
end end
coroutine.yield() coroutine.yield()
end end
end end
@ -149,13 +155,13 @@ function Encoder.put(self, chunk)
coroutine.yield(self.buffer .. chunk:sub(written + 1, fbuffer)) coroutine.yield(self.buffer .. chunk:sub(written + 1, fbuffer))
written = fbuffer written = fbuffer
while #chunk - written > self.buffersize do while #chunk - written > self.buffersize do
fbuffer = written + self.buffersize fbuffer = written + self.buffersize
coroutine.yield(chunk:sub(written + 1, fbuffer)) coroutine.yield(chunk:sub(written + 1, fbuffer))
written = fbuffer written = fbuffer
end end
self.buffer = chunk:sub(written + 1) self.buffer = chunk:sub(written + 1)
else else
self.buffer = self.buffer .. chunk self.buffer = self.buffer .. chunk
@ -197,7 +203,7 @@ function Encoder.parse_iter(self, obj)
if type(obj) == "table" and (#obj == 0 and next(obj)) then if type(obj) == "table" and (#obj == 0 and next(obj)) then
self:put("{") self:put("{")
local first = true local first = true
for key, entry in pairs(obj) do for key, entry in pairs(obj) do
first = first or self:put(",") first = first or self:put(",")
first = first and false first = first and false
@ -205,12 +211,12 @@ function Encoder.parse_iter(self, obj)
self:put(":") self:put(":")
self:dispatch(entry) self:dispatch(entry)
end end
self:put("}") self:put("}")
else else
self:put("[") self:put("[")
local first = true local first = true
if type(obj) == "table" then if type(obj) == "table" then
for i=1, #obj do for i=1, #obj do
first = first or self:put(",") first = first or self:put(",")
@ -222,10 +228,10 @@ function Encoder.parse_iter(self, obj)
first = first or self:put(",") first = first or self:put(",")
first = first and nil first = first and nil
self:dispatch(entry) self:dispatch(entry)
end end
end end
self:put("]") self:put("]")
end end
end end
@ -236,7 +242,7 @@ Encoder.parsers = {
['string'] = Encoder.parse_string, ['string'] = Encoder.parse_string,
['boolean'] = Encoder.parse_bool, ['boolean'] = Encoder.parse_bool,
['function'] = Encoder.parse_iter ['function'] = Encoder.parse_iter
} }
--- Create a new JSON-Decoder. --- Create a new JSON-Decoder.
@ -270,37 +276,37 @@ end
function Decoder.dispatch(self, chunk, src_err, strict) function Decoder.dispatch(self, chunk, src_err, strict)
local robject, object local robject, object
local oset = false local oset = false
while chunk do while chunk do
while chunk and #chunk < 1 do while chunk and #chunk < 1 do
chunk = self:fetch() chunk = self:fetch()
end end
assert(not strict or chunk, "Unexpected EOS") assert(not strict or chunk, "Unexpected EOS")
if not chunk then break end if not chunk then break end
local char = chunk:sub(1, 1) local char = chunk:sub(1, 1)
local parser = self.parsers[char] local parser = self.parsers[char]
or (char:match("%s") and self.parse_space) or (char:match("%s") and self.parse_space)
or (char:match("[0-9-]") and self.parse_number) or (char:match("[0-9-]") and self.parse_number)
or error("Unexpected char '%s'" % char) or error("Unexpected char '%s'" % char)
chunk, robject = parser(self, chunk) chunk, robject = parser(self, chunk)
if parser ~= self.parse_space then if parser ~= self.parse_space then
assert(not oset, "Scope violation: Too many objects") assert(not oset, "Scope violation: Too many objects")
object = robject object = robject
oset = true oset = true
if strict then if strict then
return chunk, object return chunk, object
end end
end end
end end
assert(not src_err, src_err) assert(not src_err, src_err)
assert(oset, "Unexpected EOS") assert(oset, "Unexpected EOS")
self.data = object self.data = object
end end
@ -318,7 +324,7 @@ function Decoder.fetch_atleast(self, chunk, bytes)
assert(nchunk, "Unexpected EOS") assert(nchunk, "Unexpected EOS")
chunk = chunk .. nchunk chunk = chunk .. nchunk
end end
return chunk return chunk
end end
@ -339,7 +345,7 @@ end
function Decoder.parse_space(self, chunk) function Decoder.parse_space(self, chunk)
local start = chunk:find("[^%s]") local start = chunk:find("[^%s]")
while not start do while not start do
chunk = self:fetch() chunk = self:fetch()
if not chunk then if not chunk then
@ -347,13 +353,13 @@ function Decoder.parse_space(self, chunk)
end end
start = chunk:find("[^%s]") start = chunk:find("[^%s]")
end end
return chunk:sub(start) return chunk:sub(start)
end end
function Decoder.parse_literal(self, chunk, literal, value) function Decoder.parse_literal(self, chunk, literal, value)
chunk = self:fetch_atleast(chunk, #literal) chunk = self:fetch_atleast(chunk, #literal)
assert(chunk:sub(1, #literal) == literal, "Invalid character sequence") assert(chunk:sub(1, #literal) == literal, "Invalid character sequence")
return chunk:sub(#literal + 1), value return chunk:sub(#literal + 1), value
end end
@ -392,7 +398,7 @@ function Decoder.parse_string(self, chunk)
local spos = chunk:find('[\\"]') local spos = chunk:find('[\\"]')
if spos then if spos then
str = str .. chunk:sub(1, spos - 1) str = str .. chunk:sub(1, spos - 1)
local char = chunk:sub(spos, spos) local char = chunk:sub(spos, spos)
if char == '"' then -- String end if char == '"' then -- String end
chunk = chunk:sub(spos + 1) chunk = chunk:sub(spos + 1)
@ -404,7 +410,7 @@ function Decoder.parse_string(self, chunk)
else else
str = str .. chunk str = str .. chunk
chunk = self:fetch() chunk = self:fetch()
assert(chunk, "Unexpected EOS while parsing a string") assert(chunk, "Unexpected EOS while parsing a string")
end end
end end
@ -412,12 +418,41 @@ function Decoder.parse_string(self, chunk)
end end
function Decoder.utf8_encode(self, s1, s2)
local n = s1 * 256 + s2
if n >= 0 and n <= 0x7F then
return char(n)
elseif n >= 0 and n <= 0x7FF then
return char(
bor(band(rshift(n, 6), 0x1F), 0xC0),
bor(band(n, 0x3F), 0x80)
)
elseif n >= 0 and n <= 0xFFFF then
return char(
bor(band(rshift(n, 12), 0x0F), 0xE0),
bor(band(rshift(n, 6), 0x3F), 0x80),
bor(band(n, 0x3F), 0x80)
)
elseif n >= 0 and n <= 0x10FFFF then
return char(
bor(band(rshift(n, 18), 0x07), 0xF0),
bor(band(rshift(n, 12), 0x3F), 0x80),
bor(band(rshift(n, 6), 0x3F), 0x80),
bor(band(n, 0x3F), 0x80)
)
else
return "?"
end
end
function Decoder.parse_escape(self, chunk) function Decoder.parse_escape(self, chunk)
local str = "" local str = ""
chunk = self:fetch_atleast(chunk:sub(2), 1) chunk = self:fetch_atleast(chunk:sub(2), 1)
local char = chunk:sub(1, 1) local char = chunk:sub(1, 1)
chunk = chunk:sub(2) chunk = chunk:sub(2)
if char == '"' then if char == '"' then
return chunk, '"' return chunk, '"'
elseif char == "\\" then elseif char == "\\" then
@ -427,9 +462,8 @@ function Decoder.parse_escape(self, chunk)
local s1, s2 = chunk:sub(1, 2), chunk:sub(3, 4) local s1, s2 = chunk:sub(1, 2), chunk:sub(3, 4)
s1, s2 = tonumber(s1, 16), tonumber(s2, 16) s1, s2 = tonumber(s1, 16), tonumber(s2, 16)
assert(s1 and s2, "Invalid Unicode character") assert(s1 and s2, "Invalid Unicode character")
-- ToDo: Unicode support return chunk:sub(5), self:utf8_encode(s1, s2)
return chunk:sub(5), s1 == 0 and string.char(s2) or ""
elseif char == "/" then elseif char == "/" then
return chunk, "/" return chunk, "/"
elseif char == "b" then elseif char == "b" then
@ -452,18 +486,18 @@ function Decoder.parse_array(self, chunk)
chunk = chunk:sub(2) chunk = chunk:sub(2)
local array = {} local array = {}
local nextp = 1 local nextp = 1
local chunk, object = self:parse_delimiter(chunk, "%]") local chunk, object = self:parse_delimiter(chunk, "%]")
if object then if object then
return chunk, array return chunk, array
end end
repeat repeat
chunk, object = self:dispatch(chunk, nil, true) chunk, object = self:dispatch(chunk, nil, true)
table.insert(array, nextp, object) table.insert(array, nextp, object)
nextp = nextp + 1 nextp = nextp + 1
chunk, object = self:parse_delimiter(chunk, ",%]") chunk, object = self:parse_delimiter(chunk, ",%]")
assert(object, "Delimiter expected") assert(object, "Delimiter expected")
until object == "]" until object == "]"
@ -486,12 +520,12 @@ function Decoder.parse_object(self, chunk)
repeat repeat
chunk = self:parse_space(chunk) chunk = self:parse_space(chunk)
assert(chunk, "Unexpected EOS") assert(chunk, "Unexpected EOS")
chunk, name = self:parse_string(chunk) chunk, name = self:parse_string(chunk)
chunk, object = self:parse_delimiter(chunk, ":") chunk, object = self:parse_delimiter(chunk, ":")
assert(object, "Separator expected") assert(object, "Separator expected")
chunk, object = self:dispatch(chunk, nil, true) chunk, object = self:dispatch(chunk, nil, true)
array[name] = object array[name] = object
@ -519,7 +553,7 @@ function Decoder.parse_delimiter(self, chunk, delimiter)
end end
Decoder.parsers = { Decoder.parsers = {
['"'] = Decoder.parse_string, ['"'] = Decoder.parse_string,
['t'] = Decoder.parse_true, ['t'] = Decoder.parse_true,
['f'] = Decoder.parse_false, ['f'] = Decoder.parse_false,