M README +2 -2
@@ 16,8 16,8 @@ Parser notes:
exist at the time of writing, as listed in "Parsing JSON is a
Minefield" (http://seriot.ch/parsing_json.php)
- * Parses JSON from string objects, explodes strings, parses in two
- passes: therefore not suitable for large input files
+ * Two-pass parser using naive exploded strings, therefore not very
+ fast and not suitable for large input files
* Only supports UTF-8 input, not UTF-16 or UTF-32. Doesn't check
that JSON strings are valid UTF-8 -- the caller must do that --
M test.sh +46 -2
@@ 5,13 5,19 @@ set -eu
pass=0
fail=0
-for testfile in testfiles/test_parsing/*.json ; do
+# Check that each of the parser test "pass" files (y_*.json) is
+# successfully parsed, and that each of the "reject" files (n_*.json)
+# is rejected. We also have a file merged-in.json which has a single
+# array containing all of the contents of all the "pass" files; this
+# should also be successfully parsed.
+
+for testfile in testfiles/test_parsing/*.json testfiles/merged/merged-in.json ; do
output=$(./test "$testfile")
base=$(basename $testfile)
case $base in
- y_*) if [ -n "$output" ]
+ y_*|merged*) if [ -n "$output" ]
then pass=$(($pass + 1))
echo "--- pass: $base"
else fail=$(($fail + 1))
@@ 29,6 35,44 @@ for testfile in testfiles/test_parsing/*
done
+# Process the merged file that has all of the "pass" file contents,
+# and check that the output is something that can also be parsed
+
+m_in=testfiles/merged/merged-in.json
+m_out=testfiles/merged/merged-out.json
+m_expected=testfiles/merged/merged-expected.json
+m_collapsed=testfiles/merged/expected-collapsed.json
+
+./test $m_in > $m_out
+
+reread=$(./test $m_out)
+if [ -n "$reread" ]
+then pass=$(($pass + 1))
+ echo "--- pass: merged file conversion can be re-read"
+else fail=$(($fail + 1))
+ echo "*** FAIL: merged file conversion cannot be re-read"
+fi
+
+# Now check that the output from the above matches an expected file
+# that we generated and hand-checked earlier
+
+cat $m_expected | perl -p -e 's/\n//gs' > $m_collapsed
+echo >> $m_collapsed # to match actual output
+
+if cmp -s $m_out $m_collapsed ; then
+ pass=$(($pass + 1))
+ echo "--- pass: merged file contents match"
+else
+ fail=$(($fail + 1))
+ echo "*** FAIL: merged file contents differ"
+ echo
+ od -c $m_out > $m_out.od
+ od -c $m_collapsed > $m_collapsed.od
+ echo "Diff of char/octal dumps (output on left, expected on right):"
+ sdiff -w156 $m_out.od $m_collapsed.od || true
+ rm $m_out.od $m_collapsed.od
+fi
+
echo
echo "Passed: $pass"
echo "Failed: $fail"
A => testfiles/merged/merged-expected.json +97 -0
@@ 0,0 1,97 @@
+[
+[[]],
+[],
+[""],
+["a"],
+[false],
+[null,1,"1",{}],
+[null],
+[1],
+[1],
+[1,null,null,null,2],
+[2],
+[0],
+[0],
+[4],
+[-1E-78],
+[200],
+[1.23E67],
+[0],
+[-123],
+[-1],
+[0],
+[1E22],
+[0.01],
+[100],
+[1.23E47],
+[1.23456E80],
+[0.01],
+[100],
+[123],
+[123.456789],
+{"asd":"sdf"},
+{"a":"b","a":"b"},
+{"a":"c","a":"b"},
+{},
+{"":0},
+{"foo\u0000bar":42},
+{"max":1E28,"min":-1E28},
+{"dfg":"fgh","asd":"sdf"},
+{"id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","x":[{"id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}]},
+{"a":[]},
+{"title":"Полтора Землекопа"},
+{"a":"b"},
+["`Īካ"],
+[""],
+[""],
+["\"\\/\b\f\n\r\t"],
+["\\u0000"],
+["\""],
+["a/*b*/c/*d//e"],
+["\\a"],
+["\\n"],
+["\u0012"],
+[""],
+["asd"],
+["asd"],
+[""],
+["new line"],
+[""],
+[""],
+[""],
+["\u0000"],
+[","],
+["π"],
+["asd "],
+" ",
+[""],
+["ࠡ"],
+["ģ"],
+["
"],
+["
"],
+["new\nline"],
+["aクリス"],
+[""],
+["⍂㈴⍂"],
+["\\"],
+["\""],
+["ꙭ"],
+[""],
+[""],
+[""],
+[""],
+[""],
+[""],
+["€𝄞"],
+["aa"],
+false,
+42,
+-0.1,
+null,
+"asd",
+true,
+"",
+["a"],
+[true],
+[]
+]
A => testfiles/merged/merged-in.json +102 -0
@@ 0,0 1,102 @@
+[
+[[] ],
+[],
+[""],
+["a"],
+[false],
+[null, 1, "1", {}],
+[null],
+[1
+],
+ [1],
+[1,null,null,null,2],
+[2] ,
+[0e+1],
+[0e1],
+[ 4],
+[-0.000000000000000000000000000000000000000000000000000000000000000000000000000001]
+,
+[20e1],
+[123e65],
+[-0],
+[-123],
+[-1],
+[-0],
+[1E22],
+[1E-2],
+[1E+2],
+[123e45],
+[123.456e78],
+[1e-2],
+[1e+2],
+[123],
+[123.456789],
+{"asd":"sdf"},
+{"a":"b","a":"b"},
+{"a":"b","a":"c"},
+{},
+{"":0},
+{"foo\u0000bar": 42},
+{ "min": -1.0e+28, "max": 1.0e+28 },
+{"asd":"sdf", "dfg":"fgh"},
+{"x":[{"id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}], "id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"},
+{"a":[]},
+{"title":"\u041f\u043e\u043b\u0442\u043e\u0440\u0430 \u0417\u0435\u043c\u043b\u0435\u043a\u043e\u043f\u0430" },
+{
+"a": "b"
+},
+["\u0060\u012a\u12AB"],
+["\uD801\udc37"],
+["\ud83d\ude39\ud83d\udc8d"],
+["\"\\\/\b\f\n\r\t"],
+["\\u0000"],
+["\""],
+["a/*b*/c/*d//e"],
+["\\a"],
+["\\n"],
+["\u0012"],
+["\uFFFF"],
+["asd"],
+[ "asd"],
+["\uDBFF\uDFFF"],
+["new\u00A0line"],
+[""],
+[""],
+[""],
+["\u0000"],
+["\u002c"],
+["π"],
+["asd "],
+" ",
+["\uD834\uDd1e"],
+["\u0821"],
+["\u0123"],
+["
"],
+["
"],
+["new\u000Aline"],
+["\u0061\u30af\u30EA\u30b9"],
+[""],
+["⍂㈴⍂"],
+["\u005C"],
+["\u0022"],
+["\uA66D"],
+["\uDBFF\uDFFE"],
+["\uD83F\uDFFE"],
+["\u200B"],
+["\u2064"],
+["\uFDD0"],
+["\uFFFE"],
+["€𝄞"],
+["aa"],
+false,
+42,
+-0.1,
+null,
+"asd",
+true,
+"",
+["a"]
+,
+[true],
+ []
+]