# HG changeset patch # User Chris Cannam # Date 1492960850 -3600 # Sun Apr 23 16:20:50 2017 +0100 # Node ID 10da24f50102d14138fe375ed7f0e54a9595bdb8 # Parent 5b42cd83b539bd545b86b0e6d4e606ab0dbfd3b0 Add output comparison test diff --git a/README b/README --- a/README +++ b/README @@ -16,8 +16,8 @@ exist at the time of writing, as listed in "Parsing JSON is a Minefield" (http://seriot.ch/parsing_json.php) - * Parses JSON from string objects, explodes strings, parses in two - passes: therefore not suitable for large input files + * Two-pass parser using naive exploded strings, therefore not very + fast and not suitable for large input files * Only supports UTF-8 input, not UTF-16 or UTF-32. Doesn't check that JSON strings are valid UTF-8 -- the caller must do that -- diff --git a/test.sh b/test.sh --- a/test.sh +++ b/test.sh @@ -5,13 +5,19 @@ pass=0 fail=0 -for testfile in testfiles/test_parsing/*.json ; do +# Check that each of the parser test "pass" files (y_*.json) is +# successfully parsed, and that each of the "reject" files (n_*.json) +# is rejected. We also have a file merged-in.json which has a single +# array containing all of the contents of all the "pass" files; this +# should also be successfully parsed. + +for testfile in testfiles/test_parsing/*.json testfiles/merged/merged-in.json ; do output=$(./test "$testfile") base=$(basename $testfile) case $base in - y_*) if [ -n "$output" ] + y_*|merged*) if [ -n "$output" ] then pass=$(($pass + 1)) echo "--- pass: $base" else fail=$(($fail + 1)) @@ -29,6 +35,44 @@ done +# Process the merged file that has all of the "pass" file contents, +# and check that the output is something that can also be parsed + +m_in=testfiles/merged/merged-in.json +m_out=testfiles/merged/merged-out.json +m_expected=testfiles/merged/merged-expected.json +m_collapsed=testfiles/merged/expected-collapsed.json + +./test $m_in > $m_out + +reread=$(./test $m_out) +if [ -n "$reread" ] +then pass=$(($pass + 1)) + echo "--- pass: merged file conversion can be re-read" +else fail=$(($fail + 1)) + echo "*** FAIL: merged file conversion cannot be re-read" +fi + +# Now check that the output from the above matches an expected file +# that we generated and hand-checked earlier + +cat $m_expected | perl -p -e 's/\n//gs' > $m_collapsed +echo >> $m_collapsed # to match actual output + +if cmp -s $m_out $m_collapsed ; then + pass=$(($pass + 1)) + echo "--- pass: merged file contents match" +else + fail=$(($fail + 1)) + echo "*** FAIL: merged file contents differ" + echo + od -c $m_out > $m_out.od + od -c $m_collapsed > $m_collapsed.od + echo "Diff of char/octal dumps (output on left, expected on right):" + sdiff -w156 $m_out.od $m_collapsed.od || true + rm $m_out.od $m_collapsed.od +fi + echo echo "Passed: $pass" echo "Failed: $fail" diff --git a/testfiles/merged/merged-expected.json b/testfiles/merged/merged-expected.json new file mode 100644 --- /dev/null +++ b/testfiles/merged/merged-expected.json @@ -0,0 +1,97 @@ +[ +[[]], +[], +[""], +["a"], +[false], +[null,1,"1",{}], +[null], +[1], +[1], +[1,null,null,null,2], +[2], +[0], +[0], +[4], +[-1E-78], +[200], +[1.23E67], +[0], +[-123], +[-1], +[0], +[1E22], +[0.01], +[100], +[1.23E47], +[1.23456E80], +[0.01], +[100], +[123], +[123.456789], +{"asd":"sdf"}, +{"a":"b","a":"b"}, +{"a":"c","a":"b"}, +{}, +{"":0}, +{"foo\u0000bar":42}, +{"max":1E28,"min":-1E28}, +{"dfg":"fgh","asd":"sdf"}, +{"id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","x":[{"id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}]}, +{"a":[]}, +{"title":"Полтора Землекопа"}, +{"a":"b"}, +["`Īካ"], +[""], +[""], +["\"\\/\b\f\n\r\t"], +["\\u0000"], +["\""], +["a/*b*/c/*d//e"], +["\\a"], +["\\n"], +["\u0012"], +["￿"], +["asd"], +["asd"], +[""], +["new line"], +["􏿿"], +["𛿿"], +["￿"], +["\u0000"], +[","], +["π"], +["asd "], +" ", +[""], +["ࠡ"], +["ģ"], +["
"], +["
"], +["new\nline"], +["aクリス"], +[""], +["⍂㈴⍂"], +["\\"], +["\""], +["ꙭ"], +[""], +[""], +["​"], +["⁤"], +["﷐"], +["￾"], +["€𝄞"], +["aa"], +false, +42, +-0.1, +null, +"asd", +true, +"", +["a"], +[true], +[] +] diff --git a/testfiles/merged/merged-in.json b/testfiles/merged/merged-in.json new file mode 100644 --- /dev/null +++ b/testfiles/merged/merged-in.json @@ -0,0 +1,102 @@ +[ +[[] ], +[], +[""], +["a"], +[false], +[null, 1, "1", {}], +[null], +[1 +], + [1], +[1,null,null,null,2], +[2] , +[0e+1], +[0e1], +[ 4], +[-0.000000000000000000000000000000000000000000000000000000000000000000000000000001] +, +[20e1], +[123e65], +[-0], +[-123], +[-1], +[-0], +[1E22], +[1E-2], +[1E+2], +[123e45], +[123.456e78], +[1e-2], +[1e+2], +[123], +[123.456789], +{"asd":"sdf"}, +{"a":"b","a":"b"}, +{"a":"b","a":"c"}, +{}, +{"":0}, +{"foo\u0000bar": 42}, +{ "min": -1.0e+28, "max": 1.0e+28 }, +{"asd":"sdf", "dfg":"fgh"}, +{"x":[{"id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}], "id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}, +{"a":[]}, +{"title":"\u041f\u043e\u043b\u0442\u043e\u0440\u0430 \u0417\u0435\u043c\u043b\u0435\u043a\u043e\u043f\u0430" }, +{ +"a": "b" +}, +["\u0060\u012a\u12AB"], +["\uD801\udc37"], +["\ud83d\ude39\ud83d\udc8d"], +["\"\\\/\b\f\n\r\t"], +["\\u0000"], +["\""], +["a/*b*/c/*d//e"], +["\\a"], +["\\n"], +["\u0012"], +["\uFFFF"], +["asd"], +[ "asd"], +["\uDBFF\uDFFF"], +["new\u00A0line"], +["􏿿"], +["𛿿"], +["￿"], +["\u0000"], +["\u002c"], +["π"], +["asd "], +" ", +["\uD834\uDd1e"], +["\u0821"], +["\u0123"], +["
"], +["
"], +["new\u000Aline"], +["\u0061\u30af\u30EA\u30b9"], +[""], +["⍂㈴⍂"], +["\u005C"], +["\u0022"], +["\uA66D"], +["\uDBFF\uDFFE"], +["\uD83F\uDFFE"], +["\u200B"], +["\u2064"], +["\uFDD0"], +["\uFFFE"], +["€𝄞"], +["aa"], +false, +42, +-0.1, +null, +"asd", +true, +"", +["a"] +, +[true], + [] +]