10da24f50102 — Chris Cannam 7 years ago
Add output comparison test
4 files changed, 247 insertions(+), 4 deletions(-)

M README
M test.sh
A => testfiles/merged/merged-expected.json
A => testfiles/merged/merged-in.json
M README +2 -2
@@ 16,8 16,8 @@ Parser notes:
    exist at the time of writing, as listed in "Parsing JSON is a
    Minefield" (http://seriot.ch/parsing_json.php)
  
- * Parses JSON from string objects, explodes strings, parses in two
-   passes: therefore not suitable for large input files
+ * Two-pass parser using naive exploded strings, therefore not very
+   fast and not suitable for large input files
 
  * Only supports UTF-8 input, not UTF-16 or UTF-32. Doesn't check
    that JSON strings are valid UTF-8 -- the caller must do that --

          
M test.sh +46 -2
@@ 5,13 5,19 @@ set -eu
 pass=0
 fail=0
 
-for testfile in testfiles/test_parsing/*.json ; do
+# Check that each of the parser test "pass" files (y_*.json) is
+# successfully parsed, and that each of the "reject" files (n_*.json)
+# is rejected. We also have a file merged-in.json which has a single
+# array containing all of the contents of all the "pass" files; this
+# should also be successfully parsed.
+
+for testfile in testfiles/test_parsing/*.json testfiles/merged/merged-in.json ; do
 
     output=$(./test "$testfile")
     base=$(basename $testfile)
     
     case $base in
-	y_*) if [ -n "$output" ]
+	y_*|merged*) if [ -n "$output" ]
 	     then pass=$(($pass + 1))
 		  echo "--- pass: $base"
 	     else fail=$(($fail + 1))

          
@@ 29,6 35,44 @@ for testfile in testfiles/test_parsing/*
     
 done
 
+# Process the merged file that has all of the "pass" file contents,
+# and check that the output is something that can also be parsed
+
+m_in=testfiles/merged/merged-in.json
+m_out=testfiles/merged/merged-out.json
+m_expected=testfiles/merged/merged-expected.json
+m_collapsed=testfiles/merged/expected-collapsed.json
+
+./test $m_in > $m_out
+
+reread=$(./test $m_out)
+if [ -n "$reread" ]
+then pass=$(($pass + 1))
+     echo "--- pass: merged file conversion can be re-read"
+else fail=$(($fail + 1))
+     echo "*** FAIL: merged file conversion cannot be re-read"
+fi
+
+# Now check that the output from the above matches an expected file
+# that we generated and hand-checked earlier
+
+cat $m_expected | perl -p -e 's/\n//gs' > $m_collapsed
+echo >> $m_collapsed # to match actual output
+
+if cmp -s $m_out $m_collapsed ; then
+    pass=$(($pass + 1))
+    echo "--- pass: merged file contents match"
+else
+    fail=$(($fail + 1))
+    echo "*** FAIL: merged file contents differ"
+    echo
+    od -c $m_out > $m_out.od
+    od -c $m_collapsed > $m_collapsed.od
+    echo "Diff of char/octal dumps (output on left, expected on right):"
+    sdiff -w156 $m_out.od $m_collapsed.od || true
+    rm $m_out.od $m_collapsed.od
+fi
+
 echo
 echo "Passed: $pass"
 echo "Failed: $fail"

          
A => testfiles/merged/merged-expected.json +97 -0
@@ 0,0 1,97 @@ 
+[
+[[]],
+[],
+[""],
+["a"],
+[false],
+[null,1,"1",{}],
+[null],
+[1],
+[1],
+[1,null,null,null,2],
+[2],
+[0],
+[0],
+[4],
+[-1E-78],
+[200],
+[1.23E67],
+[0],
+[-123],
+[-1],
+[0],
+[1E22],
+[0.01],
+[100],
+[1.23E47],
+[1.23456E80],
+[0.01],
+[100],
+[123],
+[123.456789],
+{"asd":"sdf"},
+{"a":"b","a":"b"},
+{"a":"c","a":"b"},
+{},
+{"":0},
+{"foo\u0000bar":42},
+{"max":1E28,"min":-1E28},
+{"dfg":"fgh","asd":"sdf"},
+{"id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","x":[{"id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}]},
+{"a":[]},
+{"title":"Полтора Землекопа"},
+{"a":"b"},
+["`Īካ"],
+[""],
+[""],
+["\"\\/\b\f\n\r\t"],
+["\\u0000"],
+["\""],
+["a/*b*/c/*d//e"],
+["\\a"],
+["\\n"],
+["\u0012"],
+["￿"],
+["asd"],
+["asd"],
+[""],
+["new line"],
+["􏿿"],
+["𛿿"],
+["￿"],
+["\u0000"],
+[","],
+["π"],
+["asd "],
+" ",
+[""],
+["ࠡ"],
+["ģ"],
+["
"],
+["
"],
+["new\nline"],
+["aクリス"],
+[""],
+["⍂㈴⍂"],
+["\\"],
+["\""],
+["ꙭ"],
+[""],
+[""],
+["​"],
+["⁤"],
+["﷐"],
+["￾"],
+["€𝄞"],
+["aa"],
+false,
+42,
+-0.1,
+null,
+"asd",
+true,
+"",
+["a"],
+[true],
+[]
+]

          
A => testfiles/merged/merged-in.json +102 -0
@@ 0,0 1,102 @@ 
+[
+[[]   ],
+[],
+[""],
+["a"],
+[false],
+[null, 1, "1", {}],
+[null],
+[1
+],
+ [1],
+[1,null,null,null,2],
+[2] ,
+[0e+1],
+[0e1],
+[ 4],
+[-0.000000000000000000000000000000000000000000000000000000000000000000000000000001]
+,
+[20e1],
+[123e65],
+[-0],
+[-123],
+[-1],
+[-0],
+[1E22],
+[1E-2],
+[1E+2],
+[123e45],
+[123.456e78],
+[1e-2],
+[1e+2],
+[123],
+[123.456789],
+{"asd":"sdf"},
+{"a":"b","a":"b"},
+{"a":"b","a":"c"},
+{},
+{"":0},
+{"foo\u0000bar": 42},
+{ "min": -1.0e+28, "max": 1.0e+28 },
+{"asd":"sdf", "dfg":"fgh"},
+{"x":[{"id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}], "id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"},
+{"a":[]},
+{"title":"\u041f\u043e\u043b\u0442\u043e\u0440\u0430 \u0417\u0435\u043c\u043b\u0435\u043a\u043e\u043f\u0430" },
+{
+"a": "b"
+},
+["\u0060\u012a\u12AB"],
+["\uD801\udc37"],
+["\ud83d\ude39\ud83d\udc8d"],
+["\"\\\/\b\f\n\r\t"],
+["\\u0000"],
+["\""],
+["a/*b*/c/*d//e"],
+["\\a"],
+["\\n"],
+["\u0012"],
+["\uFFFF"],
+["asd"],
+[ "asd"],
+["\uDBFF\uDFFF"],
+["new\u00A0line"],
+["􏿿"],
+["𛿿"],
+["￿"],
+["\u0000"],
+["\u002c"],
+["π"],
+["asd "],
+" ",
+["\uD834\uDd1e"],
+["\u0821"],
+["\u0123"],
+["
"],
+["
"],
+["new\u000Aline"],
+["\u0061\u30af\u30EA\u30b9"],
+[""],
+["⍂㈴⍂"],
+["\u005C"],
+["\u0022"],
+["\uA66D"],
+["\uDBFF\uDFFE"],
+["\uD83F\uDFFE"],
+["\u200B"],
+["\u2064"],
+["\uFDD0"],
+["\uFFFE"],
+["€𝄞"],
+["aa"],
+false,
+42,
+-0.1,
+null,
+"asd",
+true,
+"",
+["a"]
+,
+[true],
+ []
+]