From 2b4530f9c535027816aa46d06a6a565b97bd305c Mon Sep 17 00:00:00 2001 From: Stefan Majewsky Date: Mon, 11 Aug 2025 12:38:24 +0200 Subject: add package jsonmatch --- jsonmatch/diff_test.go | 392 +++++++++++++++++++++++++++++++++++++++++++++++++ jsonmatch/interface.go | 279 +++++++++++++++++++++++++++++++++++ jsonmatch/machinery.go | 294 +++++++++++++++++++++++++++++++++++++ 3 files changed, 965 insertions(+) create mode 100644 jsonmatch/diff_test.go create mode 100644 jsonmatch/interface.go create mode 100644 jsonmatch/machinery.go (limited to 'jsonmatch') diff --git a/jsonmatch/diff_test.go b/jsonmatch/diff_test.go new file mode 100644 index 0000000..f0f4fdd --- /dev/null +++ b/jsonmatch/diff_test.go @@ -0,0 +1,392 @@ +// SPDX-FileCopyrightText: 2025 Stefan Majewsky +// SPDX-License-Identifier: Apache-2.0 + +package jsonmatch_test + +import ( + "encoding/json" + "errors" + "fmt" + "strings" + "testing" + + . "github.com/majewsky/gg/internal/test" + "github.com/majewsky/gg/jsonmatch" + . "github.com/majewsky/gg/option" +) + +// assert that types implement the expected interfaces +// (CaptureField needs dynamic casts because CaptureField() returns `any`) +var ( + _ jsonmatch.Diffable = jsonmatch.Array{} + _ jsonmatch.Diffable = jsonmatch.Object{} + _ jsonmatch.Diffable = jsonmatch.Null() + _ jsonmatch.Diffable = jsonmatch.Scalar("foo") + _ jsonmatch.Diffable = jsonmatch.Scalar(42) + _ jsonmatch.Diffable = jsonmatch.Scalar(false) + _ = jsonmatch.CaptureField(Some(1).AsPointer()).(json.Marshaler) + _ = jsonmatch.CaptureField(Some(1).AsPointer()).(json.Unmarshaler) +) + +func TestCanonicalizesActualPayload(t *testing.T) { + testCases := [][]byte{ + // all of these are functionally identical, so they should produce an empty diff + // against our expectations regardless of key order and whitespace + []byte(`{"data": {"qux":[5,null,15], "foo": 42, "bar": "hello world"}}`), + []byte(`{"data":{"bar":"hello world","foo":42,"qux":[5,null,15]}}`), + []byte(`{ + "data": { + "bar": "hello world", + "qux": [ + 5, + null, + 15 + ], + "foo": 42 + } + }`), + } + + for _, message := range testCases { + t.Logf("message = %q", message) + + // we test with several variants of `expected` using different underlying + // types that represent identical JSON payloads, but in different ways + match := jsonmatch.Object{ + "data": jsonmatch.Object{ + "foo": 42, + "bar": "hello world", + "qux": []any{5, nil, 15}, + }, + } + AssertEqual(t, match.DiffAgainst(message), nil) + + // changing the type of `data` to map[string]any does not change anything at all; + // using the jsonmatch.Object name on this level is mostly syntactic sugar to communicate intent + match = jsonmatch.Object{ + "data": map[string]any{ + "foo": 42, + "bar": "hello world", + "qux": []any{5, nil, 15}, + }, + } + AssertEqual(t, match.DiffAgainst(message), nil) + + // this is using subtypes that our logic cannot recurse into + // (map[opaqueString]any instead of map[string]any and []Option[int] instead of []any); + // comparison will be less granular and only be able to fail on the level of the opaque subtype, but it will still work + type opaqueString string + match = jsonmatch.Object{ + "data": map[opaqueString]any{ + "foo": 42, + "bar": "hello world", + "qux": []Option[int]{Some(5), None[int](), Some(15)}, + }, + } + AssertEqual(t, match.DiffAgainst(message), nil) + + // this is using a specific struct type instead of a map[string]any, which results in a different serialization + // (map[string]any serializes with keys sorted alphabetically, but structs serialize with keys sorted by field declaration order; + // jsonmatch knows how to normalize this and thus correctly reports an empty diff because the serializations are identical except for field order) + match = jsonmatch.Object{ + "data": struct { + Foo int `json:"foo"` + Bar string `json:"bar"` + Qux []Option[int] `json:"qux"` + }{ + Foo: 42, + Bar: "hello world", + Qux: []Option[int]{Some(5), None[int](), Some(15)}, + }, + } + AssertEqual(t, match.DiffAgainst(message), nil) + + // to try and trip up the normalization shown above, this match deliberately contains an unmarshalable object; + // jsonmatch should recognize that marshaling and unmarshaling does not work and skip the normalization + match = jsonmatch.Object{ + "data": unmarshalableObject{}, + } + AssertEqual(t, match.DiffAgainst(message), []jsonmatch.Diff{{ + Kind: "type mismatch", + Pointer: "/data", + ExpectedJSON: ``, + ActualJSON: `{"bar":"hello world","foo":42,"qux":[5,null,15]}`, + }}) + } +} + +func TestCapturesFields(t *testing.T) { + const ( + uuid1 = "2cff2c65-f775-4ed5-8f86-be0998b19781" + uuid2 = "ce38aa5c-62ed-4367-a2f8-cbe2d73094a8" + ) + message := fmt.Appendf(nil, `{"objects":[{"id":"%s","tags":["foo"]},{"id":"%s","tags":["bar"]}]}`, uuid1, uuid2) + + // check that CaptureField() works as intended when contained within one of the supported container types + type opaqueString string + var ( + capturedUUID1 string + capturedUUID2 string + capturedTag1 opaqueString // check that capturing also works for custom types + ) + match := jsonmatch.Object{ + "objects": []jsonmatch.Object{ + { + "id": jsonmatch.CaptureField(&capturedUUID1), + "tags": []string{"foo"}, + }, + { + "id": jsonmatch.CaptureField(&capturedUUID2), + "tags": []any{jsonmatch.CaptureField(&capturedTag1)}, + }, + }, + } + + AssertEqual(t, match.DiffAgainst(message), nil) + AssertEqual(t, capturedUUID1, uuid1) + AssertEqual(t, capturedUUID2, uuid2) + AssertEqual(t, capturedTag1, "bar") + + // check that CaptureField() complains when unmarshaling JSON messages into incompatible types + var ( + capturedUUID3 int + ) + match = jsonmatch.Object{ + "objects": []jsonmatch.Object{ + { + "id": jsonmatch.CaptureField(&capturedUUID3), + "tags": []string{"foo"}, + }, + { + "id": uuid2, + "tags": []string{"bar"}, + }, + }, + } + + AssertEqual(t, match.DiffAgainst(message), []jsonmatch.Diff{{ + Kind: "cannot unmarshal into capture slot (json: cannot unmarshal string into Go value of type int)", + Pointer: "/objects/0/id", + ExpectedJSON: "", + ActualJSON: fmt.Sprintf("%q", uuid1), + }}) + + // check that CaptureField() does not work when contained within unsupported types + // + // This is a restriction that could be lifted in the future, but it would involve using advanced + // reflection shenanigans that complicate the implementation. The fact that this example uses + // somewhat contrived types to even be able to place a capture inside another structure shows that + // this restriction ought not be too problematic in practice. + capturedUUID1 = "unset" + capturedUUID2 = "unset" + capturedTag1 = "unset" + match = jsonmatch.Object{ + "objects": []struct { + ID any `json:"id"` + Tags []any `json:"tags"` + }{ + { + ID: jsonmatch.CaptureField(&capturedUUID1), + Tags: []any{"foo"}, + }, + { + ID: jsonmatch.CaptureField(&capturedUUID2), + Tags: []any{jsonmatch.CaptureField(&capturedTag1)}, + }, + }, + } + + AssertEqual(t, match.DiffAgainst(message), []jsonmatch.Diff{{ + Kind: "value mismatch", + Pointer: "/objects", + ActualJSON: fmt.Sprintf(`[{"id":"%s","tags":["foo"]},{"id":"%s","tags":["bar"]}]`, uuid1, uuid2), + ExpectedJSON: `[{"id":"unset","tags":["foo"]},{"id":"unset","tags":["unset"]}]`, + }}) +} + +func TestFailsOnValueMismatch(t *testing.T) { + message := []byte(`{"users": [ + {"id":23,"name":"Alice","tags":[{"name":"admin"},{"name":"senior"}]}, + {"id":42,"name":"Bob","tags":[{"name":"support"}]} + ]}`) + match := jsonmatch.Object{ + "users": []map[string]any{ // also side-note, because we did not have it anywhere else, this covers recursion into []map[string]any + { + "id": 23, + "name": "Alicia", // should be "Alice" + "status": "fixing stuff", // unexpected field + "tags": []jsonmatch.Object{{"name": "administrator"}}, // name should be "admin"; second list entry missing + }, + { + // "id" field is missing + "name": "Bob", + "tags": []jsonmatch.Object{{"name": "support"}, {"name": "postmaster"}}, // unexpected list entry + }, + }, + } + + AssertEqual(t, match.DiffAgainst(message), []jsonmatch.Diff{ + { + Kind: "value mismatch", + Pointer: "/users/0/name", + ActualJSON: `"Alice"`, + ExpectedJSON: `"Alicia"`, + }, + { + Kind: "value mismatch", + Pointer: "/users/0/tags/0/name", + ActualJSON: `"admin"`, + ExpectedJSON: `"administrator"`, + }, + { + Kind: "value mismatch", + Pointer: "/users/0/tags/1", + ActualJSON: `{"name":"senior"}`, + ExpectedJSON: ``, + }, + { + Kind: "value mismatch", + Pointer: "/users/0/status", + ActualJSON: ``, + ExpectedJSON: `"fixing stuff"`, + }, + { + Kind: "value mismatch", + Pointer: "/users/1/id", + ActualJSON: `42`, + ExpectedJSON: ``, + }, + { + Kind: "value mismatch", + Pointer: "/users/1/tags/1", + ActualJSON: ``, + ExpectedJSON: `{"name":"postmaster"}`, + }, + }) +} + +func TestFailsOnTypeMismatch(t *testing.T) { + // several JSON values with incompatible JSON-level types, paired with their code-level representation + testCases := []struct { + JSON string + Data any + Scalar Option[jsonmatch.Diffable] // for testing calls to jsonmatch.Scalar().DiffAgainst() (see below) + }{ + { + JSON: `null`, + Data: nil, + Scalar: Some(jsonmatch.Null()), + }, + { + JSON: `true`, + Data: true, + Scalar: Some(jsonmatch.Scalar(true)), + }, + { + JSON: `42`, + Data: 42, + Scalar: Some(jsonmatch.Scalar(42)), + }, + { + JSON: `"foo"`, + Data: "foo", + Scalar: Some(jsonmatch.Scalar("foo")), + }, + { + JSON: `{"value":42}`, + Data: map[string]any{"value": 42}, + Scalar: None[jsonmatch.Diffable](), + }, + { + JSON: `[42]`, + Data: []any{42}, + Scalar: None[jsonmatch.Diffable](), + }} + + for idx1, tc1 := range testCases { + objectMessage := fmt.Appendf(nil, `{"payload":%s}`, tc1.JSON) + arrayMessage := fmt.Appendf(nil, `[1,%s]`, tc1.JSON) + plainMessage := []byte(tc1.JSON) + + for idx2, tc2 := range testCases { + // type mismatch inside of an object + objectMatch := jsonmatch.Object{"payload": tc2.Data} + if idx1 == idx2 { + // if we chose matching JSON and data types, then everything works as intended + AssertEqual(t, objectMatch.DiffAgainst(objectMessage), nil) + } else { + // otherwise we expect a "type mismatch" error + AssertEqual(t, objectMatch.DiffAgainst(objectMessage), []jsonmatch.Diff{{ + Kind: "type mismatch", + Pointer: "/payload", + ActualJSON: tc1.JSON, + ExpectedJSON: tc2.JSON, + }}) + } + + // type mismatch inside of an array + arrayMatch := jsonmatch.Array{1, tc2.Data} + if idx1 == idx2 { + AssertEqual(t, arrayMatch.DiffAgainst(arrayMessage), nil) + } else { + AssertEqual(t, arrayMatch.DiffAgainst(arrayMessage), []jsonmatch.Diff{{ + Kind: "type mismatch", + Pointer: "/1", + ActualJSON: tc1.JSON, + ExpectedJSON: tc2.JSON, + }}) + } + + // type mismatch for plain scalar + if scalarMatch, ok := tc2.Scalar.Unpack(); ok { + if idx1 == idx2 { + AssertEqual(t, scalarMatch.DiffAgainst(plainMessage), nil) + } else { + AssertEqual(t, scalarMatch.DiffAgainst(plainMessage), []jsonmatch.Diff{{ + Kind: "type mismatch", + Pointer: "", + ActualJSON: tc1.JSON, + ExpectedJSON: tc2.JSON, + }}) + } + } + } + } +} + +func TestFailsOnUnmarshalError(t *testing.T) { + // all of these things are definitely not valid JSON messages + testCases := [][]byte{ + // empty string + []byte(""), + // looks like text/plain + []byte("Not found\n"), + // looks like text/yaml + []byte("data:\n- 23\n- 42\n"), + // incomplete JSON + []byte(`{"data":[23,`), + // this one is not even a valid UTF-8 string + []byte("a\xffb\xC0\xAFc\xff"), + } + match := jsonmatch.Object{ + "data": jsonmatch.Array{23, 42}, + } + + for _, message := range testCases { + diffs := match.DiffAgainst(message) + if AssertEqual(t, len(diffs), 1) { + diff := diffs[0] + AssertEqual(t, strings.HasPrefix(diff.Kind, "unmarshal error ("), true) + AssertEqual(t, strings.HasSuffix(diff.Kind, ")"), true) + AssertEqual(t, diff.Pointer, "") + AssertEqual(t, diff.ExpectedJSON, `{"data":[23,42]}`) + AssertEqual(t, strings.ReplaceAll(diff.ActualJSON, "\uFFFD", ""), strings.ToValidUTF8(string(message), "")) + } + } +} + +type unmarshalableObject struct{} + +func (unmarshalableObject) MarshalJSON() ([]byte, error) { + return nil, errors.New("this object is unmarshalable") +} diff --git a/jsonmatch/interface.go b/jsonmatch/interface.go new file mode 100644 index 0000000..3c10cc3 --- /dev/null +++ b/jsonmatch/interface.go @@ -0,0 +1,279 @@ +// SPDX-FileCopyrightText: 2025 Stefan Majewsky +// SPDX-License-Identifier: Apache-2.0 + +// Package jsonmatch implements matching of encoded JSON payloads against fixed assertions. +// The interface is most suited for unit tests, and intended for functions that return encoded JSON payloads (such as HTTP API handlers). +// Below is an example how package jsonmatch can be used together with only the standard library. +// +// In all likelihood, you will already have your own test assertion library to use on top of std. +// Package jsonmatch is intended to be low-level enough to be easy to integrate with whatever assertion library you like to use. +// +// import ( +// "net/http" +// "net/http/httptest" +// +// "github.com/majewsky/gg/jsonmatch" +// ) +// +// func TestJSONMatchOfResponseBody(t*testing.T) { +// // this example assumes that the implementation being tested +// // has an HTTP handler implementing GET /v1/things +// var h http.Handler = buildAPIHandler() +// +// // use net/http/httptest to run a request +// req := httptest.NewRequest(http.MethodGet, "/v1/things", nil) +// resp := httptest.NewRecorder() +// h.ServeHTTP(resp, req) +// if resp.Code != http.StatusOK { +// t.Error("unexpected error") +// } +// +// // check that the response payload contains the data that we expect +// expected := jsonmatch.Object{ +// "things": []jsonmatch.Object{ +// { "id": 1, "name": "First thing" }, +// { "id": 2, "name": "Second thing" }, +// }, +// } +// for _, diff := range expected.DiffAgainst(resp.Body.Bytes()) { +// if diff.Pointer == "" { +// t.Errorf("%s: expected %s, but got %s", diff.Kind, diff.ExpectedJSON, diff.ActualJSON) +// } else { +// t.Errorf("%s at %s: expected %s, but got %s", diff.Kind, diff.Pointer, diff.ExpectedJSON, diff.ActualJSON) +// } +// } +// } +// +// # Assertion format +// +// As shown in the example above, this package revolves around writing out assertions for how a JSON payload looks in your test's source code. +// +// expected := jsonmatch.Object{ +// "things": []jsonmatch.Object{ +// { "id": 1, "name": "First thing" }, +// { "id": 2, "name": "Second thing" }, +// }, +// "keywords": jsonmatch.Array{"example", "test"}, +// } +// diffs := expected.DiffAgainst(actual) +// +// The example above demonstrates the recommended style: +// - All scalar values in the assertion (bools, numbers, strings and nulls) use the respective predeclared Go value types. +// - Objects use the jsonmatch.Object type instead of map[string]any. +// - Arrays of only objects use the []jsonmatch.Object type. +// - Other arrays use the jsonmatch.Array type instead of []any or more specific array/slice types. +// +// It is possible to write jsonmatch.Object as map[string]any and jsonmatch.Array as []any, like this: +// +// expected := map[string]any{ +// "things": []map[string]any{ +// { "id": 1, "name": "First thing" }, +// { "id": 2, "name": "Second thing" }, +// }, +// "keywords": []any{"example", "test"}, +// } +// diffs := jsonmatch.Object(expected).DiffAgainst(actual) +// +// We do not recommend this style, as using the jsonmatch.Object and jsonmatch.Array identifiers better communicates the intent of the literal. +// +// # Recommendation: Do not use complex types in assertions +// +// We recommend avoiding more specific types than basic maps, slices and predeclared value types in the assertion. +// It is tempting to reuse types from the implementation, but this risks repeating errors from the implementation in the test. +// Consider the following example: +// +// // from the implementation +// type Thing struct { +// ID int `json:"id"` +// Name string `json:"naem"` +// } +// +// expected := jsonmatch.Object{ +// "things": []Thing{ +// { ID: 1, Name: "First thing" }, +// { ID: 2, Name: "Second thing" }, +// }, +// "keywords": jsonmatch.Array{"example", "test"}, +// } +// diffs := expected.DiffAgainst(actual) +// +// In this example, we have made a mistake in the implementation. +// The field "name" has been misspelled, so it will be marshalled as "naem" instead. +// Because the test unmarshals into the same type as the implementation, it will not be able to uncover this error. +// This example might be a bit contrived, but keeping test logic separate from implementation logic is especially important for types using advanced marshalling logic through custom implementations of the json.Marshaler and json.Unmarshaler interfaces. +// +// # Capturing nondeterministic data +// +// Sometimes, JSON payloads may contain randomly-generated fields like UUIDs or non-deterministic data like timestamps that cannot be predicted when writing the test code. +// For these situations, package jsonmatch provides the CaptureField function. +// The example below shows a test exercising a PUT endpoint to create an object, capturing the object's ID while asserting on the rest of the response, and then using that ID to exercise a GET endpoint that displays the created object. +// +// req1 := httptest.NewRequest(http.MethodPut, "/v1/things/new", strings.NewReader(`{"name":"hello"}`) +// // ... +// +// var uuid string +// diffs := jsonmatch.Object { +// "thing": jsonmatch.Object { +// "id": jsonmatch.CaptureField(&uuid), +// "name": "hello", +// }, +// }.DiffAgainst(resp1.Body.Bytes()) +// // ... +// +// req2 := httptest.NewRequest(http.MethodGet, "/v1/things") +// // ... +// +// diffs = jsonmatch.Object { +// "things": []jsonmatch.Object { +// { +// "id": uuid, +// "name": "hello", +// }, +// }, +// }.DiffAgainst(resp2.Body.Bytes()) +// // ... +package jsonmatch + +import ( + "encoding/json" + "errors" +) + +// Diffable is the common interface of types Object, Array, Scalar and Null from this package. +// The DiffAgainst function compares the value contained in the Diffable against an encoded JSON payload. +// +// The implementation will try to generate diffs as granularly as possible. +// For example: +// +// expected := jsonmatch.Object{ +// "things": []jsonmatch.Object{ +// { "id": 1, "name": "First thing" }, +// { "id": 2, "name": "Second thing" }, +// }, +// } +// actual := `{"things": [{"id": 1, "name": "First widget"}, {"id": 3, "name": "Second thing"}]}` +// // this call... +// diffs := expected.DiffAgainst(actual) +// // ...will return something like this +// diffs := []jsonmatch.Diff{ +// { Kind: "value mismatch", Pointer: "/things/0/name", ExpectedJSON: "First thing", ActualJSON: "First widget" }, +// { Kind: "value mismatch", Pointer: "/things/1/id", ExpectedJSON: "2", ActualJSON: "3" }, +// } +// +// However, the implementation will only recurse into substructures of the following well-known types: jsonmatch.Object, map[string]any, jsonmatch.Array, []any, []map[string]any. +// Any other map, array, slice, struct or pointer type will be treated as a black box: +// If its JSON serialization differs from that of the respective section of the actual payload, a diff will be generated for its entirety only, not for any specific subfields. +type Diffable interface { + DiffAgainst([]byte) []Diff +} + +var ( + _ Diffable = Array{} + _ Diffable = Object{} + _ Diffable = scalar{} +) + +// Array implements diffing against an encoded JSON payload that is expected to contain an array. +// Please refer to the package documentation for how to use this type. +type Array []any + +// DiffAgainst implements the Diffable interface. +func (a Array) DiffAgainst(buf []byte) []Diff { + return diffAgainst([]any(a), buf) +} + +// Object implements diffing against an encoded JSON payload that is expected to contain an object. +// Please refer to the package documentation for how to use this type. +type Object map[string]any + +// DiffAgainst implements the Diffable interface. +func (o Object) DiffAgainst(buf []byte) []Diff { + return diffAgainst(map[string]any(o), buf) +} + +// Null implements diffing against an encoded JSON payload that is expected just the value `null`. +// This type is only used on the top level of the JSON payload. +// Within type Object or type Array, put a `nil` directly. +func Null() Diffable { + return scalar{nil} +} + +// Scalar implements diffing against an encoded JSON payload that is expected to contain just a scalar value (a number, string or boolean). +// This type is only used on the top level of the JSON payload. +// Within type Object or type Array, put the value directly. +func Scalar[S ScalarValue](value S) Diffable { + return scalar{value} +} + +type scalar struct { + Value any +} + +// DiffAgainst implements the Diffable interface. +func (s scalar) DiffAgainst(buf []byte) []Diff { + return diffAgainst(s.Value, buf) +} + +// ScalarValue is an interface containing every type that can be given to func Scalar. +type ScalarValue interface { + ~bool | + ~int | ~int8 | ~int16 | ~int32 | ~int64 | + ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr | + ~float32 | ~float64 | + ~string +} + +// Diff is a difference between the actual encoded JSON payload given to a DiffAgainst() call, and the expectation encoded in the object that DiffAgainst() was called on. +// See type Diffable for details on how diffing works. +type Diff struct { + // Kind explains the type of difference. + // No stability guarantee is given for the values that can occur in this field. + // Values in this field are expected to read well when formatted using fmt.Sprintf("%s at %s", diff.Kind, diff.Pointer). + Kind string + // Pointer explains where the difference occurred within the Diffable. + // If ExpectedJSON and ActualJSON refer to the whole Diffable and the whole encoded JSON payload, then Pointer is the empty string. + Pointer Pointer + // A serialization of the respective part of the Diffable, or an error message or type description wrapped in . + ExpectedJSON string + // A serialization of the respective part of the Diffable. + ActualJSON string +} + +// Pointer is a JSON pointer (RFC 6901) that references a particular JSON value relative to the root of the encoded JSON payload that was given to DiffAgainst(). +// It appears in type Diff. +// +// This type is intended to become synonymous with encoding/json/jsontext.Pointer once that type is stabilized. +type Pointer string + +// CaptureField returns a capture slot that can be placed in a jsonmatch.Object or jsonmatch.Array instance to capture individual non-deterministic values during an assertion. +// Please refer to the package documentation for details and usage examples. +// +// Capture slots only work inside data structures that DiffAgainst() knows how to recurse into. +// Please refer to the documentation on type Diffable for details. +func CaptureField[T any](target *T) any { + // NOTE: The public interface is using generics because that allows enforcing + // that `target` is passed as pointer. But the internal representation holds + // `target` as `any` because not having type arguments on the capturedField + // type makes it easier to reflect on that type. + return capturedField{target} +} + +type capturedField struct { + PointerToTarget any +} + +// MarshalJSON implements the json.Marshaler interface by transparently marshaling the contained value. +// +// This implementation ensures that `capturedField` looks like its payload +// when serialized for a "type mismatch" or "value mismatch" error message. +func (f capturedField) MarshalJSON() ([]byte, error) { + return json.Marshal(f.PointerToTarget) +} + +// UnmarshalJSON implements the json.Unmarshaler interface by always throwing an error. +// +// This implementation ensures that `capturedField` is not placed into a +// container that DiffAgainst() does not know how to recurse into. +func (f capturedField) UnmarshalJSON(buf []byte) error { + return errors.New("cannot unmarshal into jsonmatch.CaptureField()") +} diff --git a/jsonmatch/machinery.go b/jsonmatch/machinery.go new file mode 100644 index 0000000..172c82a --- /dev/null +++ b/jsonmatch/machinery.go @@ -0,0 +1,294 @@ +// SPDX-FileCopyrightText: 2025 Stefan Majewsky +// SPDX-License-Identifier: Apache-2.0 + +package jsonmatch + +import ( + "encoding/json" + "fmt" + "maps" + "slices" + "strconv" + "strings" + + . "github.com/majewsky/gg/option" +) + +func marshalExpectedForDiff(value any) string { + // `expected` values can technically contain any sort of nonsense, + // so we want to print something useful even if marshaling fails + buf, err := json.Marshal(value) + if err != nil { + return fmt.Sprintf("", value) + } + return string(buf) +} + +func marshalActualForDiff(value any) string { + // `actual` values are always safe to marshal because they were + // unmarshaled from JSON into any and thus can only contain safe + buf, err := json.Marshal(value) + if err != nil { + // this line is therefore unreachable in tests and only exists as defense in depth + return fmt.Sprintf("", err.Error()) + } + return string(buf) +} + +// Given a string that is probably a JSON message, look at the first non-blank +// character to determine what kind of value the JSON message has on its top level. +// The exact character returned does not matter; this is only used to check if two messages are vaguely of the same type. +func kindForJSONMessage(s string) byte { + s = strings.TrimSpace(s) + if s == "" { + // defense in depth: this function should never be called on functionally empty inputs + return '?' + } + b := s[0] + switch b { + case '{', '[', '"', 'n': + return b // object, array, string or null respectively + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': + return '0' // number (NOTE: leading + is not allowed, leading decimal dot is not allowed without a 0 before it) + case 't', 'f': + return 'b' // boolean + default: + return '?' // syntax error + } +} + +// Entrypoint into this file coming from all DiffAgainst() implementations. +func diffAgainst(expected any, buf []byte) []Diff { + var actual any + err := json.Unmarshal(buf, &actual) + if err != nil { + return []Diff{{ + Kind: fmt.Sprintf("unmarshal error (%s)", err.Error()), + Pointer: "", + ExpectedJSON: marshalExpectedForDiff(expected), + ActualJSON: strings.ToValidUTF8(string(buf), "\uFFFD"), + }} + } + + // While recursing through the object, we maintain a `path` that identifies + // where we are in the callstack, e.g. when comparing + // + // actual = { "foo": { "bar": [ 5, 23 ] } } + // expected = { "foo": { "bar": [ 5, 42 ] } } + // + // we would generate a diff at Path = {"foo", "bar", 1}. Since diffs are + // usually rare, we only build Pointer strings out of these paths when we + // really need them. During recursion, `path` is maintained as a sequence of + // path fragments, most of which are constants to keep allocations to a + // minimum. WARNING: Because the `path` slice is heavily reused across nested + // function calls, it is not safe to store references to the `path` slice. + path := make([]pathElement, 0, 32) + return getDiffsForValue(path, expected, actual) +} + +type pathElement struct { + Key Option[string] + Index int +} + +func keyElement(key string) pathElement { return pathElement{Some(key), 0} } +func indexElement(idx int) pathElement { return pathElement{None[string](), idx} } + +func pathIntoPointer(path []pathElement) Pointer { + if len(path) == 0 { + return "" + } + fragments := make([]string, len(path)+1) + fragments[0] = "" + for idx, elem := range path { + if key, ok := elem.Key.Unpack(); ok { + fragments[idx+1] = keyIntoPointerFragment(key) + } else { + fragments[idx+1] = strconv.Itoa(elem.Index) + } + } + return Pointer(strings.Join(fragments, "/")) +} + +func keyIntoPointerFragment(key string) string { + buf, _ := json.Marshal(key) + s := string(buf) + s = strings.TrimPrefix(s, "\"") + s = strings.TrimSuffix(s, "\"") + s = strings.ReplaceAll(s, "~", "~0") + s = strings.ReplaceAll(s, "/", "~1") + return s +} + +// NOTE: getDiffsForValue is the main part of the recursion to generate the diff. +func getDiffsForValue(path []pathElement, expected, actual any) []Diff { + // specialized handling for relevant recursible or capturable types + switch expected := expected.(type) { + case map[string]any: + return getDiffsForObject(path, expected, actual) + case Object: + return getDiffsForObject(path, expected, actual) + case []any: + return getDiffsForArray(path, expected, actual) + case []map[string]any: + downcasted := make([]any, len(expected)) + for idx, val := range expected { + downcasted[idx] = val + } + return getDiffsForArray(path, downcasted, actual) + case []Object: + downcasted := make([]any, len(expected)) + for idx, val := range expected { + downcasted[idx] = val + } + return getDiffsForArray(path, downcasted, actual) + case capturedField: + return getDiffsForCapturedField(path, expected, actual) + case nil: + // this case needs to be handled separately because the code below + // cannot deal with reflect.TypeOf(expected) returning nil + if actual == nil { + return nil + } else { + return []Diff{{ + Kind: "type mismatch", + Pointer: pathIntoPointer(path), + ExpectedJSON: "null", + ActualJSON: marshalActualForDiff(actual), + }} + } + } + + // generic handling for values or structures that we do not recurse into further: + // check that `expected` encodes to JSON in an equivalent way to `actual` + actualJSON := marshalActualForDiff(actual) + expectedJSON := marshalExpectedForDiff(expected) + if expectedJSON == actualJSON { + return nil + } + + // if `expected` is using a custom type, we might have to do some heavy lifting: + // `actual` has all its objects as map[string]any, so keys serialize in sorted order, + // but `expected` might have struct type instead, where keys serialize in declaration order; + // this can be normalized by roundtripping `expectedJSON` through map[string]any once + // (if any of these steps fail, this is intentionally not an error because it's only a last resort) + var roundtrip any + err := json.Unmarshal([]byte(expectedJSON), &roundtrip) + if err == nil { + buf, err := json.Marshal(roundtrip) + if err == nil { + expectedJSON = string(buf) + if expectedJSON == actualJSON { + return nil + } + } + } + + kind := "value mismatch" + if kindForJSONMessage(actualJSON) != kindForJSONMessage(expectedJSON) { + kind = "type mismatch" + } + return []Diff{{ + Kind: kind, + Pointer: pathIntoPointer(path), + ExpectedJSON: expectedJSON, + ActualJSON: actualJSON, + }} +} + +func getDiffsForObject(path []pathElement, expected map[string]any, actual any) []Diff { + if actual, ok := actual.(map[string]any); ok { + return getDiffsForConfirmedObject(path, expected, actual) + } + return []Diff{{ + Kind: "type mismatch", + Pointer: pathIntoPointer(path), + ExpectedJSON: marshalExpectedForDiff(expected), + ActualJSON: marshalActualForDiff(actual), + }} +} + +func getDiffsForConfirmedObject(path []pathElement, expected, actual map[string]any) (diffs []Diff) { + // recurse into all fields + for _, key := range slices.Sorted(maps.Keys(actual)) { + subpath := append(path, keyElement(key)) + expectedValue, exists := expected[key] + if exists { + diffs = append(diffs, getDiffsForValue(subpath, expectedValue, actual[key])...) + } else { + diffs = append(diffs, Diff{ + Kind: "value mismatch", + Pointer: pathIntoPointer(subpath), + ExpectedJSON: "", + ActualJSON: marshalActualForDiff(actual[key]), + }) + } + } + for _, key := range slices.Sorted(maps.Keys(expected)) { + _, exists := actual[key] + if !exists { + subpath := append(path, keyElement(key)) + diffs = append(diffs, Diff{ + Kind: "value mismatch", + Pointer: pathIntoPointer(subpath), + ExpectedJSON: marshalExpectedForDiff(expected[key]), + ActualJSON: "", + }) + } + } + + return diffs +} + +func getDiffsForArray(path []pathElement, expected []any, actual any) []Diff { + if actual, ok := actual.([]any); ok { + return getDiffsForConfirmedArray(path, expected, actual) + } + return []Diff{{ + Kind: "type mismatch", + Pointer: pathIntoPointer(path), + ExpectedJSON: marshalExpectedForDiff(expected), + ActualJSON: marshalActualForDiff(actual), + }} +} + +func getDiffsForConfirmedArray(path []pathElement, expected, actual []any) (diffs []Diff) { + // recurse into all elements + for idx := range max(len(actual), len(expected)) { + subpath := append(path, indexElement(idx)) + switch { + case idx >= len(actual): + diffs = append(diffs, Diff{ + Kind: "value mismatch", + Pointer: pathIntoPointer(subpath), + ActualJSON: "", + ExpectedJSON: marshalExpectedForDiff(expected[idx]), + }) + case idx >= len(expected): + diffs = append(diffs, Diff{ + Kind: "value mismatch", + Pointer: pathIntoPointer(subpath), + ActualJSON: marshalActualForDiff(actual[idx]), + ExpectedJSON: "", + }) + default: + diffs = append(diffs, getDiffsForValue(subpath, expected[idx], actual[idx])...) + } + } + + return diffs +} + +func getDiffsForCapturedField(path []pathElement, expected capturedField, actual any) []Diff { + actualJSON := marshalActualForDiff(actual) + err := json.Unmarshal([]byte(actualJSON), expected.PointerToTarget) + if err != nil { + return []Diff{{ + Kind: fmt.Sprintf("cannot unmarshal into capture slot (%s)", err.Error()), + Pointer: pathIntoPointer(path), + ActualJSON: actualJSON, + ExpectedJSON: fmt.Sprintf("", expected.PointerToTarget), + }} + } + return nil +} -- cgit v1.2.3