diff options
Diffstat (limited to 'columnar')
| -rw-r--r-- | columnar/columnar.go | 174 | ||||
| -rw-r--r-- | columnar/columnar_test.go | 118 |
2 files changed, 292 insertions, 0 deletions
diff --git a/columnar/columnar.go b/columnar/columnar.go new file mode 100644 index 0000000..7236901 --- /dev/null +++ b/columnar/columnar.go @@ -0,0 +1,174 @@ +// SPDX-FileCopyrightText: 2026 Stefan Majewsky <majewsky@gmx.net> +// SPDX-License-Identifier: Apache-2.0 + +// Package columnarjson provides efficient encoding of lists of objects in a columnar JSON format. +// +// The standard way of encoding a list of objects in JSON looks like this: +// +// [ +// { "id": 1, "first_name": "Alice", "last_name": "Allison", "married": false }, +// { "id": 2, "first_name": "Bob", "last_name": "Burger", "married": true }, +// { "id": 3, "first_name": "Carol", "last_name": "Callagher", "married": true } +// ] +// +// Encoding the same list in a columnar fashion results in this: +// +// { +// "id": [1, 2, 3], +// "first_name": ["Alice", "Bob", "Carol"], +// "last_name": ["Allison", "Burger", "Callagher"], +// "married": [false, true, true] +// } +// +// In this example, changing the encoding from row-wise to columnar reduced the +// (minified) size of the JSON encoding from 202 to 124 bytes. +// +// This package eliminates the boilerplate code that would be associated with +// converting a list of objects into the respective columnar form before +// marshaling, and vice versa after unmarshaling. + +package columnar + +import ( + "encoding/json" + "fmt" + "maps" + "reflect" + "slices" +) + +// NOTE: naming convention for variables +// +// single-letter = original type (t = reflect.Type, v = reflect.Value, f = reflect.StructField) +// with "c" prefix = columnar type (ct = reflect.Type, cv = reflect.Value, cf = reflect.StructField) + +// prove interface implementations +var _ interface { + json.Marshaler + json.Unmarshaler +} = &List[bool]{} + +// cache for auto-generated columnar struct types +var columnarListTypes = map[reflect.Type]reflect.Type{} + +// List provides columnar marshaling for lists of objects. +// T must be a struct type or a pointer to one, otherwise all methods on this type will panic. +// +// Please refer to the package docstring for how this type is marshaled. +type List[T any] []T + +func foreachRelevantField(t reflect.Type, action func(f reflect.StructField)) { + for idx := range t.NumField() { + f := t.Field(idx) + if f.PkgPath == "" { + action(f) + } + } +} + +func getColumnarType(t reflect.Type) reflect.Type { + if t.Kind() != reflect.Struct { + zero := reflect.New(t).Elem().Interface() + panic(fmt.Sprintf("type %T is not a struct or pointer to a struct", zero)) + } + + result, ok := columnarListTypes[t] + if ok { + return result + } + + var fields []reflect.StructField + foreachRelevantField(t, func(f reflect.StructField) { + fields = append(fields, reflect.StructField{ + Name: f.Name, + Type: reflect.SliceOf(f.Type), + Tag: f.Tag, + }) + }) + + result = reflect.StructOf(fields) + columnarListTypes[t] = result + return result +} + +// MarshalJSON implements the [json.Marshaler] interface. +func (l List[T]) MarshalJSON() ([]byte, error) { + t := reflect.TypeFor[T]() + for t.Kind() == reflect.Pointer { + t = t.Elem() + } + ct := getColumnarType(t) + cv := reflect.New(ct).Elem() + + columns := make(map[string]reflect.Value, t.NumField()) + foreachRelevantField(t, func(f reflect.StructField) { + column := reflect.MakeSlice(reflect.SliceOf(f.Type), len(l), len(l)) + cv.FieldByName(f.Name).Set(column) + columns[f.Name] = column + }) + if len(columns) == 0 { + zero := reflect.New(t).Elem().Interface() + return nil, fmt.Errorf("%[1]T has no exported fields", zero) + } + + for idx, elem := range l { + v := reflect.ValueOf(elem) + for v.Kind() == reflect.Pointer { + v = v.Elem() + } + foreachRelevantField(t, func(f reflect.StructField) { + columns[f.Name].Index(idx).Set(v.FieldByIndex(f.Index)) + }) + } + + return json.Marshal(cv.Interface()) +} + +// UnmarshalJSON implements the [json.Unmarshaler] interface. +func (l *List[T]) UnmarshalJSON(buf []byte) error { + t := reflect.TypeFor[T]() + for t.Kind() == reflect.Pointer { + t = t.Elem() + } + ct := getColumnarType(t) + cv := reflect.New(ct) + + err := json.Unmarshal(buf, cv.Interface()) + if err != nil { + return err + } + cv = cv.Elem() + + columns := make(map[string]reflect.Value, t.NumField()) + lengths := make(map[int]int) + foreachRelevantField(t, func(f reflect.StructField) { + column := cv.FieldByName(f.Name) + columns[f.Name] = column + lengths[column.Len()]++ + }) + + switch len(lengths) { + case 0: + zero := reflect.New(t).Elem().Interface() + return fmt.Errorf("%[1]T has no exported fields", zero) + case 1: + for length := range lengths { + *l = make(List[T], length) + break + } + default: + return fmt.Errorf("cannot unmarshal from columns with inconsistent lengths %v", slices.Sorted(maps.Keys(lengths))) + } + + for idx := range *l { + v := reflect.ValueOf(&(*l)[idx]).Elem() + for v.Kind() == reflect.Pointer { + v.Set(reflect.New(v.Type().Elem())) + v = v.Elem() + } + foreachRelevantField(t, func(f reflect.StructField) { + v.FieldByIndex(f.Index).Set(columns[f.Name].Index(idx)) + }) + } + return nil +} diff --git a/columnar/columnar_test.go b/columnar/columnar_test.go new file mode 100644 index 0000000..17dc964 --- /dev/null +++ b/columnar/columnar_test.go @@ -0,0 +1,118 @@ +// SPDX-FileCopyrightText: 2026 Stefan Majewsky <majewsky@gmx.net> +// SPDX-License-Identifier: Apache-2.0 + +package columnar_test + +import ( + "encoding/json" + "sync" + "testing" + + "go.xyrillian.de/gg/columnar" + . "go.xyrillian.de/gg/internal/test" + "go.xyrillian.de/gg/jsonmatch" +) + +func testSuccessfulJSONRoundtrip[V any](t *testing.T, list []V, encoded jsonmatch.Diffable) { + buf, err := json.Marshal(columnar.List[V](list)) + if err != nil { + t.Fatal(err) + } + for _, diff := range encoded.DiffAgainst(buf) { + t.Error(diff.String()) + } + + var unmarshaled columnar.List[V] + err = json.Unmarshal(buf, &unmarshaled) + if err != nil { + t.Fatal(err) + } + AssertEqual(t, []V(unmarshaled), list) +} + +func TestJSONRoundtripBasic(t *testing.T) { + // try the example from the package docstring + type Person struct { + ID int `json:"id"` + FirstName string `json:"first_name"` + LastName string `json:"last_name"` + Married bool `json:"married"` + } + + testSuccessfulJSONRoundtrip(t, + []Person{ + {ID: 1, FirstName: "Alice", LastName: "Allison", Married: false}, + {ID: 2, FirstName: "Bob", LastName: "Burger", Married: true}, + {ID: 3, FirstName: "Carol", LastName: "Callagher", Married: true}, + }, + jsonmatch.Object{ + "id": jsonmatch.Array{1, 2, 3}, + "first_name": jsonmatch.Array{"Alice", "Bob", "Carol"}, + "last_name": jsonmatch.Array{"Allison", "Burger", "Callagher"}, + "married": jsonmatch.Array{false, true, true}, + }, + ) +} + +func TestJSONRoundtripWithSpecialFields(t *testing.T) { + type Point struct { + X float64 `json:"x"` + Y float64 `json:"y"` + } + type Event struct { + *Point `json:"location"` // embedded field, also with pointer + Name string // without tag -> uses field name as key + nameMutex sync.RWMutex //nolint:unused // not exported -> ignored + } + + testSuccessfulJSONRoundtrip(t, + []Event{ + {Point: &Point{X: 0, Y: 0}, Name: "origin"}, + {Point: &Point{X: 2, Y: 4}, Name: "somewhere"}, + }, + jsonmatch.Object{ + "location": jsonmatch.Array{jsonmatch.Object{"x": 0, "y": 0}, jsonmatch.Object{"x": 2, "y": 4}}, + "Name": jsonmatch.Array{"origin", "somewhere"}, + }, + ) +} + +func TestJSONRoundtripResolvesPointers(t *testing.T) { + type Record struct { + Foo int + Bar int + } + testSuccessfulJSONRoundtrip(t, + []***Record{ + PointerTo(PointerTo(PointerTo(Record{1, 2}))), + PointerTo(PointerTo(PointerTo(Record{2, 4}))), + PointerTo(PointerTo(PointerTo(Record{3, 6}))), + }, + jsonmatch.Object{ + "Foo": jsonmatch.Array{1, 2, 3}, + "Bar": jsonmatch.Array{2, 4, 6}, + }, + ) +} + +func TestJSONRoundtripErrors(t *testing.T) { + type nothingPublic struct { + foo int + bar int + } + + _, err := json.Marshal(columnar.List[nothingPublic]{{1, 2}}) + AssertEqual(t, err.Error(), `json: error calling MarshalJSON for type columnar.List[go.xyrillian.de/gg/columnar_test.nothingPublic·5]: columnar_test.nothingPublic has no exported fields`) + err = json.Unmarshal([]byte(`{}`), PointerTo(columnar.List[nothingPublic]{})) + AssertEqual(t, err.Error(), `columnar_test.nothingPublic has no exported fields`) +} + +func TestJSONUnmarshalFromInconsistentLengths(t *testing.T) { + type Record struct { + Foo int + Bar int + } + + err := json.Unmarshal([]byte(`{"Foo":[1,2],"Bar":[3,4,5]}`), PointerTo(columnar.List[Record]{})) + AssertEqual(t, err.Error(), `cannot unmarshal from columns with inconsistent lengths [2 3]`) +} |
