aboutsummaryrefslogtreecommitdiff
path: root/columnar
diff options
context:
space:
mode:
authorStefan Majewsky <majewsky@gmx.net>2026-06-02 13:21:40 +0200
committerStefan Majewsky <majewsky@gmx.net>2026-06-02 13:21:40 +0200
commite8724868977c2d3a8f9c883789893aabc77cda9f (patch)
tree79c2ccf120f164753199191817f6d288af232712 /columnar
parentcadbdaeac471c21758b37c3117693a95e8010ba8 (diff)
downloadgo-gg-e8724868977c2d3a8f9c883789893aabc77cda9f.tar.gz
add package columnar
Diffstat (limited to 'columnar')
-rw-r--r--columnar/columnar.go174
-rw-r--r--columnar/columnar_test.go118
2 files changed, 292 insertions, 0 deletions
diff --git a/columnar/columnar.go b/columnar/columnar.go
new file mode 100644
index 0000000..7236901
--- /dev/null
+++ b/columnar/columnar.go
@@ -0,0 +1,174 @@
+// SPDX-FileCopyrightText: 2026 Stefan Majewsky <majewsky@gmx.net>
+// SPDX-License-Identifier: Apache-2.0
+
+// Package columnarjson provides efficient encoding of lists of objects in a columnar JSON format.
+//
+// The standard way of encoding a list of objects in JSON looks like this:
+//
+// [
+// { "id": 1, "first_name": "Alice", "last_name": "Allison", "married": false },
+// { "id": 2, "first_name": "Bob", "last_name": "Burger", "married": true },
+// { "id": 3, "first_name": "Carol", "last_name": "Callagher", "married": true }
+// ]
+//
+// Encoding the same list in a columnar fashion results in this:
+//
+// {
+// "id": [1, 2, 3],
+// "first_name": ["Alice", "Bob", "Carol"],
+// "last_name": ["Allison", "Burger", "Callagher"],
+// "married": [false, true, true]
+// }
+//
+// In this example, changing the encoding from row-wise to columnar reduced the
+// (minified) size of the JSON encoding from 202 to 124 bytes.
+//
+// This package eliminates the boilerplate code that would be associated with
+// converting a list of objects into the respective columnar form before
+// marshaling, and vice versa after unmarshaling.
+
+package columnar
+
+import (
+ "encoding/json"
+ "fmt"
+ "maps"
+ "reflect"
+ "slices"
+)
+
+// NOTE: naming convention for variables
+//
+// single-letter = original type (t = reflect.Type, v = reflect.Value, f = reflect.StructField)
+// with "c" prefix = columnar type (ct = reflect.Type, cv = reflect.Value, cf = reflect.StructField)
+
+// prove interface implementations
+var _ interface {
+ json.Marshaler
+ json.Unmarshaler
+} = &List[bool]{}
+
+// cache for auto-generated columnar struct types
+var columnarListTypes = map[reflect.Type]reflect.Type{}
+
+// List provides columnar marshaling for lists of objects.
+// T must be a struct type or a pointer to one, otherwise all methods on this type will panic.
+//
+// Please refer to the package docstring for how this type is marshaled.
+type List[T any] []T
+
+func foreachRelevantField(t reflect.Type, action func(f reflect.StructField)) {
+ for idx := range t.NumField() {
+ f := t.Field(idx)
+ if f.PkgPath == "" {
+ action(f)
+ }
+ }
+}
+
+func getColumnarType(t reflect.Type) reflect.Type {
+ if t.Kind() != reflect.Struct {
+ zero := reflect.New(t).Elem().Interface()
+ panic(fmt.Sprintf("type %T is not a struct or pointer to a struct", zero))
+ }
+
+ result, ok := columnarListTypes[t]
+ if ok {
+ return result
+ }
+
+ var fields []reflect.StructField
+ foreachRelevantField(t, func(f reflect.StructField) {
+ fields = append(fields, reflect.StructField{
+ Name: f.Name,
+ Type: reflect.SliceOf(f.Type),
+ Tag: f.Tag,
+ })
+ })
+
+ result = reflect.StructOf(fields)
+ columnarListTypes[t] = result
+ return result
+}
+
+// MarshalJSON implements the [json.Marshaler] interface.
+func (l List[T]) MarshalJSON() ([]byte, error) {
+ t := reflect.TypeFor[T]()
+ for t.Kind() == reflect.Pointer {
+ t = t.Elem()
+ }
+ ct := getColumnarType(t)
+ cv := reflect.New(ct).Elem()
+
+ columns := make(map[string]reflect.Value, t.NumField())
+ foreachRelevantField(t, func(f reflect.StructField) {
+ column := reflect.MakeSlice(reflect.SliceOf(f.Type), len(l), len(l))
+ cv.FieldByName(f.Name).Set(column)
+ columns[f.Name] = column
+ })
+ if len(columns) == 0 {
+ zero := reflect.New(t).Elem().Interface()
+ return nil, fmt.Errorf("%[1]T has no exported fields", zero)
+ }
+
+ for idx, elem := range l {
+ v := reflect.ValueOf(elem)
+ for v.Kind() == reflect.Pointer {
+ v = v.Elem()
+ }
+ foreachRelevantField(t, func(f reflect.StructField) {
+ columns[f.Name].Index(idx).Set(v.FieldByIndex(f.Index))
+ })
+ }
+
+ return json.Marshal(cv.Interface())
+}
+
+// UnmarshalJSON implements the [json.Unmarshaler] interface.
+func (l *List[T]) UnmarshalJSON(buf []byte) error {
+ t := reflect.TypeFor[T]()
+ for t.Kind() == reflect.Pointer {
+ t = t.Elem()
+ }
+ ct := getColumnarType(t)
+ cv := reflect.New(ct)
+
+ err := json.Unmarshal(buf, cv.Interface())
+ if err != nil {
+ return err
+ }
+ cv = cv.Elem()
+
+ columns := make(map[string]reflect.Value, t.NumField())
+ lengths := make(map[int]int)
+ foreachRelevantField(t, func(f reflect.StructField) {
+ column := cv.FieldByName(f.Name)
+ columns[f.Name] = column
+ lengths[column.Len()]++
+ })
+
+ switch len(lengths) {
+ case 0:
+ zero := reflect.New(t).Elem().Interface()
+ return fmt.Errorf("%[1]T has no exported fields", zero)
+ case 1:
+ for length := range lengths {
+ *l = make(List[T], length)
+ break
+ }
+ default:
+ return fmt.Errorf("cannot unmarshal from columns with inconsistent lengths %v", slices.Sorted(maps.Keys(lengths)))
+ }
+
+ for idx := range *l {
+ v := reflect.ValueOf(&(*l)[idx]).Elem()
+ for v.Kind() == reflect.Pointer {
+ v.Set(reflect.New(v.Type().Elem()))
+ v = v.Elem()
+ }
+ foreachRelevantField(t, func(f reflect.StructField) {
+ v.FieldByIndex(f.Index).Set(columns[f.Name].Index(idx))
+ })
+ }
+ return nil
+}
diff --git a/columnar/columnar_test.go b/columnar/columnar_test.go
new file mode 100644
index 0000000..17dc964
--- /dev/null
+++ b/columnar/columnar_test.go
@@ -0,0 +1,118 @@
+// SPDX-FileCopyrightText: 2026 Stefan Majewsky <majewsky@gmx.net>
+// SPDX-License-Identifier: Apache-2.0
+
+package columnar_test
+
+import (
+ "encoding/json"
+ "sync"
+ "testing"
+
+ "go.xyrillian.de/gg/columnar"
+ . "go.xyrillian.de/gg/internal/test"
+ "go.xyrillian.de/gg/jsonmatch"
+)
+
+func testSuccessfulJSONRoundtrip[V any](t *testing.T, list []V, encoded jsonmatch.Diffable) {
+ buf, err := json.Marshal(columnar.List[V](list))
+ if err != nil {
+ t.Fatal(err)
+ }
+ for _, diff := range encoded.DiffAgainst(buf) {
+ t.Error(diff.String())
+ }
+
+ var unmarshaled columnar.List[V]
+ err = json.Unmarshal(buf, &unmarshaled)
+ if err != nil {
+ t.Fatal(err)
+ }
+ AssertEqual(t, []V(unmarshaled), list)
+}
+
+func TestJSONRoundtripBasic(t *testing.T) {
+ // try the example from the package docstring
+ type Person struct {
+ ID int `json:"id"`
+ FirstName string `json:"first_name"`
+ LastName string `json:"last_name"`
+ Married bool `json:"married"`
+ }
+
+ testSuccessfulJSONRoundtrip(t,
+ []Person{
+ {ID: 1, FirstName: "Alice", LastName: "Allison", Married: false},
+ {ID: 2, FirstName: "Bob", LastName: "Burger", Married: true},
+ {ID: 3, FirstName: "Carol", LastName: "Callagher", Married: true},
+ },
+ jsonmatch.Object{
+ "id": jsonmatch.Array{1, 2, 3},
+ "first_name": jsonmatch.Array{"Alice", "Bob", "Carol"},
+ "last_name": jsonmatch.Array{"Allison", "Burger", "Callagher"},
+ "married": jsonmatch.Array{false, true, true},
+ },
+ )
+}
+
+func TestJSONRoundtripWithSpecialFields(t *testing.T) {
+ type Point struct {
+ X float64 `json:"x"`
+ Y float64 `json:"y"`
+ }
+ type Event struct {
+ *Point `json:"location"` // embedded field, also with pointer
+ Name string // without tag -> uses field name as key
+ nameMutex sync.RWMutex //nolint:unused // not exported -> ignored
+ }
+
+ testSuccessfulJSONRoundtrip(t,
+ []Event{
+ {Point: &Point{X: 0, Y: 0}, Name: "origin"},
+ {Point: &Point{X: 2, Y: 4}, Name: "somewhere"},
+ },
+ jsonmatch.Object{
+ "location": jsonmatch.Array{jsonmatch.Object{"x": 0, "y": 0}, jsonmatch.Object{"x": 2, "y": 4}},
+ "Name": jsonmatch.Array{"origin", "somewhere"},
+ },
+ )
+}
+
+func TestJSONRoundtripResolvesPointers(t *testing.T) {
+ type Record struct {
+ Foo int
+ Bar int
+ }
+ testSuccessfulJSONRoundtrip(t,
+ []***Record{
+ PointerTo(PointerTo(PointerTo(Record{1, 2}))),
+ PointerTo(PointerTo(PointerTo(Record{2, 4}))),
+ PointerTo(PointerTo(PointerTo(Record{3, 6}))),
+ },
+ jsonmatch.Object{
+ "Foo": jsonmatch.Array{1, 2, 3},
+ "Bar": jsonmatch.Array{2, 4, 6},
+ },
+ )
+}
+
+func TestJSONRoundtripErrors(t *testing.T) {
+ type nothingPublic struct {
+ foo int
+ bar int
+ }
+
+ _, err := json.Marshal(columnar.List[nothingPublic]{{1, 2}})
+ AssertEqual(t, err.Error(), `json: error calling MarshalJSON for type columnar.List[go.xyrillian.de/gg/columnar_test.nothingPublic·5]: columnar_test.nothingPublic has no exported fields`)
+ err = json.Unmarshal([]byte(`{}`), PointerTo(columnar.List[nothingPublic]{}))
+ AssertEqual(t, err.Error(), `columnar_test.nothingPublic has no exported fields`)
+}
+
+func TestJSONUnmarshalFromInconsistentLengths(t *testing.T) {
+ type Record struct {
+ Foo int
+ Bar int
+ }
+
+ err := json.Unmarshal([]byte(`{"Foo":[1,2],"Bar":[3,4,5]}`), PointerTo(columnar.List[Record]{}))
+ AssertEqual(t, err.Error(), `cannot unmarshal from columns with inconsistent lengths [2 3]`)
+}