diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..f0daedc --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,178 @@ +# Changes + +## 2.0.0-beta.4 + +- `Open` and `FromBytes` now accept options. +- `IncludeNetworksWithoutData` and `IncludeAliasedNetworks` now return a + `NetworksOption` rather than being one themselves. This was done to improve + the documentation organization. + +## 2.0.0-beta.3 - 2025-02-16 + +- `Open` will now fall back to loading the database in memory if the + file-system does not support `mmap`. Pull request by database64128. GitHub + #163. +- Made significant improvements to the Windows memory-map handling. . GitHub + #162. +- Fix an integer overflow on large databases when using a 32-bit architecture. + See ipinfo/mmdbctl#33. + +## 2.0.0-beta.2 - 2024-11-14 + +- Allow negative indexes for arrays when using `DecodePath`. #152 +- Add `IncludeNetworksWithoutData` option for `Networks` and `NetworksWithin`. + #155 and #156 + +## 2.0.0-beta.1 - 2024-08-18 + +This is the first beta of the v2 releases. Go 1.23 is required. I don't expect +to do a final release until Go 1.24 is available. See #141 for the v2 roadmap. + +Notable changes: + +- `(*Reader).Lookup` now takes only the IP address and returns a `Result`. + `Lookup(ip, &rec)` would now become `Lookup(ip).Decode(&rec)`. +- `(*Reader).LookupNetwork` has been removed. To get the network for a result, + use `(Result).Prefix()`. +- `(*Reader).LookupOffset` now _takes_ an offset and returns a `Result`. + `Result` has an `Offset()` method that returns the offset value. + `(*Reader).Decode` has been removed. +- Use of `net.IP` and `*net.IPNet` have been replaced with `netip.Addr` and + `netip.Prefix`. +- You may now decode a particular path within a database record using + `(Result).DecodePath`. For instance, to decode just the country code in + GeoLite2 Country to a string called `code`, you might do something like + `Lookup(ip).DecodePath(&code, "country", "iso_code")`. Strings should be used + for map keys and ints for array indexes. +- `(*Reader).Networks` and `(*Reader).NetworksWithin` now return a Go 1.23 + iterator of `Result` values. Aliased networks are now skipped by default. If + you wish to include them, use the `IncludeAliasedNetworks` option. + +## 1.13.1 - 2024-06-28 + +- Return the `*net.IPNet` in canonical form when using `NetworksWithin` to look + up a network more specific than the one in the database. Previously, the `IP` + field on the `*net.IPNet` would be set to the IP from the lookup network + rather than the first IP of the network. +- `NetworksWithin` will now correctly handle an `*net.IPNet` parameter that is + not in canonical form. This issue would only occur if the `*net.IPNet` was + manually constructed, as `net.ParseCIDR` returns the value in canonical form + even if the input string is not. + +## 1.13.0 - 2024-06-03 + +- Go 1.21 or greater is now required. +- The error messages when decoding have been improved. #119 + +## 1.12.0 - 2023-08-01 + +- The `wasi` target is now built without memory-mapping support. Pull request + by Alex Kashintsev. GitHub #114. +- When decoding to a map of non-scalar, non-interface types such as a + `map[string]map[string]any`, the decoder failed to zero out the value for the + map elements, which could result in incorrect decoding. Reported by JT Olio. + GitHub #115. + +## 1.11.0 - 2023-06-18 + +- `wasm` and `wasip1` targets are now built without memory-mapping support. + Pull request by Randy Reddig. GitHub #110. + +**Full Changelog**: +https://github.com/oschwald/maxminddb-golang/compare/v1.10.0...v1.11.0 + +## 1.10.0 - 2022-08-07 + +- Set Go version in go.mod file to 1.18. + +## 1.9.0 - 2022-03-26 + +- Set the minimum Go version in the go.mod file to 1.17. +- Updated dependencies. +- Minor performance improvements to the custom deserializer feature added in + 1.8.0. + +## 1.8.0 - 2020-11-23 + +- Added `maxminddb.SkipAliasedNetworks` option to `Networks` and + `NetworksWithin` methods. When set, this option will cause the iterator to + skip networks that are aliases of the IPv4 tree. +- Added experimental custom deserializer support. This allows much more control + over the deserialization. The API is subject to change and you should use at + your own risk. + +## 1.7.0 - 2020-06-13 + +- Add `NetworksWithin` method. This returns an iterator that traverses all + networks in the database that are contained in the given network. Pull + request by Olaf Alders. GitHub #65. + +## 1.6.0 - 2019-12-25 + +- This module now uses Go modules. Requested by Matthew Rothenberg. GitHub #49. +- Plan 9 is now supported. Pull request by Jacob Moody. GitHub #61. +- Documentation fixes. Pull request by Olaf Alders. GitHub #62. +- Thread-safety is now mentioned in the documentation. Requested by Ken + Sedgwick. GitHub #39. +- Fix off-by-one error in file offset safety check. Reported by Will Storey. + GitHub #63. + +## 1.5.0 - 2019-09-11 + +- Drop support for Go 1.7 and 1.8. +- Minor performance improvements. + +## 1.4.0 - 2019-08-28 + +- Add the method `LookupNetwork`. This returns the network that the record + belongs to as well as a boolean indicating whether there was a record for the + IP address in the database. GitHub #59. +- Improve performance. + +## 1.3.1 - 2019-08-28 + +- Fix issue with the finalizer running too early on Go 1.12 when using the + Verify method. Reported by Robert-André Mauchin. GitHub #55. +- Remove unnecessary call to reflect.ValueOf. PR by SenseyeDeveloper. GitHub + #53. + +## 1.3.0 - 2018-02-25 + +- The methods on the `maxminddb.Reader` struct now return an error if called on + a closed database reader. Previously, this could cause a segmentation + violation when using a memory-mapped file. +- The `Close` method on the `maxminddb.Reader` struct now sets the underlying + buffer to nil, even when using `FromBytes` or `Open` on Google App Engine. +- No longer uses constants from `syscall` + +## 1.2.1 - 2018-01-03 + +- Fix incorrect index being used when decoding into anonymous struct fields. PR + #42 by Andy Bursavich. + +## 1.2.0 - 2017-05-05 + +- The database decoder now does bound checking when decoding data from the + database. This is to help ensure that the reader does not panic when given a + corrupt database to decode. Closes #37. +- The reader will now return an error on a data structure with a depth greater + than 512. This is done to prevent the possibility of a stack overflow on a + cyclic data structure in a corrupt database. This matches the maximum depth + allowed by `libmaxminddb`. All MaxMind databases currently have a depth of + less than five. + +## 1.1.0 - 2016-12-31 + +- Added appengine build tag for Windows. When enabled, memory-mapping will be + disabled in the Windows build as it is for the non-Windows build. Pull + request #35 by Ingo Oeser. +- SetFinalizer is now used to unmap files if the user fails to close the + reader. Using `r.Close()` is still recommended for most use cases. +- Previously, an unsafe conversion between `[]byte` and string was used to + avoid unnecessary allocations when decoding struct keys. The decoder now + relies on a compiler optimization on `string([]byte)` map lookups to achieve + this rather than using `unsafe`. + +## 1.0.0 - 2016-11-09 + +New release for those using tagged releases. diff --git a/errors.go b/errors.go index f141f61..bffa4ca 100644 --- a/errors.go +++ b/errors.go @@ -1,46 +1,13 @@ package maxminddb -import ( - "fmt" - "reflect" -) - -// InvalidDatabaseError is returned when the database contains invalid data -// and cannot be parsed. -type InvalidDatabaseError struct { - message string -} - -func newOffsetError() InvalidDatabaseError { - return InvalidDatabaseError{"unexpected end of database"} -} - -func newInvalidDatabaseError(format string, args ...any) InvalidDatabaseError { - return InvalidDatabaseError{fmt.Sprintf(format, args...)} -} +import "github.com/oschwald/maxminddb-golang/v2/internal/mmdberrors" -func (e InvalidDatabaseError) Error() string { - return e.message -} +type ( + // InvalidDatabaseError is returned when the database contains invalid data + // and cannot be parsed. + InvalidDatabaseError = mmdberrors.InvalidDatabaseError -// UnmarshalTypeError is returned when the value in the database cannot be -// assigned to the specified data type. -type UnmarshalTypeError struct { - Type reflect.Type - Value string -} - -func newUnmarshalTypeStrError(value string, rType reflect.Type) UnmarshalTypeError { - return UnmarshalTypeError{ - Type: rType, - Value: value, - } -} - -func newUnmarshalTypeError(value any, rType reflect.Type) UnmarshalTypeError { - return newUnmarshalTypeStrError(fmt.Sprintf("%v (%T)", value, value), rType) -} - -func (e UnmarshalTypeError) Error() string { - return fmt.Sprintf("maxminddb: cannot unmarshal %s into type %s", e.Value, e.Type) -} + // UnmarshalTypeError is returned when the value in the database cannot be + // assigned to the specified data type. + UnmarshalTypeError = mmdberrors.UnmarshalTypeError +) diff --git a/decoder.go b/internal/decoder/decoder.go similarity index 80% rename from decoder.go rename to internal/decoder/decoder.go index 273f170..9383d9e 100644 --- a/decoder.go +++ b/internal/decoder/decoder.go @@ -1,16 +1,25 @@ -package maxminddb +package decoder import ( "encoding/binary" + "errors" "fmt" "math" "math/big" "reflect" "sync" + + "github.com/oschwald/maxminddb-golang/v2/internal/mmdberrors" ) -type decoder struct { +type Cacher interface { + Load(key uint) (any, bool) + Store(key uint, value any) +} + +type Decoder struct { buffer []byte + cache Cacher } type dataType int @@ -41,9 +50,28 @@ const ( maximumDataStructureDepth = 512 ) -func (d *decoder) decode(offset uint, result reflect.Value, depth int) (uint, error) { +func New(buffer []byte, cache Cacher) Decoder { + return Decoder{buffer: buffer, cache: cache} +} + +func (d *Decoder) Decode(offset uint, v any) error { + rv := reflect.ValueOf(v) + if rv.Kind() != reflect.Ptr || rv.IsNil() { + return errors.New("result param must be a pointer") + } + + if dser, ok := v.(deserializer); ok { + _, err := d.decodeToDeserializer(offset, dser, 0, false) + return err + } + + _, err := d.decode(offset, rv, 0) + return err +} + +func (d *Decoder) decode(offset uint, result reflect.Value, depth int) (uint, error) { if depth > maximumDataStructureDepth { - return 0, newInvalidDatabaseError( + return 0, mmdberrors.NewInvalidDatabaseError( "exceeded maximum data structure depth; database is likely corrupt", ) } @@ -59,14 +87,14 @@ func (d *decoder) decode(offset uint, result reflect.Value, depth int) (uint, er return d.decodeFromType(typeNum, size, newOffset, result, depth+1) } -func (d *decoder) decodeToDeserializer( +func (d *Decoder) decodeToDeserializer( offset uint, dser deserializer, depth int, getNext bool, ) (uint, error) { if depth > maximumDataStructureDepth { - return 0, newInvalidDatabaseError( + return 0, mmdberrors.NewInvalidDatabaseError( "exceeded maximum data structure depth; database is likely corrupt", ) } @@ -89,11 +117,16 @@ func (d *decoder) decodeToDeserializer( return d.decodeFromTypeToDeserializer(typeNum, size, newOffset, dser, depth+1) } -func (d *decoder) decodePath( +func (d *Decoder) DecodePath( offset uint, path []any, - result reflect.Value, + v any, ) error { + result := reflect.ValueOf(v) + if result.Kind() != reflect.Ptr || result.IsNil() { + return errors.New("result param must be a pointer") + } + PATH: for i, v := range path { var ( @@ -173,17 +206,17 @@ PATH: return err } -func (d *decoder) decodeCtrlData(offset uint) (dataType, uint, uint, error) { +func (d *Decoder) decodeCtrlData(offset uint) (dataType, uint, uint, error) { newOffset := offset + 1 if offset >= uint(len(d.buffer)) { - return 0, 0, 0, newOffsetError() + return 0, 0, 0, mmdberrors.NewOffsetError() } ctrlByte := d.buffer[offset] typeNum := dataType(ctrlByte >> 5) if typeNum == _Extended { if newOffset >= uint(len(d.buffer)) { - return 0, 0, 0, newOffsetError() + return 0, 0, 0, mmdberrors.NewOffsetError() } typeNum = dataType(d.buffer[newOffset] + 7) newOffset++ @@ -194,7 +227,7 @@ func (d *decoder) decodeCtrlData(offset uint) (dataType, uint, uint, error) { return typeNum, size, newOffset, err } -func (d *decoder) sizeFromCtrlByte( +func (d *Decoder) sizeFromCtrlByte( ctrlByte byte, offset uint, typeNum dataType, @@ -212,7 +245,7 @@ func (d *decoder) sizeFromCtrlByte( bytesToRead = size - 28 newOffset := offset + bytesToRead if newOffset > uint(len(d.buffer)) { - return 0, 0, newOffsetError() + return 0, 0, mmdberrors.NewOffsetError() } if size == 29 { return 29 + uint(d.buffer[offset]), offset + 1, nil @@ -229,7 +262,7 @@ func (d *decoder) sizeFromCtrlByte( return size, newOffset, nil } -func (d *decoder) decodeFromType( +func (d *Decoder) decodeFromType( dtype dataType, size uint, offset uint, @@ -252,7 +285,7 @@ func (d *decoder) decodeFromType( // For the remaining types, size is the byte size if offset+size > uint(len(d.buffer)) { - return 0, newOffsetError() + return 0, mmdberrors.NewOffsetError() } switch dtype { case _Bytes: @@ -274,11 +307,11 @@ func (d *decoder) decodeFromType( case _Uint128: return d.unmarshalUint128(size, offset, result) default: - return 0, newInvalidDatabaseError("unknown type: %d", dtype) + return 0, mmdberrors.NewInvalidDatabaseError("unknown type: %d", dtype) } } -func (d *decoder) decodeFromTypeToDeserializer( +func (d *Decoder) decodeFromTypeToDeserializer( dtype dataType, size uint, offset uint, @@ -305,7 +338,7 @@ func (d *decoder) decodeFromTypeToDeserializer( // For the remaining types, size is the byte size if offset+size > uint(len(d.buffer)) { - return 0, newOffsetError() + return 0, mmdberrors.NewOffsetError() } switch dtype { case _Bytes: @@ -321,7 +354,10 @@ func (d *decoder) decodeFromTypeToDeserializer( v, offset := d.decodeInt(size, offset) return offset, dser.Int32(int32(v)) case _String: - v, offset := d.decodeString(size, offset) + v, offset, err := d.decodeString(size, offset) + if err != nil { + return 0, err + } return offset, dser.String(v) case _Uint16: v, offset := d.decodeUint(size, offset) @@ -336,13 +372,13 @@ func (d *decoder) decodeFromTypeToDeserializer( v, offset := d.decodeUint128(size, offset) return offset, dser.Uint128(v) default: - return 0, newInvalidDatabaseError("unknown type: %d", dtype) + return 0, mmdberrors.NewInvalidDatabaseError("unknown type: %d", dtype) } } func unmarshalBool(size, offset uint, result reflect.Value) (uint, error) { if size > 1 { - return 0, newInvalidDatabaseError( + return 0, mmdberrors.NewInvalidDatabaseError( "the MaxMind DB file's data section contains bad data (bool size of %v)", size, ) @@ -359,7 +395,7 @@ func unmarshalBool(size, offset uint, result reflect.Value) (uint, error) { return newOffset, nil } } - return newOffset, newUnmarshalTypeError(value, result.Type()) + return newOffset, mmdberrors.NewUnmarshalTypeError(value, result.Type()) } // indirect follows pointers and create values as necessary. This is @@ -393,7 +429,7 @@ func indirect(result reflect.Value) reflect.Value { var sliceType = reflect.TypeOf([]byte{}) -func (d *decoder) unmarshalBytes(size, offset uint, result reflect.Value) (uint, error) { +func (d *Decoder) unmarshalBytes(size, offset uint, result reflect.Value) (uint, error) { value, newOffset := d.decodeBytes(size, offset) switch result.Kind() { @@ -408,12 +444,12 @@ func (d *decoder) unmarshalBytes(size, offset uint, result reflect.Value) (uint, return newOffset, nil } } - return newOffset, newUnmarshalTypeError(value, result.Type()) + return newOffset, mmdberrors.NewUnmarshalTypeError(value, result.Type()) } -func (d *decoder) unmarshalFloat32(size, offset uint, result reflect.Value) (uint, error) { +func (d *Decoder) unmarshalFloat32(size, offset uint, result reflect.Value) (uint, error) { if size != 4 { - return 0, newInvalidDatabaseError( + return 0, mmdberrors.NewInvalidDatabaseError( "the MaxMind DB file's data section contains bad data (float32 size of %v)", size, ) @@ -430,12 +466,12 @@ func (d *decoder) unmarshalFloat32(size, offset uint, result reflect.Value) (uin return newOffset, nil } } - return newOffset, newUnmarshalTypeError(value, result.Type()) + return newOffset, mmdberrors.NewUnmarshalTypeError(value, result.Type()) } -func (d *decoder) unmarshalFloat64(size, offset uint, result reflect.Value) (uint, error) { +func (d *Decoder) unmarshalFloat64(size, offset uint, result reflect.Value) (uint, error) { if size != 8 { - return 0, newInvalidDatabaseError( + return 0, mmdberrors.NewInvalidDatabaseError( "the MaxMind DB file's data section contains bad data (float 64 size of %v)", size, ) @@ -445,7 +481,7 @@ func (d *decoder) unmarshalFloat64(size, offset uint, result reflect.Value) (uin switch result.Kind() { case reflect.Float32, reflect.Float64: if result.OverflowFloat(value) { - return 0, newUnmarshalTypeError(value, result.Type()) + return 0, mmdberrors.NewUnmarshalTypeError(value, result.Type()) } result.SetFloat(value) return newOffset, nil @@ -455,12 +491,12 @@ func (d *decoder) unmarshalFloat64(size, offset uint, result reflect.Value) (uin return newOffset, nil } } - return newOffset, newUnmarshalTypeError(value, result.Type()) + return newOffset, mmdberrors.NewUnmarshalTypeError(value, result.Type()) } -func (d *decoder) unmarshalInt32(size, offset uint, result reflect.Value) (uint, error) { +func (d *Decoder) unmarshalInt32(size, offset uint, result reflect.Value) (uint, error) { if size > 4 { - return 0, newInvalidDatabaseError( + return 0, mmdberrors.NewInvalidDatabaseError( "the MaxMind DB file's data section contains bad data (int32 size of %v)", size, ) @@ -491,10 +527,10 @@ func (d *decoder) unmarshalInt32(size, offset uint, result reflect.Value) (uint, return newOffset, nil } } - return newOffset, newUnmarshalTypeError(value, result.Type()) + return newOffset, mmdberrors.NewUnmarshalTypeError(value, result.Type()) } -func (d *decoder) unmarshalMap( +func (d *Decoder) unmarshalMap( size uint, offset uint, result reflect.Value, @@ -503,7 +539,7 @@ func (d *decoder) unmarshalMap( result = indirect(result) switch result.Kind() { default: - return 0, newUnmarshalTypeStrError("map", result.Type()) + return 0, mmdberrors.NewUnmarshalTypeStrError("map", result.Type()) case reflect.Struct: return d.decodeStruct(size, offset, result, depth) case reflect.Map: @@ -515,11 +551,11 @@ func (d *decoder) unmarshalMap( result.Set(rv) return newOffset, err } - return 0, newUnmarshalTypeStrError("map", result.Type()) + return 0, mmdberrors.NewUnmarshalTypeStrError("map", result.Type()) } } -func (d *decoder) unmarshalPointer( +func (d *Decoder) unmarshalPointer( size, offset uint, result reflect.Value, depth int, @@ -532,7 +568,7 @@ func (d *decoder) unmarshalPointer( return newOffset, err } -func (d *decoder) unmarshalSlice( +func (d *Decoder) unmarshalSlice( size uint, offset uint, result reflect.Value, @@ -550,11 +586,14 @@ func (d *decoder) unmarshalSlice( return newOffset, err } } - return 0, newUnmarshalTypeStrError("array", result.Type()) + return 0, mmdberrors.NewUnmarshalTypeStrError("array", result.Type()) } -func (d *decoder) unmarshalString(size, offset uint, result reflect.Value) (uint, error) { - value, newOffset := d.decodeString(size, offset) +func (d *Decoder) unmarshalString(size, offset uint, result reflect.Value) (uint, error) { + value, newOffset, err := d.decodeString(size, offset) + if err != nil { + return 0, err + } switch result.Kind() { case reflect.String: @@ -566,16 +605,16 @@ func (d *decoder) unmarshalString(size, offset uint, result reflect.Value) (uint return newOffset, nil } } - return newOffset, newUnmarshalTypeError(value, result.Type()) + return newOffset, mmdberrors.NewUnmarshalTypeError(value, result.Type()) } -func (d *decoder) unmarshalUint( +func (d *Decoder) unmarshalUint( size, offset uint, result reflect.Value, uintType uint, ) (uint, error) { if size > uintType/8 { - return 0, newInvalidDatabaseError( + return 0, mmdberrors.NewInvalidDatabaseError( "the MaxMind DB file's data section contains bad data (uint%v size of %v)", uintType, size, @@ -607,14 +646,14 @@ func (d *decoder) unmarshalUint( return newOffset, nil } } - return newOffset, newUnmarshalTypeError(value, result.Type()) + return newOffset, mmdberrors.NewUnmarshalTypeError(value, result.Type()) } var bigIntType = reflect.TypeOf(big.Int{}) -func (d *decoder) unmarshalUint128(size, offset uint, result reflect.Value) (uint, error) { +func (d *Decoder) unmarshalUint128(size, offset uint, result reflect.Value) (uint, error) { if size > 16 { - return 0, newInvalidDatabaseError( + return 0, mmdberrors.NewInvalidDatabaseError( "the MaxMind DB file's data section contains bad data (uint128 size of %v)", size, ) @@ -633,33 +672,33 @@ func (d *decoder) unmarshalUint128(size, offset uint, result reflect.Value) (uin return newOffset, nil } } - return newOffset, newUnmarshalTypeError(value, result.Type()) + return newOffset, mmdberrors.NewUnmarshalTypeError(value, result.Type()) } func decodeBool(size, offset uint) (bool, uint) { return size != 0, offset } -func (d *decoder) decodeBytes(size, offset uint) ([]byte, uint) { +func (d *Decoder) decodeBytes(size, offset uint) ([]byte, uint) { newOffset := offset + size bytes := make([]byte, size) copy(bytes, d.buffer[offset:newOffset]) return bytes, newOffset } -func (d *decoder) decodeFloat64(size, offset uint) (float64, uint) { +func (d *Decoder) decodeFloat64(size, offset uint) (float64, uint) { newOffset := offset + size bits := binary.BigEndian.Uint64(d.buffer[offset:newOffset]) return math.Float64frombits(bits), newOffset } -func (d *decoder) decodeFloat32(size, offset uint) (float32, uint) { +func (d *Decoder) decodeFloat32(size, offset uint) (float32, uint) { newOffset := offset + size bits := binary.BigEndian.Uint32(d.buffer[offset:newOffset]) return math.Float32frombits(bits), newOffset } -func (d *decoder) decodeInt(size, offset uint) (int, uint) { +func (d *Decoder) decodeInt(size, offset uint) (int, uint) { newOffset := offset + size var val int32 for _, b := range d.buffer[offset:newOffset] { @@ -668,7 +707,7 @@ func (d *decoder) decodeInt(size, offset uint) (int, uint) { return int(val), newOffset } -func (d *decoder) decodeMap( +func (d *Decoder) decodeMap( size uint, offset uint, result reflect.Value, @@ -707,7 +746,7 @@ func (d *decoder) decodeMap( return offset, nil } -func (d *decoder) decodeMapToDeserializer( +func (d *Decoder) decodeMapToDeserializer( size uint, offset uint, dser deserializer, @@ -736,14 +775,14 @@ func (d *decoder) decodeMapToDeserializer( return offset, nil } -func (d *decoder) decodePointer( +func (d *Decoder) decodePointer( size uint, offset uint, ) (uint, uint, error) { pointerSize := ((size >> 3) & 0x3) + 1 newOffset := offset + pointerSize if newOffset > uint(len(d.buffer)) { - return 0, 0, newOffsetError() + return 0, 0, mmdberrors.NewOffsetError() } pointerBytes := d.buffer[offset:newOffset] var prefix uint @@ -771,7 +810,7 @@ func (d *decoder) decodePointer( return pointer, newOffset, nil } -func (d *decoder) decodeSlice( +func (d *Decoder) decodeSlice( size uint, offset uint, result reflect.Value, @@ -788,7 +827,7 @@ func (d *decoder) decodeSlice( return offset, nil } -func (d *decoder) decodeSliceToDeserializer( +func (d *Decoder) decodeSliceToDeserializer( size uint, offset uint, dser deserializer, @@ -811,12 +850,28 @@ func (d *decoder) decodeSliceToDeserializer( return offset, nil } -func (d *decoder) decodeString(size, offset uint) (string, uint) { +func (d *Decoder) decodeString(size, offset uint) (string, uint, error) { newOffset := offset + size - return string(d.buffer[offset:newOffset]), newOffset + if d.cache == nil { + return string(d.buffer[offset:newOffset]), newOffset, nil + } + + v, ok := d.cache.Load(offset) + if ok { + if s, ok := v.(string); ok { + return s, newOffset, nil + } + return "", 0, mmdberrors.NewCacheTypeStrError(v, "string") + } + + s := string(d.buffer[offset:newOffset]) + + d.cache.Store(offset, s) + + return s, newOffset, nil } -func (d *decoder) decodeStruct( +func (d *Decoder) decodeStruct( size uint, offset uint, result reflect.Value, @@ -899,7 +954,7 @@ func cachedFields(result reflect.Value) *fieldsType { return fields } -func (d *decoder) decodeUint(size, offset uint) (uint64, uint) { +func (d *Decoder) decodeUint(size, offset uint) (uint64, uint) { newOffset := offset + size bytes := d.buffer[offset:newOffset] @@ -910,7 +965,7 @@ func (d *decoder) decodeUint(size, offset uint) (uint64, uint) { return val, newOffset } -func (d *decoder) decodeUint128(size, offset uint) (*big.Int, uint) { +func (d *Decoder) decodeUint128(size, offset uint) (*big.Int, uint) { newOffset := offset + size val := new(big.Int) val.SetBytes(d.buffer[offset:newOffset]) @@ -930,7 +985,7 @@ func uintFromBytes(prefix uint, uintBytes []byte) uint { // can take advantage of https://github.com/golang/go/issues/3512 to avoid // copying the bytes when decoding a struct. Previously, we achieved this by // using unsafe. -func (d *decoder) decodeKey(offset uint) ([]byte, uint, error) { +func (d *Decoder) decodeKey(offset uint) ([]byte, uint, error) { typeNum, size, dataOffset, err := d.decodeCtrlData(offset) if err != nil { return nil, 0, err @@ -944,11 +999,14 @@ func (d *decoder) decodeKey(offset uint) ([]byte, uint, error) { return key, ptrOffset, err } if typeNum != _String { - return nil, 0, newInvalidDatabaseError("unexpected type when decoding string: %v", typeNum) + return nil, 0, mmdberrors.NewInvalidDatabaseError( + "unexpected type when decoding string: %v", + typeNum, + ) } newOffset := dataOffset + size if newOffset > uint(len(d.buffer)) { - return nil, 0, newOffsetError() + return nil, 0, mmdberrors.NewOffsetError() } return d.buffer[dataOffset:newOffset], newOffset, nil } @@ -956,7 +1014,7 @@ func (d *decoder) decodeKey(offset uint) ([]byte, uint, error) { // This function is used to skip ahead to the next value without decoding // the one at the offset passed in. The size bits have different meanings for // different data types. -func (d *decoder) nextValueOffset(offset, numberToSkip uint) (uint, error) { +func (d *Decoder) nextValueOffset(offset, numberToSkip uint) (uint, error) { if numberToSkip == 0 { return offset, nil } diff --git a/decoder_test.go b/internal/decoder/decoder_test.go similarity index 96% rename from decoder_test.go rename to internal/decoder/decoder_test.go index a7aba68..3563676 100644 --- a/decoder_test.go +++ b/internal/decoder/decoder_test.go @@ -1,9 +1,10 @@ -package maxminddb +package decoder import ( "encoding/hex" "math/big" "os" + "path/filepath" "reflect" "strings" "testing" @@ -207,7 +208,7 @@ func validateDecoding(t *testing.T, tests map[string]any) { for inputStr, expected := range tests { inputBytes, err := hex.DecodeString(inputStr) require.NoError(t, err) - d := decoder{buffer: inputBytes} + d := Decoder{buffer: inputBytes} var result any _, err = d.decode(0, reflect.ValueOf(&result), 0) @@ -223,7 +224,7 @@ func validateDecoding(t *testing.T, tests map[string]any) { func TestPointers(t *testing.T) { bytes, err := os.ReadFile(testFile("maps-with-pointers.raw")) require.NoError(t, err) - d := decoder{buffer: bytes} + d := Decoder{buffer: bytes} expected := map[uint]map[string]string{ 0: {"long_key": "long_value1"}, @@ -243,3 +244,7 @@ func TestPointers(t *testing.T) { } } } + +func testFile(file string) string { + return filepath.Join("..", "..", "test-data", "test-data", file) +} diff --git a/deserializer.go b/internal/decoder/deserializer.go similarity index 98% rename from deserializer.go rename to internal/decoder/deserializer.go index c6dd68d..0411af9 100644 --- a/deserializer.go +++ b/internal/decoder/deserializer.go @@ -1,4 +1,4 @@ -package maxminddb +package decoder import "math/big" diff --git a/internal/decoder/verifier.go b/internal/decoder/verifier.go new file mode 100644 index 0000000..30627c2 --- /dev/null +++ b/internal/decoder/verifier.go @@ -0,0 +1,63 @@ +package decoder + +import ( + "reflect" + + "github.com/oschwald/maxminddb-golang/v2/internal/mmdberrors" +) + +func (d *Decoder) VerifyDataSection(offsets map[uint]bool) error { + pointerCount := len(offsets) + + var offset uint + bufferLen := uint(len(d.buffer)) + for offset < bufferLen { + var data any + rv := reflect.ValueOf(&data) + newOffset, err := d.decode(offset, rv, 0) + if err != nil { + return mmdberrors.NewInvalidDatabaseError( + "received decoding error (%v) at offset of %v", + err, + offset, + ) + } + if newOffset <= offset { + return mmdberrors.NewInvalidDatabaseError( + "data section offset unexpectedly went from %v to %v", + offset, + newOffset, + ) + } + + pointer := offset + + if _, ok := offsets[pointer]; !ok { + return mmdberrors.NewInvalidDatabaseError( + "found data (%v) at %v that the search tree does not point to", + data, + pointer, + ) + } + delete(offsets, pointer) + + offset = newOffset + } + + if offset != bufferLen { + return mmdberrors.NewInvalidDatabaseError( + "unexpected data at the end of the data section (last offset: %v, end: %v)", + offset, + bufferLen, + ) + } + + if len(offsets) != 0 { + return mmdberrors.NewInvalidDatabaseError( + "found %v pointers (of %v) in the search tree that we did not see in the data section", + len(offsets), + pointerCount, + ) + } + return nil +} diff --git a/internal/mmdberrors/errors.go b/internal/mmdberrors/errors.go new file mode 100644 index 0000000..7e93325 --- /dev/null +++ b/internal/mmdberrors/errors.go @@ -0,0 +1,62 @@ +package mmdberrors + +import ( + "fmt" + "reflect" +) + +// InvalidDatabaseError is returned when the database contains invalid data +// and cannot be parsed. +type InvalidDatabaseError struct { + message string +} + +func NewOffsetError() InvalidDatabaseError { + return InvalidDatabaseError{"unexpected end of database"} +} + +func NewInvalidDatabaseError(format string, args ...any) InvalidDatabaseError { + return InvalidDatabaseError{fmt.Sprintf(format, args...)} +} + +func (e InvalidDatabaseError) Error() string { + return e.message +} + +type CacheTypeError struct { + Type string + Value any +} + +func NewCacheTypeStrError(value any, expType string) CacheTypeError { + return CacheTypeError{ + Type: expType, + Value: value, + } +} + +func (e CacheTypeError) Error() string { + return fmt.Sprintf("maxminddb: expected %s type in cache but found %T", e.Type, e.Value) +} + +// UnmarshalTypeError is returned when the value in the database cannot be +// assigned to the specified data type. +type UnmarshalTypeError struct { + Type reflect.Type + Value string +} + +func NewUnmarshalTypeStrError(value string, rType reflect.Type) UnmarshalTypeError { + return UnmarshalTypeError{ + Type: rType, + Value: value, + } +} + +func NewUnmarshalTypeError(value any, rType reflect.Type) UnmarshalTypeError { + return NewUnmarshalTypeStrError(fmt.Sprintf("%v (%T)", value, value), rType) +} + +func (e UnmarshalTypeError) Error() string { + return fmt.Sprintf("maxminddb: cannot unmarshal %s into type %s", e.Value, e.Type) +} diff --git a/reader.go b/reader.go index ae18794..3513e19 100644 --- a/reader.go +++ b/reader.go @@ -8,8 +8,10 @@ import ( "io" "net/netip" "os" - "reflect" "runtime" + + "github.com/oschwald/maxminddb-golang/v2/internal/decoder" + "github.com/oschwald/maxminddb-golang/v2/internal/mmdberrors" ) const dataSectionSeparatorSize = 16 @@ -24,7 +26,7 @@ var metadataStartMarker = []byte("\xAB\xCD\xEFMaxMind.com") type Reader struct { nodeReader nodeReader buffer []byte - decoder decoder + decoder decoder.Decoder Metadata Metadata ipv4Start uint ipv4StartBitDepth int @@ -48,13 +50,25 @@ type Metadata struct { RecordSize uint `maxminddb:"record_size"` } -// Open takes a string path to a MaxMind DB file and returns a Reader -// structure or an error. The database file is opened using a memory map -// on supported platforms. On platforms without memory map support, such +type readerOptions struct { + cache decoder.Cacher +} + +type ReaderOption func(*readerOptions) + +func Cache(cache decoder.Cacher) ReaderOption { + return func(o *readerOptions) { + o.cache = cache + } +} + +// Open takes a string path to a MaxMind DB file and any options. It returns a +// Reader structure or an error. The database file is opened using a memory +// map on supported platforms. On platforms without memory map support, such // as WebAssembly or Google App Engine, or if the memory map attempt fails // due to lack of support from the filesystem, the database is loaded into memory. // Use the Close method on the Reader object to return the resources to the system. -func Open(file string) (*Reader, error) { +func Open(file string, options ...ReaderOption) (*Reader, error) { mapFile, err := os.Open(file) if err != nil { return nil, err @@ -86,12 +100,12 @@ func Open(file string) (*Reader, error) { if err != nil { return nil, err } - return FromBytes(data) + return FromBytes(data, options...) } return nil, err } - reader, err := FromBytes(data) + reader, err := FromBytes(data, options...) if err != nil { _ = munmap(data) return nil, err @@ -120,22 +134,28 @@ func (r *Reader) Close() error { return err } -// FromBytes takes a byte slice corresponding to a MaxMind DB file and returns -// a Reader structure or an error. -func FromBytes(buffer []byte) (*Reader, error) { +// FromBytes takes a byte slice corresponding to a MaxMind DB file and any +// options. It returns a Reader structure or an error. +func FromBytes(buffer []byte, options ...ReaderOption) (*Reader, error) { + opts := &readerOptions{} + for _, option := range options { + option(opts) + } + metadataStart := bytes.LastIndex(buffer, metadataStartMarker) if metadataStart == -1 { - return nil, newInvalidDatabaseError("error opening database: invalid MaxMind DB file") + return nil, mmdberrors.NewInvalidDatabaseError( + "error opening database: invalid MaxMind DB file", + ) } metadataStart += len(metadataStartMarker) - metadataDecoder := decoder{buffer: buffer[metadataStart:]} + metadataDecoder := decoder.New(buffer[metadataStart:], nil) var metadata Metadata - rvMetadata := reflect.ValueOf(&metadata) - _, err := metadataDecoder.decode(0, rvMetadata, 0) + err := metadataDecoder.Decode(0, &metadata) if err != nil { return nil, err } @@ -144,11 +164,12 @@ func FromBytes(buffer []byte) (*Reader, error) { dataSectionStart := searchTreeSize + dataSectionSeparatorSize dataSectionEnd := uint(metadataStart - len(metadataStartMarker)) if dataSectionStart > dataSectionEnd { - return nil, newInvalidDatabaseError("the MaxMind DB contains invalid metadata") - } - d := decoder{ - buffer: buffer[searchTreeSize+dataSectionSeparatorSize : metadataStart-len(metadataStartMarker)], + return nil, mmdberrors.NewInvalidDatabaseError("the MaxMind DB contains invalid metadata") } + d := decoder.New( + buffer[searchTreeSize+dataSectionSeparatorSize:metadataStart-len(metadataStartMarker)], + opts.cache, + ) nodeBuffer := buffer[:searchTreeSize] var nodeReader nodeReader @@ -160,7 +181,10 @@ func FromBytes(buffer []byte) (*Reader, error) { case 32: nodeReader = nodeReader32{buffer: nodeBuffer} default: - return nil, newInvalidDatabaseError("unknown record size: %d", metadata.RecordSize) + return nil, mmdberrors.NewInvalidDatabaseError( + "unknown record size: %d", + metadata.RecordSize, + ) } reader := &Reader{ @@ -255,7 +279,7 @@ func (r *Reader) lookupPointer(ip netip.Addr) (uint, int, error) { return node, prefixLength, nil } - return 0, prefixLength, newInvalidDatabaseError("invalid node in search tree") + return 0, prefixLength, mmdberrors.NewInvalidDatabaseError("invalid node in search tree") } func (r *Reader) traverseTree(ip netip.Addr, node uint, stopBit int) (uint, int) { @@ -286,7 +310,7 @@ func (r *Reader) resolveDataPointer(pointer uint) (uintptr, error) { resolved := uintptr(pointer - r.Metadata.NodeCount - dataSectionSeparatorSize) if resolved >= uintptr(len(r.buffer)) { - return 0, newInvalidDatabaseError("the MaxMind DB file's search tree is corrupt") + return 0, mmdberrors.NewInvalidDatabaseError("the MaxMind DB file's search tree is corrupt") } return resolved, nil } diff --git a/reader_test.go b/reader_test.go index d6c8cf8..440c96d 100644 --- a/reader_test.go +++ b/reader_test.go @@ -14,6 +14,8 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "github.com/oschwald/maxminddb-golang/v2/internal/mmdberrors" ) func TestReader(t *testing.T) { @@ -647,7 +649,7 @@ func TestBrokenDoubleDatabase(t *testing.T) { var result any err = reader.Lookup(netip.MustParseAddr("2001:220::")).Decode(&result) - expected := newInvalidDatabaseError( + expected := mmdberrors.NewInvalidDatabaseError( "the MaxMind DB file's data section contains bad data (float 64 size of 2)", ) require.ErrorAs(t, err, &expected) @@ -657,7 +659,7 @@ func TestBrokenDoubleDatabase(t *testing.T) { func TestInvalidNodeCountDatabase(t *testing.T) { _, err := Open(testFile("GeoIP2-City-Test-Invalid-Node-Count.mmdb")) - expected := newInvalidDatabaseError("the MaxMind DB contains invalid metadata") + expected := mmdberrors.NewInvalidDatabaseError("the MaxMind DB contains invalid metadata") assert.Equal(t, expected, err) } @@ -929,6 +931,44 @@ func BenchmarkCityLookup(b *testing.B) { var result fullCity s := make(net.IP, 4) + + b.ResetTimer() + + for range b.N { + ip := randomIPv4Address(r, s) + err = db.Lookup(ip).Decode(&result) + if err != nil { + b.Error(err) + } + } + require.NoError(b, db.Close(), "error on close") +} + +type cache struct { + m map[uint]any +} + +func (c *cache) Load(key uint) (any, bool) { + v, ok := c.m[key] + return v, ok +} + +func (c *cache) Store(key uint, value any) { + c.m[key] = value +} + +func BenchmarkCityLookupWithCache(b *testing.B) { + db, err := Open("GeoLite2-City.mmdb", Cache(&cache{m: map[uint]any{}})) + require.NoError(b, err) + + //nolint:gosec // this is a test + r := rand.New(rand.NewSource(time.Now().UnixNano())) + var result fullCity + + s := make(net.IP, 4) + + b.ResetTimer() + for range b.N { ip := randomIPv4Address(r, s) err = db.Lookup(ip).Decode(&result) @@ -947,6 +987,9 @@ func BenchmarkCityLookupOnly(b *testing.B) { r := rand.New(rand.NewSource(time.Now().UnixNano())) s := make(net.IP, 4) + + b.ResetTimer() + for range b.N { ip := randomIPv4Address(r, s) result := db.Lookup(ip) diff --git a/result.go b/result.go index 7562b2b..6462990 100644 --- a/result.go +++ b/result.go @@ -1,10 +1,10 @@ package maxminddb import ( - "errors" "math" "net/netip" - "reflect" + + "github.com/oschwald/maxminddb-golang/v2/internal/decoder" ) const notFound uint = math.MaxUint @@ -12,7 +12,7 @@ const notFound uint = math.MaxUint type Result struct { ip netip.Addr err error - decoder decoder + decoder decoder.Decoder offset uint prefixLen uint8 } @@ -35,18 +35,8 @@ func (r Result) Decode(v any) error { if r.offset == notFound { return nil } - rv := reflect.ValueOf(v) - if rv.Kind() != reflect.Ptr || rv.IsNil() { - return errors.New("result param must be a pointer") - } - if dser, ok := v.(deserializer); ok { - _, err := r.decoder.decodeToDeserializer(r.offset, dser, 0, false) - return err - } - - _, err := r.decoder.decode(r.offset, rv, 0) - return err + return r.decoder.Decode(r.offset, v) } // DecodePath unmarshals a value from data section into v, following the @@ -89,11 +79,7 @@ func (r Result) DecodePath(v any, path ...any) error { if r.offset == notFound { return nil } - rv := reflect.ValueOf(v) - if rv.Kind() != reflect.Ptr || rv.IsNil() { - return errors.New("result param must be a pointer") - } - return r.decoder.decodePath(r.offset, path, rv) + return r.decoder.DecodePath(r.offset, path, v) } // Err provides a way to check whether there was an error during the lookup diff --git a/traverse.go b/traverse.go index b9a6acd..39ba3dd 100644 --- a/traverse.go +++ b/traverse.go @@ -5,6 +5,8 @@ import ( // comment to prevent gofumpt from randomly moving iter. "iter" "net/netip" + + "github.com/oschwald/maxminddb-golang/v2/internal/mmdberrors" ) // Internal structure used to keep track of nodes we still need to visit. @@ -30,14 +32,18 @@ type NetworksOption func(*networkOptions) // IncludeAliasedNetworks is an option for Networks and NetworksWithin // that makes them iterate over aliases of the IPv4 subtree in an IPv6 // database, e.g., ::ffff:0:0/96, 2001::/32, and 2002::/16. -func IncludeAliasedNetworks(networks *networkOptions) { - networks.includeAliasedNetworks = true +func IncludeAliasedNetworks() NetworksOption { + return func(networks *networkOptions) { + networks.includeAliasedNetworks = true + } } // IncludeNetworksWithoutData is an option for Networks and NetworksWithin // that makes them include networks without any data in the iteration. -func IncludeNetworksWithoutData(networks *networkOptions) { - networks.includeEmptyNetworks = true +func IncludeNetworksWithoutData() NetworksOption { + return func(networks *networkOptions) { + networks.includeEmptyNetworks = true + } } // Networks returns an iterator that can be used to traverse the networks in @@ -166,7 +172,7 @@ func (r *Reader) NetworksWithin(prefix netip.Prefix, options ...NetworksOption) ip: displayAddr, prefixLen: uint8(node.bit), } - res.err = newInvalidDatabaseError( + res.err = mmdberrors.NewInvalidDatabaseError( "invalid search tree at %s", res.Prefix()) yield(res) diff --git a/traverse_test.go b/traverse_test.go index 5fbbb88..5340978 100644 --- a/traverse_test.go +++ b/traverse_test.go @@ -227,7 +227,7 @@ var tests = []networkTest{ "2002:101:110::/44", "2002:101:120::/48", }, - Options: []NetworksOption{IncludeAliasedNetworks}, + Options: []NetworksOption{IncludeAliasedNetworks()}, }, { Network: "::/0", @@ -281,7 +281,7 @@ var tests = []networkTest{ "1.64.0.0/10", "1.128.0.0/9", }, - Options: []NetworksOption{IncludeNetworksWithoutData}, + Options: []NetworksOption{IncludeNetworksWithoutData()}, }, { Network: "1.1.1.16/28", diff --git a/verifier.go b/verifier.go index 335cb1b..0c9f393 100644 --- a/verifier.go +++ b/verifier.go @@ -1,8 +1,9 @@ package maxminddb import ( - "reflect" "runtime" + + "github.com/oschwald/maxminddb-golang/v2/internal/mmdberrors" ) type verifier struct { @@ -96,7 +97,7 @@ func (v *verifier) verifyDatabase() error { return err } - return v.verifyDataSection(offsets) + return v.reader.decoder.VerifyDataSection(offsets) } func (v *verifier) verifySearchTree() (map[uint]bool, error) { @@ -118,66 +119,11 @@ func (v *verifier) verifyDataSectionSeparator() error { for _, b := range separator { if b != 0 { - return newInvalidDatabaseError("unexpected byte in data separator: %v", separator) - } - } - return nil -} - -func (v *verifier) verifyDataSection(offsets map[uint]bool) error { - pointerCount := len(offsets) - - decoder := v.reader.decoder - - var offset uint - bufferLen := uint(len(decoder.buffer)) - for offset < bufferLen { - var data any - rv := reflect.ValueOf(&data) - newOffset, err := decoder.decode(offset, rv, 0) - if err != nil { - return newInvalidDatabaseError( - "received decoding error (%v) at offset of %v", - err, - offset, - ) - } - if newOffset <= offset { - return newInvalidDatabaseError( - "data section offset unexpectedly went from %v to %v", - offset, - newOffset, + return mmdberrors.NewInvalidDatabaseError( + "unexpected byte in data separator: %v", + separator, ) } - - pointer := offset - - if _, ok := offsets[pointer]; !ok { - return newInvalidDatabaseError( - "found data (%v) at %v that the search tree does not point to", - data, - pointer, - ) - } - delete(offsets, pointer) - - offset = newOffset - } - - if offset != bufferLen { - return newInvalidDatabaseError( - "unexpected data at the end of the data section (last offset: %v, end: %v)", - offset, - bufferLen, - ) - } - - if len(offsets) != 0 { - return newInvalidDatabaseError( - "found %v pointers (of %v) in the search tree that we did not see in the data section", - len(offsets), - pointerCount, - ) } return nil } @@ -187,7 +133,7 @@ func testError( expected any, actual any, ) error { - return newInvalidDatabaseError( + return mmdberrors.NewInvalidDatabaseError( "%v - Expected: %v Actual: %v", field, expected,