diff --git a/README.md b/README.md index 0a5eeb2..49728ef 100644 --- a/README.md +++ b/README.md @@ -178,3 +178,145 @@ using the following options. 3. length, max_length: The length of the string - if not specified we use the terminator to find the end of the string. This can also be a lambda to derive the length from another field. + + +### References + +When dereferencing a struct member we receive the basic type contained +in the struct. + +Consider the following struct: + +```json +[ + ["TestStruct", 0, [ + ["Field1", 2, "uint8"], + ["Field2", 0, Value, { + value: "x=>x.Field1 + 1" + }] + ]] +] +``` + +In VQL, accessing the field will produce a uint8 type, so `Field2` +will be calculated by adding 1 to the uint8 content of Field1. + +While this makes it easy and intuitive to use, sometimes we need to +calculate some metadata over the struct field instead of it's literal +value. For example metadata such as its starting offset, ending offset +etc. + +Suppose we wanted to add another field, immediately following Field1: + +```json +[ + ["TestStruct", 0, [ + ["Field1", 2, "uint8"], + ["Next", "x=>x.Field1.EndOf", "uint16"], + ]] +] +``` + +This is not going to work, because `x.Field1` is a `uint8` integer +type and it does not have an `EndOf` method. + +This is where references come in. We can obtain a struct field +reference by using the @ character when accessing the field. A +reference is a wrapper around the field which provides metadata about +it. + +```json +[ + ["TestStruct", 0, [ + ["Field1", 2, "uint8"], + ["Next", "x=>x.`@Field1`.RelEndOf", "uint16"], + ]] +] +``` + +Note that the field name must be enclosed in backticks for VQL to +identify the @ as part of the field name. + +Accessing a struct field with a name begining with @ will return a +reference to the field instead of the field itself. + +The reference has many useful methods: + +- *SizeOf*, *Size*: These return the size of the field in bytes. +- *StartOf*, *Start*, *OffsetOf*: These return the byte offset of the + beginning of the field. + + NOTE that this offset is absolute - i.e. this offset will be + relative to the beginning of the file. + +- *RelOffset*: The relative offset of the field within the struct. + + In the following, the Next and Next2 fields are equivalent: + +```json +[ + ["TestStruct", 0, [ + ["Field1", 2, "uint8"], + ["Next", "x=>x.`@Field1`.StartOf + 4 - x.OffsetOf", "uint16"], + ["Next2", "x=>x.`@Field1`.RelOffset + 4", "uint16"], + ]] +] +``` + +- *EndOf*, *End*: These return the end of the field in absolute bytes, + relative to the file. + + In the following, the Next and Next2 fields are equivalent and are + both located directly after `Field1`: + +```json +[ + ["TestStruct", 0, [ + ["Field1", 2, "uint8"], + ["Next", "x=>x.`@Field1`.EndOf - x.OffsetOf", "uint16"], + ["Next2", "x=>x.`@Field1`.RelOffset + x.`@Field1`.SizeOf", "uint16"], + ]] +] +``` + +- *RelEndOf*: Like *EndOf* but relative to the start of the struct. + + In particular notice that the profile struct syntax requires + specifying the offset of a field relative to the struct. Therefore + this field is especially useful to specify the field should be + located immediately after the previous field. + +```json +[ + ["TestStruct", 0, [ + ["Field1", 2, "uint8"], + ["Next", "x=>x.`@Field1`.RelEndOf", "uint16"], + ]] +] +``` + + This also works for fields with variable size. For example consider + a null terminated string followed immediately with an integer. + +```json +[ + ["TestStruct", 0, [ + ["Field1", 0, String], + ["Next", "x=>x.`@Field1`.RelEndOf", "uint32"], + ]] +] +``` + +- *Value*: It is possible to dereference the reference to obtain the + real value. + +```json +[ + ["TestStruct", 0, [ + ["Field1", 2, "uint8"], + ["Field2", 0, Value, { + value: "x=>x.`@Field1`.Value + 1" + }] + ]] +] +``` diff --git a/fixtures/TestStructParser.golden b/fixtures/TestStructParser.golden index 0e4ccd7..a624a8c 100644 --- a/fixtures/TestStructParser.golden +++ b/fixtures/TestStructParser.golden @@ -1,20 +1,36 @@ { - "Field1": 3, + "Field1": 5, "Field2": { - "SecondField1": 7 + "SecondField1": 9 }, + "StringField": "hello", "X": { - "Field1": 3, + "Field1": 5, "Field2": { - "SecondField1": 7 + "SecondField1": 9 }, - "Field3": 578437695752307201, + "StringField": "hello", + "Field3": 1084818905618843912, "Field4": { - "SecondField1": 6 - } + "SecondField1": 10 + }, + "OffsetOfField3": 7, + "SizeOfField3": 8, + "OffsetOfField2": 6, + "RelOffsetField2": 4, + "SizeOfField2": 5, + "StructOffset": 2, + "StringFieldSize": 12 }, - "Field3": 578437695752307201, + "Field3": 1084818905618843912, "Field4": { - "SecondField1": 6 - } + "SecondField1": 10 + }, + "OffsetOfField3": 7, + "SizeOfField3": 8, + "OffsetOfField2": 6, + "RelOffsetField2": 4, + "SizeOfField2": 5, + "StructOffset": 2, + "StringFieldSize": 12 } \ No newline at end of file diff --git a/parser.go b/parser.go index 17fd999..185d3d3 100644 --- a/parser.go +++ b/parser.go @@ -17,10 +17,17 @@ type Parser interface { New(profile *Profile, options *ordereddict.Dict) (Parser, error) } +// Used by parsers or wrappers who have fixed size type Sizer interface { Size() int } +// Applies on a parser which needs to instantiate to figure out the +// size. i.e. the size depends on the data read (e.g. a string). +type InstanceSizer interface { + InstanceSize(scope vfilter.Scope, reader io.ReaderAt, offset int64) int +} + // Allows psuedo elements to reveal their own value. type Valuer interface { Value() interface{} diff --git a/parser_test.go b/parser_test.go index 81db5bc..cf824d7 100644 --- a/parser_test.go +++ b/parser_test.go @@ -44,6 +44,9 @@ var ( // offset 95 - E58E26 -> 624485 0xe5, 0x8e, 0x26, + + // offset 96 - UTF16 string + 0x68, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00, 0x00, 0x00, } ) @@ -87,9 +90,35 @@ func TestStructParser(t *testing.T) { ["TestStruct", "x => x.Field1 + 5", [ ["Field1", 2, "uint8"], ["Field2", 4, "Second"], + ["StringField", 96, String, { + length: 20, + encoding: "utf16", + }], ["X", 0, "Value", {"value": "x=>x"}], - ["Field3", 0, "unsigned long long"], - ["Field4", "x => x.Field1", "Second"] + ["Field3", 5, "unsigned long long"], + ["Field4", "x => x.Field1", "Second"], + ["OffsetOfField3", 0, Value, { + ` + "value: 'x=>x.`@Field3`.OffsetOf', " + ` + }], + ["SizeOfField3", 0, Value, { + ` + "value: 'x=>x.`@Field3`.SizeOf', " + ` + }], + ["OffsetOfField2", 0, Value, { + ` + "value: 'x=>x.`@Field2`.OffsetOf', " + ` + }], + ["RelOffsetField2", 0, Value, { + ` + "value: 'x=>x.`@Field2`.RelOffset', " + ` + }], + ["SizeOfField2", 0, Value, { + ` + "value: 'x=>x.`@Field2`.SizeOf', " + ` + }], + ["StructOffset", 0, Value, { + value: "x=>x.OffsetOf", + }], + ["StringFieldSize", 0, Value, { + ` + "value: 'x=>x.`@StringField`.SizeOf', " + ` + }], + ]], ["Second", 5, [ @@ -103,19 +132,19 @@ func TestStructParser(t *testing.T) { // Parse TestStruct over the reader reader := bytes.NewReader(sample) - obj, err := profile.Parse(scope, "TestStruct", reader, 0) + obj, err := profile.Parse(scope, "TestStruct", reader, 2) assert.NoError(t, err) // Field1 is at offset 2 has value 0x03 - assert.Equal(t, uint64(3), Associative(scope, obj, "Field1")) + assert.Equal(t, uint64(5), Associative(scope, obj, "Field1")) // Object size is calculated as x.Field1 + 5 ... 8 - assert.Equal(t, 8, SizeOf(obj)) + assert.Equal(t, 10, SizeOf(obj)) // Field4's offset is calculated as x=>x.Field1 // i.e. 3. SecondField1 has a relative offset of 2, therefore // absolute offset of 3 + 2 = 5 -> value = 0x06 - assert.Equal(t, uint64(6), Associative(scope, obj, "Field4.SecondField1")) + assert.Equal(t, uint64(10), Associative(scope, obj, "Field4.SecondField1")) serialized, err := json.MarshalIndent(obj, "", " ") assert.NoError(t, err) diff --git a/protocol.go b/protocol.go index 89f0f59..e46edb3 100644 --- a/protocol.go +++ b/protocol.go @@ -34,7 +34,7 @@ func (self StructAssociative) Associative(scope vfilter.Scope, // A Struct definition overrides default fields - this way a // struct may define a field called "Offset" and it will be - // honored but if not defined we retur the default offset. + // honored but if not defined we return the default offset. if lhs.HasField(rhs) { return lhs.Get(rhs) } @@ -178,3 +178,65 @@ func (self ArrayIterator) Iterate( return output_chan } + +type StructFieldReferenceAssociative struct{} + +func (self StructFieldReferenceAssociative) Applicable(a vfilter.Any, b vfilter.Any) bool { + switch a.(type) { + case StructFieldReference, *StructFieldReference: + _, ok := b.(string) + if ok { + return true + } + } + return false +} + +func (self StructFieldReferenceAssociative) Associative(scope vfilter.Scope, + a vfilter.Any, b vfilter.Any) (vfilter.Any, bool) { + lhs, ok := a.(*StructFieldReference) + if !ok { + return vfilter.Null{}, false + } + + rhs, ok := b.(string) + if !ok { + return vfilter.Null{}, false + } + + switch rhs { + case "SizeOf", "Size": + return lhs.Size(), true + + case "StartOf", "Start", "OffsetOf": + return lhs.Start(), true + + case "RelOffset": + return lhs.RelOffset(), true + + case "RelEndOf": + return lhs.RelOffset() + int64(lhs.Size()), true + + case "EndOf", "End": + return lhs.End(), true + + case "Value": + return lhs.Value(), true + + default: + return nil, false + } +} + +func (self StructFieldReferenceAssociative) GetMembers(scope vfilter.Scope, a vfilter.Any) []string { + return nil +} + +func GetProtocols() []vfilter.Any { + return []vfilter.Any{ + &StructAssociative{}, + &ArrayAssociative{}, + &ArrayIterator{}, + &StructFieldReferenceAssociative{}, + } +} diff --git a/reference.go b/reference.go new file mode 100644 index 0000000..1ee823a --- /dev/null +++ b/reference.go @@ -0,0 +1,46 @@ +package vtypes + +import ( + "encoding/json" + "io" + + "www.velocidex.com/golang/vfilter" +) + +// A reference is a wrapper around a struct member which can contain +// metadata about it. + +type StructFieldReference struct { + // Offset to the start of the struct + offset int64 + reader io.ReaderAt + scope vfilter.Scope + field string + + parser *ParseAtOffset +} + +// The offset within the struct +func (self *StructFieldReference) RelOffset() int64 { + return self.parser.getOffset(self.scope) +} + +func (self *StructFieldReference) Start() int64 { + return self.offset + self.parser.getOffset(self.scope) +} + +func (self *StructFieldReference) Size() int { + return self.parser.Size(self.scope, self.reader, self.offset) +} + +func (self *StructFieldReference) End() int64 { + return self.Start() + int64(self.Size()) +} + +func (self *StructFieldReference) Value() interface{} { + return self.parser.Parse(self.scope, self.reader, self.offset) +} + +func (self *StructFieldReference) MarshalJSON() ([]byte, error) { + return json.Marshal(self.Value()) +} diff --git a/scope.go b/scope.go index 442c4a8..67a1d79 100644 --- a/scope.go +++ b/scope.go @@ -4,9 +4,7 @@ import "www.velocidex.com/golang/vfilter" func MakeScope() vfilter.Scope { result := vfilter.NewScope() - result.AddProtocolImpl( - &StructAssociative{}, &ArrayAssociative{}, &ArrayIterator{}, - ) + result.AddProtocolImpl(GetProtocols()...) return result } diff --git a/string.go b/string.go index c89ef5b..4381378 100644 --- a/string.go +++ b/string.go @@ -26,6 +26,8 @@ type StringParserOptions struct { TermExpression *vfilter.Lambda `vfilter:"optional,field=term_exp,doc=A Terminator expression"` Encoding string `vfilter:"optional,field=encoding,doc=The encoding to use, can be utf8 or utf16"` Bytes bool `vfilter:"optional,field=byte_string,doc=Terminating string (can be an expression)"` + + utf16 bool } type StringParser struct { @@ -44,6 +46,14 @@ func (self *StringParser) New(profile *Profile, options *ordereddict.Dict) (Pars result.options.MaxLength = 1024 } + switch result.options.Encoding { + case "utf8", "": + case "utf16": + result.options.utf16 = true + default: + return nil, fmt.Errorf("StringParser: encoding can only be utf8 or utf16") + } + if result.options.TermHex != nil { term, err := hex.DecodeString(*result.options.TermHex) if err != nil { @@ -56,9 +66,66 @@ func (self *StringParser) New(profile *Profile, options *ordereddict.Dict) (Pars return result, nil } +func (self *StringParser) InstanceSize( + scope vfilter.Scope, + reader io.ReaderAt, offset int64) int { + + // The length of the string we are allowed to read. + result_len := self.getCount(scope) + + buf := make([]byte, result_len) + + n, _ := reader.ReadAt(buf, offset) + result := buf[:n] + + // If a terminator is specified read up to that. + term := defaultTerm + + // if lamda term_exp configured evaluate and add as a standard + // term + if self.options.TermExpression != nil { + term = EvalLambdaAsString( + self.options.TermExpression, scope) + } + + if self.options.Term != nil { + term = *self.options.Term + } + + // We need to bisect the read buffer by the terminator. + var term_bytes []byte + step := 1 + + if self.options.utf16 { + term_bytes = UTF16Encode(term) + step = 2 + + } else { + term_bytes = []byte(term) + } + + // Truncate to the right place by trying to find the + // term_bytes. Note that UTF16 comparisons must be aligned to 2 + // bytes. + if len(term_bytes) > 0 { + for i := 0; i < len(result); i += step { + if bytes.HasPrefix(result[i:], term_bytes) { + // Include the terminator in the size as it is + // technically part of the string. + return i + len(term_bytes) + } + } + } + + // Does not include the terminator + return len(result) +} + func (self *StringParser) getCount(scope vfilter.Scope) int64 { var result int64 = 1024 + // If length is not specified, we read 1kb and look for the + // terminator. if self.options.Length != nil { result = *self.options.Length } @@ -76,8 +143,19 @@ func (self *StringParser) getCount(scope vfilter.Scope) int64 { } func (self *StringParser) Parse( - scope vfilter.Scope, - reader io.ReaderAt, offset int64) interface{} { + scope vfilter.Scope, reader io.ReaderAt, offset int64) interface{} { + + result := self._Parse(scope, reader, offset) + if self.options.Bytes { + return result + } + + return string(result) + +} + +func (self *StringParser) _Parse( + scope vfilter.Scope, reader io.ReaderAt, offset int64) []byte { result_len := self.getCount(scope) @@ -86,21 +164,6 @@ func (self *StringParser) Parse( n, _ := reader.ReadAt(buf, offset) result := buf[:n] - // If encoding is specified, convert from utf16 - if self.options.Encoding == "utf16" { - order := binary.LittleEndian - u16s := []uint16{} - - for i, j := 0, len(result); i < j; i += 2 { - if len(result) < i+2 { - break - } - u16s = append(u16s, order.Uint16(result[i:])) - } - - result = []byte(string(utf16.Decode(u16s))) - } - // If a terminator is specified read up to that. term := defaultTerm @@ -115,16 +178,46 @@ func (self *StringParser) Parse( term = *self.options.Term } - if term != "" { - idx := bytes.Index(result, []byte(term)) - if idx >= 0 { - result = result[:idx] + // We need to bisect the read buffer by the terminator. + var term_bytes []byte + step := 1 + + if self.options.utf16 { + term_bytes = UTF16Encode(term) + step = 2 + + } else { + term_bytes = []byte(term) + } + + // Truncate to the right place by trying to find the + // term_bytes. Note that UTF16 comparisons must be aligned to 2 + // bytes. + if len(term_bytes) > 0 { + for i := 0; i < len(result); i += step { + if bytes.HasPrefix(result[i:], term_bytes) { + result = result[:i] + break + } } } - if self.options.Bytes { - return result + if self.options.utf16 { + return []byte(UTF16Decode(result)) } - return string(result) + return result +} + +func UTF16Encode(in string) []byte { + buf := bytes.NewBuffer(nil) + ints := utf16.Encode([]rune(in)) + binary.Write(buf, binary.LittleEndian, &ints) + return buf.Bytes() +} + +func UTF16Decode(in []byte) string { + ints := make([]uint16, len(in)/2) + binary.Read(bytes.NewReader([]byte(in)), binary.LittleEndian, &ints) + return string(utf16.Decode(ints)) } diff --git a/struct.go b/struct.go index feeeb49..8151360 100644 --- a/struct.go +++ b/struct.go @@ -15,7 +15,7 @@ type StructParser struct { size_expression *vfilter.Lambda // Maintain the order of the fields. - fields map[string]Parser + fields map[string]*ParseAtOffset field_names []string } @@ -64,7 +64,7 @@ func NewStructParser(type_name string, size int) *StructParser { result := &StructParser{ type_name: type_name, size: size, - fields: make(map[string]Parser), + fields: make(map[string]*ParseAtOffset), } return result @@ -94,6 +94,25 @@ func (self *ParseAtOffset) getOffset(scope vfilter.Scope) int64 { return EvalLambdaAsInt64(self.offset_expression, scope) } +// Geting a field size may require actually parsing it since the size +// may be calculated. +func (self *ParseAtOffset) Size( + scope vfilter.Scope, reader io.ReaderAt, offset int64) int { + element_size := SizeOf(self.parser) + if element_size != 0 { + return element_size + } + + field_offset := self.getOffset(scope) + element_size = InstanceSizeOf(self.parser, scope, reader, offset+field_offset) + if element_size != 0 { + return element_size + } + + element := self.Parse(scope, reader, offset) + return SizeOf(element) +} + // NOTE: offset is the offset to the start of the struct. func (self *ParseAtOffset) Parse(scope vfilter.Scope, reader io.ReaderAt, offset int64) interface{} { @@ -129,8 +148,9 @@ func (self *StructObject) Start() int64 { return self.offset } -func (self *StructObject) HasField(name string) bool { - return self.parser.HasField(name) +func (self *StructObject) HasField(field string) bool { + pure_field := strings.TrimPrefix(field, "@") + return self.parser.HasField(pure_field) } func (self *StructObject) TypeName() string { @@ -151,6 +171,26 @@ func (self *StructObject) Get(field string) (interface{}, bool) { return hit, true } + // User wants a reference + pure_field := strings.TrimPrefix(field, "@") + if field != pure_field { + parser, pres := self.parser.fields[pure_field] + if !pres { + return vfilter.Null{}, false + } + + return &StructFieldReference{ + // Offset to the start of the struct + offset: self.offset, + reader: self.reader, + scope: self.scope, + field: field, + + // The field parser + parser: parser, + }, true + } + parser, pres := self.parser.fields[field] if !pres { return vfilter.Null{}, false diff --git a/utils.go b/utils.go index c6b0536..0ebebcb 100644 --- a/utils.go +++ b/utils.go @@ -2,6 +2,7 @@ package vtypes import ( "context" + "io" "reflect" "strings" @@ -66,9 +67,26 @@ func to_int64(x interface{}) (int64, bool) { // Some helpers func SizeOf(obj interface{}) int { - sizer, ok := obj.(Sizer) - if ok { - return sizer.Size() + switch t := obj.(type) { + case Sizer: + return t.Size() + + // Built in types + case string: + return len(t) + case []byte: + return len(t) + default: + return 0 + } +} + +func InstanceSizeOf(parser Parser, + scope vfilter.Scope, reader io.ReaderAt, offset int64) int { + + switch t := parser.(type) { + case InstanceSizer: + return t.InstanceSize(scope, reader, offset) } return 0 }