From bbbeeef0009071ed5c0e9b2061ab4d090f568e02 Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Mon, 21 Feb 2022 17:02:37 -0500 Subject: [PATCH 1/9] Extract ascii2der into a library package --- {cmd/ascii2der => ascii2der}/encoder.go | 19 ++- {cmd/ascii2der => ascii2der}/encoder_test.go | 2 +- {cmd/ascii2der => ascii2der}/scanner.go | 134 ++++++++----------- {cmd/ascii2der => ascii2der}/scanner_test.go | 8 +- {cmd/ascii2der => ascii2der}/values.go | 2 +- {cmd/ascii2der => ascii2der}/values_test.go | 2 +- cmd/ascii2der/main.go | 4 +- 7 files changed, 86 insertions(+), 85 deletions(-) rename {cmd/ascii2der => ascii2der}/encoder.go (88%) rename {cmd/ascii2der => ascii2der}/encoder_test.go (99%) rename {cmd/ascii2der => ascii2der}/scanner.go (76%) rename {cmd/ascii2der => ascii2der}/scanner_test.go (99%) rename {cmd/ascii2der => ascii2der}/values.go (99%) rename {cmd/ascii2der => ascii2der}/values_test.go (99%) diff --git a/cmd/ascii2der/encoder.go b/ascii2der/encoder.go similarity index 88% rename from cmd/ascii2der/encoder.go rename to ascii2der/encoder.go index 0d84559..6b9f787 100644 --- a/cmd/ascii2der/encoder.go +++ b/ascii2der/encoder.go @@ -12,15 +12,32 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package ascii2der import ( "errors" "fmt" + "unicode/utf16" "github.com/google/der-ascii/internal" ) +func appendUTF16(dst []byte, r rune) []byte { + if r <= 0xffff { + // Note this logic intentionally tolerates unpaired surrogates. + return append(dst, byte(r>>8), byte(r)) + } + + r1, r2 := utf16.EncodeRune(r) + dst = append(dst, byte(r1>>8), byte(r1)) + dst = append(dst, byte(r2>>8), byte(r2)) + return dst +} + +func appendUTF32(dst []byte, r rune) []byte { + return append(dst, byte(r>>24), byte(r>>16), byte(r>>8), byte(r)) +} + func appendBase128(dst []byte, value uint32) []byte { dst, err := appendBase128WithLength(dst, value, 0) if err != nil { diff --git a/cmd/ascii2der/encoder_test.go b/ascii2der/encoder_test.go similarity index 99% rename from cmd/ascii2der/encoder_test.go rename to ascii2der/encoder_test.go index 5faa8ba..8c964da 100644 --- a/cmd/ascii2der/encoder_test.go +++ b/ascii2der/encoder_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package ascii2der import ( "bytes" diff --git a/cmd/ascii2der/scanner.go b/ascii2der/scanner.go similarity index 76% rename from cmd/ascii2der/scanner.go rename to ascii2der/scanner.go index f4c49f2..feeacee 100644 --- a/cmd/ascii2der/scanner.go +++ b/ascii2der/scanner.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package ascii2der import ( "encoding/hex" @@ -21,14 +21,13 @@ import ( "regexp" "strconv" "strings" - "unicode/utf16" "unicode/utf8" "github.com/google/der-ascii/internal" ) -// A position describes a location in the input stream. -type position struct { +// A Position describes a location in the input stream. +type Position struct { Offset int // offset, starting at 0 Line int // line number, starting at 1 Column int // column number, starting at 1 (byte count) @@ -46,14 +45,14 @@ const ( tokenEOF ) -// A parseError is an error during parsing DER ASCII. -type parseError struct { - Pos position +// A ParseError is an error during parsing DER ASCII. +type ParseError struct { + Pos Position Err error } -func (t *parseError) Error() string { - return fmt.Sprintf("line %d: %s", t.Pos.Line, t.Err) +func (e *ParseError) Error() string { + return fmt.Sprintf("line %d: %s", e.Pos.Line, e.Err) } // A token is a token in a DER ASCII file. @@ -64,7 +63,7 @@ type token struct { // bytes. Value []byte // Pos is the position of the first byte of the token. - Pos position + Pos Position // Length, for a tokenLongForm token, is the number of bytes to use to // encode the length, not including the initial one. Length int @@ -75,19 +74,19 @@ var ( regexpOID = regexp.MustCompile(`^[0-9]+(\.[0-9]+)+$`) ) -type scanner struct { +type Scanner struct { text string - pos position + pos Position } -func newScanner(text string) *scanner { - return &scanner{text: text, pos: position{Line: 1}} +func NewScanner(text string) *Scanner { + return &Scanner{text: text, pos: Position{Line: 1}} } -func (s *scanner) parseEscapeSequence() (rune, error) { +func (s *Scanner) parseEscapeSequence() (rune, error) { s.advance() // Skip the \. The caller is assumed to have validated it. if s.isEOF() { - return 0, &parseError{s.pos, errors.New("expected escape character")} + return 0, &ParseError{s.pos, errors.New("expected escape character")} } switch c := s.text[s.pos.Offset]; c { case 'n': @@ -99,48 +98,48 @@ func (s *scanner) parseEscapeSequence() (rune, error) { case 'x': s.advance() if s.pos.Offset+2 > len(s.text) { - return 0, &parseError{s.pos, errors.New("unfinished escape sequence")} + return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")} } b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+2]) if err != nil { - return 0, &parseError{s.pos, err} + return 0, &ParseError{s.pos, err} } s.advanceBytes(2) return rune(b[0]), nil case 'u': s.advance() if s.pos.Offset+4 > len(s.text) { - return 0, &parseError{s.pos, errors.New("unfinished escape sequence")} + return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")} } b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+4]) if err != nil { - return 0, &parseError{s.pos, err} + return 0, &ParseError{s.pos, err} } s.advanceBytes(4) return rune(b[0])<<8 | rune(b[1]), nil case 'U': s.advance() if s.pos.Offset+8 > len(s.text) { - return 0, &parseError{s.pos, errors.New("unfinished escape sequence")} + return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")} } b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+8]) if err != nil { - return 0, &parseError{s.pos, err} + return 0, &ParseError{s.pos, err} } s.advanceBytes(8) return rune(b[0])<<24 | rune(b[1])<<16 | rune(b[2])<<8 | rune(b[3]), nil default: - return 0, &parseError{s.pos, fmt.Errorf("unknown escape sequence \\%c", c)} + return 0, &ParseError{s.pos, fmt.Errorf("unknown escape sequence \\%c", c)} } } -func (s *scanner) parseQuotedString() (token, error) { +func (s *Scanner) parseQuotedString() (token, error) { s.advance() // Skip the ". The caller is assumed to have validated it. start := s.pos var bytes []byte for { if s.isEOF() { - return token{}, &parseError{start, errors.New("unmatched \"")} + return token{}, &ParseError{start, errors.New("unmatched \"")} } switch c := s.text[s.pos.Offset]; c { case '"': @@ -154,7 +153,7 @@ func (s *scanner) parseQuotedString() (token, error) { } if r > 0xff { // TODO(davidben): Alternatively, should these encode as UTF-8? - return token{}, &parseError{escapeStart, errors.New("illegal escape for quoted string")} + return token{}, &ParseError{escapeStart, errors.New("illegal escape for quoted string")} } bytes = append(bytes, byte(r)) default: @@ -164,26 +163,14 @@ func (s *scanner) parseQuotedString() (token, error) { } } -func appendUTF16(b []byte, r rune) []byte { - if r <= 0xffff { - // Note this logic intentionally tolerates unpaired surrogates. - return append(b, byte(r>>8), byte(r)) - } - - r1, r2 := utf16.EncodeRune(r) - b = append(b, byte(r1>>8), byte(r1)) - b = append(b, byte(r2>>8), byte(r2)) - return b -} - -func (s *scanner) parseUTF16String() (token, error) { +func (s *Scanner) parseUTF16String() (token, error) { s.advance() // Skip the u. The caller is assumed to have validated it. s.advance() // Skip the ". The caller is assumed to have validated it. start := s.pos var bytes []byte for { if s.isEOF() { - return token{}, &parseError{start, errors.New("unmatched \"")} + return token{}, &ParseError{start, errors.New("unmatched \"")} } switch c := s.text[s.pos.Offset]; c { case '"': @@ -201,7 +188,7 @@ func (s *scanner) parseUTF16String() (token, error) { // legitimate replacement charaacter in the input. The documentation // says errors return (RuneError, 0) or (RuneError, 1). if r == utf8.RuneError && n <= 1 { - return token{}, &parseError{s.pos, errors.New("invalid UTF-8")} + return token{}, &ParseError{s.pos, errors.New("invalid UTF-8")} } s.advanceBytes(n) bytes = appendUTF16(bytes, r) @@ -209,18 +196,14 @@ func (s *scanner) parseUTF16String() (token, error) { } } -func appendUTF32(b []byte, r rune) []byte { - return append(b, byte(r>>24), byte(r>>16), byte(r>>8), byte(r)) -} - -func (s *scanner) parseUTF32String() (token, error) { +func (s *Scanner) parseUTF32String() (token, error) { s.advance() // Skip the U. The caller is assumed to have validated it. s.advance() // Skip the ". The caller is assumed to have validated it. start := s.pos var bytes []byte for { if s.isEOF() { - return token{}, &parseError{start, errors.New("unmatched \"")} + return token{}, &ParseError{start, errors.New("unmatched \"")} } switch c := s.text[s.pos.Offset]; c { case '"': @@ -238,7 +221,7 @@ func (s *scanner) parseUTF32String() (token, error) { // legitimate replacement charaacter in the input. The documentation // says errors return (RuneError, 0) or (RuneError, 1). if r == utf8.RuneError && n <= 1 { - return token{}, &parseError{s.pos, errors.New("invalid UTF-8")} + return token{}, &ParseError{s.pos, errors.New("invalid UTF-8")} } s.advanceBytes(n) bytes = appendUTF32(bytes, r) @@ -246,7 +229,7 @@ func (s *scanner) parseUTF32String() (token, error) { } } -func (s *scanner) Next() (token, error) { +func (s *Scanner) next() (token, error) { again: if s.isEOF() { return token{Kind: tokenEOF, Pos: s.pos}, nil @@ -290,7 +273,7 @@ again: s.advance() // Skip the `. bitStr, ok := s.consumeUpTo('`') if !ok { - return token{}, &parseError{s.pos, errors.New("unmatched `")} + return token{}, &ParseError{s.pos, errors.New("unmatched `")} } // The leading byte is the number of "extra" bits at the end. @@ -309,7 +292,7 @@ again: bitCount++ case '|': if sawPipe { - return token{}, &parseError{s.pos, errors.New("duplicate |")} + return token{}, &ParseError{s.pos, errors.New("duplicate |")} } // bitsRemaining is the number of bits remaining in the output that haven't @@ -317,13 +300,13 @@ again: bitsRemaining := (len(value)-1)*8 - bitCount inputRemaining := len(bitStr) - i - 1 if inputRemaining > bitsRemaining { - return token{}, &parseError{s.pos, fmt.Errorf("expected at most %v explicit padding bits; found %v", bitsRemaining, inputRemaining)} + return token{}, &ParseError{s.pos, fmt.Errorf("expected at most %v explicit padding bits; found %v", bitsRemaining, inputRemaining)} } sawPipe = true value[0] = byte(bitsRemaining) default: - return token{}, &parseError{s.pos, fmt.Errorf("unexpected rune %q", r)} + return token{}, &ParseError{s.pos, fmt.Errorf("unexpected rune %q", r)} } } if !sawPipe { @@ -335,26 +318,26 @@ again: s.advance() hexStr, ok := s.consumeUpTo('`') if !ok { - return token{}, &parseError{s.pos, errors.New("unmatched `")} + return token{}, &ParseError{s.pos, errors.New("unmatched `")} } bytes, err := hex.DecodeString(hexStr) if err != nil { - return token{}, &parseError{s.pos, err} + return token{}, &ParseError{s.pos, err} } return token{Kind: tokenBytes, Value: bytes, Pos: s.pos}, nil case '[': s.advance() tagStr, ok := s.consumeUpTo(']') if !ok { - return token{}, &parseError{s.pos, errors.New("unmatched [")} + return token{}, &ParseError{s.pos, errors.New("unmatched [")} } tag, err := decodeTagString(tagStr) if err != nil { - return token{}, &parseError{s.pos, err} + return token{}, &ParseError{s.pos, err} } value, err := appendTag(nil, tag) if err != nil { - return token{}, &parseError{s.pos, err} + return token{}, &ParseError{s.pos, err} } return token{Kind: tokenBytes, Value: value, Pos: s.pos}, nil } @@ -381,7 +364,7 @@ loop: value, err := appendTag(nil, tag) if err != nil { // This is impossible; built-in tags always encode. - return token{}, &parseError{s.pos, err} + return token{}, &ParseError{s.pos, err} } return token{Kind: tokenBytes, Value: value, Pos: start}, nil } @@ -389,7 +372,7 @@ loop: if regexpInteger.MatchString(symbol) { value, err := strconv.ParseInt(symbol, 10, 64) if err != nil { - return token{}, &parseError{start, err} + return token{}, &ParseError{start, err} } return token{Kind: tokenBytes, Value: appendInteger(nil, value), Pos: s.pos}, nil } @@ -400,7 +383,7 @@ loop: for _, s := range oidStr { u, err := strconv.ParseUint(s, 10, 32) if err != nil { - return token{}, &parseError{start, err} + return token{}, &ParseError{start, err} } oid = append(oid, uint32(u)) } @@ -426,7 +409,7 @@ loop: if isLongFormOverride(symbol) { l, err := decodeLongFormOverride(symbol) if err != nil { - return token{}, &parseError{start, err} + return token{}, &ParseError{start, err} } return token{Kind: tokenLongForm, Length: l}, nil } @@ -434,11 +417,11 @@ loop: return token{}, fmt.Errorf("unrecognized symbol %q", symbol) } -func (s *scanner) isEOF() bool { +func (s *Scanner) isEOF() bool { return s.pos.Offset >= len(s.text) } -func (s *scanner) advance() { +func (s *Scanner) advance() { if !s.isEOF() { if s.text[s.pos.Offset] == '\n' { s.pos.Line++ @@ -450,13 +433,13 @@ func (s *scanner) advance() { } } -func (s *scanner) advanceBytes(n int) { +func (s *Scanner) advanceBytes(n int) { for i := 0; i < n; i++ { s.advance() } } -func (s *scanner) consumeUpTo(b byte) (string, bool) { +func (s *Scanner) consumeUpTo(b byte) (string, bool) { start := s.pos.Offset for !s.isEOF() { if s.text[s.pos.Offset] == b { @@ -469,22 +452,22 @@ func (s *scanner) consumeUpTo(b byte) (string, bool) { return "", false } -func asciiToDERImpl(scanner *scanner, leftCurly *token) ([]byte, error) { +func (s *Scanner) exec(leftCurly *token) ([]byte, error) { var out []byte var lengthModifier *token for { - token, err := scanner.Next() + token, err := s.next() if err != nil { return nil, err } if lengthModifier != nil && token.Kind != tokenLeftCurly { - return nil, &parseError{lengthModifier.Pos, errors.New("length modifier was not followed by '{'")} + return nil, &ParseError{lengthModifier.Pos, errors.New("length modifier was not followed by '{'")} } switch token.Kind { case tokenBytes: out = append(out, token.Value...) case tokenLeftCurly: - child, err := asciiToDERImpl(scanner, &token) + child, err := s.exec(&token) if err != nil { return nil, err } @@ -504,7 +487,7 @@ func asciiToDERImpl(scanner *scanner, leftCurly *token) ([]byte, error) { out, err = appendLength(out, len(child), lengthOverride) if err != nil { // appendLength may fail if the lengthModifier was incompatible. - return nil, &parseError{lengthModifier.Pos, err} + return nil, &ParseError{lengthModifier.Pos, err} } out = append(out, child...) lengthModifier = nil @@ -512,21 +495,20 @@ func asciiToDERImpl(scanner *scanner, leftCurly *token) ([]byte, error) { if leftCurly != nil { return out, nil } - return nil, &parseError{token.Pos, errors.New("unmatched '}'")} + return nil, &ParseError{token.Pos, errors.New("unmatched '}'")} case tokenLongForm, tokenIndefinite: lengthModifier = &token case tokenEOF: if leftCurly == nil { return out, nil } - return nil, &parseError{leftCurly.Pos, errors.New("unmatched '{'")} + return nil, &ParseError{leftCurly.Pos, errors.New("unmatched '{'")} default: panic(token) } } } -func asciiToDER(input string) ([]byte, error) { - scanner := newScanner(input) - return asciiToDERImpl(scanner, nil) +func (s *Scanner) Exec() ([]byte, error) { + return s.exec(nil) } diff --git a/cmd/ascii2der/scanner_test.go b/ascii2der/scanner_test.go similarity index 99% rename from cmd/ascii2der/scanner_test.go rename to ascii2der/scanner_test.go index cac14d2..ff0fe85 100644 --- a/cmd/ascii2der/scanner_test.go +++ b/ascii2der/scanner_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package ascii2der import ( "bytes" @@ -383,9 +383,9 @@ indefinite long-form:2`, } func scanAll(in string) (tokens []token, ok bool) { - scanner := newScanner(in) + scanner := NewScanner(in) for { - token, err := scanner.Next() + token, err := scanner.next() if err != nil { return } @@ -446,7 +446,7 @@ var asciiToDERTests = []struct { func TestASCIIToDER(t *testing.T) { for i, tt := range asciiToDERTests { - out, err := asciiToDER(tt.in) + out, err := NewScanner(tt.in).Exec() ok := err == nil if !tt.ok { if ok { diff --git a/cmd/ascii2der/values.go b/ascii2der/values.go similarity index 99% rename from cmd/ascii2der/values.go rename to ascii2der/values.go index 03e82dc..d8406e0 100644 --- a/cmd/ascii2der/values.go +++ b/ascii2der/values.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package ascii2der import ( "errors" diff --git a/cmd/ascii2der/values_test.go b/ascii2der/values_test.go similarity index 99% rename from cmd/ascii2der/values_test.go rename to ascii2der/values_test.go index 17d6a9c..ea74213 100644 --- a/cmd/ascii2der/values_test.go +++ b/ascii2der/values_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package ascii2der import ( "testing" diff --git a/cmd/ascii2der/main.go b/cmd/ascii2der/main.go index ca5c73d..f379968 100644 --- a/cmd/ascii2der/main.go +++ b/cmd/ascii2der/main.go @@ -20,6 +20,8 @@ import ( "fmt" "io/ioutil" "os" + + "github.com/google/der-ascii/ascii2der" ) var inPath = flag.String("i", "", "input file to use (defaults to stdin)") @@ -52,7 +54,7 @@ func main() { os.Exit(1) } - outBytes, err := asciiToDER(string(inBytes)) + outBytes, err := ascii2der.NewScanner(string(inBytes)).Exec() if err != nil { fmt.Fprintf(os.Stderr, "Syntax error: %s\n", err) os.Exit(1) From 9c05123218977c1940725c4384be07e0c2d904e6 Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Mon, 21 Feb 2022 17:31:28 -0500 Subject: [PATCH 2/9] Add doc comments to remaning functions in encoder.go --- ascii2der/encoder.go | 37 +++++++++++++++++++++++++++++++------ ascii2der/encoder_test.go | 10 +++++----- ascii2der/scanner.go | 6 +++--- 3 files changed, 39 insertions(+), 14 deletions(-) diff --git a/ascii2der/encoder.go b/ascii2der/encoder.go index 6b9f787..87993dc 100644 --- a/ascii2der/encoder.go +++ b/ascii2der/encoder.go @@ -22,9 +22,12 @@ import ( "github.com/google/der-ascii/internal" ) +// appendUTF16 marshals r using UTF-16 and appends the result to dst, returning +// the updated slice. +// +// This logic intentionally tolerates unpaired surrogates. func appendUTF16(dst []byte, r rune) []byte { if r <= 0xffff { - // Note this logic intentionally tolerates unpaired surrogates. return append(dst, byte(r>>8), byte(r)) } @@ -34,10 +37,20 @@ func appendUTF16(dst []byte, r rune) []byte { return dst } +// appendUTF16 marshals r using UTF-32 and appends the result to dst, returning +// the updated slice. +// +// In other words, this function writes r as an integer in big-endian order. func appendUTF32(dst []byte, r rune) []byte { return append(dst, byte(r>>24), byte(r>>16), byte(r>>8), byte(r)) } +// appendBase128 marshals an integer in base 128, a varint format used by OIDs +// and long-form tag numbers, and appends the result to dst, returning the +// updated slice. +// +// This function is the same as appendBase128WithLength with length set to zero, +// which cannot fail. func appendBase128(dst []byte, value uint32) []byte { dst, err := appendBase128WithLength(dst, value, 0) if err != nil { @@ -47,6 +60,11 @@ func appendBase128(dst []byte, value uint32) []byte { return dst } +// appendBase128 marshals an integer in base 128, a varint format used by OIDs +// and long-form tag numbers, and appends the result to dst, returning the +// updated slice. +// +// If length is zero, the minimal length is chosen. func appendBase128WithLength(dst []byte, value uint32, length int) ([]byte, error) { // Count how many bytes are needed. var l int @@ -137,18 +155,25 @@ func appendInteger(dst []byte, value int64) []byte { return dst } -func appendObjectIdentifier(dst []byte, value []uint32) ([]byte, bool) { +// appendObjectIdentifier marshals the given array of integers as an OID. +func appendObjectIdentifier(dst []byte, value []uint32) ([]byte, error) { // Validate the input before anything is written. - if len(value) < 2 || value[0] > 2 || (value[0] < 2 && value[1] > 39) { - return dst, false + if len(value) < 2 { + return dst, errors.New("OIDs must have at least two arcs") + } + if value[0] > 2 { + return dst, fmt.Errorf("first arc of an OID must be one of 0, 1, or 2; got %d", value[0]) + } + if value[0] < 2 && value[1] > 39 { + return dst, fmt.Errorf("second arc of an OID must be at most 39; got %d", value[1]) } if value[0]*40+value[1] < value[1] { - return dst, false + return dst, errors.New("first two arcs overflowed") } dst = appendBase128(dst, value[0]*40+value[1]) for _, v := range value[2:] { dst = appendBase128(dst, v) } - return dst, true + return dst, nil } diff --git a/ascii2der/encoder_test.go b/ascii2der/encoder_test.go index 8c964da..f0472b8 100644 --- a/ascii2der/encoder_test.go +++ b/ascii2der/encoder_test.go @@ -162,21 +162,21 @@ var appendObjectIdentifierTests = []struct { func TestAppendObjectIdentifier(t *testing.T) { for i, tt := range appendObjectIdentifierTests { - dst, ok := appendObjectIdentifier(nil, tt.value) + dst, err := appendObjectIdentifier(nil, tt.value) if !tt.ok { - if ok { + if err == nil { t.Errorf("%d. appendObjectIdentifier(nil, %v) unexpectedly suceeded.", i, tt.value) } else if len(dst) != 0 { t.Errorf("%d. appendObjectIdentifier did not preserve input.", i) } } else if !bytes.Equal(dst, tt.encoded) { - t.Errorf("%d. appendObjectIdentifier(nil, %v) = %v, wanted %v.", i, tt.value, dst, tt.encoded) + t.Errorf("%d. appendObjectIdentifier(nil, %v) = %v, %v, wanted %v.", i, tt.value, dst, err, tt.encoded) } dst = []byte{0} - dst, ok = appendObjectIdentifier(dst, tt.value) + dst, err = appendObjectIdentifier(dst, tt.value) if !tt.ok { - if ok { + if err == nil { t.Errorf("%d. appendObjectIdentifier(nil, %v) unexpectedly suceeded.", i, tt.value) } else if !bytes.Equal(dst, []byte{0}) { t.Errorf("%d. appendObjectIdentifier did not preserve input.", i) diff --git a/ascii2der/scanner.go b/ascii2der/scanner.go index feeacee..a195bac 100644 --- a/ascii2der/scanner.go +++ b/ascii2der/scanner.go @@ -387,9 +387,9 @@ loop: } oid = append(oid, uint32(u)) } - der, ok := appendObjectIdentifier(nil, oid) - if !ok { - return token{}, errors.New("invalid OID") + der, err := appendObjectIdentifier(nil, oid) + if err != nil { + return token{}, &ParseError{start, err} } return token{Kind: tokenBytes, Value: der, Pos: s.pos}, nil } From 6f3533b174171da8ab082f82dfc44f85ec739e96 Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Mon, 21 Feb 2022 23:02:50 -0500 Subject: [PATCH 3/9] Document scanner.go, including an example --- ascii2der/encoder.go | 8 +- ascii2der/examples_test.go | 31 ++++++ ascii2der/scanner.go | 219 +++++++++++++++++++++++++------------ 3 files changed, 186 insertions(+), 72 deletions(-) create mode 100644 ascii2der/examples_test.go diff --git a/ascii2der/encoder.go b/ascii2der/encoder.go index 87993dc..a0b04d5 100644 --- a/ascii2der/encoder.go +++ b/ascii2der/encoder.go @@ -24,7 +24,7 @@ import ( // appendUTF16 marshals r using UTF-16 and appends the result to dst, returning // the updated slice. -// +// // This logic intentionally tolerates unpaired surrogates. func appendUTF16(dst []byte, r rune) []byte { if r <= 0xffff { @@ -39,7 +39,7 @@ func appendUTF16(dst []byte, r rune) []byte { // appendUTF16 marshals r using UTF-32 and appends the result to dst, returning // the updated slice. -// +// // In other words, this function writes r as an integer in big-endian order. func appendUTF32(dst []byte, r rune) []byte { return append(dst, byte(r>>24), byte(r>>16), byte(r>>8), byte(r)) @@ -48,7 +48,7 @@ func appendUTF32(dst []byte, r rune) []byte { // appendBase128 marshals an integer in base 128, a varint format used by OIDs // and long-form tag numbers, and appends the result to dst, returning the // updated slice. -// +// // This function is the same as appendBase128WithLength with length set to zero, // which cannot fail. func appendBase128(dst []byte, value uint32) []byte { @@ -63,7 +63,7 @@ func appendBase128(dst []byte, value uint32) []byte { // appendBase128 marshals an integer in base 128, a varint format used by OIDs // and long-form tag numbers, and appends the result to dst, returning the // updated slice. -// +// // If length is zero, the minimal length is chosen. func appendBase128WithLength(dst []byte, value uint32, length int) ([]byte, error) { // Count how many bytes are needed. diff --git a/ascii2der/examples_test.go b/ascii2der/examples_test.go new file mode 100644 index 0000000..e3fbbc9 --- /dev/null +++ b/ascii2der/examples_test.go @@ -0,0 +1,31 @@ +// Copyright 2015 The DER ASCII Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ascii2der + +import ( + "fmt" +) + +func Example() { + scanner := NewScanner(` + SEQUENCE { + INTEGER { "totally an integer" } + } +`) + + der, _ := scanner.Exec() + fmt.Printf("%x\n", der) + // Output: 30140212746f74616c6c7920616e20696e7465676572 +} diff --git a/ascii2der/scanner.go b/ascii2der/scanner.go index a195bac..eb13caa 100644 --- a/ascii2der/scanner.go +++ b/ascii2der/scanner.go @@ -12,6 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +// package ascii2der implements the DER-ASCII language described in +// https://github.com/google/der-ascii/blob/master/language.txt. +// +// The Scanner type can be used to parse DER-ASCII files and output byte blobs +// that may or may not be valid DER. package ascii2der import ( @@ -27,10 +32,22 @@ import ( ) // A Position describes a location in the input stream. +// +// The zero-value Position represents the first byte of an anonymous input file. type Position struct { - Offset int // offset, starting at 0 - Line int // line number, starting at 1 - Column int // column number, starting at 1 (byte count) + Offset int // Byte offset. + Line int // Line number (zero-indexed). + Column int // Column number (zero-indexed byte, not rune, count). + File string // Optional file name for pretty-printing. +} + +// String converts a Position to a string. +func (p Position) String() string { + file := p.File + if file == "" { + file = "" + } + return fmt.Sprintf("%s:%d:%d", file, p.Line+1, p.Column+1) } // A tokenKind is a kind of token. @@ -45,14 +62,26 @@ const ( tokenEOF ) -// A ParseError is an error during parsing DER ASCII. +// A ParseError may be produced while executing a DER ASCII file, wrapping +// another error along with a position. +// +// Errors produced by functions in this package my by type-asserted to +// ParseError to try and obtain the position at which the error occurred. type ParseError struct { Pos Position Err error } +// Error makes this type into an error type. func (e *ParseError) Error() string { - return fmt.Sprintf("line %d: %s", e.Pos.Line, e.Err) + return fmt.Sprintf("%s: %s", e.Pos, e.Err) +} + +// Unwrap extracts the inner wrapped error. +// +// See errors.Unwrap(). +func (e *ParseError) Unwrap() error { + return e.Err } // A token is a token in a DER ASCII file. @@ -74,21 +103,96 @@ var ( regexpOID = regexp.MustCompile(`^[0-9]+(\.[0-9]+)+$`) ) +// A Scanner represents parsing state for a DER ASCII file. +// +// A zero-value Scanner is ready to begin parsing (given that Input is set to +// a valid value). However, it is recommended to use NewScanner to create a new +// Scanner, since it can pre-populate fields other than Input with default +// settings. type Scanner struct { - text string - pos Position + // Input is the input text being processed. + Input string + // Position is the current position at which parsing should + // resume. The Offset field is used for indexing into Input; the remaining + // fields are used for error-reporting. + pos Position +} + +// NewScanner creates a new scanner for parsing the given input. +func NewScanner(input string) *Scanner { + return &Scanner{Input: input} +} + +// SetFile sets the file path shown in this Scanner's error reports. +func (s *Scanner) SetFile(path string) { + s.pos.File = path +} + +// Exec consumes tokens until Input is exhausted, returning the resulting +// encoded maybe-DER. +func (s *Scanner) Exec() ([]byte, error) { + return s.exec(nil) +} + +// isEOF returns whether the cursor is past the end of the input string. +func (s *Scanner) isEOF() bool { + return s.pos.Offset >= len(s.Input) +} + +// advance advances the scanner's cursor one position. +// +// Unlike just s.pos.Offset++, this will not proceed beyond the end of the +// string, and will update the line and column information accordingly. +func (s *Scanner) advance() { + if !s.isEOF() { + if s.Input[s.pos.Offset] == '\n' { + s.pos.Line++ + s.pos.Column = 0 + } else { + s.pos.Column++ + } + s.pos.Offset++ + } } -func NewScanner(text string) *Scanner { - return &Scanner{text: text, pos: Position{Line: 1}} +// advanceBytes calls advance() n times. +func (s *Scanner) advanceBytes(n int) { + for i := 0; i < n; i++ { + s.advance() + } } +// consumeUpTo advances the cursor until the given byte is seen, returning all +// source bytes between the initial cursor position and excluding the given +// byte. +// +// If EOF is reached before the byte is seen, the function returns false. +func (s *Scanner) consumeUpTo(b byte) (string, bool) { + start := s.pos.Offset + for !s.isEOF() { + if s.Input[s.pos.Offset] == b { + ret := s.Input[start:s.pos.Offset] + s.advance() + return ret, true + } + s.advance() + } + return "", false +} + +// parseEscapeSequence parses a DER-ASCII escape sequence, returning the rune +// it escapes. +// +// Valid escapes are: +// \n \" \\ \xNN \uNNNN \UNNNNNNNN +// +// This function assumes that the scanner's cursor is currently on a \ rune. func (s *Scanner) parseEscapeSequence() (rune, error) { s.advance() // Skip the \. The caller is assumed to have validated it. if s.isEOF() { return 0, &ParseError{s.pos, errors.New("expected escape character")} } - switch c := s.text[s.pos.Offset]; c { + switch c := s.Input[s.pos.Offset]; c { case 'n': s.advance() return '\n', nil @@ -97,10 +201,10 @@ func (s *Scanner) parseEscapeSequence() (rune, error) { return rune(c), nil case 'x': s.advance() - if s.pos.Offset+2 > len(s.text) { + if s.pos.Offset+2 > len(s.Input) { return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")} } - b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+2]) + b, err := hex.DecodeString(s.Input[s.pos.Offset : s.pos.Offset+2]) if err != nil { return 0, &ParseError{s.pos, err} } @@ -108,10 +212,10 @@ func (s *Scanner) parseEscapeSequence() (rune, error) { return rune(b[0]), nil case 'u': s.advance() - if s.pos.Offset+4 > len(s.text) { + if s.pos.Offset+4 > len(s.Input) { return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")} } - b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+4]) + b, err := hex.DecodeString(s.Input[s.pos.Offset : s.pos.Offset+4]) if err != nil { return 0, &ParseError{s.pos, err} } @@ -119,10 +223,10 @@ func (s *Scanner) parseEscapeSequence() (rune, error) { return rune(b[0])<<8 | rune(b[1]), nil case 'U': s.advance() - if s.pos.Offset+8 > len(s.text) { + if s.pos.Offset+8 > len(s.Input) { return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")} } - b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+8]) + b, err := hex.DecodeString(s.Input[s.pos.Offset : s.pos.Offset+8]) if err != nil { return 0, &ParseError{s.pos, err} } @@ -133,6 +237,9 @@ func (s *Scanner) parseEscapeSequence() (rune, error) { } } +// parseQuotedString parses a UTF-8 string until the next ". +// +// This function assumes that the scanner's cursor is currently on a " rune. func (s *Scanner) parseQuotedString() (token, error) { s.advance() // Skip the ". The caller is assumed to have validated it. start := s.pos @@ -141,7 +248,7 @@ func (s *Scanner) parseQuotedString() (token, error) { if s.isEOF() { return token{}, &ParseError{start, errors.New("unmatched \"")} } - switch c := s.text[s.pos.Offset]; c { + switch c := s.Input[s.pos.Offset]; c { case '"': s.advance() return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil @@ -163,6 +270,10 @@ func (s *Scanner) parseQuotedString() (token, error) { } } +// parseUTF16String parses a UTF-16 string until the next ". +// +// This function assumes that the scanner's cursor is currently on a u followed +// by a " rune. func (s *Scanner) parseUTF16String() (token, error) { s.advance() // Skip the u. The caller is assumed to have validated it. s.advance() // Skip the ". The caller is assumed to have validated it. @@ -172,7 +283,7 @@ func (s *Scanner) parseUTF16String() (token, error) { if s.isEOF() { return token{}, &ParseError{start, errors.New("unmatched \"")} } - switch c := s.text[s.pos.Offset]; c { + switch c := s.Input[s.pos.Offset]; c { case '"': s.advance() return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil @@ -183,9 +294,9 @@ func (s *Scanner) parseUTF16String() (token, error) { } bytes = appendUTF16(bytes, r) default: - r, n := utf8.DecodeRuneInString(s.text[s.pos.Offset:]) + r, n := utf8.DecodeRuneInString(s.Input[s.pos.Offset:]) // Note DecodeRuneInString may return utf8.RuneError if there is a - // legitimate replacement charaacter in the input. The documentation + // legitimate replacement character in the input. The documentation // says errors return (RuneError, 0) or (RuneError, 1). if r == utf8.RuneError && n <= 1 { return token{}, &ParseError{s.pos, errors.New("invalid UTF-8")} @@ -196,6 +307,10 @@ func (s *Scanner) parseUTF16String() (token, error) { } } +// parseUTF32String parses a UTF-32 string until the next ". +// +// This function assumes that the scanner's cursor is currently on a U followed +// by a " rune. func (s *Scanner) parseUTF32String() (token, error) { s.advance() // Skip the U. The caller is assumed to have validated it. s.advance() // Skip the ". The caller is assumed to have validated it. @@ -205,7 +320,7 @@ func (s *Scanner) parseUTF32String() (token, error) { if s.isEOF() { return token{}, &ParseError{start, errors.New("unmatched \"")} } - switch c := s.text[s.pos.Offset]; c { + switch c := s.Input[s.pos.Offset]; c { case '"': s.advance() return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil @@ -216,7 +331,7 @@ func (s *Scanner) parseUTF32String() (token, error) { } bytes = appendUTF32(bytes, r) default: - r, n := utf8.DecodeRuneInString(s.text[s.pos.Offset:]) + r, n := utf8.DecodeRuneInString(s.Input[s.pos.Offset:]) // Note DecodeRuneInString may return utf8.RuneError if there is a // legitimate replacement charaacter in the input. The documentation // says errors return (RuneError, 0) or (RuneError, 1). @@ -229,13 +344,14 @@ func (s *Scanner) parseUTF32String() (token, error) { } } +// next lexes the next token. func (s *Scanner) next() (token, error) { again: if s.isEOF() { return token{Kind: tokenEOF, Pos: s.pos}, nil } - switch s.text[s.pos.Offset] { + switch s.Input[s.pos.Offset] { case ' ', '\t', '\n', '\r': // Skip whitespace. s.advance() @@ -244,7 +360,7 @@ again: // Skip to the end of the comment. s.advance() for !s.isEOF() { - wasNewline := s.text[s.pos.Offset] == '\n' + wasNewline := s.Input[s.pos.Offset] == '\n' s.advance() if wasNewline { break @@ -260,15 +376,15 @@ again: case '"': return s.parseQuotedString() case 'u': - if s.pos.Offset+1 < len(s.text) && s.text[s.pos.Offset+1] == '"' { + if s.pos.Offset+1 < len(s.Input) && s.Input[s.pos.Offset+1] == '"' { return s.parseUTF16String() } case 'U': - if s.pos.Offset+1 < len(s.text) && s.text[s.pos.Offset+1] == '"' { + if s.pos.Offset+1 < len(s.Input) && s.Input[s.pos.Offset+1] == '"' { return s.parseUTF32String() } case 'b': - if s.pos.Offset+1 < len(s.text) && s.text[s.pos.Offset+1] == '`' { + if s.pos.Offset+1 < len(s.Input) && s.Input[s.pos.Offset+1] == '`' { s.advance() // Skip the b. s.advance() // Skip the `. bitStr, ok := s.consumeUpTo('`') @@ -348,7 +464,7 @@ again: s.advance() loop: for !s.isEOF() { - switch s.text[s.pos.Offset] { + switch s.Input[s.pos.Offset] { case ' ', '\t', '\n', '\r', '{', '}', '[', ']', '`', '"', '#': break loop default: @@ -356,7 +472,7 @@ loop: } } - symbol := s.text[start.Offset:s.pos.Offset] + symbol := s.Input[start.Offset:s.pos.Offset] // See if it is a tag. tag, ok := internal.TagByName(symbol) @@ -417,41 +533,12 @@ loop: return token{}, fmt.Errorf("unrecognized symbol %q", symbol) } -func (s *Scanner) isEOF() bool { - return s.pos.Offset >= len(s.text) -} - -func (s *Scanner) advance() { - if !s.isEOF() { - if s.text[s.pos.Offset] == '\n' { - s.pos.Line++ - s.pos.Column = 0 - } else { - s.pos.Column++ - } - s.pos.Offset++ - } -} - -func (s *Scanner) advanceBytes(n int) { - for i := 0; i < n; i++ { - s.advance() - } -} - -func (s *Scanner) consumeUpTo(b byte) (string, bool) { - start := s.pos.Offset - for !s.isEOF() { - if s.text[s.pos.Offset] == b { - ret := s.text[start:s.pos.Offset] - s.advance() - return ret, true - } - s.advance() - } - return "", false -} - +// exec is the main parser loop. +// +// The leftCurly argument, it not nil, represents the { that began the +// length-prefixed block we're currently executing. Because we need to encode +// the full extent of the contents of a {} before emitting the length prefix, +// this function calls itself with a non-nil leftCurly to encode it. func (s *Scanner) exec(leftCurly *token) ([]byte, error) { var out []byte var lengthModifier *token @@ -508,7 +595,3 @@ func (s *Scanner) exec(leftCurly *token) ([]byte, error) { } } } - -func (s *Scanner) Exec() ([]byte, error) { - return s.exec(nil) -} From d5d85a8d8ce195b3010b431b17e96bad0cd535e5 Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Mon, 21 Feb 2022 23:58:18 -0500 Subject: [PATCH 4/9] Simplify the base parsing helpers in scanner.go --- ascii2der/scanner.go | 176 +++++++++++++++++++++---------------------- 1 file changed, 88 insertions(+), 88 deletions(-) diff --git a/ascii2der/scanner.go b/ascii2der/scanner.go index eb13caa..b0c0d81 100644 --- a/ascii2der/scanner.go +++ b/ascii2der/scanner.go @@ -134,17 +134,18 @@ func (s *Scanner) Exec() ([]byte, error) { return s.exec(nil) } -// isEOF returns whether the cursor is past the end of the input string. -func (s *Scanner) isEOF() bool { - return s.pos.Offset >= len(s.Input) +// isEOF returns whether the cursor is at least n bytes ahead of the end of the +// input. +func (s *Scanner) isEOF(n int) bool { + return s.pos.Offset+n >= len(s.Input) } -// advance advances the scanner's cursor one position. +// advance advances the scanner's cursor n positions. // -// Unlike just s.pos.Offset++, this will not proceed beyond the end of the +// Unlike just s.pos.Offset += n, this will not proceed beyond the end of the // string, and will update the line and column information accordingly. -func (s *Scanner) advance() { - if !s.isEOF() { +func (s *Scanner) advance(n int) { + for i := 0; i < n && !s.isEOF(0); i++ { if s.Input[s.pos.Offset] == '\n' { s.pos.Line++ s.pos.Column = 0 @@ -155,27 +156,30 @@ func (s *Scanner) advance() { } } -// advanceBytes calls advance() n times. -func (s *Scanner) advanceBytes(n int) { - for i := 0; i < n; i++ { - s.advance() +// consume advances exactly n times and returns all source bytes between the +// initial cursor position and excluding the final cursor position. +// +// If EOF is reached before all n bytes are consumed, the function returns +// false. +func (s *Scanner) consume(n int) (string, bool) { + start := s.pos.Offset + s.advance(n) + if s.pos.Offset-start != n { + return "", false } + + return s.Input[start:s.pos.Offset], true } -// consumeUpTo advances the cursor until the given byte is seen, returning all +// consumeUntil advances the cursor until the given byte is seen, returning all // source bytes between the initial cursor position and excluding the given -// byte. +// byte. This function will advance past the searched-for byte. // // If EOF is reached before the byte is seen, the function returns false. -func (s *Scanner) consumeUpTo(b byte) (string, bool) { - start := s.pos.Offset - for !s.isEOF() { - if s.Input[s.pos.Offset] == b { - ret := s.Input[start:s.pos.Offset] - s.advance() - return ret, true - } - s.advance() +func (s *Scanner) consumeUntil(b byte) (string, bool) { + if i := strings.IndexByte(s.Input[s.pos.Offset:], b); i != -1 { + text, _ := s.consume(i + 1) + return text[:i], true } return "", false } @@ -188,50 +192,47 @@ func (s *Scanner) consumeUpTo(b byte) (string, bool) { // // This function assumes that the scanner's cursor is currently on a \ rune. func (s *Scanner) parseEscapeSequence() (rune, error) { - s.advance() // Skip the \. The caller is assumed to have validated it. - if s.isEOF() { + s.advance(1) // Skip the \. The caller is assumed to have validated it. + if s.isEOF(0) { return 0, &ParseError{s.pos, errors.New("expected escape character")} } + switch c := s.Input[s.pos.Offset]; c { case 'n': - s.advance() + s.advance(1) return '\n', nil case '"', '\\': - s.advance() + s.advance(1) return rune(c), nil - case 'x': - s.advance() - if s.pos.Offset+2 > len(s.Input) { - return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")} - } - b, err := hex.DecodeString(s.Input[s.pos.Offset : s.pos.Offset+2]) - if err != nil { - return 0, &ParseError{s.pos, err} + case 'x', 'u', 'U': + s.advance(1) + + var digits int + switch c { + case 'x': + digits = 2 + case 'u': + digits = 4 + case 'U': + digits = 8 } - s.advanceBytes(2) - return rune(b[0]), nil - case 'u': - s.advance() - if s.pos.Offset+4 > len(s.Input) { + + hexes, ok := s.consume(digits) + if !ok { return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")} } - b, err := hex.DecodeString(s.Input[s.pos.Offset : s.pos.Offset+4]) + + bytes, err := hex.DecodeString(hexes) if err != nil { return 0, &ParseError{s.pos, err} } - s.advanceBytes(4) - return rune(b[0])<<8 | rune(b[1]), nil - case 'U': - s.advance() - if s.pos.Offset+8 > len(s.Input) { - return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")} - } - b, err := hex.DecodeString(s.Input[s.pos.Offset : s.pos.Offset+8]) - if err != nil { - return 0, &ParseError{s.pos, err} + + var r rune + for _, b := range bytes { + r <<= 8 + r |= rune(b) } - s.advanceBytes(8) - return rune(b[0])<<24 | rune(b[1])<<16 | rune(b[2])<<8 | rune(b[3]), nil + return r, nil default: return 0, &ParseError{s.pos, fmt.Errorf("unknown escape sequence \\%c", c)} } @@ -241,16 +242,16 @@ func (s *Scanner) parseEscapeSequence() (rune, error) { // // This function assumes that the scanner's cursor is currently on a " rune. func (s *Scanner) parseQuotedString() (token, error) { - s.advance() // Skip the ". The caller is assumed to have validated it. + s.advance(1) // Skip the ". The caller is assumed to have validated it. start := s.pos var bytes []byte for { - if s.isEOF() { + if s.isEOF(0) { return token{}, &ParseError{start, errors.New("unmatched \"")} } switch c := s.Input[s.pos.Offset]; c { case '"': - s.advance() + s.advance(1) return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil case '\\': escapeStart := s.pos @@ -264,7 +265,7 @@ func (s *Scanner) parseQuotedString() (token, error) { } bytes = append(bytes, byte(r)) default: - s.advance() + s.advance(1) bytes = append(bytes, c) } } @@ -275,17 +276,17 @@ func (s *Scanner) parseQuotedString() (token, error) { // This function assumes that the scanner's cursor is currently on a u followed // by a " rune. func (s *Scanner) parseUTF16String() (token, error) { - s.advance() // Skip the u. The caller is assumed to have validated it. - s.advance() // Skip the ". The caller is assumed to have validated it. + s.advance(2) // Skip the u". The caller is assumed to have validated it. start := s.pos var bytes []byte for { - if s.isEOF() { + if s.isEOF(0) { return token{}, &ParseError{start, errors.New("unmatched \"")} } - switch c := s.Input[s.pos.Offset]; c { + + switch s.Input[s.pos.Offset] { case '"': - s.advance() + s.advance(1) return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil case '\\': r, err := s.parseEscapeSequence() @@ -301,7 +302,7 @@ func (s *Scanner) parseUTF16String() (token, error) { if r == utf8.RuneError && n <= 1 { return token{}, &ParseError{s.pos, errors.New("invalid UTF-8")} } - s.advanceBytes(n) + s.advance(n) bytes = appendUTF16(bytes, r) } } @@ -312,17 +313,17 @@ func (s *Scanner) parseUTF16String() (token, error) { // This function assumes that the scanner's cursor is currently on a U followed // by a " rune. func (s *Scanner) parseUTF32String() (token, error) { - s.advance() // Skip the U. The caller is assumed to have validated it. - s.advance() // Skip the ". The caller is assumed to have validated it. + s.advance(2) // Skip the U". The caller is assumed to have validated it. start := s.pos var bytes []byte for { - if s.isEOF() { + if s.isEOF(0) { return token{}, &ParseError{start, errors.New("unmatched \"")} } - switch c := s.Input[s.pos.Offset]; c { + + switch s.Input[s.pos.Offset] { case '"': - s.advance() + s.advance(1) return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil case '\\': r, err := s.parseEscapeSequence() @@ -338,7 +339,7 @@ func (s *Scanner) parseUTF32String() (token, error) { if r == utf8.RuneError && n <= 1 { return token{}, &ParseError{s.pos, errors.New("invalid UTF-8")} } - s.advanceBytes(n) + s.advance(n) bytes = appendUTF32(bytes, r) } } @@ -347,47 +348,46 @@ func (s *Scanner) parseUTF32String() (token, error) { // next lexes the next token. func (s *Scanner) next() (token, error) { again: - if s.isEOF() { + if s.isEOF(0) { return token{Kind: tokenEOF, Pos: s.pos}, nil } switch s.Input[s.pos.Offset] { case ' ', '\t', '\n', '\r': // Skip whitespace. - s.advance() + s.advance(1) goto again case '#': // Skip to the end of the comment. - s.advance() - for !s.isEOF() { + s.advance(1) + for !s.isEOF(0) { wasNewline := s.Input[s.pos.Offset] == '\n' - s.advance() + s.advance(1) if wasNewline { break } } goto again case '{': - s.advance() + s.advance(1) return token{Kind: tokenLeftCurly, Pos: s.pos}, nil case '}': - s.advance() + s.advance(1) return token{Kind: tokenRightCurly, Pos: s.pos}, nil case '"': return s.parseQuotedString() case 'u': - if s.pos.Offset+1 < len(s.Input) && s.Input[s.pos.Offset+1] == '"' { + if !s.isEOF(1) && s.Input[s.pos.Offset+1] == '"' { return s.parseUTF16String() } case 'U': - if s.pos.Offset+1 < len(s.Input) && s.Input[s.pos.Offset+1] == '"' { + if !s.isEOF(1) && s.Input[s.pos.Offset+1] == '"' { return s.parseUTF32String() } case 'b': - if s.pos.Offset+1 < len(s.Input) && s.Input[s.pos.Offset+1] == '`' { - s.advance() // Skip the b. - s.advance() // Skip the `. - bitStr, ok := s.consumeUpTo('`') + if !s.isEOF(1) && s.Input[s.pos.Offset+1] == '`' { + s.advance(2) // Skip the b`. + bitStr, ok := s.consumeUntil('`') if !ok { return token{}, &ParseError{s.pos, errors.New("unmatched `")} } @@ -431,8 +431,8 @@ again: return token{Kind: tokenBytes, Value: value, Pos: s.pos}, nil } case '`': - s.advance() - hexStr, ok := s.consumeUpTo('`') + s.advance(1) + hexStr, ok := s.consumeUntil('`') if !ok { return token{}, &ParseError{s.pos, errors.New("unmatched `")} } @@ -442,8 +442,8 @@ again: } return token{Kind: tokenBytes, Value: bytes, Pos: s.pos}, nil case '[': - s.advance() - tagStr, ok := s.consumeUpTo(']') + s.advance(1) + tagStr, ok := s.consumeUntil(']') if !ok { return token{}, &ParseError{s.pos, errors.New("unmatched [")} } @@ -461,14 +461,14 @@ again: // Normal token. Consume up to the next whitespace character, symbol, or // EOF. start := s.pos - s.advance() + s.advance(1) loop: - for !s.isEOF() { + for !s.isEOF(0) { switch s.Input[s.pos.Offset] { case ' ', '\t', '\n', '\r', '{', '}', '[', ']', '`', '"', '#': break loop default: - s.advance() + s.advance(1) } } From 51662df30b524148a7995dc787599fa8d65e586c Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Tue, 22 Feb 2022 00:07:22 -0500 Subject: [PATCH 5/9] Make cmd/ascii2der use the correct file name for errors --- cmd/ascii2der/main.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cmd/ascii2der/main.go b/cmd/ascii2der/main.go index f379968..b6749b7 100644 --- a/cmd/ascii2der/main.go +++ b/cmd/ascii2der/main.go @@ -54,7 +54,10 @@ func main() { os.Exit(1) } - outBytes, err := ascii2der.NewScanner(string(inBytes)).Exec() + scanner := ascii2der.NewScanner(string(inBytes)) + scanner.SetFile(*inPath) + + outBytes, err := scanner.Exec() if err != nil { fmt.Fprintf(os.Stderr, "Syntax error: %s\n", err) os.Exit(1) From 74f3c2d741549206a0402753ceb7821acf80e04c Mon Sep 17 00:00:00 2001 From: Thore Goebel Date: Tue, 18 Jan 2022 18:51:11 +0100 Subject: [PATCH 6/9] Update installation instruction for go 1.17 Fixes #20 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9a9a8d4..a5bbb0a 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ structures and malformed variants of them. It provides two tools, `ascii2der` and `der2ascii`, to convert DER ASCII to a byte string and vice versa. To install them, run: - go get github.com/google/der-ascii/cmd/... + go install github.com/google/der-ascii/cmd/...@latest These tools may be used to create test inputs by taking an existing DER or BER structure, disassembling it with `der2ascii` into DER ASCII, making From dfb22d3a321378d83cf84f84b9dba12e95017a84 Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Mon, 14 Jun 2021 19:55:38 -0400 Subject: [PATCH 7/9] Add function call syntax for builtin functions This commit paves the way to adding crypto primitives to the surface language, such as sign(), for painlessly generating signed data, esp. for testing X.509 parsers. --- ascii2der/builtins.go | 69 ++++++++++++++++++++ ascii2der/scanner.go | 133 ++++++++++++++++++++++++++++++++------ ascii2der/scanner_test.go | 36 +++++++++-- language.txt | 32 ++++++++- 4 files changed, 243 insertions(+), 27 deletions(-) create mode 100644 ascii2der/builtins.go diff --git a/ascii2der/builtins.go b/ascii2der/builtins.go new file mode 100644 index 0000000..eba5037 --- /dev/null +++ b/ascii2der/builtins.go @@ -0,0 +1,69 @@ +// Copyright 2015 The DER ASCII Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ascii2der + +import ( + "errors" + "fmt" +) + +// setDefaultBuiltins adds the default builtins to the given Scanner's builtin +// function table. +// +// Some builtins may capture the Scanner pointer if they operate on scanner +// state, such as variables. +func setDefaultBuiltins(scanner *Scanner) { + // NOTE: If adding a builtin, remember to document it in language.txt! + scanner.Builtins = map[string]Builtin{ + // define(var, val) sets var = val in the scanner's variable table. + // Variables may be redefined. Expands to the empty string. + "define": func(args [][]byte) ([]byte, error) { + if len(args) != 2 { + return nil, errors.New("expected two arguments to define()") + } + + if scanner.Vars == nil { + scanner.Vars = make(map[string][]byte) + } + scanner.Vars[string(args[0])] = args[1] + + return nil, nil + }, + + // var(var) expands to whatever var is set to in the scanner's variable table. + // Error if var is not defined. + // + // var(var, default) behaves similarly, except expands to default if var is + // not defined. + "var": func(args [][]byte) ([]byte, error) { + switch len(args) { + case 1: + val, ok := scanner.Vars[string(args[0])] + if !ok { + return nil, fmt.Errorf("var() with undefined name: %q", string(args[0])) + } + return val, nil + case 2: + val, ok := scanner.Vars[string(args[0])] + if !ok { + return args[1], nil + } + return val, nil + default: + return nil, errors.New("expected one or two arguments to var()") + } + }, + } +} diff --git a/ascii2der/scanner.go b/ascii2der/scanner.go index b0c0d81..0f49caf 100644 --- a/ascii2der/scanner.go +++ b/ascii2der/scanner.go @@ -59,6 +59,10 @@ const ( tokenRightCurly tokenIndefinite tokenLongForm + tokenComma + tokenLeftParen + tokenRightParen + tokenWord tokenEOF ) @@ -103,6 +107,8 @@ var ( regexpOID = regexp.MustCompile(`^[0-9]+(\.[0-9]+)+$`) ) +type Builtin func(args [][]byte) ([]byte, error) + // A Scanner represents parsing state for a DER ASCII file. // // A zero-value Scanner is ready to begin parsing (given that Input is set to @@ -112,6 +118,15 @@ var ( type Scanner struct { // Input is the input text being processed. Input string + // Builtins is a table of builtin functions that can be called with the usual + // function call syntax in a DER ASCII file. NewScanner will return a Scanner + // with a pre-populated table consisting of those functions defined in + // language.txt, but users may add or remove whatever functions they wish. + Builtins map[string]Builtin + // Vars is a table of variables that builtins can use to store and retrieve + // state, such as via the define() and var() builtins. + Vars map[string][]byte + // Position is the current position at which parsing should // resume. The Offset field is used for indexing into Input; the remaining // fields are used for error-reporting. @@ -120,7 +135,9 @@ type Scanner struct { // NewScanner creates a new scanner for parsing the given input. func NewScanner(input string) *Scanner { - return &Scanner{Input: input} + s := &Scanner{Input: input} + setDefaultBuiltins(s) + return s } // SetFile sets the file path shown in this Scanner's error reports. @@ -131,7 +148,8 @@ func (s *Scanner) SetFile(path string) { // Exec consumes tokens until Input is exhausted, returning the resulting // encoded maybe-DER. func (s *Scanner) Exec() ([]byte, error) { - return s.exec(nil) + enc, _, err := s.exec(nil) + return enc, err } // isEOF returns whether the cursor is at least n bytes ahead of the end of the @@ -374,6 +392,15 @@ again: case '}': s.advance(1) return token{Kind: tokenRightCurly, Pos: s.pos}, nil + case ',': + s.advance(1) + return token{Kind: tokenComma, Pos: s.pos}, nil + case '(': + s.advance(1) + return token{Kind: tokenLeftParen, Pos: s.pos}, nil + case ')': + s.advance(1) + return token{Kind: tokenRightParen, Pos: s.pos}, nil case '"': return s.parseQuotedString() case 'u': @@ -465,7 +492,7 @@ again: loop: for !s.isEOF(0) { switch s.Input[s.pos.Offset] { - case ' ', '\t', '\n', '\r', '{', '}', '[', ']', '`', '"', '#': + case ' ', '\t', '\n', '\r', ',', '(', ')', '{', '}', '[', ']', '`', '"', '#': break loop default: s.advance(1) @@ -530,33 +557,44 @@ loop: return token{Kind: tokenLongForm, Length: l}, nil } - return token{}, fmt.Errorf("unrecognized symbol %q", symbol) + return token{Kind: tokenWord, Value: []byte(symbol), Pos: s.pos}, nil } // exec is the main parser loop. // -// The leftCurly argument, it not nil, represents the { that began the -// length-prefixed block we're currently executing. Because we need to encode -// the full extent of the contents of a {} before emitting the length prefix, -// this function calls itself with a non-nil leftCurly to encode it. -func (s *Scanner) exec(leftCurly *token) ([]byte, error) { +// Because we need to consume all of the tokens between delimiters (e.g. for +// computing the length of the contents of {} or counting arguments in ()), this +// function needs to recurse into itself; the left parameter, when non-nil, +// refers to the left delimiter that triggered the recursion. +// +// This function returns when: it sees an EOF; it sees a comma; it sees the +// matching right-delimiter to left. It returns the encoded contents of the the +// recognized tokens and all of the tokens that were recognized, including +// the token that ended parsing. +func (s *Scanner) exec(left *token) ([]byte, []token, error) { var out []byte + var tokens []token var lengthModifier *token + var word *token for { token, err := s.next() if err != nil { - return nil, err + return nil, nil, err } + tokens = append(tokens, token) if lengthModifier != nil && token.Kind != tokenLeftCurly { - return nil, &ParseError{lengthModifier.Pos, errors.New("length modifier was not followed by '{'")} + return nil, nil, &ParseError{lengthModifier.Pos, errors.New("length modifier was not followed by '{'")} + } + if word != nil && token.Kind != tokenLeftParen { + return nil, nil, &ParseError{word.Pos, fmt.Errorf("unrecognized symbol %q", string(token.Value))} } switch token.Kind { case tokenBytes: out = append(out, token.Value...) case tokenLeftCurly: - child, err := s.exec(&token) + child, _, err := s.exec(&token) if err != nil { - return nil, err + return nil, nil, err } var lengthOverride int if lengthModifier != nil { @@ -574,24 +612,79 @@ func (s *Scanner) exec(leftCurly *token) ([]byte, error) { out, err = appendLength(out, len(child), lengthOverride) if err != nil { // appendLength may fail if the lengthModifier was incompatible. - return nil, &ParseError{lengthModifier.Pos, err} + return nil, tokens, &ParseError{lengthModifier.Pos, err} } out = append(out, child...) lengthModifier = nil + case tokenLeftParen: + if word == nil { + return nil, tokens, &ParseError{token.Pos, errors.New("missing function name")} + } + var args [][]byte + argLoop: + for { + arg, prev, err := s.exec(&token) + if err != nil { + return nil, tokens, err + } + args = append(args, arg) + lastToken := prev[len(prev)-1] + switch lastToken.Kind { + case tokenComma: + if len(prev) < 2 { + return nil, nil, &ParseError{lastToken.Pos, errors.New("function arguments cannot be empty")} + } + case tokenRightParen: + if len(prev) < 2 { + // Actually foo(), so the argument list is nil. + args = nil + } + break argLoop + default: + return nil, nil, &ParseError{lastToken.Pos, errors.New("expected ',' or ')'")} + } + } + bytes, err := s.executeBuiltin(string(word.Value), args) + if err != nil { + return nil, nil, err + } + word = nil + out = append(out, bytes...) case tokenRightCurly: - if leftCurly != nil { - return out, nil + if left != nil && left.Kind == tokenLeftCurly { + return out, tokens, nil } - return nil, &ParseError{token.Pos, errors.New("unmatched '}'")} + return nil, nil, &ParseError{token.Pos, errors.New("unmatched '}'")} + case tokenRightParen: + if left != nil && left.Kind == tokenLeftParen { + return out, tokens, nil + } + return nil, nil, &ParseError{token.Pos, errors.New("unmatched '('")} case tokenLongForm, tokenIndefinite: lengthModifier = &token + case tokenComma: + return out, tokens, nil + case tokenWord: + word = &token case tokenEOF: - if leftCurly == nil { - return out, nil + if left == nil { + return out, tokens, nil + } else if left.Kind == tokenLeftCurly { + return nil, nil, &ParseError{left.Pos, errors.New("unmatched '{'")} + } else { + return nil, nil, &ParseError{left.Pos, errors.New("unmatched '('")} } - return nil, &ParseError{leftCurly.Pos, errors.New("unmatched '{'")} default: panic(token) } } } + +func (s *Scanner) executeBuiltin(name string, args [][]byte) ([]byte, error) { + builtin, ok := s.Builtins[name] + if !ok { + return nil, fmt.Errorf("unrecognized builtin %q", name) + } + + return builtin(args) +} diff --git a/ascii2der/scanner_test.go b/ascii2der/scanner_test.go index ff0fe85..e3059ce 100644 --- a/ascii2der/scanner_test.go +++ b/ascii2der/scanner_test.go @@ -94,11 +94,6 @@ indefinite long-form:2`, }, true, }, - // Garbage tokens. - {"SEQUENC", nil, false}, - {"1...2", nil, false}, - {"true", nil, false}, - {"false", nil, false}, // Unmatched [. {"[SEQUENCE", nil, false}, // Unmatched ". @@ -426,9 +421,18 @@ var asciiToDERTests = []struct { ok bool }{ {"SEQUENCE { INTEGER { 42 } INTEGER { 1 } }", []byte{0x30, 0x06, 0x02, 0x01, 0x2a, 0x02, 0x01, 0x01}, true}, + // Garbage words. + {"SEQUENC", nil, false}, + {"1...2", nil, false}, + {"true", nil, false}, + {"false", nil, false}, // Mismatched curlies. {"{", nil, false}, {"}", nil, false}, + {"(", nil, false}, + {")", nil, false}, + {"({)}", nil, false}, + {"{(})", nil, false}, // Invalid token. {"BOGUS", nil, false}, // Length overrides. @@ -442,6 +446,28 @@ var asciiToDERTests = []struct { // Too long of length modifiers. {"[long-form:1 99999]", nil, false}, {"SEQUENCE long-form:1 { `" + strings.Repeat("a", 1024) + "` }", nil, false}, + // Function call without function name. + {"()", nil, false}, + // Unknown function. + {"bogus()", nil, false}, + // Basic variable usage. + {`define("foo", 42) var("foo") var("foo")`, []byte{42, 42}, true}, + { + ` + define(42, 42) var(42) + define(42, "a") var(42) + `, + []byte{42, byte('a')}, + true, + }, + {`var("missing")`, nil, false}, + {`var("missing", 42)`, []byte{42}, true}, + // Empty parens -> zero args. + // TODO(mcyoung): if we ever add a zero-argument function, use it in this + // test, instead. + {"var()", nil, false}, + // Empty token streams are not valid arguments. + {"define(, 42)", nil, false}, } func TestASCIIToDER(t *testing.T) { diff --git a/language.txt b/language.txt index 1b4f774..cb30775 100644 --- a/language.txt +++ b/language.txt @@ -151,7 +151,7 @@ FALSE [PRIVATE 2] [UNIVERSAL 16] # This is a SEQUENCE. [UNIVERSAL 2 PRIMITIVE] # This is an INTEGER. -[long-form:2 UNIVERSAL 2 PRIMTIVE] # This is `1f0002` instead of `02`. +[long-form:2 UNIVERSAL 2 PRIMITIVE] # This is `1f0002` instead of `02`. # As a shorthand, one may write type names from ASN.1, replacing spaces with # underscore. These specify tag, number, and the constructed bit. The @@ -236,7 +236,6 @@ INTEGER long-form:1 { 5 } INTEGER { `00ff` } } - # Examples. # These primitives may be combined with raw byte strings to produce other @@ -278,6 +277,35 @@ SEQUENCE `aabbcc` INTEGER { 2 } +# DER ASCII provides a selection of builtin functions for building more +# complex DER structures. Builtins are spelled builtin(arg1, arg2, ...), where +# each argument is an arbitrary but non-empty sequence of DER ASCII tokens. +# Tokens are fully expanded (but not emitted) before the builtin executes. +# Builtin calls expand to a byte string just like any other token. +# +# There is explicitly no support for user-defined functions. +# +# The syntax and output of anything using builtins is subject to change, and +# doesn't have the same stability expectations as the rest of DER ASCII. + +# define(var, value) defines a variable to the given value. The name of the +# variable may be an arbitrary byte string. Variables may be redefined at any +# point. define() always expands to an empty byte string. +define("payload", SEQUENCE { + INTEGER { 1 } + OCTET_STRING { "hello, world" } +}) +define(`ffff`, "payload") + +# var(var) expands to a variable previously defined with define(). The main use +# of var() is to factor a complex structure that will be repeated many times, +# such as complex issuer and subject fields in a self-signed X.509 cert. +# +# It is an error to access a var() that has not been previously defined. +var("payload") +var(var(`ffff`)) # Same as above, since var(`ffff`) expands to "payload". + + # Disassembler. # Although the conversion from DER ASCII to a byte string is well-defined, the From aae1383cef4060406e9414c398ed8c3410e72010 Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Mon, 28 Jun 2021 23:03:37 -0400 Subject: [PATCH 8/9] Add -d command-line flag for pre-defining variables Compare the -D flag in Clang --- ascii2der/scanner_test.go | 2 +- cmd/ascii2der/main.go | 88 ++++++++++++++++++++++++++++++++------- 2 files changed, 74 insertions(+), 16 deletions(-) diff --git a/ascii2der/scanner_test.go b/ascii2der/scanner_test.go index e3059ce..a3fd0b8 100644 --- a/ascii2der/scanner_test.go +++ b/ascii2der/scanner_test.go @@ -426,7 +426,7 @@ var asciiToDERTests = []struct { {"1...2", nil, false}, {"true", nil, false}, {"false", nil, false}, - // Mismatched curlies. + // Mismatched brackets. {"{", nil, false}, {"}", nil, false}, {"(", nil, false}, diff --git a/cmd/ascii2der/main.go b/cmd/ascii2der/main.go index b6749b7..a32083b 100644 --- a/cmd/ascii2der/main.go +++ b/cmd/ascii2der/main.go @@ -20,14 +20,72 @@ import ( "fmt" "io/ioutil" "os" + "strings" "github.com/google/der-ascii/ascii2der" ) +// pairs conforms to flag.Value. Each time Set() is called, it collects another +// k=v pair into itself. +type pairs map[string]string + +func (p pairs) String() string { + return "" +} + +func (p pairs) Set(pair string) error { + if pair == "" || p == nil { + return nil + } + + split := strings.SplitN(pair, "=", 2) + if len(split) != 2 { + return fmt.Errorf("missing \"=\": %q", pair) + } + + p[split[0]] = split[1] + return nil +} + +var defines = make(map[string]string) +var fileDefines = make(map[string]string) + +func init() { + flag.Var(pairs(defines), "d", + `pair of the form a=b; define("a", "b") is inserted at the start of the input`+ + "\nmay occur multiple times") + flag.Var(pairs(fileDefines), "df", + `like -d, except the second value is interpreted as a binary file to read`+ + "\nmay occur multiple times") +} + var inPath = flag.String("i", "", "input file to use (defaults to stdin)") var outPath = flag.String("o", "", "output file to use (defaults to stdout)") var pemType = flag.String("pem", "", "if provided, format the output as a PEM block with this type") +func readAll(path string) []byte { + var file *os.File + if path == "" { + file = os.Stdin + } else { + var err error + file, err = os.Open(path) + if err != nil { + fmt.Fprintf(os.Stderr, "Error opening %s: %s\n", path, err) + os.Exit(1) + } + defer file.Close() + } + + buf, err := ioutil.ReadAll(file) + if err != nil { + fmt.Fprintf(os.Stderr, "Error reading %s: %s\n", path, err) + os.Exit(1) + } + + return buf +} + func main() { flag.Parse() @@ -37,26 +95,26 @@ func main() { os.Exit(1) } - inFile := os.Stdin - if *inPath != "" { - var err error - inFile, err = os.Open(*inPath) - if err != nil { - fmt.Fprintf(os.Stderr, "Error opening %s: %s\n", *inPath, err) + inBytes := readAll(*inPath) + scanner := ascii2der.NewScanner(string(inBytes)) + scanner.SetFile(*inPath) + + scanner.Vars = make(map[string][]byte) + for k, v := range defines { + if _, ok := scanner.Vars[k]; ok { + fmt.Fprintf(os.Stderr, "Error: tried to define %q with flags twice\n", k) os.Exit(1) } - defer inFile.Close() + scanner.Vars[k] = []byte(v) } - - inBytes, err := ioutil.ReadAll(inFile) - if err != nil { - fmt.Fprintf(os.Stderr, "Error reading input: %s\n", err) - os.Exit(1) + for k, v := range fileDefines { + if _, ok := scanner.Vars[k]; ok { + fmt.Fprintf(os.Stderr, "Error: tried to define %q with flags twice\n", k) + os.Exit(1) + } + scanner.Vars[k] = readAll(v) } - scanner := ascii2der.NewScanner(string(inBytes)) - scanner.SetFile(*inPath) - outBytes, err := scanner.Exec() if err != nil { fmt.Fprintf(os.Stderr, "Syntax error: %s\n", err) From 2f4fea53b4a0e58c1b473982c86d7c3d00ea0a4a Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Thu, 1 Jul 2021 22:02:07 -0400 Subject: [PATCH 9/9] Add sign() builtin for emitting digital signatures --- ascii2der/builtins.go | 96 ++++++++++++++++++++++++ language.txt | 4 + samples/cert_with_sign.txt | 147 +++++++++++++++++++++++++++++++++++++ samples/certificates.md | 57 +++++++------- 4 files changed, 276 insertions(+), 28 deletions(-) create mode 100644 samples/cert_with_sign.txt diff --git a/ascii2der/builtins.go b/ascii2der/builtins.go index eba5037..a3d7f06 100644 --- a/ascii2der/builtins.go +++ b/ascii2der/builtins.go @@ -15,8 +15,14 @@ package ascii2der import ( + "crypto" + "crypto/ecdsa" + "crypto/ed25519" + "crypto/rsa" + "crypto/x509" "errors" "fmt" + "reflect" ) // setDefaultBuiltins adds the default builtins to the given Scanner's builtin @@ -65,5 +71,95 @@ func setDefaultBuiltins(scanner *Scanner) { return nil, errors.New("expected one or two arguments to var()") } }, + + // sign(algorithm, key, message) expands into a digital signature for message + // using the given algorithm and key. key must be a private key in PKCS #8 + // format. + // + // The supported algorithm strings are: + // - "RSA_PKCS1_SHA1", RSA_PKCS1_SHA256", "RSA_PKCS1_SHA384", + // "RSA_PKCS1_SHA512", for RSA-SSA with the specified hash function. + // - "ECDSA_SHA256", "ECDSA_SHA384", "ECDSA_SHA512", for ECDSA with the + // specified hash function. + // - "Ed25519" for itself. + "sign": func(args [][]byte) ([]byte, error) { + if len(args) != 3 { + return nil, errors.New("expected two arguments to sign()") + } + + pk8, err := x509.ParsePKCS8PrivateKey(args[1]) + if err != nil { + return nil, err + } + + var signer crypto.Signer + var hash crypto.Hash + switch string(args[0]) { + case "RSA_PKCS1_SHA1": + key, ok := pk8.(*rsa.PrivateKey) + if !ok { + return nil, fmt.Errorf("expected RSA key, got %v", reflect.TypeOf(key)) + } + signer = key + hash = crypto.SHA1 + case "RSA_PKCS1_SHA256": + key, ok := pk8.(*rsa.PrivateKey) + if !ok { + return nil, fmt.Errorf("expected RSA key, got %v", reflect.TypeOf(key)) + } + signer = key + hash = crypto.SHA256 + case "RSA_PKCS1_SHA384": + key, ok := pk8.(*rsa.PrivateKey) + if !ok { + return nil, fmt.Errorf("expected RSA key, got %v", reflect.TypeOf(key)) + } + signer = key + hash = crypto.SHA384 + case "RSA_PKCS1_SHA512": + key, ok := pk8.(*rsa.PrivateKey) + if !ok { + return nil, fmt.Errorf("expected RSA key, got %v", reflect.TypeOf(key)) + } + signer = key + hash = crypto.SHA512 + case "ECSDA_SHA256": + key, ok := pk8.(*ecdsa.PrivateKey) + if !ok { + return nil, fmt.Errorf("expected ECDSA key, got %v", reflect.TypeOf(key)) + } + signer = key + hash = crypto.SHA256 + case "ECSDA_SHA384": + key, ok := pk8.(*ecdsa.PrivateKey) + if !ok { + return nil, fmt.Errorf("expected ECDSA key, got %v", reflect.TypeOf(key)) + } + signer = key + hash = crypto.SHA384 + case "ECSDA_SHA512": + key, ok := pk8.(*ecdsa.PrivateKey) + if !ok { + return nil, fmt.Errorf("expected ECDSA key, got %v", reflect.TypeOf(key)) + } + signer = key + hash = crypto.SHA512 + case "Ed22519": + key, ok := pk8.(ed25519.PrivateKey) + if !ok { + return nil, fmt.Errorf("expected Ed25519 key, got %v", reflect.TypeOf(key)) + } + signer = key + } + + digest := args[2] + if hash > 0 { + hash := hash.New() + hash.Write(digest) + digest = hash.Sum(nil) + } + + return signer.Sign(nil, digest, hash) + }, } } diff --git a/language.txt b/language.txt index cb30775..8cb9849 100644 --- a/language.txt +++ b/language.txt @@ -305,6 +305,10 @@ define(`ffff`, "payload") var("payload") var(var(`ffff`)) # Same as above, since var(`ffff`) expands to "payload". +# sign(algo, key, message) expands to a digital signature for message, +# using the given algorithm string (e.g. "ECDSA_SHA256") and private key. The +# supported key formats and algorithms can be found in +# cmd/ascii2der/builtins.go. # Disassembler. diff --git a/samples/cert_with_sign.txt b/samples/cert_with_sign.txt new file mode 100644 index 0000000..7a7731d --- /dev/null +++ b/samples/cert_with_sign.txt @@ -0,0 +1,147 @@ +# This is the same certificate as cert.txt, but with the signature generated +# using sign(). +# +# ascii2der will assemble both files to equal byte strings, because RSA-SSA +# happens to be deterministic. + +# Our private key, in PKCS #8 form. +define("my_key", SEQUENCE { + INTEGER { 0 } + SEQUENCE { + # rsaEncryption + OBJECT_IDENTIFIER { 1.2.840.113549.1.1.1 } + NULL {} + } + OCTET_STRING { + SEQUENCE { + INTEGER { 0 } + INTEGER { `00d82bc8a632e462ff4df3d0ad598b45a7bdf147bf09587b22bd35ae97258694a080c0b41f7691674631d01084b7221e70239172c8e96d793a8577800fc4951675c54a714cc8633fa3f2639c2a4f9afacbc1716e288528a0271e651cae07d55b6f2d43ed2b90b18caf246daee9173a05c1bfb81cae653b1b58c2d9aed6aa6788f1` } + INTEGER { 65537 } + INTEGER { `008072d3d15de033aafc88e9f0778ab8230a4c7a935b5c461ec84b43a8f0555daf599227f5a220983b2f92309e8bab2c66f9db8d5730cd2a01ca18cdf1909ffe2d791c40960c5161ea0b743f9cf43270a1c33666bdab55fc55c24f48fe5a618d07f8247a12a31972cb7234dbe9d45854948266156e38b2dc6d6215d74820809401` } + INTEGER { `00f12f2c19ee1ecf2c999b87bdafde60eace3790faad8f9adec13b14c6dfb69f8795a1d0fe65494250b59534014b918453042012952ae6f5786342999600725491` } + INTEGER { `00e57341d15469ec0bb5d389a0f0ada58a18d73776d9e69ef134049a918e475d4bea46f12d0b2468c972fc33a739a6bcdada8019376a0c466048d98278a2a49e61` } + INTEGER { `0be99d8f0650e540b9b191e9cf96f74881b902e32ed169ffd8a1776c3f3e80f0ac765aa14615713e1549f250a20fe4ee48c4e0c6176162fc7842a0dd64d640d1` } + INTEGER { `00e4d74e168bdd5499dd4fcc5d228ddda35ce111254d7010a7ba5cb91860d1d64007b99782783168fd39dc455c0c48bae47fb5f0f06ea92d6b8c5cbb1ebbfff921` } + INTEGER { `00bef4572c74da6ba545cd36a288ef12685b07577950c973ad32b0690798dd9a86568231ef0765bd0a49fbb03aac3c1f94dadc97d23a03750132ba230408363ca1` } + } + } +}) + +# The "to be signed" portion of our cert. +define("tbs_cert", SEQUENCE { + [0] { + INTEGER { 2 } + } + INTEGER { `00fbb04c2eab109b0c` } + SEQUENCE { + # sha1WithRSAEncryption + OBJECT_IDENTIFIER { 1.2.840.113549.1.1.5 } + NULL {} + } + SEQUENCE { + SET { + SEQUENCE { + # countryName + OBJECT_IDENTIFIER { 2.5.4.6 } + PrintableString { "AU" } + } + } + SET { + SEQUENCE { + # stateOrProvinceName + OBJECT_IDENTIFIER { 2.5.4.8 } + UTF8String { "Some-State" } + } + } + SET { + SEQUENCE { + # organizationName + OBJECT_IDENTIFIER { 2.5.4.10 } + UTF8String { "Internet Widgits Pty Ltd" } + } + } + } + SEQUENCE { + UTCTime { "140423205040Z" } + UTCTime { "170422205040Z" } + } + SEQUENCE { + SET { + SEQUENCE { + # countryName + OBJECT_IDENTIFIER { 2.5.4.6 } + PrintableString { "AU" } + } + } + SET { + SEQUENCE { + # stateOrProvinceName + OBJECT_IDENTIFIER { 2.5.4.8 } + UTF8String { "Some-State" } + } + } + SET { + SEQUENCE { + # organizationName + OBJECT_IDENTIFIER { 2.5.4.10 } + UTF8String { "Internet Widgits Pty Ltd" } + } + } + } + SEQUENCE { + SEQUENCE { + # rsaEncryption + OBJECT_IDENTIFIER { 1.2.840.113549.1.1.1 } + NULL {} + } + BIT_STRING { + `00` + SEQUENCE { + INTEGER { `00d82bc8a632e462ff4df3d0ad598b45a7bdf147bf09587b22bd35ae97258694a080c0b41f7691674631d01084b7221e70239172c8e96d793a8577800fc4951675c54a714cc8633fa3f2639c2a4f9afacbc1716e288528a0271e651cae07d55b6f2d43ed2b90b18caf246daee9173a05c1bfb81cae653b1b58c2d9aed6aa6788f1` } + INTEGER { 65537 } + } + } + } + [3] { + SEQUENCE { + SEQUENCE { + # subjectKeyIdentifier + OBJECT_IDENTIFIER { 2.5.29.14 } + OCTET_STRING { + OCTET_STRING { `8b75d5accb08be0e1f65b7fa56be6ca775da85af` } + } + } + SEQUENCE { + # authorityKeyIdentifier + OBJECT_IDENTIFIER { 2.5.29.35 } + OCTET_STRING { + SEQUENCE { + [0 PRIMITIVE] { `8b75d5accb08be0e1f65b7fa56be6ca775da85af` } + } + } + } + SEQUENCE { + # basicConstraints + OBJECT_IDENTIFIER { 2.5.29.19 } + OCTET_STRING { + SEQUENCE { + BOOLEAN { TRUE } + } + } + } + } + } +}) + +SEQUENCE { + var("tbs_cert") + SEQUENCE { + # sha1WithRSAEncryption + OBJECT_IDENTIFIER { 1.2.840.113549.1.1.5 } + NULL {} + } + BIT_STRING { + `00` + sign("RSA_PKCS1_SHA1", var("my_key"), var("tbs_cert")) + } +} diff --git a/samples/certificates.md b/samples/certificates.md index e25e756..009bc11 100644 --- a/samples/certificates.md +++ b/samples/certificates.md @@ -2,8 +2,8 @@ Modifying and creating X.509 certificates is more involved than modifying a normal DER structure if one wishes to keep the signature valid. This document -provides instructions for fixing up a modified test certificate's signature if -the issuer's private key is available. (For a non-test certificate, this is the +provides instructions for using the `sign()` builtin to generate the signature +on-demand using the private key. (For a non-test certificate, this is the CA's private key and is presumably unavailable.) X.509 certificates are specified in [RFC 5280](https://tools.ietf.org/html/rfc5280). @@ -17,31 +17,32 @@ The basic top-level structure is: The `tbsCertificate` is a large structure with the contents of the certificate. This includes the subject, issuer, public key, etc. The `signatureAlgorithm` specifies the signature algorithm and parameters. Finally, the `signatureValue` -is the signature itself, created from the issuer's private key. This is the -field that must be fixed once the `tbsCertificate` is modified. - -The signature is computed over the serialized `tbsCertificate`, so, using a -text editor, copy the `tbsCertificate` value into its own file, `tbs-cert.txt`. -Now sign that with the issuing private key. If using OpenSSL's command-line -tool, here is a sample command: - - ascii2der -i tbs-cert.txt | openssl dgst -sha256 -sign issuer_key.pem | \ - xxd -p -c 9999 > signature.txt - -For other options, replace `-sha256` with a different digest or pass `-sigopt`. -See [OpenSSL's documentation](https://www.openssl.org/docs/man1.1.1/man1/dgst.html) -for details. Note that, for a valid certificate, the signature parameters -should match the `signatureAlgorithm` field. If using different signing -parameters, update it and the copy in the `tbsCertificate`. - -Finally, in a text editor, replace the signature with the new one. X.509 -defines certificates as BIT STRINGs, but every signature algorithm uses byte -strings, so include a leading zero to specify that no bits should be removed -from the end: - - BIT_STRING { - `00` # No unused bits. - `INSERT SIGNATURE HERE` +is the signature itself, created from the issuer's private key. We can express +this relationship using a variable and `sign()`: + + define("tbs_cert", SEQUENCE { + [0] { INTEGER { 2 } } + # Other X.509-ey goodness. + }) + + SEQUENCE { + # Splat in the actual tbsCertificate. + var("tbs_cert") + + # This is the signatureAlgorithm. + SEQUENCE { + # ed25519 + OBJECT_IDENTIFIER { 1.3.6.1.4.1.11591.15.1 } + } + + # This is the signatureValue. + BIT_STRING { + `00` # No unused bits. + sign("ed25519", var("my_key"), var("tbs_cert")) + } } -Finally, use `ascii2der` to convert the certificate to DER. +The variable `"my_key` would have been defined elsewhere in the file, or +potentially injected using the `-df` flag. + +See `cert_with_sign.txt` for a complete example. \ No newline at end of file