diff --git a/parser.go b/parser.go index 6ae246e..c2422c8 100644 --- a/parser.go +++ b/parser.go @@ -10,6 +10,8 @@ import ( "text/scanner" ) +var InvalidCSSError = errors.New("invalid CSS") + //go:generate stringer -type=tokenType type tokenType int @@ -23,6 +25,8 @@ const ( tokenSelector tokenStyleSeparator tokenStatementEnd + tokenCommentStart + tokenCommentEnd ) type tokenEntry struct { @@ -32,12 +36,14 @@ type tokenEntry struct { func newTokenType(typ string) tokenType { types := map[string]tokenType{ - "{": tokenBlockStart, - "}": tokenBlockEnd, - ":": tokenStyleSeparator, - ";": tokenStatementEnd, - ".": tokenSelector, - "#": tokenSelector, + "{": tokenBlockStart, + "}": tokenBlockEnd, + ":": tokenStyleSeparator, + ";": tokenStatementEnd, + ".": tokenSelector, + "#": tokenSelector, + "/*": tokenCommentStart, + "*/": tokenCommentEnd, } result, ok := types[typ] @@ -124,12 +130,14 @@ func buildList(r io.Reader) *list.List { func parse(l *list.List) (map[Rule]map[string]string, error) { var ( // Information about the current block that is parsed. - rule []string + rule = make([]string, 1) style string value string selector string - isBlock bool + isBlock bool + isValue bool + isComment bool // Parsed styles. css = make(map[Rule]map[string]string) @@ -142,39 +150,77 @@ func parse(l *list.List) (map[Rule]map[string]string, error) { for e := l.Front(); e != nil; e = l.Front() { token := e.Value.(tokenEntry) l.Remove(e) - // fmt.Printf("typ: %s, value: %q, prevToken: %v\n", token.typ(), token.value, prevToken) + + // handle comment - we continue after this because we don't want to override prevToken + switch token.typ() { + case tokenCommentStart: + isComment = true + continue + case tokenCommentEnd: + // handle standalone endComment token + if !isComment { + return css, fmt.Errorf("line %d: unexpected end of comment: %w", token.pos.Line, InvalidCSSError) + } + + isComment = false + continue + } + + if isComment { // skip everything regardless what it is if processing in comment mode + continue + } + switch token.typ() { case tokenValue: switch prevToken { case tokenFirstToken, tokenBlockEnd: - rule = append(rule, token.value) + rule[len(rule)-1] += token.value case tokenSelector: - rule = append(rule, selector+token.value) - case tokenBlockStart, tokenStatementEnd: + // if not empty - we already added a part of a rule and this is a descendant selector for that rule + if rule[len(rule)-1] != "" { + rule[len(rule)-1] += " " + } + + rule[len(rule)-1] += selector + token.value + case tokenBlockStart, tokenStatementEnd: // { or ; style = token.value case tokenStyleSeparator: + if isValue { // multiple separators without ; + return css, fmt.Errorf("line %d: multiple style names before value: %w", token.pos.Line, InvalidCSSError) + } + + isValue = true value = token.value case tokenValue: - rule = append(rule, token.value) + if !isBlock { // descendant selector + rule[len(rule)-1] += " " + token.value + } else { // technically, this could mean we put multiple style values. + if !isValue { // want to parse multiple style names? denied. + return css, fmt.Errorf("line %d: expected only one name before value: %w", token.pos.Line, InvalidCSSError) + } + + value += " " + token.value + } default: - return css, fmt.Errorf("line %d: invalid syntax", token.pos.Line) + return css, fmt.Errorf("line %d: invalid syntax: %w", token.pos.Line, InvalidCSSError) } case tokenSelector: selector = token.value case tokenBlockStart: if prevToken != tokenValue { - return css, fmt.Errorf("line %d: block is missing rule identifier", token.pos.Line) + return css, fmt.Errorf("line %d: block is missing rule identifier: %w", token.pos.Line, InvalidCSSError) } isBlock = true + isValue = false case tokenStatementEnd: - // fmt.Printf("prevToken: %v, style: %v, value: %v\n", prevToken, style, value) if prevToken != tokenValue || style == "" || value == "" { - return css, fmt.Errorf("line %d: expected style before semicolon", token.pos.Line) + return css, fmt.Errorf("line %d: expected style before semicolon: %w", token.pos.Line, InvalidCSSError) } styles[style] = value + isValue = false case tokenBlockEnd: if !isBlock { - return css, fmt.Errorf("line %d: rule block ends without a beginning", token.pos.Line) + return css, fmt.Errorf("line %d: rule block ends without a beginning: %w", token.pos.Line, InvalidCSSError) } for i := range rule { @@ -196,7 +242,7 @@ func parse(l *list.List) (map[Rule]map[string]string, error) { styles = map[string]string{} style, value = "", "" isBlock = false - rule = make([]string, 0) + rule = make([]string, 1) } prevToken = token.typ() } diff --git a/parser_test.go b/parser_test.go index 16ad22a..9cdd687 100644 --- a/parser_test.go +++ b/parser_test.go @@ -38,6 +38,19 @@ rule1 { background-repeat: repeat-x; }` + ex6 := `rule1 descendant { + style1:value1; +}` + + ex7 := `rule1 { + /* this is a comment */ + style: value; +}` + + ex8 := `.rule1 #rule2 { + style: value; +}` + cases := []struct { name string CSS string @@ -75,6 +88,21 @@ rule1 { "background-repeat": "repeat-x", }, }}, + {"Descendant selector", ex6, map[Rule]map[string]string{ + "rule1 descendant": { + "style1": "value1", + }, + }}, + {"Comment in rule", ex7, map[Rule]map[string]string{ + "rule1": { + "style": "value", + }, + }}, + {"Selector with descentant ID and Class", ex8, map[Rule]map[string]string{ + ".rule1 #rule2": { + "style": "value", + }, + }}, } for _, tt := range cases { @@ -109,7 +137,12 @@ rule { style1: value1; style2:; }` - _ = ex3 + + ex5 := ` +body { + style1:value1; + */ +}` cases := []struct { name string @@ -117,9 +150,9 @@ rule { }{ {"Missing rule", ex1}, {"Missing style", ex2}, - // TODO: this hsould not crash - //{"Statement Missing Semicolon", ex3}, + {"Statement Missing Semicolon", ex3}, {"BlockEndsWithoutBeginning", ex4}, + {"Unexpected end of comment", ex5}, } for _, tt := range cases { @@ -131,50 +164,6 @@ rule { } } -func TestParseSelectors(t *testing.T) { - ex1 := `.rule { - style1: value1; - style2: value2; -} -#rule1 sad asd { - style3: value3; - style4: value4; -}` - - css, err := Unmarshal([]byte(ex1)) - if err != nil { - t.Fatal(err) - } - if _, ok := css[".rule"]; !ok { - t.Fatal("Missing '.rule' rule") - } - if _, ok := css["#rule1"]; !ok { - t.Fatal("Missing '.rule' rule") - } -} - -func TestParseSelectorGroup(t *testing.T) { - ex1 := `.rule1 #rule2 rule3 { - style1: value1; - style2: value2; -}` - - css, err := Unmarshal([]byte(ex1)) - if err != nil { - t.Fatal(err) - } - - if _, ok := css[".rule1"]; !ok { - t.Fatal("Missing '.rule1' rule") - } - if _, ok := css["#rule2"]; !ok { - t.Fatal("Missing '#rule2' rule") - } - if _, ok := css["rule3"]; !ok { - t.Fatal("Missing '.rule3' rule") - } -} - func BenchmarkParser(b *testing.B) { ex1 := "" for i := 0; i < 100; i++ { diff --git a/tokentype_string.go b/tokentype_string.go index c6840e4..3f9e953 100644 --- a/tokentype_string.go +++ b/tokentype_string.go @@ -16,11 +16,13 @@ func _() { _ = x[tokenSelector-4] _ = x[tokenStyleSeparator-5] _ = x[tokenStatementEnd-6] + _ = x[tokenCommentStart-7] + _ = x[tokenCommentEnd-8] } -const _tokenType_name = "tokenFirstTokentokenBlockStarttokenBlockEndtokenRuleNametokenValuetokenSelectortokenStyleSeparatortokenStatementEnd" +const _tokenType_name = "tokenFirstTokentokenBlockStarttokenBlockEndtokenRuleNametokenValuetokenSelectortokenStyleSeparatortokenStatementEndtokenCommentStarttokenCommentEnd" -var _tokenType_index = [...]uint8{0, 15, 30, 43, 56, 66, 79, 98, 115} +var _tokenType_index = [...]uint8{0, 15, 30, 43, 56, 66, 79, 98, 115, 132, 147} func (i tokenType) String() string { i -= -1