diff --git a/README.md b/README.md index 25e4895f66..b618cf1d4f 100644 --- a/README.md +++ b/README.md @@ -415,7 +415,7 @@ Flags: -h, --help help for yq -I, --indent int sets indent level for output (default 2) -i, --inplace update the file in place of first file given. - -p, --input-format string [auto|a|yaml|y|json|j|kyaml|ky|props|p|csv|c|tsv|t|xml|x|base64|uri|toml|hcl|h|lua|l|ini|i] parse format for input. (default "auto") + -p, --input-format string [auto|a|yaml|y|json|j|json5|j5|kyaml|ky|props|p|csv|c|tsv|t|xml|x|base64|uri|toml|hcl|h|lua|l|ini|i] parse format for input. (default "auto") --lua-globals output keys as top-level global variables --lua-prefix string prefix (default "return ") --lua-suffix string suffix (default ";\n") @@ -424,7 +424,7 @@ Flags: -N, --no-doc Don't print document separators (---) -0, --nul-output Use NUL char to separate values. If unwrap scalar is also set, fail if unwrapped scalar contains NUL char. -n, --null-input Don't read input, simply evaluate the expression given. Useful for creating docs from scratch. - -o, --output-format string [auto|a|yaml|y|json|j|kyaml|ky|props|p|csv|c|tsv|t|xml|x|base64|uri|toml|hcl|h|shell|s|lua|l|ini|i] output format type. (default "auto") + -o, --output-format string [auto|a|yaml|y|json|j|json5|j5|kyaml|ky|props|p|csv|c|tsv|t|xml|x|base64|uri|toml|hcl|h|shell|s|lua|l|ini|i] output format type. (default "auto") -P, --prettyPrint pretty print, shorthand for '... style = ""' --properties-array-brackets use [x] in array paths (e.g. for SpringBoot) --properties-separator string separator to use between keys and values (default " = ") diff --git a/acceptance_tests/inputs-format-auto.sh b/acceptance_tests/inputs-format-auto.sh index 3c241d650c..b2b207428d 100755 --- a/acceptance_tests/inputs-format-auto.sh +++ b/acceptance_tests/inputs-format-auto.sh @@ -5,6 +5,7 @@ setUp() { rm test*.toml 2>/dev/null || true rm test*.tfstate 2>/dev/null || true rm test*.json 2>/dev/null || true + rm test*.json5 2>/dev/null || true rm test*.properties 2>/dev/null || true rm test*.csv 2>/dev/null || true rm test*.tsv 2>/dev/null || true @@ -31,6 +32,29 @@ EOM assertEquals "$expected" "$X" } +testInputJson5() { + cat >test.json5 <<'EOL' +{ /* hello */ + mike: { things: "cool", }, +} +EOL + + read -r -d '' expected << EOM +// hello +{ + "mike": { + "things": "cool" + } +} +EOM + + X=$(./yq test.json5) + assertEquals "$expected" "$X" + + X=$(./yq ea test.json5) + assertEquals "$expected" "$X" +} + testInputToml() { cat >test.toml <test.json5 <<'EOL' +{ + // comment + mike: { things: "cool", }, +} +EOL + + read -r -d '' expected << EOM +# comment +mike: + things: cool +EOM + + X=$(./yq test.json5 -oy) + assertEquals "$expected" "$X" + + X=$(./yq ea test.json5 -oy) + assertEquals "$expected" "$X" +} + testInputProperties() { cat >test.properties </dev/null || true rm test*.xml 2>/dev/null || true rm test*.tf 2>/dev/null || true + rm test*.json5 2>/dev/null || true + rm test*.kyaml 2>/dev/null || true } testInputProperties() { @@ -185,6 +187,40 @@ EOM assertEquals "$expected" "$X" } +testInputJson5MultilineBlockComments() { + cat >test.json5 <<'EOL' +{ + /* + multiline + block comment + */ + first: 1, + second/* inline block */: 2, + third: /* before value */ 3, + fourth: [1, /* between elements */ 2,], +} +EOL + + read -r -d '' expected <<'EOM' +# multiline +# block comment +first: 1 +second: 2 # inline block +third: 3 +# before value +fourth: + - 1 + # between elements + - 2 +EOM + + X=$(./yq e -p=json5 test.json5) + assertEquals "$expected" "$X" + + X=$(./yq ea -p=json5 test.json5) + assertEquals "$expected" "$X" +} + diff --git a/acceptance_tests/output-format.sh b/acceptance_tests/output-format.sh index 02a150c9ba..58c8d99b0b 100755 --- a/acceptance_tests/output-format.sh +++ b/acceptance_tests/output-format.sh @@ -48,6 +48,28 @@ EOM assertEquals "$expected" "$X" } +testOutputJson5() { + cat >test.yml <test.yml <= len(p.input) +} + +func (p *json5Parser) peek() rune { + if p.eof() { + return 0 + } + return p.input[p.pos] +} + +func (p *json5Parser) peekNext() rune { + i := p.pos + 1 + if i >= len(p.input) { + return 0 + } + return p.input[i] +} + +func (p *json5Parser) next() rune { + if p.eof() { + return 0 + } + r := p.input[p.pos] + p.pos++ + if r == '\n' { + p.line++ + p.col = 1 + } else { + p.col++ + } + return r +} + +func (p *json5Parser) errorf(format string, args ...interface{}) error { + return fmt.Errorf("json5: %s at line %d, column %d", fmt.Sprintf(format, args...), p.line, p.col) +} + +func (p *json5Parser) skipWhitespaceAndComments() error { + sawNewline := false + + for !p.eof() { + r := p.peek() + if unicode.IsSpace(r) { + if r == '\n' || r == '\r' || r == '\u2028' || r == '\u2029' { + sawNewline = true + } + p.next() + continue + } + if r == '/' && p.peekNext() == '/' { + startsOnNewLine := sawNewline + p.next() // / + p.next() // / + var sb strings.Builder + for !p.eof() && p.peek() != '\n' { + sb.WriteRune(p.next()) + } + p.pendingComments = append(p.pendingComments, json5Comment{ + text: strings.TrimSpace(sb.String()), + startsOnNewLine: startsOnNewLine, + }) + continue + } + if r == '/' && p.peekNext() == '*' { + startsOnNewLine := sawNewline + p.next() // / + p.next() // * + var sb strings.Builder + for { + if p.eof() { + return p.errorf("unterminated block comment") + } + if p.peek() == '*' && p.peekNext() == '/' { + p.next() + p.next() + break + } + sb.WriteRune(p.next()) + } + normalised := normaliseJSON5BlockComment(sb.String()) + if strings.Contains(normalised, "\n") { + sawNewline = true + } + p.pendingComments = append(p.pendingComments, json5Comment{ + text: normalised, + startsOnNewLine: startsOnNewLine, + }) + continue + } + break + } + return nil +} + +func (p *json5Parser) takePendingComments() []json5Comment { + if len(p.pendingComments) == 0 { + return nil + } + comments := p.pendingComments + p.pendingComments = nil + return comments +} + +func commentsToText(comments []json5Comment) string { + if len(comments) == 0 { + return "" + } + parts := make([]string, 0, len(comments)) + for _, c := range comments { + if strings.TrimSpace(c.text) == "" { + continue + } + parts = append(parts, c.text) + } + return strings.Join(parts, "\n") +} + +func normaliseJSON5BlockComment(content string) string { + content = strings.ReplaceAll(content, "\r\n", "\n") + content = strings.Trim(content, "\n\r\t ") + if content == "" { + return "" + } + + lines := strings.Split(content, "\n") + for i, line := range lines { + line = strings.TrimLeft(line, " \t") + if strings.HasPrefix(line, "*") { + line = strings.TrimPrefix(line, "*") + line = strings.TrimLeft(line, " \t") + } + lines[i] = line + } + return strings.TrimSpace(strings.Join(lines, "\n")) +} + +func (p *json5Parser) parseValue() (*CandidateNode, error) { + if err := p.skipWhitespaceAndComments(); err != nil { + return nil, err + } + leading := commentsToText(p.takePendingComments()) + if p.eof() { + return nil, io.EOF + } + + var node *CandidateNode + var err error + + switch r := p.peek(); r { + case '{': + node, err = p.parseObject() + case '[': + node, err = p.parseArray() + case '"', '\'': + s, err := p.parseString() + if err != nil { + return nil, err + } + node = createScalarNode(s, s) + case '-', '+', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + node, err = p.parseNumber() + default: + if isIdentifierStart(r) { + ident, err := p.parseIdentifier() + if err != nil { + return nil, err + } + switch ident { + case "true": + node = createScalarNode(true, "true") + case "false": + node = createScalarNode(false, "false") + case "null": + node = createScalarNode(nil, "null") + case "Infinity": + node = &CandidateNode{Kind: ScalarNode, Tag: "!!float", Value: "+Inf"} + case "NaN": + node = &CandidateNode{Kind: ScalarNode, Tag: "!!float", Value: "NaN"} + default: + return nil, p.errorf("unexpected identifier %q", ident) + } + } else { + return nil, p.errorf("unexpected character %q", r) + } + } + + if err != nil { + return nil, err + } + if node == nil { + return nil, p.errorf("invalid value") + } + if leading != "" { + if node.HeadComment != "" { + node.HeadComment = leading + "\n" + node.HeadComment + } else { + node.HeadComment = leading + } + } + return node, nil +} + +func (p *json5Parser) parseObject() (*CandidateNode, error) { + if p.next() != '{' { + return nil, p.errorf("expected '{'") + } + + node := &CandidateNode{Kind: MappingNode, Tag: "!!map"} + if err := p.skipWhitespaceAndComments(); err != nil { + return nil, err + } + node.HeadComment = commentsToText(p.takePendingComments()) + if p.peek() == '}' { + p.next() + return node, nil + } + + for { + if err := p.skipWhitespaceAndComments(); err != nil { + return nil, err + } + pendingBeforeKey := commentsToText(p.takePendingComments()) + + key, err := p.parseObjectKey() + if err != nil { + return nil, err + } + + if err := p.skipWhitespaceAndComments(); err != nil { + return nil, err + } + pendingAfterKey := commentsToText(p.takePendingComments()) + if p.next() != ':' { + return nil, p.errorf("expected ':' after object key") + } + + if err := p.skipWhitespaceAndComments(); err != nil { + return nil, err + } + pendingAfterColon := commentsToText(p.takePendingComments()) + + value, err := p.parseValue() + if err != nil { + return nil, err + } + + childKey := node.CreateChild() + childKey.IsMapKey = true + childKey.Value = key + childKey.Kind = ScalarNode + childKey.Tag = "!!str" + childKey.HeadComment = pendingBeforeKey + childKey.LineComment = pendingAfterKey + + if pendingAfterColon != "" { + if value.HeadComment != "" { + value.HeadComment = pendingAfterColon + "\n" + value.HeadComment + } else { + value.HeadComment = pendingAfterColon + } + } + + value.Parent = node + value.Key = childKey + node.Content = append(node.Content, childKey, value) + + if err := p.skipWhitespaceAndComments(); err != nil { + return nil, err + } + value.LineComment = commentsToText(p.takePendingComments()) + + switch p.peek() { + case ',': + p.next() + if err := p.skipWhitespaceAndComments(); err != nil { + return nil, err + } + if p.peek() == '}' { + p.next() + return node, nil + } + case '}': + p.next() + return node, nil + default: + return nil, p.errorf("expected ',' or '}' after object entry") + } + } +} + +func (p *json5Parser) parseObjectKey() (string, error) { + if err := p.skipWhitespaceAndComments(); err != nil { + return "", err + } + + switch p.peek() { + case '"', '\'': + return p.parseString() + default: + r := p.peek() + if !isIdentifierStart(r) && (r != '\\' || p.peekNext() != 'u') { + return "", p.errorf("expected object key") + } + return p.parseIdentifier() + } +} + +func (p *json5Parser) parseArray() (*CandidateNode, error) { + if p.next() != '[' { + return nil, p.errorf("expected '['") + } + + node := &CandidateNode{Kind: SequenceNode, Tag: "!!seq"} + if err := p.skipWhitespaceAndComments(); err != nil { + return nil, err + } + node.HeadComment = commentsToText(p.takePendingComments()) + if p.peek() == ']' { + p.next() + return node, nil + } + + index := 0 + for { + if err := p.skipWhitespaceAndComments(); err != nil { + return nil, err + } + pendingBeforeElement := commentsToText(p.takePendingComments()) + + value, err := p.parseValue() + if err != nil { + return nil, err + } + if pendingBeforeElement != "" { + if value.HeadComment != "" { + value.HeadComment = pendingBeforeElement + "\n" + value.HeadComment + } else { + value.HeadComment = pendingBeforeElement + } + } + + childKey := node.CreateChild() + childKey.Kind = ScalarNode + childKey.Tag = "!!int" + childKey.Value = fmt.Sprintf("%v", index) + childKey.IsMapKey = true + + value.Parent = node + value.Key = childKey + node.Content = append(node.Content, value) + index++ + + if err := p.skipWhitespaceAndComments(); err != nil { + return nil, err + } + value.LineComment = commentsToText(p.takePendingComments()) + + switch p.peek() { + case ',': + p.next() + if err := p.skipWhitespaceAndComments(); err != nil { + return nil, err + } + if p.peek() == ']' { + p.next() + return node, nil + } + case ']': + p.next() + return node, nil + default: + return nil, p.errorf("expected ',' or ']' after array element") + } + } +} + +func (p *json5Parser) parseString() (string, error) { + quote := p.next() + var sb strings.Builder + + for { + if p.eof() { + return "", p.errorf("unterminated string") + } + + r := p.next() + if r == quote { + return sb.String(), nil + } + + if r == '\n' || r == '\r' || r == '\u2028' || r == '\u2029' { + return "", p.errorf("unterminated string") + } + + if r != '\\' { + sb.WriteRune(r) + continue + } + + if p.eof() { + return "", p.errorf("unterminated escape sequence") + } + + esc := p.next() + + if esc == '\n' || esc == '\u2028' || esc == '\u2029' { + continue + } + if esc == '\r' { + if p.peek() == '\n' { + p.next() + } + continue + } + + switch esc { + case '\\', '/', '"', '\'': + sb.WriteRune(esc) + case 'b': + sb.WriteByte('\b') + case 'f': + sb.WriteByte('\f') + case 'n': + sb.WriteByte('\n') + case 'r': + sb.WriteByte('\r') + case 't': + sb.WriteByte('\t') + case 'v': + sb.WriteByte('\v') + case '0': + if isDigit(p.peek()) { + return "", p.errorf("invalid escape sequence \\0 followed by digit") + } + sb.WriteByte(0) + case 'x': + r, err := p.parseHexEscape(2) + if err != nil { + return "", err + } + sb.WriteRune(r) + case 'u': + r, err := p.parseUnicodeEscape() + if err != nil { + return "", err + } + sb.WriteRune(r) + default: + sb.WriteRune(esc) + } + } +} + +func (p *json5Parser) parseHexEscape(length int) (rune, error) { + if p.pos+length > len(p.input) { + return 0, p.errorf("invalid hex escape") + } + var value rune + for i := 0; i < length; i++ { + d := p.next() + h, ok := hexDigitValue(d) + if !ok { + return 0, p.errorf("invalid hex escape") + } + value = (value << 4) | h + } + return value, nil +} + +func (p *json5Parser) parseUnicodeEscape() (rune, error) { + r, err := p.parseHexEscape(4) + if err != nil { + return 0, err + } + + if utf16.IsSurrogate(r) { + originalPos, originalLine, originalCol := p.pos, p.line, p.col + if p.peek() == '\\' && p.peekNext() == 'u' { + p.next() + p.next() + r2, err := p.parseHexEscape(4) + if err == nil && utf16.IsSurrogate(r2) { + return utf16.DecodeRune(r, r2), nil + } + } + p.pos, p.line, p.col = originalPos, originalLine, originalCol + } + + return r, nil +} + +func (p *json5Parser) parseNumber() (*CandidateNode, error) { + startPos := p.pos + + sign := rune(0) + if p.peek() == '+' || p.peek() == '-' { + sign = p.next() + } + + if isIdentifierStart(p.peek()) { + ident, err := p.parseIdentifier() + if err != nil { + return nil, err + } + if ident != "Infinity" { + return nil, p.errorf("invalid number") + } + if sign == '-' { + return &CandidateNode{Kind: ScalarNode, Tag: "!!float", Value: "-Inf"}, nil + } + return &CandidateNode{Kind: ScalarNode, Tag: "!!float", Value: "+Inf"}, nil + } + + posAfterSign := p.pos + + isFloat := false + if p.peek() == '0' && (p.peekNext() == 'x' || p.peekNext() == 'X') { + p.next() + p.next() + if !isHexDigit(p.peek()) { + return nil, p.errorf("invalid hex number") + } + for isHexDigit(p.peek()) { + p.next() + } + lit := string(p.input[posAfterSign:p.pos]) + if sign == '-' { + lit = "-" + lit + } + // if sign == '+' { } // drop explicit plus sign + return &CandidateNode{Kind: ScalarNode, Tag: "!!int", Value: lit}, nil + } + + if p.peek() == '.' { + isFloat = true + p.next() + if !isDigit(p.peek()) { + return nil, p.errorf("invalid number") + } + for isDigit(p.peek()) { + p.next() + } + } else { + if !isDigit(p.peek()) { + return nil, p.errorf("invalid number") + } + for isDigit(p.peek()) { + p.next() + } + if p.peek() == '.' { + isFloat = true + p.next() + for isDigit(p.peek()) { + p.next() + } + } + } + + if p.peek() == 'e' || p.peek() == 'E' { + isFloat = true + p.next() + if p.peek() == '+' || p.peek() == '-' { + p.next() + } + if !isDigit(p.peek()) { + return nil, p.errorf("invalid number exponent") + } + for isDigit(p.peek()) { + p.next() + } + } + + lit := string(p.input[startPos:p.pos]) + + lit = strings.TrimPrefix(lit, "+") + + if isFloat { + if _, err := strconv.ParseFloat(lit, 64); err != nil { + return nil, p.errorf("invalid float number %q", lit) + } + return &CandidateNode{Kind: ScalarNode, Tag: "!!float", Value: lit}, nil + } + + if _, err := strconv.ParseInt(lit, 10, 64); err != nil { + return nil, p.errorf("invalid integer %q", lit) + } + return &CandidateNode{Kind: ScalarNode, Tag: "!!int", Value: lit}, nil +} + +func (p *json5Parser) parseIdentifier() (string, error) { + var sb strings.Builder + + r := p.peek() + if !isIdentifierStart(r) && (r != '\\' || p.peekNext() != 'u') { + return "", p.errorf("expected identifier") + } + + for !p.eof() { + r := p.peek() + if r == '\\' && p.peekNext() == 'u' { + p.next() + p.next() + decoded, err := p.parseUnicodeEscape() + if err != nil { + return "", err + } + if sb.Len() == 0 { + if !isIdentifierStart(decoded) { + return "", p.errorf("invalid identifier start") + } + } else if !isIdentifierPart(decoded) { + return "", p.errorf("invalid identifier part") + } + sb.WriteRune(decoded) + continue + } + + if sb.Len() == 0 { + if !isIdentifierStart(r) { + break + } + } else if !isIdentifierPart(r) { + break + } + + sb.WriteRune(p.next()) + } + + return sb.String(), nil +} + +func isDigit(r rune) bool { + return r >= '0' && r <= '9' +} + +func isHexDigit(r rune) bool { + return (r >= '0' && r <= '9') || (r >= 'a' && r <= 'f') || (r >= 'A' && r <= 'F') +} + +func hexDigitValue(r rune) (rune, bool) { + switch { + case r >= '0' && r <= '9': + return r - '0', true + case r >= 'a' && r <= 'f': + return r - 'a' + 10, true + case r >= 'A' && r <= 'F': + return r - 'A' + 10, true + default: + return 0, false + } +} + +func isIdentifierStart(r rune) bool { + return r == '$' || r == '_' || unicode.IsLetter(r) +} + +func isIdentifierPart(r rune) bool { + return isIdentifierStart(r) || unicode.IsDigit(r) +} diff --git a/pkg/yqlib/decoder_test.go b/pkg/yqlib/decoder_test.go index d71e8e8c31..f1105a2c0d 100644 --- a/pkg/yqlib/decoder_test.go +++ b/pkg/yqlib/decoder_test.go @@ -17,6 +17,8 @@ type formatScenario struct { skipDoc bool scenarioType string expectedError string + // expectedErrorContains allows less brittle error assertions (e.g. where line/column may change). + expectedErrorContains string } func processFormatScenario(s formatScenario, decoder Decoder, encoder Encoder) (string, error) { diff --git a/pkg/yqlib/doc/usage/headers/json5.md b/pkg/yqlib/doc/usage/headers/json5.md new file mode 100644 index 0000000000..d6fd1aeb74 --- /dev/null +++ b/pkg/yqlib/doc/usage/headers/json5.md @@ -0,0 +1,5 @@ +# JSON5 + +JSON5 support in `yq` lets you parse JSON5 files (comments, trailing commas, single quotes, unquoted keys, hex numbers, `Infinity`, `NaN`) and convert them to other formats like YAML, or output JSON5. + +Note: when converting JSON5 to YAML (or other formats), comments may move slightly because formats like YAML don't always have a distinct representation for certain JSON5 comment placements (e.g. `/* foo */ { ... }` vs `{ /* foo */ ... }`). When converting JSON5 back to JSON5, `yq` keeps comments as close as possible to their original location. diff --git a/pkg/yqlib/doc/usage/json5.md b/pkg/yqlib/doc/usage/json5.md new file mode 100644 index 0000000000..10928978c3 --- /dev/null +++ b/pkg/yqlib/doc/usage/json5.md @@ -0,0 +1,29 @@ +# JSON5 + +JSON5 support in `yq` lets you parse JSON5 files (comments, trailing commas, single quotes, unquoted keys, hex numbers, `Infinity`, `NaN`) and convert them to other formats like YAML, or output JSON5. + +Note: when converting JSON5 to YAML (or other formats), comments may move slightly because formats like YAML don't always have a distinct representation for certain JSON5 comment placements (e.g. `/* foo */ { ... }` vs `{ /* foo */ ... }`). When converting JSON5 back to JSON5, `yq` keeps comments as close as possible to their original location. + +## Parse json5: comments, trailing commas, single quotes +Given a sample.json5 file of: +```json5 +{ + // comment + unquoted: 'single quoted', + trailing: [1, 2,], +} + +``` +then +```bash +yq -P -p=json5 '.' sample.json5 +``` +will output +```yaml +# comment +unquoted: single quoted +trailing: + - 1 + - 2 +``` + diff --git a/pkg/yqlib/encoder_json5.go b/pkg/yqlib/encoder_json5.go new file mode 100644 index 0000000000..2ffe014b05 --- /dev/null +++ b/pkg/yqlib/encoder_json5.go @@ -0,0 +1,347 @@ +//go:build !yq_nojson5 + +package yqlib + +import ( + "bytes" + "io" + "math" + "strconv" + "strings" + + "github.com/goccy/go-json" +) + +type json5Encoder struct { + prefs JsonPreferences + indentString string +} + +func NewJSON5Encoder(prefs JsonPreferences) Encoder { + indentString := "" + for i := 0; i < prefs.Indent; i++ { + indentString += " " + } + return &json5Encoder{prefs: prefs, indentString: indentString} +} + +func (je *json5Encoder) CanHandleAliases() bool { + return false +} + +func (je *json5Encoder) PrintDocumentSeparator(_ io.Writer) error { + return nil +} + +func (je *json5Encoder) PrintLeadingContent(_ io.Writer, _ string) error { + return nil +} + +func (je *json5Encoder) Encode(writer io.Writer, node *CandidateNode) error { + if node.Kind == ScalarNode && je.prefs.UnwrapScalar { + return writeString(writer, node.Value+"\n") + } + + destination := writer + tempBuffer := bytes.NewBuffer(nil) + if je.prefs.ColorsEnabled { + destination = tempBuffer + } + + if err := writeJSON5CommentBlock(destination, node.HeadComment, je.indentString, 0); err != nil { + return err + } + if err := encodeJSON5Node(destination, node, je.indentString, 0); err != nil { + return err + } + if err := writeJSON5InlineComment(destination, node.LineComment, true); err != nil { + return err + } + if err := writeString(destination, "\n"); err != nil { + return err + } + if err := writeJSON5CommentBlock(destination, node.FootComment, je.indentString, 0); err != nil { + return err + } + + if je.prefs.ColorsEnabled { + return colorizeAndPrint(tempBuffer.Bytes(), writer) + } + return nil +} + +func encodeJSON5Node(writer io.Writer, node *CandidateNode, indentString string, depth int) error { + if node == nil { + return writeString(writer, "null") + } + + switch node.Kind { + case AliasNode: + return encodeJSON5Node(writer, node.Alias, indentString, depth) + case ScalarNode: + return writeString(writer, json5ScalarString(node)) + case SequenceNode: + return encodeJSON5Sequence(writer, node, indentString, depth) + case MappingNode: + return encodeJSON5Mapping(writer, node, indentString, depth) + default: + return writeString(writer, "null") + } +} + +func encodeJSON5Sequence(writer io.Writer, node *CandidateNode, indentString string, depth int) error { + if err := writeString(writer, "["); err != nil { + return err + } + if len(node.Content) == 0 { + return writeString(writer, "]") + } + + pretty := indentString != "" + if pretty { + if err := writeString(writer, "\n"); err != nil { + return err + } + } + + for i, child := range node.Content { + if pretty { + if err := writeJSON5CommentBlock(writer, child.HeadComment, indentString, depth+1); err != nil { + return err + } + if err := writeString(writer, strings.Repeat(indentString, depth+1)); err != nil { + return err + } + } + if err := encodeJSON5Node(writer, child, indentString, depth+1); err != nil { + return err + } + if err := writeJSON5InlineComment(writer, child.LineComment, true); err != nil { + return err + } + if i != len(node.Content)-1 { + if err := writeString(writer, ","); err != nil { + return err + } + } + if pretty { + if err := writeString(writer, "\n"); err != nil { + return err + } + } + } + + if pretty { + if err := writeString(writer, strings.Repeat(indentString, depth)); err != nil { + return err + } + } + return writeString(writer, "]") +} + +func encodeJSON5Mapping(writer io.Writer, node *CandidateNode, indentString string, depth int) error { + if err := writeString(writer, "{"); err != nil { + return err + } + if len(node.Content) == 0 { + return writeString(writer, "}") + } + + pretty := indentString != "" + if pretty { + if err := writeString(writer, "\n"); err != nil { + return err + } + } + + for i := 0; i < len(node.Content); i += 2 { + keyNode := node.Content[i] + valueNode := node.Content[i+1] + + keyBytes, err := json.Marshal(keyNode.Value) + if err != nil { + return err + } + + if pretty { + if err := writeJSON5CommentBlock(writer, keyNode.HeadComment, indentString, depth+1); err != nil { + return err + } + if err := writeString(writer, strings.Repeat(indentString, depth+1)); err != nil { + return err + } + } + if err := writeString(writer, string(keyBytes)); err != nil { + return err + } + if err := writeJSON5InlineComment(writer, keyNode.LineComment, true); err != nil { + return err + } + if err := writeString(writer, ":"); err != nil { + return err + } + if pretty && strings.TrimSpace(valueNode.HeadComment) != "" && strings.Contains(valueNode.HeadComment, "\n") { + if err := writeString(writer, "\n"); err != nil { + return err + } + if err := writeJSON5CommentBlock(writer, valueNode.HeadComment, indentString, depth+1); err != nil { + return err + } + if err := writeString(writer, strings.Repeat(indentString, depth+1)); err != nil { + return err + } + } else { + if pretty { + if err := writeString(writer, " "); err != nil { + return err + } + } + if err := writeJSON5InlineComment(writer, valueNode.HeadComment, false); err != nil { + return err + } + if strings.TrimSpace(valueNode.HeadComment) != "" { + if err := writeString(writer, " "); err != nil { + return err + } + } + } + + if err := encodeJSON5Node(writer, valueNode, indentString, depth+1); err != nil { + return err + } + if err := writeJSON5InlineComment(writer, valueNode.LineComment, true); err != nil { + return err + } + if i != len(node.Content)-2 { + if err := writeString(writer, ","); err != nil { + return err + } + } + if pretty { + if err := writeString(writer, "\n"); err != nil { + return err + } + } + } + + if pretty { + if err := writeString(writer, strings.Repeat(indentString, depth)); err != nil { + return err + } + } + return writeString(writer, "}") +} + +func json5ScalarString(node *CandidateNode) string { + tag := node.guessTagFromCustomType() + + switch tag { + case "!!null": + return "null" + case "!!bool": + if isTruthyNode(node) { + return "true" + } + return "false" + case "!!int": + value, err := node.GetValueRep() + if err != nil { + return "null" + } + intBytes, err := json.Marshal(value) + if err != nil { + return "null" + } + return string(intBytes) + case "!!float": + return json5FloatString(node.Value) + case "!!str": + stringBytes, err := json.Marshal(node.Value) + if err != nil { + return "null" + } + return string(stringBytes) + default: + stringBytes, err := json.Marshal(node.Value) + if err != nil { + return "null" + } + return string(stringBytes) + } +} + +func json5FloatString(value string) string { + switch strings.ToLower(value) { + case ".inf", "+.inf", "inf", "+inf", "+infinity", "infinity": + return "Infinity" + case "-.inf", "-inf", "-infinity": + return "-Infinity" + case ".nan", "nan": + return "NaN" + } + + parsed, err := strconv.ParseFloat(value, 64) + if err != nil { + return "null" + } + + if math.IsNaN(parsed) { + return "NaN" + } + if math.IsInf(parsed, 1) { + return "Infinity" + } + if math.IsInf(parsed, -1) { + return "-Infinity" + } + + floatBytes, err := json.Marshal(parsed) + if err != nil { + return "null" + } + return string(floatBytes) +} + +func writeJSON5CommentBlock(writer io.Writer, comment string, indentString string, depth int) error { + comment = strings.ReplaceAll(comment, "\r\n", "\n") + comment = strings.TrimSpace(comment) + if comment == "" { + return nil + } + + indent := strings.Repeat(indentString, depth) + lines := strings.Split(comment, "\n") + for _, line := range lines { + line = strings.TrimSpace(line) + if line == "" { + continue + } + line = strings.TrimSpace(strings.TrimPrefix(line, "#")) + line = strings.TrimSpace(strings.TrimPrefix(line, "//")) + if err := writeString(writer, indent); err != nil { + return err + } + if err := writeString(writer, "// "+line+"\n"); err != nil { + return err + } + } + return nil +} + +func writeJSON5InlineComment(writer io.Writer, comment string, forceLeadingSpace bool) error { + comment = strings.ReplaceAll(comment, "\r\n", "\n") + comment = strings.TrimSpace(comment) + if comment == "" { + return nil + } + comment = strings.TrimSpace(strings.TrimPrefix(comment, "#")) + comment = strings.TrimSpace(strings.TrimPrefix(comment, "//")) + comment = strings.ReplaceAll(comment, "\n", " ") + comment = strings.Join(strings.Fields(comment), " ") + + prefix := "" + if forceLeadingSpace { + prefix = " " + } + return writeString(writer, prefix+"/* "+comment+" */") +} diff --git a/pkg/yqlib/encoder_json5_test.go b/pkg/yqlib/encoder_json5_test.go new file mode 100644 index 0000000000..e2bd4891ee --- /dev/null +++ b/pkg/yqlib/encoder_json5_test.go @@ -0,0 +1,171 @@ +//go:build !yq_nojson5 + +package yqlib + +import ( + "bytes" + "testing" +) + +func TestJSON5EncoderPrintDocumentSeparatorIsNoop(t *testing.T) { + prefs := ConfiguredJSONPreferences.Copy() + var buf bytes.Buffer + + if err := NewJSON5Encoder(prefs).PrintDocumentSeparator(&buf); err != nil { + t.Fatalf("PrintDocumentSeparator returned error: %v", err) + } + if got := buf.String(); got != "" { + t.Fatalf("expected no output, got %q", got) + } +} + +func TestJSON5EncoderUnwrapScalar(t *testing.T) { + prefs := ConfiguredJSONPreferences.Copy() + prefs.UnwrapScalar = true + + node := createStringScalarNode("hi") + + var buf bytes.Buffer + if err := NewJSON5Encoder(prefs).Encode(&buf, node); err != nil { + t.Fatalf("Encode returned error: %v", err) + } + + if got, want := buf.String(), "hi\n"; got != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func TestEncodeJSON5NodeNilAndUnknownKind(t *testing.T) { + var buf bytes.Buffer + if err := encodeJSON5Node(&buf, nil, " ", 0); err != nil { + t.Fatalf("encodeJSON5Node(nil) returned error: %v", err) + } + if got, want := buf.String(), "null"; got != want { + t.Fatalf("got %q, want %q", got, want) + } + + buf.Reset() + if err := encodeJSON5Node(&buf, &CandidateNode{}, " ", 0); err != nil { + t.Fatalf("encodeJSON5Node(unknown kind) returned error: %v", err) + } + if got, want := buf.String(), "null"; got != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func TestJSON5ScalarAndFloatFormatting(t *testing.T) { + if got, want := json5FloatString(".inf"), "Infinity"; got != want { + t.Fatalf("got %q, want %q", got, want) + } + if got, want := json5FloatString("1e309"), "null"; got != want { + t.Fatalf("got %q, want %q", got, want) + } + if got, want := json5FloatString(".nan"), "NaN"; got != want { + t.Fatalf("got %q, want %q", got, want) + } + if got, want := json5FloatString("definitely-not-a-float"), "null"; got != want { + t.Fatalf("got %q, want %q", got, want) + } + + badInt := &CandidateNode{Kind: ScalarNode, Tag: "!!int", Value: "nope"} + if got, want := json5ScalarString(badInt), "null"; got != want { + t.Fatalf("got %q, want %q", got, want) + } + + unknownTag := &CandidateNode{Kind: ScalarNode, Tag: "!!unknown", Value: "hi"} + if got, want := json5ScalarString(unknownTag), "\"hi\""; got != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func TestJSON5EncoderEncodeSequenceMappingAndComments(t *testing.T) { + prefs := ConfiguredJSONPreferences.Copy() + prefs.Indent = 2 + prefs.UnwrapScalar = false + prefs.ColorsEnabled = false + + aliasTarget := createScalarNode(3, "3") + seq := &CandidateNode{ + Kind: SequenceNode, + HeadComment: "# root head\n\n// root second\n", + LineComment: "# root inline", + FootComment: "# foot", + Content: []*CandidateNode{ + { + Kind: ScalarNode, + Tag: "!!int", + Value: "1", + HeadComment: "# first", + LineComment: "// lc1", + }, + { + Kind: AliasNode, + Alias: aliasTarget, + LineComment: "# alias", + }, + { + Kind: MappingNode, + HeadComment: "# map head", + LineComment: "# map lc", + Content: []*CandidateNode{ + { + Kind: ScalarNode, + Tag: "!!str", + Value: "a", + HeadComment: "# key a", + LineComment: "# key inline", + IsMapKey: true, + }, + { + Kind: ScalarNode, + Tag: "!!float", + Value: ".inf", + HeadComment: "# before a line1\n# before a line2", + LineComment: "# value inline", + }, + { + Kind: ScalarNode, + Tag: "!!str", + Value: "b", + IsMapKey: true, + }, + { + Kind: ScalarNode, + Tag: "!!bool", + Value: "true", + HeadComment: "# before b", + }, + }, + }, + createStringScalarNode("hi"), + }, + } + + var buf bytes.Buffer + if err := NewJSON5Encoder(prefs).Encode(&buf, seq); err != nil { + t.Fatalf("Encode returned error: %v", err) + } + + const expected = `// root head +// root second +[ + // first + 1 /* lc1 */, + 3 /* alias */, + // map head + { + // key a + "a" /* key inline */: + // before a line1 + // before a line2 + Infinity /* value inline */, + "b": /* before b */ true + } /* map lc */, + "hi" +] /* root inline */ +// foot +` + if got := buf.String(); got != expected { + t.Fatalf("unexpected output:\n%s", got) + } +} diff --git a/pkg/yqlib/format.go b/pkg/yqlib/format.go index 4cf456f719..b63e40262f 100644 --- a/pkg/yqlib/format.go +++ b/pkg/yqlib/format.go @@ -33,6 +33,11 @@ var JSONFormat = &Format{"json", []string{"j"}, func() Decoder { return NewJSONDecoder() }, } +var JSON5Format = &Format{"json5", []string{"j5"}, + func() Encoder { return NewJSON5Encoder(ConfiguredJSONPreferences) }, + func() Decoder { return NewJSON5Decoder() }, +} + var PropertiesFormat = &Format{"props", []string{"p", "properties"}, func() Encoder { return NewPropertiesEncoder(ConfiguredPropertiesPreferences) }, func() Decoder { return NewPropertiesDecoder() }, @@ -97,6 +102,7 @@ var Formats = []*Format{ YamlFormat, KYamlFormat, JSONFormat, + JSON5Format, PropertiesFormat, CSVFormat, TSVFormat, diff --git a/pkg/yqlib/json5_test.go b/pkg/yqlib/json5_test.go new file mode 100644 index 0000000000..acb23d02f5 --- /dev/null +++ b/pkg/yqlib/json5_test.go @@ -0,0 +1,321 @@ +//go:build !yq_nojson5 + +package yqlib + +import ( + "bufio" + "fmt" + "strings" + "testing" + + "github.com/mikefarah/yq/v4/test" +) + +var json5Scenarios = []formatScenario{ + { + description: "Parse json5: comments, trailing commas, single quotes", + input: `{ + // comment + unquoted: 'single quoted', + trailing: [1, 2,], +} +`, + expected: `# comment +unquoted: single quoted +trailing: + - 1 + - 2 +`, + scenarioType: "decode", + }, + { + description: "Parse json5: multiline block comments", + skipDoc: true, + scenarioType: "decode", + input: `{ + /* + multiline + block comment + */ + first: 1, + second/* inline block */: 2, + third: /* before value */ 3, + fourth: [1, /* between elements */ 2,], +} +`, + expected: `# multiline +# block comment +first: 1 +second: 2 # inline block +third: 3 +# before value +fourth: + - 1 + # between elements + - 2 +`, + }, + { + description: "Roundtrip json5: preserve comment placement", + skipDoc: true, + scenarioType: "roundtrip", + indent: 2, + input: `{ + a/*k*/:/*v*/1/*after*/, + b: 2 // end +}`, + expected: `{ + "a" /* k */: /* v */ 1 /* after */, + "b": 2 /* end */ +} +`, + }, + { + description: "Roundtrip json5: hex, Infinity, NaN", + skipDoc: true, + input: `{hex: 0x10, posInf: Infinity, negInf: -Infinity, not: NaN,}`, + expected: "{\"hex\":16,\"posInf\":Infinity,\"negInf\":-Infinity,\"not\":NaN}\n", + indent: 0, + scenarioType: "roundtrip", + }, + { + description: "bad json5", + skipDoc: true, + input: `{a: 1,]`, + expectedError: `bad file 'sample.yml': json5: expected object key at line 1, column 7`, + scenarioType: "decode-error", + subdescription: "json5 supports more syntax than json, but it still needs to be well-formed.", + }, + { + description: "Parse json5: block comments normalisation (stars, tabs, trailing spaces)", + skipDoc: true, + scenarioType: "decode", + input: "{\n /*\n\t * hello \t \n\t ** world\t\n */\n a: 1,\n}\n", + expected: "# hello \t \n# * world\na: 1\n", + subdescription: "Block comments are normalised and emitted as YAML comments.", + }, + { + description: "Parse json5: scalars true/false/null with leading whitespace", + skipDoc: true, + scenarioType: "decode", + input: " \n\ttrue\n", + expected: "true\n", + }, + { + description: "Parse json5: scalar false", + skipDoc: true, + scenarioType: "decode", + input: "false", + expected: "false\n", + }, + { + description: "Parse json5: scalar null with extra newlines", + skipDoc: true, + scenarioType: "decode", + input: "\n\nnull\n\n", + expected: "null\n", + }, + { + description: "Parse json5: object trailing comma", + skipDoc: true, + scenarioType: "decode", + input: "{a: 1,}\n", + expected: "a: 1\n", + }, + { + description: "Parse json5: array trailing comma", + skipDoc: true, + scenarioType: "decode", + input: "[1, 2,]\n", + expected: "- 1\n- 2\n", + }, + { + description: "Parse json5: unicode escapes in strings", + skipDoc: true, + scenarioType: "decode", + input: "{s: \"\\u0041\"}\n", + expected: "s: A\n", + }, + { + description: "Parse json5: UTF-16 surrogate pair unicode escape in string", + skipDoc: true, + scenarioType: "decode", + input: "{s: \"\\uD83D\\uDE00\"}\n", + expected: "s: \"\\U0001F600\"\n", + }, + { + description: "Parse json5: invalid unicode escape in string (lowercase g)", + skipDoc: true, + scenarioType: "decode-error", + input: "{s: \"\\u00ag\"}\n", + expectedErrorContains: "invalid hex escape", + }, + { + description: "Parse json5: invalid float (overflow)", + skipDoc: true, + scenarioType: "decode-error", + input: "{a: 1e309}\n", + expectedErrorContains: "invalid float number", + }, + { + description: "Parse json5: invalid float exponent", + skipDoc: true, + scenarioType: "decode-error", + input: "{a: 1e+}\n", + expectedErrorContains: "invalid number exponent", + }, + { + description: "Parse json5: invalid number with multiple periods", + skipDoc: true, + scenarioType: "decode-error", + input: "{ip: 127.0.0.1}\n", + expectedErrorContains: "expected ',' or '}' after object entry", + }, + { + description: "Parse json5: extra colon after key", + skipDoc: true, + scenarioType: "decode-error", + input: "{a:: 1}\n", + expectedErrorContains: "unexpected character ':'", + }, + { + description: "Parse json5: trailing comma with extra closing brace", + skipDoc: true, + scenarioType: "decode-error", + input: "{a: 1,}}\n", + expectedErrorContains: "unexpected character '}'", + }, + { + description: "Parse json5: unterminated block comment at EOF", + skipDoc: true, + scenarioType: "decode-error", + input: "{/* unterminated", + expectedErrorContains: "unterminated block comment", + }, + { + description: "Parse json5: unterminated escape sequence at EOF", + skipDoc: true, + scenarioType: "decode-error", + input: "\"abc\\", + expectedErrorContains: "unterminated escape sequence", + }, + { + description: "Parse json5: unterminated unicode escape", + skipDoc: true, + scenarioType: "decode-error", + input: "{s: \"\\u12\"}\n", + expectedErrorContains: "invalid hex escape", + }, + { + description: "Parse json5: bad hex digits in string escape", + skipDoc: true, + scenarioType: "decode-error", + input: "{s: \"\\x0G\"}\n", + expectedErrorContains: "invalid hex escape", + }, + { + description: "Parse json5: bad hex digits in string escape (lowercase g)", + skipDoc: true, + scenarioType: "decode-error", + input: "{s: \"\\xag\"}\n", + expectedErrorContains: "invalid hex escape", + }, + { + description: "Parse json5: bad hex digits in hex number", + skipDoc: true, + scenarioType: "decode-error", + input: "{a: 0xG}\n", + expectedErrorContains: "invalid hex number", + }, + { + description: "Parse json5: bad hex digits in hex number (lowercase g)", + skipDoc: true, + scenarioType: "decode-error", + input: "{a: 0xg}\n", + expectedErrorContains: "invalid hex number", + }, + { + description: "Parse json5: invalid identifier start via unicode escape", + skipDoc: true, + scenarioType: "decode-error", + input: "{\\u0031abc: 1}\n", + expectedErrorContains: "invalid identifier start", + }, + { + description: "Parse json5: invalid identifier part via unicode escape", + skipDoc: true, + scenarioType: "decode-error", + input: "{a\\u002D: 1}\n", + expectedErrorContains: "invalid identifier part", + }, + { + description: "Parse json5: invalid unicode escape in identifier", + skipDoc: true, + scenarioType: "decode-error", + input: "{a\\u00ag: 1}\n", + expectedErrorContains: "invalid hex escape", + }, +} + +func testJSON5Scenario(t *testing.T, s formatScenario) { + prefs := ConfiguredJSONPreferences.Copy() + prefs.Indent = s.indent + prefs.UnwrapScalar = false + + switch s.scenarioType { + case "encode": + test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewJSON5Encoder(prefs)), s.description) + case "decode": + test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewJSON5Decoder(), NewYamlEncoder(ConfiguredYamlPreferences)), s.description) + case "roundtrip": + test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewJSON5Decoder(), NewJSON5Encoder(prefs)), s.description) + case "decode-error": + result, err := processFormatScenario(s, NewJSON5Decoder(), NewJSON5Encoder(prefs)) + if err == nil { + t.Errorf("Expected error '%v' but it worked: %v", s.expectedError, result) + } else { + if s.expectedErrorContains != "" { + if !strings.Contains(err.Error(), s.expectedErrorContains) { + t.Errorf("%s: expected error containing %q, got %q", s.description, s.expectedErrorContains, err.Error()) + } + } else { + test.AssertResultComplexWithContext(t, s.expectedError, err.Error(), s.description) + } + } + default: + panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType)) + } +} + +func documentJSON5Scenario(_ *testing.T, w *bufio.Writer, i interface{}) { + s := i.(formatScenario) + if s.skipDoc { + return + } + + writeOrPanic(w, fmt.Sprintf("## %v\n", s.description)) + + if s.subdescription != "" { + writeOrPanic(w, s.subdescription) + writeOrPanic(w, "\n\n") + } + + writeOrPanic(w, "Given a sample.json5 file of:\n") + writeOrPanic(w, fmt.Sprintf("```json5\n%v\n```\n", s.input)) + writeOrPanic(w, "then\n") + writeOrPanic(w, "```bash\nyq -P -p=json5 '.' sample.json5\n```\n") + writeOrPanic(w, "will output\n") + writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n", mustProcessFormatScenario(s, NewJSON5Decoder(), NewYamlEncoder(ConfiguredYamlPreferences)))) +} + +func TestJSON5Scenarios(t *testing.T) { + for _, tt := range json5Scenarios { + testJSON5Scenario(t, tt) + } + + genericScenarios := make([]interface{}, len(json5Scenarios)) + for i, s := range json5Scenarios { + genericScenarios[i] = s + } + documentScenarios(t, "usage", "json5", genericScenarios, documentJSON5Scenario) +} diff --git a/pkg/yqlib/no_json5.go b/pkg/yqlib/no_json5.go new file mode 100644 index 0000000000..5e1034da56 --- /dev/null +++ b/pkg/yqlib/no_json5.go @@ -0,0 +1,11 @@ +//go:build yq_nojson5 + +package yqlib + +func NewJSON5Decoder() Decoder { + return nil +} + +func NewJSON5Encoder(prefs JsonPreferences) Encoder { + return nil +} diff --git a/project-words.txt b/project-words.txt index 759f0dd557..d8f2ef2703 100644 --- a/project-words.txt +++ b/project-words.txt @@ -293,3 +293,4 @@ buildvcs behaviour GOFLAGS gocache +json5 diff --git a/scripts/build-small-yq.sh b/scripts/build-small-yq.sh index 5f6877aa7d..a4b361cb00 100755 --- a/scripts/build-small-yq.sh +++ b/scripts/build-small-yq.sh @@ -1,2 +1,2 @@ #!/bin/bash -go build -tags "yq_nolua yq_noini yq_notoml yq_noxml yq_nojson yq_nohcl yq_nokyaml" -ldflags "-s -w" . +go build -tags "yq_nolua yq_noini yq_notoml yq_noxml yq_nojson yq_nohcl yq_nokyaml yq_nojson5" -ldflags "-s -w" . diff --git a/scripts/build-tinygo-yq.sh b/scripts/build-tinygo-yq.sh index dfd9c9f083..12515cb9a2 100755 --- a/scripts/build-tinygo-yq.sh +++ b/scripts/build-tinygo-yq.sh @@ -1,4 +1,4 @@ #!/bin/bash # Currently, the `yq_nojson` feature must be enabled when using TinyGo. -tinygo build -no-debug -tags "yq_nolua yq_noini yq_notoml yq_noxml yq_nojson yq_nocsv yq_nobase64 yq_nouri yq_noprops yq_nosh yq_noshell yq_nohcl yq_nokyaml" . +tinygo build -no-debug -tags "yq_nolua yq_noini yq_notoml yq_noxml yq_nojson yq_nocsv yq_nobase64 yq_nouri yq_noprops yq_nosh yq_noshell yq_nohcl yq_nokyaml yq_nojson5" .