VictoriaMetrics/vendor/github.com/aws/smithy-go/encoding/xml/escape.go

138 lines
3.0 KiB
Go
Raw Normal View History

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Copied and modified from Go 1.14 stdlib's encoding/xml
package xml
import (
"unicode/utf8"
)
// Copied from Go 1.14 stdlib's encoding/xml
var (
escQuot = []byte(""") // shorter than """
escApos = []byte("'") // shorter than "'"
escAmp = []byte("&")
escLT = []byte("<")
escGT = []byte(">")
escTab = []byte("	")
escNL = []byte("
")
escCR = []byte("
")
escFFFD = []byte("\uFFFD") // Unicode replacement character
// Additional Escapes
escNextLine = []byte("…")
escLS = []byte("
")
)
// Decide whether the given rune is in the XML Character Range, per
// the Char production of https://www.xml.com/axml/testaxml.htm,
// Section 2.2 Characters.
func isInCharacterRange(r rune) (inrange bool) {
return r == 0x09 ||
r == 0x0A ||
r == 0x0D ||
r >= 0x20 && r <= 0xD7FF ||
r >= 0xE000 && r <= 0xFFFD ||
r >= 0x10000 && r <= 0x10FFFF
}
// TODO: When do we need to escape the string?
// Based on encoding/xml escapeString from the Go Standard Library.
// https://golang.org/src/encoding/xml/xml.go
func escapeString(e writer, s string) {
var esc []byte
last := 0
for i := 0; i < len(s); {
r, width := utf8.DecodeRuneInString(s[i:])
i += width
switch r {
case '"':
esc = escQuot
case '\'':
esc = escApos
case '&':
esc = escAmp
case '<':
esc = escLT
case '>':
esc = escGT
case '\t':
esc = escTab
case '\n':
esc = escNL
case '\r':
esc = escCR
case '\u0085':
// Not escaped by stdlib
esc = escNextLine
case '\u2028':
// Not escaped by stdlib
esc = escLS
default:
if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
esc = escFFFD
break
}
continue
}
e.WriteString(s[last : i-width])
e.Write(esc)
last = i
}
e.WriteString(s[last:])
}
// escapeText writes to w the properly escaped XML equivalent
// of the plain text data s. If escapeNewline is true, newline
// characters will be escaped.
//
// Based on encoding/xml escapeText from the Go Standard Library.
// https://golang.org/src/encoding/xml/xml.go
func escapeText(e writer, s []byte) {
var esc []byte
last := 0
for i := 0; i < len(s); {
r, width := utf8.DecodeRune(s[i:])
i += width
switch r {
case '"':
esc = escQuot
case '\'':
esc = escApos
case '&':
esc = escAmp
case '<':
esc = escLT
case '>':
esc = escGT
case '\t':
esc = escTab
case '\n':
// This always escapes newline, which is different than stdlib's optional
// escape of new line.
esc = escNL
case '\r':
esc = escCR
case '\u0085':
// Not escaped by stdlib
esc = escNextLine
case '\u2028':
// Not escaped by stdlib
esc = escLS
default:
if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
esc = escFFFD
break
}
continue
}
e.Write(s[last : i-width])
e.Write(esc)
last = i
}
e.Write(s[last:])
}