字符串拼接测试
编写基准测试讨论 Golang 字符串拼接效率,基准测试如下
package play
import (
"bytes"
"fmt"
"strings"
"testing"
)
const (
hello = "hello"
world = "world"
num = 100
longHello = "hellohellohellohellohellohellohellohellohellohello"
)
func BenchmarkStringWithPlus(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = hello + "," + world
}
}
func BenchmarkStringWithSprintf(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = fmt.Sprintf("%s,%s", hello, world)
}
}
func BenchmarkStringWithJoin(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = strings.Join([]string{hello, world}, ",")
}
}
func BenchmarkStringWithBuffer(b *testing.B) {
for i := 0; i < b.N; i++ {
var buffer bytes.Buffer
buffer.WriteString(hello)
buffer.WriteString(",")
buffer.WriteString(world)
_ = buffer.String()
}
}
func BenchmarkStringWithBuilder(b *testing.B) {
for i := 0; i < b.N; i++ {
var builder strings.Builder
builder.WriteString(hello)
builder.WriteString(",")
builder.WriteString(world)
_ = builder.String()
}
}
func BenchmarkLongStringWithPlus(b *testing.B) {
for i := 0; i < b.N; i++ {
s := ""
for j := 0; j < num; j++ {
s += longHello
}
}
}
func BenchmarkLongStringWithSprintf(b *testing.B) {
for i := 0; i < b.N; i++ {
s := ""
for j := 0; j < num; j++ {
s = fmt.Sprintf("%s%s", s, longHello)
}
}
}
var joinList []string
func init() {
joinList = make([]string, num)
for j := 0; j < num; j++ {
joinList[j] = longHello
}
}
func BenchmarkLongStringWithJoin(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = strings.Join(joinList, "")
}
}
func BenchmarkLongStringWithBuffer(b *testing.B) {
for i := 0; i < b.N; i++ {
var buffer bytes.Buffer
for j := 0; j < num; j++ {
buffer.WriteString(longHello)
}
_ = buffer.String()
}
}
// 1.10+
func BenchmarkLongStringWithBuilder(b *testing.B) {
for i := 0; i < b.N; i++ {
var builder strings.Builder
for j := 0; j < num; j++ {
builder.WriteString(longHello)
}
_ = builder.String()
}
}
func BenchmarkLongStringWithBuilderGrow(b *testing.B) {
for i := 0; i < b.N; i++ {
var builder strings.Builder
builder.Grow(len(longHello) * num)
for j := 0; j < num; j++ {
builder.WriteString(longHello)
}
_ = builder.String()
}
}
测试结果
$ go test -benchmem -bench=. -v
goos: darwin
goarch: amd64
pkg: github.com/haozibi/play
BenchmarkStringWithPlus-4 2000000000 0.34 ns/op 0 B/op 0 allocs/op
BenchmarkStringWithSprintf-4 10000000 126 ns/op 16 B/op 1 allocs/op
BenchmarkStringWithJoin-4 30000000 58.6 ns/op 16 B/op 1 allocs/op
BenchmarkStringWithBuffer-4 20000000 67.6 ns/op 64 B/op 1 allocs/op
BenchmarkStringWithBuilder-4 20000000 67.6 ns/op 24 B/op 2 allocs/op
BenchmarkLongStringWithPlus-4 20000 60128 ns/op 270784 B/op 99 allocs/op
BenchmarkLongStringWithSprintf-4 20000 75893 ns/op 272558 B/op 199 allocs/op
BenchmarkLongStringWithJoin-4 1000000 2118 ns/op 5376 B/op 1 allocs/op
BenchmarkLongStringWithBuffer-4 200000 5768 ns/op 21024 B/op 8 allocs/op
BenchmarkLongStringWithBuilder-4 200000 5605 ns/op 21184 B/op 10 allocs/op
BenchmarkLongStringWithBuilderGrow-4 1000000 1567 ns/op 5376 B/op 1 allocs/op
PASS
ok github.com/haozibi/play 17.105s
-benchmem
可以提供每次操作分配内存的次数,以及每次操作分配的字节数。
测试发现小文本 “+” 方法拼接还是非常适合的,应该编译器对其进行了优化,”fmt” 方式性能不是很好,不推荐使用。
对于大文本推荐使用 “strings.Builder” + “Builder.Grow”
在 Go 1.10 推出了一个新的结构 strings.Builder,用于替代 bytes.Buffer 。
// A Builder is used to efficiently build a string using Write methods.
// It minimizes memory copying. The zero value is ready to use.
// Do not copy a non-zero Builder.
type Builder struct {
addr *Builder // of receiver, to detect copies by value
buf []byte
}
// It returns the length of s and a nil error.
func (b *Builder) WriteString(s string) (int, error) {
b.copyCheck()
b.buf = append(b.buf, s...)
return len(s), nil
}
由于 append
函数的特性,如果字符串过多则会重新申请新的内存,导致拖慢速度,则可以使用 Builder.Grow() 方法进行预设大小,避免重新申请内存。设置过 Grow 的效果显著。
ps: 看了 func Join(a []string, sep string) string 发现其内部也是用 strings.Builder 进行处理。
// Join concatenates the elements of a to create a single string. The separator string
// sep is placed between elements in the resulting string.
func Join(a []string, sep string) string {
switch len(a) {
case 0:
return ""
case 1:
return a[0]
}
n := len(sep) * (len(a) - 1)
for i := 0; i < len(a); i++ {
n += len(a[i])
}
var b Builder
b.Grow(n)
b.WriteString(a[0])
for _, s := range a[1:] {
b.WriteString(sep)
b.WriteString(s)
}
return b.String()
}
strings.Builder 源码
Builder 支持 4 种将数据写入 builder 中
func (b *Builder) Write(p []byte) (int, error)
func (b *Builder) WriteByte(c byte) error
func (b *Builder) WriteRune(r rune) (int, error)
func (b *Builder) WriteString(s string) (int, error)
Builder 底层通过 []byte
来存储数据
// A Builder is used to efficiently build a string using Write methods.
// It minimizes memory copying. The zero value is ready to use.
// Do not copy a non-zero Builder.
type Builder struct {
addr *Builder // of receiver, to detect copies by value
buf []byte
}
WriteString
以 func (b *Builder) WriteString(s string) (int, error)
为例,当开发者调用 WriteString 方法时,数据会被追加到其内部的 slice (具体为: []byte) 中。根据 append 函数的特性(容量每次双倍提升),如果达到了 slice 的容量(capacity)限制,一个新的 slice 就会被分配,然后老的 slice 上的内容会被拷贝到新的 slice 上。当 slice 长度很大时,这个操作就会很消耗资源甚至引起 内存问题。
// WriteString appends the contents of s to b's buffer.
// It returns the length of s and a nil error.
func (b *Builder) WriteString(s string) (int, error) {
b.copyCheck()
b.buf = append(b.buf, s...)
return len(s), nil
}
Grow
为了解决 append 的问题,Builder 提供了 Grow 方法预设足够大的容量。只有 slice 剩余空间不足以写入扩容的字节数(n)时扩容才发生,而且扩容的容量是 2*cap(b.buf)+n
。func (b *Builder) grow(n int)
会先创建一个新的 slice,然后通过 copy
关键字把旧的拷贝过去。
由于 UTF-8 的原因,WriteString
,WriteRune
写入的字符可能不止一个字节。
// Grow grows b's capacity, if necessary, to guarantee space for
// another n bytes. After Grow(n), at least n bytes can be written to b
// without another allocation. If n is negative, Grow panics.
func (b *Builder) Grow(n int) {
b.copyCheck()
if n < 0 {
panic("strings.Builder.Grow: negative count")
}
if cap(b.buf)-len(b.buf) < n {
b.grow(n)
}
}
// grow copies the buffer to a new, larger buffer so that there are at least n
// bytes of capacity beyond len(b.buf).
func (b *Builder) grow(n int) {
buf := make([]byte, len(b.buf), 2*cap(b.buf)+n)
copy(buf, b.buf)
b.buf = buf
}
String
为了节省内存分配,通过 unsafe.Pointer 的存指针转换操作,实现了直接将buf []byte转换为 string类型,同时避免了内存充分配的问题。
// String returns the accumulated string.
func (b *Builder) String() string {
return *(*string)(unsafe.Pointer(&b.buf))
}
copyCheck
当你试图拷贝 strings.Builder 并写入的时候,你的程序就会崩溃。
var b1 strings.Builder
b1.WriteString("ABC")
b2 := b1
b2.WriteString("DEF")
// panic: illegal use of non-zero Builder copied by value
strings.Builder 内部通过 slice 来保存和管理内容。slice 内部则是通过一个指针指向实际保存内容的数组。当我们拷贝了 builder 以后,同样也拷贝了其 slice 的指针。但是它仍然指向同一个旧的数组。
当你对源 builder 或者拷贝后的 builder 写入的时候,问题就产生了。另一个 builder 指向的数组内容也被改变了。这就是为什么 strings.Builder 不允许拷贝的原因。
func (b *Builder) copyCheck() {
if b.addr == nil {
// This hack works around a failing of Go's escape analysis
// that was causing b to escape and be heap allocated.
// See issue 23382.
// TODO: once issue 7921 is fixed, this should be reverted to
// just "b.addr = b".
b.addr = (*Builder)(noescape(unsafe.Pointer(b)))
} else if b.addr != b {
panic("strings: illegal use of non-zero Builder copied by value")
}
}
通过 copyCheck 获得当前 Builder 的地址,如果当前 Builder 被拷贝,则 b.addr
和 b
的地址不相同,具体应该可以查阅 Go's escape analysis
,只在下面 4 种方法中进行检测。
- Grow(n int)
- Write(p []byte)
- WriteRune(r rune)
- WriteString(s string)
所以下面的代码是可行的
// Reset()
// Len()
// String()
var b1 strings.Builder
b1.WriteString("ABC")
b2 := b1
fmt.Println(b2.Len()) // 3
fmt.Println(b2.String()) // ABC
b2.Reset()
b2.WriteString("DEF")
fmt.Println(b2.String()) // DEF
并发
和 bytes.Buffer
一样,strings.Builder
也不支持并行的读或者写。
io.Writer
strings.Builder
通过 Write(p []byte) (n int, err error)
方法实现了 io.Writer
接口。所以,我们多了很多使用它的情形:
- io.Copy(dst Writer, src Reader) (written int64, err error)
- bufio.NewWriter(w io.Writer) *Writer
- fmt.Fprint(w io.Writer, a …interface{}) (n int, err error)
- func (r *http.Request) Write(w io.Writer) error
其他使用 io.Writer 的库