Blame - src/bytes/bytes.go - platform/prebuilts/go/darwin-x86

blob: 9af177fa882d7e17376649395721e183332211e3 [file] [log] [blame]

Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	1	// Copyright 2009 The Go Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
				5	// Package bytes implements functions for the manipulation of byte slices.
				6	// It is analogous to the facilities of the strings package.
				7	package bytes
				8
				9	import (
				10	"unicode"
				11	"unicode/utf8"
				12	)
				13
				14	func equalPortable(a, b []byte) bool {
				15	if len(a) != len(b) {
				16	return false
				17	}
				18	for i, c := range a {
				19	if c != b[i] {
				20	return false
				21	}
				22	}
				23	return true
				24	}
				25
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	26	// explode splits s into a slice of UTF-8 sequences, one per Unicode code point (still slices of bytes),
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	27	// up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes.
				28	func explode(s []byte, n int) [][]byte {
				29	if n <= 0 {
				30	n = len(s)
				31	}
				32	a := make([][]byte, n)
				33	var size int
				34	na := 0
				35	for len(s) > 0 {
				36	if na+1 >= n {
				37	a[na] = s
				38	na++
				39	break
				40	}
				41	_, size = utf8.DecodeRune(s)
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	42	a[na] = s[0:size:size]
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	43	s = s[size:]
				44	na++
				45	}
				46	return a[0:na]
				47	}
				48
Dan Willemsen	d279748	2017-07-26 13:13:13 -0700	[diff] [blame]	49	// countGeneric actually implements Count
				50	func countGeneric(s, sep []byte) int {
				51	// special case
				52	if len(sep) == 0 {
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	53	return utf8.RuneCount(s) + 1
				54	}
Dan Willemsen	d279748	2017-07-26 13:13:13 -0700	[diff] [blame]	55	n := 0
				56	for {
				57	i := Index(s, sep)
				58	if i == -1 {
				59	return n
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	60	}
Dan Willemsen	d279748	2017-07-26 13:13:13 -0700	[diff] [blame]	61	n++
				62	s = s[i+len(sep):]
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	63	}
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	64	}
				65
				66	// Contains reports whether subslice is within b.
				67	func Contains(b, subslice []byte) bool {
				68	return Index(b, subslice) != -1
				69	}
				70
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	71	// ContainsAny reports whether any of the UTF-8-encoded code points in chars are within b.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	72	func ContainsAny(b []byte, chars string) bool {
				73	return IndexAny(b, chars) >= 0
				74	}
				75
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	76	// ContainsRune reports whether the rune is contained in the UTF-8-encoded byte slice b.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	77	func ContainsRune(b []byte, r rune) bool {
				78	return IndexRune(b, r) >= 0
				79	}
				80
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	81	func indexBytePortable(s []byte, c byte) int {
				82	for i, b := range s {
				83	if b == c {
				84	return i
				85	}
				86	}
				87	return -1
				88	}
				89
				90	// LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
				91	func LastIndex(s, sep []byte) int {
				92	n := len(sep)
				93	if n == 0 {
				94	return len(s)
				95	}
				96	c := sep[0]
				97	for i := len(s) - n; i >= 0; i-- {
				98	if s[i] == c && (n == 1 \|\| Equal(s[i:i+n], sep)) {
				99	return i
				100	}
				101	}
				102	return -1
				103	}
				104
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	105	// LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s.
				106	func LastIndexByte(s []byte, c byte) int {
				107	for i := len(s) - 1; i >= 0; i-- {
				108	if s[i] == c {
				109	return i
				110	}
				111	}
				112	return -1
				113	}
				114
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	115	// IndexRune interprets s as a sequence of UTF-8-encoded code points.
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	116	// It returns the byte index of the first occurrence in s of the given rune.
				117	// It returns -1 if rune is not present in s.
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	118	// If r is utf8.RuneError, it returns the first instance of any
				119	// invalid UTF-8 byte sequence.
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	120	func IndexRune(s []byte, r rune) int {
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	121	switch {
				122	case 0 <= r && r < utf8.RuneSelf:
				123	return IndexByte(s, byte(r))
				124	case r == utf8.RuneError:
				125	for i := 0; i < len(s); {
				126	r1, n := utf8.DecodeRune(s[i:])
				127	if r1 == utf8.RuneError {
				128	return i
				129	}
				130	i += n
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	131	}
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	132	return -1
				133	case !utf8.ValidRune(r):
				134	return -1
				135	default:
				136	var b [utf8.UTFMax]byte
				137	n := utf8.EncodeRune(b[:], r)
				138	return Index(s, b[:n])
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	139	}
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	140	}
				141
				142	// IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points.
				143	// It returns the byte index of the first occurrence in s of any of the Unicode
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	144	// code points in chars. It returns -1 if chars is empty or if there is no code
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	145	// point in common.
				146	func IndexAny(s []byte, chars string) int {
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	147	if chars == "" {
				148	// Avoid scanning all of s.
				149	return -1
				150	}
				151	if len(s) > 8 {
				152	if as, isASCII := makeASCIISet(chars); isASCII {
				153	for i, c := range s {
				154	if as.contains(c) {
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	155	return i
				156	}
				157	}
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	158	return -1
				159	}
				160	}
				161	var width int
				162	for i := 0; i < len(s); i += width {
				163	r := rune(s[i])
				164	if r < utf8.RuneSelf {
				165	width = 1
				166	} else {
				167	r, width = utf8.DecodeRune(s[i:])
				168	}
				169	for _, ch := range chars {
				170	if r == ch {
				171	return i
				172	}
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	173	}
				174	}
				175	return -1
				176	}
				177
				178	// LastIndexAny interprets s as a sequence of UTF-8-encoded Unicode code
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	179	// points. It returns the byte index of the last occurrence in s of any of
				180	// the Unicode code points in chars. It returns -1 if chars is empty or if
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	181	// there is no code point in common.
				182	func LastIndexAny(s []byte, chars string) int {
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	183	if chars == "" {
				184	// Avoid scanning all of s.
				185	return -1
				186	}
				187	if len(s) > 8 {
				188	if as, isASCII := makeASCIISet(chars); isASCII {
				189	for i := len(s) - 1; i >= 0; i-- {
				190	if as.contains(s[i]) {
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	191	return i
				192	}
				193	}
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	194	return -1
				195	}
				196	}
				197	for i := len(s); i > 0; {
				198	r, size := utf8.DecodeLastRune(s[:i])
				199	i -= size
				200	for _, c := range chars {
				201	if r == c {
				202	return i
				203	}
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	204	}
				205	}
				206	return -1
				207	}
				208
				209	// Generic split: splits after each instance of sep,
				210	// including sepSave bytes of sep in the subslices.
				211	func genSplit(s, sep []byte, sepSave, n int) [][]byte {
				212	if n == 0 {
				213	return nil
				214	}
				215	if len(sep) == 0 {
				216	return explode(s, n)
				217	}
				218	if n < 0 {
				219	n = Count(s, sep) + 1
				220	}
Dan Willemsen	d279748	2017-07-26 13:13:13 -0700	[diff] [blame]	221
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	222	a := make([][]byte, n)
Dan Willemsen	d279748	2017-07-26 13:13:13 -0700	[diff] [blame]	223	n--
				224	i := 0
				225	for i < n {
				226	m := Index(s, sep)
				227	if m < 0 {
				228	break
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	229	}
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	230	a[i] = s[: m+sepSave : m+sepSave]
Dan Willemsen	d279748	2017-07-26 13:13:13 -0700	[diff] [blame]	231	s = s[m+len(sep):]
				232	i++
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	233	}
Dan Willemsen	d279748	2017-07-26 13:13:13 -0700	[diff] [blame]	234	a[i] = s
				235	return a[:i+1]
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	236	}
				237
				238	// SplitN slices s into subslices separated by sep and returns a slice of
				239	// the subslices between those separators.
				240	// If sep is empty, SplitN splits after each UTF-8 sequence.
				241	// The count determines the number of subslices to return:
				242	// n > 0: at most n subslices; the last subslice will be the unsplit remainder.
				243	// n == 0: the result is nil (zero subslices)
				244	// n < 0: all subslices
				245	func SplitN(s, sep []byte, n int) [][]byte { return genSplit(s, sep, 0, n) }
				246
				247	// SplitAfterN slices s into subslices after each instance of sep and
				248	// returns a slice of those subslices.
				249	// If sep is empty, SplitAfterN splits after each UTF-8 sequence.
				250	// The count determines the number of subslices to return:
				251	// n > 0: at most n subslices; the last subslice will be the unsplit remainder.
				252	// n == 0: the result is nil (zero subslices)
				253	// n < 0: all subslices
				254	func SplitAfterN(s, sep []byte, n int) [][]byte {
				255	return genSplit(s, sep, len(sep), n)
				256	}
				257
				258	// Split slices s into all subslices separated by sep and returns a slice of
				259	// the subslices between those separators.
				260	// If sep is empty, Split splits after each UTF-8 sequence.
				261	// It is equivalent to SplitN with a count of -1.
				262	func Split(s, sep []byte) [][]byte { return genSplit(s, sep, 0, -1) }
				263
				264	// SplitAfter slices s into all subslices after each instance of sep and
				265	// returns a slice of those subslices.
				266	// If sep is empty, SplitAfter splits after each UTF-8 sequence.
				267	// It is equivalent to SplitAfterN with a count of -1.
				268	func SplitAfter(s, sep []byte) [][]byte {
				269	return genSplit(s, sep, len(sep), -1)
				270	}
				271
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	272	var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
				273
				274	// Fields interprets s as a sequence of UTF-8-encoded code points.
				275	// It splits the slice s around each instance of one or more consecutive white space
				276	// characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an
				277	// empty slice if s contains only white space.
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	278	func Fields(s []byte) [][]byte {
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	279	// First count the fields.
				280	// This is an exact count if s is ASCII, otherwise it is an approximation.
				281	n := 0
				282	wasSpace := 1
				283	// setBits is used to track which bits are set in the bytes of s.
				284	setBits := uint8(0)
				285	for i := 0; i < len(s); i++ {
				286	r := s[i]
				287	setBits \|= r
				288	isSpace := int(asciiSpace[r])
				289	n += wasSpace & ^isSpace
				290	wasSpace = isSpace
				291	}
				292
				293	if setBits >= utf8.RuneSelf {
				294	// Some runes in the input slice are not ASCII.
				295	return FieldsFunc(s, unicode.IsSpace)
				296	}
				297
				298	// ASCII fast path
				299	a := make([][]byte, n)
				300	na := 0
				301	fieldStart := 0
				302	i := 0
				303	// Skip spaces in the front of the input.
				304	for i < len(s) && asciiSpace[s[i]] != 0 {
				305	i++
				306	}
				307	fieldStart = i
				308	for i < len(s) {
				309	if asciiSpace[s[i]] == 0 {
				310	i++
				311	continue
				312	}
				313	a[na] = s[fieldStart:i:i]
				314	na++
				315	i++
				316	// Skip spaces in between fields.
				317	for i < len(s) && asciiSpace[s[i]] != 0 {
				318	i++
				319	}
				320	fieldStart = i
				321	}
				322	if fieldStart < len(s) { // Last field might end at EOF.
				323	a[na] = s[fieldStart:len(s):len(s)]
				324	}
				325	return a
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	326	}
				327
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	328	// FieldsFunc interprets s as a sequence of UTF-8-encoded code points.
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	329	// It splits the slice s at each run of code points c satisfying f(c) and
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	330	// returns a slice of subslices of s. If all code points in s satisfy f(c), or
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	331	// len(s) == 0, an empty slice is returned.
				332	// FieldsFunc makes no guarantees about the order in which it calls f(c).
				333	// If f does not return consistent results for a given c, FieldsFunc may crash.
				334	func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	335	// A span is used to record a slice of s of the form s[start:end].
				336	// The start index is inclusive and the end index is exclusive.
				337	type span struct {
				338	start int
				339	end int
				340	}
				341	spans := make([]span, 0, 32)
				342
				343	// Find the field start and end indices.
				344	wasField := false
				345	fromIndex := 0
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	346	for i := 0; i < len(s); {
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	347	size := 1
				348	r := rune(s[i])
				349	if r >= utf8.RuneSelf {
				350	r, size = utf8.DecodeRune(s[i:])
				351	}
				352	if f(r) {
				353	if wasField {
				354	spans = append(spans, span{start: fromIndex, end: i})
				355	wasField = false
				356	}
				357	} else {
				358	if !wasField {
				359	fromIndex = i
				360	wasField = true
				361	}
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	362	}
				363	i += size
				364	}
				365
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	366	// Last field might end at EOF.
				367	if wasField {
				368	spans = append(spans, span{fromIndex, len(s)})
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	369	}
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	370
				371	// Create subslices from recorded field indices.
				372	a := make([][]byte, len(spans))
				373	for i, span := range spans {
				374	a[i] = s[span.start:span.end:span.end]
				375	}
				376
				377	return a
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	378	}
				379
				380	// Join concatenates the elements of s to create a new byte slice. The separator
				381	// sep is placed between elements in the resulting slice.
				382	func Join(s [][]byte, sep []byte) []byte {
				383	if len(s) == 0 {
				384	return []byte{}
				385	}
				386	if len(s) == 1 {
				387	// Just return a copy.
				388	return append([]byte(nil), s[0]...)
				389	}
				390	n := len(sep) * (len(s) - 1)
				391	for _, v := range s {
				392	n += len(v)
				393	}
				394
				395	b := make([]byte, n)
				396	bp := copy(b, s[0])
				397	for _, v := range s[1:] {
				398	bp += copy(b[bp:], sep)
				399	bp += copy(b[bp:], v)
				400	}
				401	return b
				402	}
				403
				404	// HasPrefix tests whether the byte slice s begins with prefix.
				405	func HasPrefix(s, prefix []byte) bool {
				406	return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix)
				407	}
				408
				409	// HasSuffix tests whether the byte slice s ends with suffix.
				410	func HasSuffix(s, suffix []byte) bool {
				411	return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix)
				412	}
				413
				414	// Map returns a copy of the byte slice s with all its characters modified
				415	// according to the mapping function. If mapping returns a negative value, the character is
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	416	// dropped from the byte slice with no replacement. The characters in s and the
				417	// output are interpreted as UTF-8-encoded code points.
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	418	func Map(mapping func(r rune) rune, s []byte) []byte {
				419	// In the worst case, the slice can grow when mapped, making
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	420	// things unpleasant. But it's so rare we barge in assuming it's
				421	// fine. It could also shrink but that falls out naturally.
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	422	maxbytes := len(s) // length of b
				423	nbytes := 0 // number of bytes encoded in b
				424	b := make([]byte, maxbytes)
				425	for i := 0; i < len(s); {
				426	wid := 1
				427	r := rune(s[i])
				428	if r >= utf8.RuneSelf {
				429	r, wid = utf8.DecodeRune(s[i:])
				430	}
				431	r = mapping(r)
				432	if r >= 0 {
				433	rl := utf8.RuneLen(r)
				434	if rl < 0 {
				435	rl = len(string(utf8.RuneError))
				436	}
				437	if nbytes+rl > maxbytes {
				438	// Grow the buffer.
				439	maxbytes = maxbytes*2 + utf8.UTFMax
				440	nb := make([]byte, maxbytes)
				441	copy(nb, b[0:nbytes])
				442	b = nb
				443	}
				444	nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r)
				445	}
				446	i += wid
				447	}
				448	return b[0:nbytes]
				449	}
				450
				451	// Repeat returns a new byte slice consisting of count copies of b.
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	452	//
				453	// It panics if count is negative or if
				454	// the result of (len(b) * count) overflows.
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	455	func Repeat(b []byte, count int) []byte {
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	456	// Since we cannot return an error on overflow,
				457	// we should panic if the repeat will generate
				458	// an overflow.
				459	// See Issue golang.org/issue/16237.
				460	if count < 0 {
				461	panic("bytes: negative Repeat count")
				462	} else if count > 0 && len(b)*count/count != len(b) {
				463	panic("bytes: Repeat count causes overflow")
				464	}
				465
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	466	nb := make([]byte, len(b)*count)
				467	bp := copy(nb, b)
				468	for bp < len(nb) {
				469	copy(nb[bp:], nb[:bp])
				470	bp *= 2
				471	}
				472	return nb
				473	}
				474
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	475	// ToUpper treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters within it mapped to their upper case.
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	476	func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) }
				477
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	478	// ToLower treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their lower case.
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	479	func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) }
				480
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	481	// ToTitle treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their title case.
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	482	func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
				483
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	484	// ToUpperSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	485	// upper case, giving priority to the special casing rules.
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	486	func ToUpperSpecial(c unicode.SpecialCase, s []byte) []byte {
				487	return Map(func(r rune) rune { return c.ToUpper(r) }, s)
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	488	}
				489
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	490	// ToLowerSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	491	// lower case, giving priority to the special casing rules.
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	492	func ToLowerSpecial(c unicode.SpecialCase, s []byte) []byte {
				493	return Map(func(r rune) rune { return c.ToLower(r) }, s)
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	494	}
				495
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	496	// ToTitleSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	497	// title case, giving priority to the special casing rules.
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	498	func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte {
				499	return Map(func(r rune) rune { return c.ToTitle(r) }, s)
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	500	}
				501
				502	// isSeparator reports whether the rune could mark a word boundary.
				503	// TODO: update when package unicode captures more of the properties.
				504	func isSeparator(r rune) bool {
				505	// ASCII alphanumerics and underscore are not separators
				506	if r <= 0x7F {
				507	switch {
				508	case '0' <= r && r <= '9':
				509	return false
				510	case 'a' <= r && r <= 'z':
				511	return false
				512	case 'A' <= r && r <= 'Z':
				513	return false
				514	case r == '_':
				515	return false
				516	}
				517	return true
				518	}
				519	// Letters and digits are not separators
				520	if unicode.IsLetter(r) \|\| unicode.IsDigit(r) {
				521	return false
				522	}
				523	// Otherwise, all we can do for now is treat spaces as separators.
				524	return unicode.IsSpace(r)
				525	}
				526
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	527	// Title treats s as UTF-8-encoded bytes and returns a copy with all Unicode letters that begin
				528	// words mapped to their title case.
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	529	//
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	530	// BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	531	func Title(s []byte) []byte {
				532	// Use a closure here to remember state.
				533	// Hackish but effective. Depends on Map scanning in order and calling
				534	// the closure once per rune.
				535	prev := ' '
				536	return Map(
				537	func(r rune) rune {
				538	if isSeparator(prev) {
				539	prev = r
				540	return unicode.ToTitle(r)
				541	}
				542	prev = r
				543	return r
				544	},
				545	s)
				546	}
				547
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	548	// TrimLeftFunc treats s as UTF-8-encoded bytes and returns a subslice of s by slicing off
				549	// all leading UTF-8-encoded code points c that satisfy f(c).
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	550	func TrimLeftFunc(s []byte, f func(r rune) bool) []byte {
				551	i := indexFunc(s, f, false)
				552	if i == -1 {
				553	return nil
				554	}
				555	return s[i:]
				556	}
				557
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	558	// TrimRightFunc returns a subslice of s by slicing off all trailing
				559	// UTF-8-encoded code points c that satisfy f(c).
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	560	func TrimRightFunc(s []byte, f func(r rune) bool) []byte {
				561	i := lastIndexFunc(s, f, false)
				562	if i >= 0 && s[i] >= utf8.RuneSelf {
				563	_, wid := utf8.DecodeRune(s[i:])
				564	i += wid
				565	} else {
				566	i++
				567	}
				568	return s[0:i]
				569	}
				570
				571	// TrimFunc returns a subslice of s by slicing off all leading and trailing
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	572	// UTF-8-encoded code points c that satisfy f(c).
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	573	func TrimFunc(s []byte, f func(r rune) bool) []byte {
				574	return TrimRightFunc(TrimLeftFunc(s, f), f)
				575	}
				576
				577	// TrimPrefix returns s without the provided leading prefix string.
				578	// If s doesn't start with prefix, s is returned unchanged.
				579	func TrimPrefix(s, prefix []byte) []byte {
				580	if HasPrefix(s, prefix) {
				581	return s[len(prefix):]
				582	}
				583	return s
				584	}
				585
				586	// TrimSuffix returns s without the provided trailing suffix string.
				587	// If s doesn't end with suffix, s is returned unchanged.
				588	func TrimSuffix(s, suffix []byte) []byte {
				589	if HasSuffix(s, suffix) {
				590	return s[:len(s)-len(suffix)]
				591	}
				592	return s
				593	}
				594
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	595	// IndexFunc interprets s as a sequence of UTF-8-encoded code points.
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	596	// It returns the byte index in s of the first Unicode
				597	// code point satisfying f(c), or -1 if none do.
				598	func IndexFunc(s []byte, f func(r rune) bool) int {
				599	return indexFunc(s, f, true)
				600	}
				601
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	602	// LastIndexFunc interprets s as a sequence of UTF-8-encoded code points.
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	603	// It returns the byte index in s of the last Unicode
				604	// code point satisfying f(c), or -1 if none do.
				605	func LastIndexFunc(s []byte, f func(r rune) bool) int {
				606	return lastIndexFunc(s, f, true)
				607	}
				608
				609	// indexFunc is the same as IndexFunc except that if
				610	// truth==false, the sense of the predicate function is
				611	// inverted.
				612	func indexFunc(s []byte, f func(r rune) bool, truth bool) int {
				613	start := 0
				614	for start < len(s) {
				615	wid := 1
				616	r := rune(s[start])
				617	if r >= utf8.RuneSelf {
				618	r, wid = utf8.DecodeRune(s[start:])
				619	}
				620	if f(r) == truth {
				621	return start
				622	}
				623	start += wid
				624	}
				625	return -1
				626	}
				627
				628	// lastIndexFunc is the same as LastIndexFunc except that if
				629	// truth==false, the sense of the predicate function is
				630	// inverted.
				631	func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int {
				632	for i := len(s); i > 0; {
				633	r, size := rune(s[i-1]), 1
				634	if r >= utf8.RuneSelf {
				635	r, size = utf8.DecodeLastRune(s[0:i])
				636	}
				637	i -= size
				638	if f(r) == truth {
				639	return i
				640	}
				641	}
				642	return -1
				643	}
				644
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	645	// asciiSet is a 32-byte value, where each bit represents the presence of a
				646	// given ASCII character in the set. The 128-bits of the lower 16 bytes,
				647	// starting with the least-significant bit of the lowest word to the
				648	// most-significant bit of the highest word, map to the full range of all
				649	// 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
				650	// ensuring that any non-ASCII character will be reported as not in the set.
				651	type asciiSet [8]uint32
				652
				653	// makeASCIISet creates a set of ASCII characters and reports whether all
				654	// characters in chars are ASCII.
				655	func makeASCIISet(chars string) (as asciiSet, ok bool) {
				656	for i := 0; i < len(chars); i++ {
				657	c := chars[i]
				658	if c >= utf8.RuneSelf {
				659	return as, false
				660	}
				661	as[c>>5] \|= 1 << uint(c&31)
				662	}
				663	return as, true
				664	}
				665
				666	// contains reports whether c is inside the set.
				667	func (as *asciiSet) contains(c byte) bool {
				668	return (as[c>>5] & (1 << uint(c&31))) != 0
				669	}
				670
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	671	func makeCutsetFunc(cutset string) func(r rune) bool {
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	672	if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
				673	return func(r rune) bool {
				674	return r == rune(cutset[0])
				675	}
				676	}
				677	if as, isASCII := makeASCIISet(cutset); isASCII {
				678	return func(r rune) bool {
				679	return r < utf8.RuneSelf && as.contains(byte(r))
				680	}
				681	}
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	682	return func(r rune) bool {
				683	for _, c := range cutset {
				684	if c == r {
				685	return true
				686	}
				687	}
				688	return false
				689	}
				690	}
				691
				692	// Trim returns a subslice of s by slicing off all leading and
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	693	// trailing UTF-8-encoded code points contained in cutset.
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	694	func Trim(s []byte, cutset string) []byte {
				695	return TrimFunc(s, makeCutsetFunc(cutset))
				696	}
				697
				698	// TrimLeft returns a subslice of s by slicing off all leading
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	699	// UTF-8-encoded code points contained in cutset.
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	700	func TrimLeft(s []byte, cutset string) []byte {
				701	return TrimLeftFunc(s, makeCutsetFunc(cutset))
				702	}
				703
				704	// TrimRight returns a subslice of s by slicing off all trailing
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	705	// UTF-8-encoded code points that are contained in cutset.
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	706	func TrimRight(s []byte, cutset string) []byte {
				707	return TrimRightFunc(s, makeCutsetFunc(cutset))
				708	}
				709
				710	// TrimSpace returns a subslice of s by slicing off all leading and
				711	// trailing white space, as defined by Unicode.
				712	func TrimSpace(s []byte) []byte {
				713	return TrimFunc(s, unicode.IsSpace)
				714	}
				715
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	716	// Runes interprets s as a sequence of UTF-8-encoded code points.
				717	// It returns a slice of runes (Unicode code points) equivalent to s.
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	718	func Runes(s []byte) []rune {
				719	t := make([]rune, utf8.RuneCount(s))
				720	i := 0
				721	for len(s) > 0 {
				722	r, l := utf8.DecodeRune(s)
				723	t[i] = r
				724	i++
				725	s = s[l:]
				726	}
				727	return t
				728	}
				729
				730	// Replace returns a copy of the slice s with the first n
				731	// non-overlapping instances of old replaced by new.
				732	// If old is empty, it matches at the beginning of the slice
				733	// and after each UTF-8 sequence, yielding up to k+1 replacements
				734	// for a k-rune slice.
				735	// If n < 0, there is no limit on the number of replacements.
				736	func Replace(s, old, new []byte, n int) []byte {
				737	m := 0
				738	if n != 0 {
				739	// Compute number of replacements.
				740	m = Count(s, old)
				741	}
				742	if m == 0 {
				743	// Just return a copy.
				744	return append([]byte(nil), s...)
				745	}
				746	if n < 0 \|\| m < n {
				747	n = m
				748	}
				749
				750	// Apply replacements to buffer.
				751	t := make([]byte, len(s)+n*(len(new)-len(old)))
				752	w := 0
				753	start := 0
				754	for i := 0; i < n; i++ {
				755	j := start
				756	if len(old) == 0 {
				757	if i > 0 {
				758	_, wid := utf8.DecodeRune(s[start:])
				759	j += wid
				760	}
				761	} else {
				762	j += Index(s[start:], old)
				763	}
				764	w += copy(t[w:], s[start:j])
				765	w += copy(t[w:], new)
				766	start = j + len(old)
				767	}
				768	w += copy(t[w:], s[start:])
				769	return t[0:w]
				770	}
				771
				772	// EqualFold reports whether s and t, interpreted as UTF-8 strings,
				773	// are equal under Unicode case-folding.
				774	func EqualFold(s, t []byte) bool {
				775	for len(s) != 0 && len(t) != 0 {
				776	// Extract first rune from each.
				777	var sr, tr rune
				778	if s[0] < utf8.RuneSelf {
				779	sr, s = rune(s[0]), s[1:]
				780	} else {
				781	r, size := utf8.DecodeRune(s)
				782	sr, s = r, s[size:]
				783	}
				784	if t[0] < utf8.RuneSelf {
				785	tr, t = rune(t[0]), t[1:]
				786	} else {
				787	r, size := utf8.DecodeRune(t)
				788	tr, t = r, t[size:]
				789	}
				790
				791	// If they match, keep going; if not, return false.
				792
				793	// Easy case.
				794	if tr == sr {
				795	continue
				796	}
				797
				798	// Make sr < tr to simplify what follows.
				799	if tr < sr {
				800	tr, sr = sr, tr
				801	}
				802	// Fast check for ASCII.
				803	if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' {
				804	// ASCII, and sr is upper case. tr must be lower case.
				805	if tr == sr+'a'-'A' {
				806	continue
				807	}
				808	return false
				809	}
				810
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	811	// General case. SimpleFold(x) returns the next equivalent rune > x
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	812	// or wraps around to smaller values.
				813	r := unicode.SimpleFold(sr)
				814	for r != sr && r < tr {
				815	r = unicode.SimpleFold(r)
				816	}
				817	if r == tr {
				818	continue
				819	}
				820	return false
				821	}
				822
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	823	// One string is empty. Are both?
Brent Austin	ba3052e	2015-04-21 16:08:23 -0700	[diff] [blame]	824	return len(s) == len(t)
				825	}
Dan Willemsen	a322328	2018-02-27 19:41:43 -0800	[diff] [blame^]	826
				827	func indexRabinKarp(s, sep []byte) int {
				828	// Rabin-Karp search
				829	hashsep, pow := hashStr(sep)
				830	n := len(sep)
				831	var h uint32
				832	for i := 0; i < n; i++ {
				833	h = h*primeRK + uint32(s[i])
				834	}
				835	if h == hashsep && Equal(s[:n], sep) {
				836	return 0
				837	}
				838	for i := n; i < len(s); {
				839	h *= primeRK
				840	h += uint32(s[i])
				841	h -= pow * uint32(s[i-n])
				842	i++
				843	if h == hashsep && Equal(s[i-n:i], sep) {
				844	return i - n
				845	}
				846	}
				847	return -1
				848	}
				849
				850	// primeRK is the prime base used in Rabin-Karp algorithm.
				851	const primeRK = 16777619
				852
				853	// hashStr returns the hash and the appropriate multiplicative
				854	// factor for use in Rabin-Karp algorithm.
				855	func hashStr(sep []byte) (uint32, uint32) {
				856	hash := uint32(0)
				857	for i := 0; i < len(sep); i++ {
				858	hash = hash*primeRK + uint32(sep[i])
				859	}
				860	var pow, sq uint32 = 1, primeRK
				861	for i := len(sep); i > 0; i >>= 1 {
				862	if i&1 != 0 {
				863	pow *= sq
				864	}
				865	sq *= sq
				866	}
				867	return hash, pow
				868	}