
package com.taobao.sketch.util;

import java.io.UnsupportedEncodingException;

import java.nio.ByteBuffer;

import java.nio.CharBuffer;

import java.nio.charset.Charset;


* ArrayUtil,一些基于byte数组的操作方法集


* Author By: junshan

* Created Date: 2010-12-27 16:17:23


public class ArrayUtil {


* 查找并替换指定byte数组


* @param org of type byte[] 原数组

* @param search of type byte[] 要查找的数组

* @param replace of type byte[] 要替换的数组

* @param startIndex of type int 开始搜索索引

* @return byte[] 返回新的数组

* @throws UnsupportedEncodingException when


public static byte[] arrayReplace(byte[] org, byte[] search, byte[] replace, int startIndex) throws UnsupportedEncodingException {

int index = indexOf(org, search, startIndex);

if (index != -1) {

int newLength = org.length + replace.length – search.length;

byte[] newByte = new byte[newLength];

System.arraycopy(org, 0, newByte, 0, index);

System.arraycopy(replace, 0, newByte, index, replace.length);

System.arraycopy(org, index + search.length, newByte, index + replace.length, org.length – index – search.length);

int newStart = index + replace.length;

//String newstr = new String(newByte, “GBK”);


if ((newByte.length – newStart) > replace.length) {

return arrayReplace(newByte, search, replace, newStart);


return newByte;

} else {

return org;




* 从指定数组的copy一个子数组并返回


* @param org of type byte[] 原数组

* @param to 合并一个byte[]

* @return 合并的数据


public static byte[] append(byte[] org, byte[] to) {

byte[] newByte = new byte[org.length + to.length];

System.arraycopy(org, 0, newByte, 0, org.length);

System.arraycopy(to, 0, newByte, org.length, to.length);

return newByte;



* 从指定数组的copy一个子数组并返回


* @param org of type byte[] 原数组

* @param to 合并一个byte

* @return 合并的数据


public static byte[] append(byte[] org, byte to) {

byte[] newByte = new byte[org.length + 1];

System.arraycopy(org, 0, newByte, 0, org.length);

newByte[org.length] = to;

return newByte;



* 从指定数组的copy一个子数组并返回


* @param org of type byte[] 原数组

* @param from 起始点

* @param append 要合并的数据


public static void append(byte[] org, int from, byte[] append) {

System.arraycopy(append, 0, org, from, append.length);



* 从指定数组的copy一个子数组并返回


* @param original of type byte[] 原数组

* @param from 起始点

* @param to 结束点

* @return 返回copy的数组


public static byte[] copyOfRange(byte[] original, int from, int to) {

int newLength = to – from;

if (newLength < 0)

throw new IllegalArgumentException(from + ” > ” + to);

byte[] copy = new byte[newLength];

System.arraycopy(original, from, copy, 0,

Math.min(original.length – from, newLength));

return copy;


public static byte[] char2byte(String encode, char… chars) {

Charset cs = Charset.forName(encode);

CharBuffer cb = CharBuffer.allocate(chars.length);



ByteBuffer bb = cs.encode(cb);

return bb.array();



* 查找指定数组的起始索引


* @param org of type byte[] 原数组

* @param search of type byte[] 要查找的数组

* @return int 返回索引


public static int indexOf(byte[] org, byte[] search) {

return indexOf(org, search, 0);



* 查找指定数组的起始索引


* @param org of type byte[] 原数组

* @param search of type byte[] 要查找的数组

* @param startIndex 起始索引

* @return int 返回索引


public static int indexOf(byte[] org, byte[] search, int startIndex) {

KMPMatcher kmpMatcher = new com.taobao.sketch.util.ArrayUtil.KMPMatcher();


return kmpMatcher.indexOf(org, startIndex);

//return com.alibaba.common.lang.ArrayUtil.indexOf(org, search);



* 查找指定数组的最后一次出现起始索引


* @param org of type byte[] 原数组

* @param search of type byte[] 要查找的数组

* @return int 返回索引


public static int lastIndexOf(byte[] org, byte[] search) {

return lastIndexOf(org, search, 0);



* 查找指定数组的最后一次出现起始索引


* @param org of type byte[] 原数组

* @param search of type byte[] 要查找的数组

* @param fromIndex 起始索引

* @return int 返回索引


public static int lastIndexOf(byte[] org, byte[] search, int fromIndex) {

KMPMatcher kmpMatcher = new com.taobao.sketch.util.ArrayUtil.KMPMatcher();


return kmpMatcher.lastIndexOf(org, fromIndex);



* KMP算法类


* Created on 2011-1-3


static class KMPMatcher {

private int[] failure;

private int matchPoint;

private byte[] bytePattern;


* Method indexOf …


* @param text of type byte[]

* @param startIndex of type int

* @return int


public int indexOf(byte[] text, int startIndex) {

int j = 0;

if (text.length == 0 || startIndex > text.length) return -1;

for (int i = startIndex; i < text.length; i++) {

while (j > 0 && bytePattern[j] != text[i]) {

j = failure[j - 1];


if (bytePattern[j] == text[i]) {



if (j == bytePattern.length) {

matchPoint = i – bytePattern.length + 1;

return matchPoint;



return -1;



* 找到末尾后重头开始找


* @param text of type byte[]

* @param startIndex of type int

* @return int


public int lastIndexOf(byte[] text, int startIndex) {

matchPoint = -1;

int j = 0;

if (text.length == 0 || startIndex > text.length) return -1;

int end = text.length;

for (int i = startIndex; i < end; i++) {

while (j > 0 && bytePattern[j] != text[i]) {

j = failure[j - 1];


if (bytePattern[j] == text[i]) {



if (j == bytePattern.length) {

matchPoint = i – bytePattern.length + 1;

if ((text.length – i) > bytePattern.length) {

j = 0;



return matchPoint;



if (startIndex != 0 && i + 1 == end) {

end = startIndex;

i = -1;

startIndex = 0;



return matchPoint;



* 找到末尾后不会重头开始找


* @param text of type byte[]

* @param startIndex of type int

* @return int


public int lastIndexOfWithNoLoop(byte[] text, int startIndex) {

matchPoint = -1;

int j = 0;

if (text.length == 0 || startIndex > text.length) return -1;

for (int i = startIndex; i < text.length; i++) {

while (j > 0 && bytePattern[j] != text[i]) {

j = failure[j - 1];


if (bytePattern[j] == text[i]) {



if (j == bytePattern.length) {

matchPoint = i – bytePattern.length + 1;

if ((text.length – i) > bytePattern.length) {

j = 0;



return matchPoint;



return matchPoint;



* Method computeFailure4Byte …


* @param patternStr of type byte[]


public void computeFailure4Byte(byte[] patternStr) {

bytePattern = patternStr;

int j = 0;

int len = bytePattern.length;

failure = new int[len];

for (int i = 1; i < len; i++) {

while (j > 0 && bytePattern[j] != bytePattern[i]) {

j = failure[j - 1];


if (bytePattern[j] == bytePattern[i]) {



failure[i] = j;




public static void main(String[] args) {

try {

byte[] org = “kadeadedcfdededghkk”.getBytes(“GBK”);

byte[] search = “kk”.getBytes(“GBK”);

int last = lastIndexOf(org, search, 19);

long t1 = 0;

long t2 = 0;

int f1 = 0;

int f2 = 0;

for (int i = 0; i < 10000; i++) {

long s1 = System.nanoTime();

f1 = indexOf(org, search, 0);

long s2 = System.nanoTime();

f2 = com.alibaba.common.lang.ArrayUtil.indexOf(org, search);

long s3 = System.nanoTime();

t1 = t1 + (s2 – s1);

t2 = t2 + (s3 – s2);


System.out.println(“kmp=” + t1 / 10000 + “,ali=” + t2 / 10000);

System.out.printf(“f1=” + f1 + “,f2=” + f2);

} catch (UnsupportedEncodingException e) {






在 stream流 和 byte[] 中查找(搜索)指定字符串

这里注重看的是两个 Search 的扩展方法,一个是 stream 类型的扩展,另一个是 byte[] 类型的扩展,


-- 常用扩展代码,需要这部分代码的支持!

using System;

using System.Collections.Generic;

using System.Linq;

using System.Text;

using System.IO;

using System.Drawing;

namespace Ims.Bll



/// stream 、 string 、byte[] 间的转换扩展方法类


public static class StreamExtend


#region Stream 扩展


/// Stream Stream 转换为 byte 数组



public static byte[] ToByteArray(this Stream stream)


byte[] bytes = new byte[stream.Length];

stream.Read(bytes, 0, bytes.Length);

// 设置当前流的位置为流的开始

stream.Seek(0, SeekOrigin.Begin);

return bytes;



/// Stream 转换为 image 图片



public static Image ToImage(this Stream stream)


Image img = new Bitmap(stream);

return img;



/// Stream 转换为 string ,使用 Encoding.Default 编码



public static string ToStr(this Stream stream)


return System.Text.Encoding.Default.GetString(stream.ToByteArray());



/// 在当前流中搜索指定的 byte[]



/// 搜索关键字

/// 搜索开始位置

/// 如果存在则返回byte[]在流中首次出现的位置,否则返回 -1

public static long Search(this Stream stream, long beginPosition, byte[] key)


if (stream == null || stream.Length <= beginPosition)

return -1;

if (key == null || stream.Length < key.Length)

return -1;

long i=-1;

long j = -1;

int currentByte = int.MinValue;



if (stream.Length < key.Length + i)


stream.Seek(i, SeekOrigin.Begin);

for (j = 0; j < key.Length; j++)


currentByte = stream.ReadByte();

if (currentByte != key[j])



if (j == key.Length)

return i;

if(currentByte == -1)



return -1;



#region byte[] 扩展


/// byte[] 转换为 stream 流



public static Stream ToStream(this byte[] arr)


Stream stream = new MemoryStream(arr);

// 设置当前流的位置为流的开始 www.2cto.com

stream.Seek(0, SeekOrigin.Begin);

return stream;



/// byte[] 转换为 Image



public static Image ToImage(this byte[] arr)


return Image.FromStream(arr.ToStream());



/// 转换为 string,使用 Encoding.Default 编码



public static string ToStr(this byte[] arr)


return System.Text.Encoding.Default.GetString(arr);



/// 搜索



/// 搜索关键字

/// 搜索开始位置


public static int Search(this byte[] arr, int beginPosition, byte[] key)


if (arr == null || arr.Length <= beginPosition)

return -1;

if (key == null || arr.Length < key.Length)

return -1;

int i = -1;

int j = -1;

for (i = beginPosition; i < arr.Length; i++)


if (arr.Length < key.Length + i)


for (j = 0; j < key.Length; j++)


if (arr[i+j] != key[j])



if (j == key.Length)

return i;


return -1;



#region string 扩展


/// string 转换为 byte[]



public static byte[] ToByteArray(this string str)


return System.Text.Encoding.Default.GetBytes(str);



/// string 转换为 Stream



public static Stream ToStream(this string str)


Stream stream = new MemoryStream(str.ToByteArray());

// 设置当前流的位置为流的开始

stream.Seek(0, SeekOrigin.Begin);

return stream;






-- 测试脚本

byte[] arr = "0123456789111".ToByteArray();

byte[] key1 = "123".ToByteArray();

byte[] key2 = "678".ToByteArray();

byte[] key3 = "911".ToByteArray();

byte[] key4 = "111".ToByteArray();


Stream sm = arr.ToStream();

long index1 = sm.Search(0, key1);

long index2 = sm.Search(0, key2);

long index3 = sm.Search(0, key3);

long index4 = sm.Search(0, key4);


long index10 = arr.Search(0, key1);

long index20 = arr.Search(0, key2);

long index30 = arr.Search(0, key3);

long index40 = arr.Search(0, key4);

