Two days ago, after posting what I thought was a pretty solid SQLCLR string splitting method, I received a comment telling me about a big thread on SQLServerCentral dedicated to the question of how best to split strings. So I jumped in, and went back and forth, and back and forth, and back... and forth...
Many, many messages and several revisions and re-revisions later, I am happy to present the newer, better, more bug-free SQLCLR string splitting function. Not only is it faster than the previous one I posted, but it also handles multi-character delimiters, ignores consecutive delimiters as well as pre and post delimiters, and best of all, it won't get itself stuck in an infinite loop in certain cases (always a really great stability feature).
This version walks the SqlChars character array rather than using the IndexOf method on the string, which we found to be a somewhat faster technique--and it's certainly the most scalable and memory efficient method I can imagine.
Special thanks to SQLServerCentral member Florian Reischl, who was the main person keeping the thread going with me during the last couple of days, and who managed to re-write my versions and eke out even better performance by modifying the algorithms. Great stuff--it was definitely the most fun I've had on a technical forum in quite a long time.
But without further ado, the code:
using System;
using System.Collections;
using System.Data;
using System.Data.SqlClient;
using System.Data.SqlTypes;
using Microsoft.SqlServer.Server;
public partial class UserDefinedFunctions
{
[Microsoft.SqlServer.Server.SqlFunction(
FillRowMethodName = "FillRow_Multi",
TableDefinition = "item nvarchar(4000)"
)
]
public static IEnumerator SplitString_Multi(
[SqlFacet(MaxSize = -1)]
SqlChars Input,
[SqlFacet(MaxSize = 255)]
SqlChars Delimiter
)
{
return (
(Input.IsNull || Delimiter.IsNull) ?
new SplitStringMulti(new char[0], new char[0]) :
new SplitStringMulti(Input.Value, Delimiter.Value));
}
public static void FillRow_Multi(object obj, out SqlString item)
{
item = new SqlString((string)obj);
}
public class SplitStringMulti : IEnumerator
{
public SplitStringMulti(char[] TheString, char[] Delimiter)
{
theString = TheString;
stringLen = TheString.Length;
delimiter = Delimiter;
delimiterLen = (byte)(Delimiter.Length);
isSingleCharDelim = (delimiterLen == 1);
lastPos = 0;
nextPos = delimiterLen * -1;
}
#region IEnumerator Members
public object Current
{
get
{
return new string(theString, lastPos, nextPos - lastPos);
}
}
public bool MoveNext()
{
if (nextPos >= stringLen)
return false;
else
{
lastPos = nextPos + delimiterLen;
for (int i = lastPos; i < stringLen; i++)
{
bool matches = true;
//Optimize for single-character delimiters
if (isSingleCharDelim)
{
if (theString[i] != delimiter[0])
matches = false;
}
else
{
for (byte j = 0; j < delimiterLen; j++)
{
if (((i + j) >= stringLen) || (theString[i + j] != delimiter[j]))
{
matches = false;
break;
}
}
}
if (matches)
{
nextPos = i;
//Deal with consecutive delimiters
if ((nextPos - lastPos) > 0)
return true;
else
{
i += (delimiterLen-1);
lastPos += delimiterLen;
}
}
}
lastPos = nextPos + delimiterLen;
nextPos = stringLen;
if ((nextPos - lastPos) > 0)
return true;
else
return false;
}
}
public void Reset()
{
lastPos = 0;
nextPos = delimiterLen * -1;
}
#endregion
private int lastPos;
private int nextPos;
private readonly char[] theString;
private readonly char[] delimiter;
private readonly int stringLen;
private readonly byte delimiterLen;
private readonly bool isSingleCharDelim;
}
};
Enjoy!