After reading Adam Machanic’s comment to my previous post, I started to wonder if what I saw was all due to the fact that SQL Server blindly chooses to allocate at least one extent in response to each call to WriteToServer(), perhaps in its zeal to achieve minimal logging.
Such an algorithm sounds a bit crazy, or even dumb. But anything is possible.
To check if this is indeed the case, I wrote a simple test program below with most of the parameters hard coded. The only exception is the payload, i.e. the number of rows that WriteToServer() copies to the server. BTW, the target database needs to be in either bulk_logged or simple recovery mode.
using System;
using System.Collections;
using System.IO;
using System.Data;
using System.Data.SqlClient;
class Loader
{
static void Main(string[] args)
{
int payload = 0;
try
{
payload = int.Parse(args[0]);
}
catch
{
Console.WriteLine("***Err: The argument must be an integer.");
Environment.Exit(-1);
}
int row_counter = 0;
int write_2_server_counter = 0;
int extent_counter = 0;
using (SqlConnection bcp_conn = new SqlConnection("server=myServer;database=myDB;Integrated Security=SSPI;"),
conn = new SqlConnection("server=myServer;database=myDB;Integrated Security=SSPI;"))
{
try
{
bcp_conn.Open();
conn.Open();
try
{
SqlCommand cmd = new SqlCommand();
cmd.Connection = conn;
cmd.CommandType = CommandType.Text;
cmd.CommandText = "if object_id('dbo.junk') is not NULL " +
" drop table junk";
cmd.ExecuteNonQuery();
cmd.CommandText = "CREATE TABLE dbo.junk( " +
" c_id int NOT NULL, " +
" c_data char(950) NOT NULL)";
cmd.ExecuteNonQuery();
}
catch(Exception e)
{
Console.WriteLine(e.ToString());
Environment.Exit(-1);
}
DataTable dt = new DataTable();
dt.MinimumCapacity = 20000;
DataColumn c_id = new DataColumn();
c_id.DataType = System.Type.GetType("System.Int32");
c_id.ColumnName = "c_id";
dt.Columns.Add(c_id);
DataColumn c_data = new DataColumn();
c_data.DataType = System.Type.GetType("System.String");
c_data.ColumnName = "c_data";
dt.Columns.Add(c_data);
using (SqlBulkCopy bcp = new SqlBulkCopy(bcp_conn, SqlBulkCopyOptions.TableLock, null))
{
bcp.DestinationTableName = "dbo.junk";
bcp.BatchSize = 20000;
bcp.BulkCopyTimeout = 30000000;
for (int i = 1; i <= 1000; i++)
{
row_counter++;
DataRow row = dt.NewRow();
row["c_id"] = i;
row["c_data"] = "12345667890abcdefgh";
dt.Rows.Add(row);
if (row_counter % payload == 0)
{
bcp.WriteToServer(dt);
write_2_server_counter++;
extent_counter = GetExtentCount(conn);
Console.WriteLine("{0},{1}", write_2_server_counter, extent_counter);
dt.Rows.Clear();
}
}
if (dt.Rows.Count > 0)
{
bcp.WriteToServer(dt);
dt.Rows.Clear();
}
} // using
}
catch(Exception e)
{
Console.WriteLine(e.ToString());
Environment.Exit(-1);
}
}
} // Main()
static int GetExtentCount(SqlConnection conn)
{
int extent_count = 0;
try
{
SqlCommand cmd = new SqlCommand();
cmd.Connection = conn;
cmd.CommandType = CommandType.Text;
cmd.CommandText = "select cast(u.total_pages/8 as int) " +
" from sys.partitions p join sys.allocation_units u " +
" on p.partition_id = u.container_id " +
" where p.object_id = object_id('junk')";
extent_count = (int) cmd.ExecuteScalar();
}
catch(Exception e)
{
Console.WriteLine(e.ToString());
Environment.Exit(-1);
}
return extent_count;
}
} // class Loader
If you run this program (and I have run this against a SQL Server 2008 instance), you’ll find that as long as the payload parameter value you supply on the command line is 64 or less, SQL Server will allocate a new extent for every call to WriteToServer() no matter how little data is being written to the server and no matter how much free space is already allocated to the table
This behavior is utterly striking!!!
If the payload parameter value is 65, the amount of data no longer fits on one extent, and understandably SQL Server needs to allocate—and does allocate--two extents for each WriteToServer().
I’m all for reducing the amount of transaction logging whenever appropriate. Furthermore, if this space cost is the result of my explicitly configuring something to favor minimal logging, I’m willing to live with the consequence. But in this test nowhere did I say that I wanted minimal logging at any cost.
One may argue that this is an edge case because people are not expected to--or should not--call WriteToServer() with a small payload. But regardless, at least the potential space implication of doing so should have been clearly made known so that people can explicitly protect their code from falling into it. And in the software business, it’s often the edge cases that trip people up.