// For faster converting of dates to strings
char* sLeadingZeroIntegerValues[] = {
"00","01","02","03","04","05","06","07","08","09",
"10","11","12","13","14","15","16","17","18","19",
"20","21","22","23","24","25","26","27","28","29",
"30","31","32","33","34","35","36","37","38","39",
"40","41","42","43","44","45","46","47","48","49",
"50","51","52","53","54","55","56","57","58","59"};
// For faster converting of dates to strings
char date_buf[20] = "XXXX-XX-XX XX:XX:XX";
// Just some non printable ascii chars, for delimiters
const string DataTable::SECTION_DELIMITER = "\1";
const string DataTable::ROW_DELIMITER = "\2";
const string DataTable::COL_DELIMITER = "\3";
const string DataTable::TABLE_DELIMITER = "\4";
const string DataTable::NAME_VALUE_DELIMITER = "\5";
void CDbInterface::LoadRecordsetIntoStreamAsBigString(
_RecordsetPtr& pRs, ostringstream& ostrm)
{
DataTable::TypeList types;
if (!pRs->EndOfFile)
{
int nColumns = pRs->Fields->GetCount();
// Column names
for (long i = 0L; i < nColumns; i++)
{
string name = (const char*) (_bstr_t) pRs->Fields->GetItem(i)->Name;
if (i != 0) ostrm << DataTable::COL_DELIMITER;
ostrm << name.c_str();
}
ostrm << DataTable::SECTION_DELIMITER;
// Column types
for (long i = 0L; i < nColumns; i++)
{
ADODB::DataTypeEnum dbType = pRs->Fields->GetItem(i)->GetType();
if (i != 0) ostrm << DataTable::COL_DELIMITER;
DataTableDataType type;
if (dbType == ADODB::DataTypeEnum::adVarWChar
|| dbType == ADODB::DataTypeEnum::adWChar)
{
type = DataTableDataType::String;
}
else if (dbType == ADODB::DataTypeEnum::adInteger)
{
type = DataTableDataType::Integer;
}
else if (dbType == ADODB::DataTypeEnum::adUnsignedTinyInt)
{
type = DataTableDataType::Bool;
}
else if (dbType == ADODB::DataTypeEnum::adDate)
{
type = DataTableDataType::Date;
}
ostrm << type;
types.push_back(type);
}
ostrm << DataTable::SECTION_DELIMITER;
// Load the rows and columns
int nRow = 0;
ADODB::FieldsPtr pFields = pRs->Fields;
char buf[80];
::SYSTEMTIME sysTime;
_variant_t var;
char* pBstr;
long nBstrLen;
DWORD time1 = ::GetTickCount();
while(!pRs->EndOfFile)
{
if (nRow != 0)
ostrm << DataTable::ROW_DELIMITER;
for (long i = 0L; i < nColumns; i++)
{
// For every column in every row... <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< START HERE
// Can we make this even faster?
if (i != 0)
ostrm << DataTable::COL_DELIMITER;
var = pFields->GetItem(i)->GetValue();
if (V_VT(&var) == VT_BSTR)
{
// Copy every other byte of the bstr into the stream.
// That's doing an ansi conversion w/o allocating unnecessary memory
pBstr = (char*) var.bstrVal;
nBstrLen = *(pBstr-4);
for (int i = 0; i < nBstrLen; i+=2)
{
ostrm << (char) pBstr[i];
}
}
else if (V_VT(&var) == VT_I4
|| V_VT(&var) == VT_UI1
|| V_VT(&var) == VT_I2
|| V_VT(&var) == VT_BOOL)
{
ostrm << itoa(((int)var),buf,10);
}
else if (V_VT(&var) == VT_DATE)
{
::VariantTimeToSystemTime(var,&sysTime);
memcpy(date_buf,itoa(sysTime.wYear,buf,10),4);
memcpy(date_buf+5,sLeadingZeroIntegerValues[sysTime.wMonth],2);
memcpy(date_buf+8,sLeadingZeroIntegerValues[sysTime.wDay],2);
memcpy(date_buf+11,sLeadingZeroIntegerValues[sysTime.wHour],2);
memcpy(date_buf+14,sLeadingZeroIntegerValues[sysTime.wMinute],2);
memcpy(date_buf+17,sLeadingZeroIntegerValues[sysTime.wSecond],2);
ostrm << date_buf;
}
// End of the part we're trying to make faster... <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< END HERE
else if (V_VT(&var) == VT_EMPTY
|| V_VT(&var) == VT_NULL)
{
if (types[i] == DataTableDataType::String)
{
ostrm << "";
}
else if (types[i] == DataTableDataType::Integer)
{
ostrm << DataTableNullInteger;
}
else if (types[i] == DataTableDataType::Bool)
{
ostrm << "0";
}
else if (types[i] == DataTableDataType::Date)
{
ostrm << "";
}
}
}
pRs->MoveNext();
nRow++;
}
}
ostrm << DataTable::FINAL_DELIMITER;
}
Refactorings
No refactoring yet !
Adam
November 18, 2008, November 18, 2008 06:19, permalink
I don't know if it's any faster, but methinks this is much easier to read.
I assume you have identified this block as the bottleneck in your application? The string and integer cases do not seem particularly problematic to me, so I will assume that the call to VariantTimeToSystemTime is the problem. If you have a lot of duplicate dates, perhaps caching the results would yield an improvement?
Also, you don't need to check the type each time. Each column is going to be of the same type on every row iteration. The elimination of the conditional logic should increase the performance somewhat. I'll leave the implementation of that to the reader, although it is relatively trivial.
switch (V_VT(&var)) {
case VT_BSTR:
for (char *string = (char *)var.bstrVal; *string; string += 2) {
ostrm << *string;
}
break;
case VT_UI1:
case VT_I2:
case VT_BOOL:
ostrm << (int)var;
break;
case VT_DATE:
::VariantTimeToSystemTime(var, &sysTime);
ostrm << sysTime.wYear;
ostrm << sLeadingZeroIntegerValues[sysTime.wMonth];
ostrm << sLeadingZeroIntegerValues[sysTime.wDay];
ostrm << sLeadingZeroIntegerValues[sysTime.wHour];
ostrm << sLeadingZeroIntegerValues[sysTime.wMinute];
ostrm << sLeadingZeroIntegerValues[sysTime.wSecond];
}
No6
February 15, 2009, February 15, 2009 14:43, permalink
There is a GetString() method on the recordset which takes parameters for the delimiters but doesn't allow a fine degree of control over formatting.
Can we make the code between "START HERE" and "END HERE" faster? The code is iterating thru the rows and columns of an ADODB Recordset, using Microsoft's generated wrapper classes. I'm streaming it into a buffer as a big flattened string. Can we make the flattening go faster? Notice there is already a trick for turning the BSTRs into ANSI strings (I'm only dealing with ASCII chars). Anything else?