23da7be57867a7eb54b583983c89b375

Can we make the code between "START HERE" and "END HERE" faster? The code is iterating thru the rows and columns of an ADODB Recordset, using Microsoft's generated wrapper classes. I'm streaming it into a buffer as a big flattened string. Can we make the flattening go faster? Notice there is already a trick for turning the BSTRs into ANSI strings (I'm only dealing with ASCII chars). Anything else?

// For faster converting of dates to strings
char* sLeadingZeroIntegerValues[] = {
"00","01","02","03","04","05","06","07","08","09",
"10","11","12","13","14","15","16","17","18","19",
"20","21","22","23","24","25","26","27","28","29",
"30","31","32","33","34","35","36","37","38","39",
"40","41","42","43","44","45","46","47","48","49",
"50","51","52","53","54","55","56","57","58","59"};

// For faster converting of dates to strings
char date_buf[20] = "XXXX-XX-XX XX:XX:XX";

// Just some non printable ascii chars, for delimiters
const string DataTable::SECTION_DELIMITER = "\1";
const string DataTable::ROW_DELIMITER = "\2";
const string DataTable::COL_DELIMITER = "\3";
const string DataTable::TABLE_DELIMITER = "\4";
const string DataTable::NAME_VALUE_DELIMITER = "\5";

void CDbInterface::LoadRecordsetIntoStreamAsBigString(
	_RecordsetPtr& pRs,  ostringstream& ostrm)
{



		DataTable::TypeList types; 

		if (!pRs->EndOfFile)
		{
			int nColumns = pRs->Fields->GetCount();

			// Column names
			for (long i = 0L; i < nColumns; i++)
			{
				string name = (const char*) (_bstr_t) pRs->Fields->GetItem(i)->Name;
				if (i != 0) ostrm << DataTable::COL_DELIMITER;
				ostrm << name.c_str();
			}

			ostrm << DataTable::SECTION_DELIMITER;

			// Column types
			for (long i = 0L; i < nColumns; i++)
			{
				ADODB::DataTypeEnum dbType = pRs->Fields->GetItem(i)->GetType();
				if (i != 0) ostrm << DataTable::COL_DELIMITER;

				DataTableDataType type;

				if (dbType == ADODB::DataTypeEnum::adVarWChar
					|| dbType == ADODB::DataTypeEnum::adWChar)
				{
					type = DataTableDataType::String;
				}
				else if (dbType == ADODB::DataTypeEnum::adInteger)
				{
					type = DataTableDataType::Integer;
				}
				else if (dbType == ADODB::DataTypeEnum::adUnsignedTinyInt)
				{
					type = DataTableDataType::Bool;
				}
				else if (dbType == ADODB::DataTypeEnum::adDate)
				{
					type = DataTableDataType::Date;
				}

				ostrm << type;
				types.push_back(type);
			}

			ostrm << DataTable::SECTION_DELIMITER;

			// Load the rows and columns
			int nRow = 0;
			ADODB::FieldsPtr pFields = pRs->Fields;
			char buf[80];
			::SYSTEMTIME sysTime;
			_variant_t var;
			char* pBstr;
			long nBstrLen;

			DWORD time1 = ::GetTickCount();

			while(!pRs->EndOfFile)
			{
				if (nRow != 0) 
                                    ostrm << DataTable::ROW_DELIMITER;

				for (long i = 0L; i < nColumns; i++)
				{

// For every column in every row...  <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<   START HERE
// Can we make this even faster?

					if (i != 0)
                                             ostrm << DataTable::COL_DELIMITER;


					var = pFields->GetItem(i)->GetValue();

					if (V_VT(&var) == VT_BSTR)
					{
						// Copy every other byte of the bstr into the stream.
						// That's doing an ansi conversion w/o allocating unnecessary memory
						pBstr = (char*) var.bstrVal;
						nBstrLen = *(pBstr-4);
						for (int i = 0; i < nBstrLen; i+=2)
						{
							ostrm << (char) pBstr[i];	
						}
					}
					else if (V_VT(&var) == VT_I4
					|| V_VT(&var) == VT_UI1
					|| V_VT(&var) == VT_I2
					|| V_VT(&var) == VT_BOOL)
					{
						ostrm << itoa(((int)var),buf,10);
					}
					else if (V_VT(&var) == VT_DATE)
					{
						::VariantTimeToSystemTime(var,&sysTime);
						memcpy(date_buf,itoa(sysTime.wYear,buf,10),4);
						memcpy(date_buf+5,sLeadingZeroIntegerValues[sysTime.wMonth],2);
						memcpy(date_buf+8,sLeadingZeroIntegerValues[sysTime.wDay],2);
						memcpy(date_buf+11,sLeadingZeroIntegerValues[sysTime.wHour],2);
						memcpy(date_buf+14,sLeadingZeroIntegerValues[sysTime.wMinute],2);
						memcpy(date_buf+17,sLeadingZeroIntegerValues[sysTime.wSecond],2);
						ostrm << date_buf;
					}

// End of the part we're trying to make faster...   <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<  END HERE

					else if (V_VT(&var) == VT_EMPTY
						|| V_VT(&var) == VT_NULL)
					{
						if (types[i] == DataTableDataType::String)
						{
							ostrm << "";
						}
						else if (types[i] == DataTableDataType::Integer)
						{
							ostrm << DataTableNullInteger;
						}
						else if (types[i] == DataTableDataType::Bool)
						{
							ostrm << "0";
						}
						else if (types[i] == DataTableDataType::Date)
						{
							ostrm << "";
						}
					}

				}
				pRs->MoveNext();
				nRow++;
			}
		}


                ostrm << DataTable::FINAL_DELIMITER;
}

Refactorings

No refactoring yet !

A8d3f35baafdaea851914b17dae9e1fc

Adam

November 18, 2008, November 18, 2008 06:19, permalink

No rating. Login to rate!

I don't know if it's any faster, but methinks this is much easier to read.

I assume you have identified this block as the bottleneck in your application? The string and integer cases do not seem particularly problematic to me, so I will assume that the call to VariantTimeToSystemTime is the problem. If you have a lot of duplicate dates, perhaps caching the results would yield an improvement?

Also, you don't need to check the type each time. Each column is going to be of the same type on every row iteration. The elimination of the conditional logic should increase the performance somewhat. I'll leave the implementation of that to the reader, although it is relatively trivial.

switch (V_VT(&var)) {
    case VT_BSTR:
        for (char *string = (char *)var.bstrVal; *string; string += 2) {
            ostrm << *string;
        }
        
        break;
        
    case VT_UI1:
    case VT_I2:
    case VT_BOOL:
        ostrm << (int)var;
        break;
        
    case VT_DATE:
        ::VariantTimeToSystemTime(var, &sysTime);

        ostrm << sysTime.wYear;
        ostrm << sLeadingZeroIntegerValues[sysTime.wMonth];
        ostrm << sLeadingZeroIntegerValues[sysTime.wDay];
        ostrm << sLeadingZeroIntegerValues[sysTime.wHour];
        ostrm << sLeadingZeroIntegerValues[sysTime.wMinute];
        ostrm << sLeadingZeroIntegerValues[sysTime.wSecond];
}
D41d8cd98f00b204e9800998ecf8427e

No6

February 15, 2009, February 15, 2009 14:43, permalink

No rating. Login to rate!

There is a GetString() method on the recordset which takes parameters for the delimiters but doesn't allow a fine degree of control over formatting.

Your refactoring





Format Copy from initial code

or Cancel