namespace MyUtilities {
using System;
using System.Diagnostics;
using System.Linq;
/// <summary>
/// Contains utility classes for working with numbers
/// </summary>
public static class Numbers {
/// <summary>
/// Contains utility methods for working with 32-bit signed integers
/// </summary>
public static class Int32 {
/// <summary>
/// The maximum length of the (base-10) string representation of a
/// value stored in a 32-bit integer
/// </summary>
private const int Int32MaxLength = 10;
/// <summary>
/// The (base-10) string representation of the maximum positive value a
/// 32-bit signed integer can hold
/// </summary>
private const string Int32Max = "2147483647";
/// <summary>
/// The (base-10) string representation of the minimum negative value a
/// 32-bit signed integer can hold, excluding the sign.
/// </summary>
private const string Int32Min = "2147483648";
/// <summary>
/// Parses a simple (base-10) string representation of a 32-bit
/// signed integer.
/// </summary>
/// <param name="str">The string to be parsed</param>
/// <returns>
/// A 32-bit signed integer equivalent to the number contained in
/// <paramref name="str"/>
/// </returns>
/// <exception cref="ArgumentNullException">
/// if <paramref name="str"/> is <c>null</c></exception>
/// <exception cref="OverflowException">
/// if the number represented by the specified substring is too
/// large to be stored in a 32-bit signed integer</exception>
/// <exception cref="FormatException">
/// if the specified substring contains any characters other than
/// ASCII digits ('0' - '9')
/// </exception>
/// <remarks>
/// This method provides a facility to parse simple 32-bit signed
/// integers quickly. "Simple" is used to mean that the number
/// consists of solely ASCII digits ('0' - '9'), without any
/// localization-specific strings. In other words, a number format
/// that the compiler would accept as a (base-10) 32-bit signed
/// integer.
/// <para>
/// Checking overflow is performed before checking format errors,
/// which may result in an <see cref="OverflowException"/> being
/// thrown when actually a <see cref="FormatException"/> should be
/// thrown.</para>
/// <para>
/// This method is equivalent to
/// <c><see cref="ParseSimple(string, int)"/>(str, 0)</c></para>
/// </remarks>
/// <example><code>
/// string str = "10";
/// int age = Numbers.Int32.ParseSimple(str);
/// </code></example>
/// <seealso cref="ParseSimple(string, int)"/>
/// <seealso cref="ParseTrusted(string)"/>
public static int ParseSimple(string str) {
return ParseSimple(str, 0);
}
/// <summary>
/// Parses a simple (base-10) string representation of a 32-bit
/// signed integer from a substring of the input.
/// </summary>
/// <param name="str">The string to be parsed</param>
/// <param name="start">The starting index of the substring to be parsed</param>
/// <returns>
/// A 32-bit signed integer equivalent to the number contained in
/// the specified substring of <paramref name="str"/>
/// </returns>
/// <exception cref="ArgumentNullException">
/// if <paramref name="str"/> is <c>null</c></exception>
/// <exception cref="ArgumentOutOfRangeException">
/// if <paramref name="start"/> is less than zero</exception>
/// <exception cref="OverflowException">
/// if the number represented by the specified substring is too
/// large to be stored in a 32-bit signed integer</exception>
/// <exception cref="FormatException">
/// if the specified substring contains any characters other than
/// ASCII digits ('0' - '9')
/// </exception>
/// <remarks>
/// This method provides a facility to parse simple 32-bit signed
/// integers quickly. "Simple" is used to mean that the number
/// consists of solely ASCII digits ('0' - '9'), without any
/// localization-specific strings. In other words, a number format
/// that the compiler would accept as a (base-10) 32-bit signed
/// integer.
/// <para>
/// Checking overflow is performed before checking format errors,
/// which may result in an <see cref="OverflowException"/> being
/// thrown when actually a <see cref="FormatException"/> should be
/// thrown.</para>
/// <para>
/// This method is equivalent to
/// <c><see cref="ParseSimple(string, int, int)"/>(str, start, str.Length)</c></para>
/// </remarks>
/// <example><code>
/// string str = "Ben is 10";
/// int age = Numbers.Int32.ParseSimple(str, 8);
/// </code></example>
/// <seealso cref="ParseSimple(string, int, int)"/>
/// <seealso cref="ParseTrusted(string, int)"/>
public static int ParseSimple(string str, int start) {
return ParseSimple(str, start, str.Length);
}
/// <summary>
/// Parses a simple (base-10) string representation of a 32-bit
/// signed integer from a substring of the input.
/// </summary>
/// <param name="str">The string to be parsed</param>
/// <param name="start">The starting index of the substring to be parsed</param>
/// <param name="end">
/// The exclusive ending index of the substring to be parsed</param>
/// <returns>
/// A 32-bit signed integer equivalent to the number contained in
/// the specified substring of <paramref name="str"/>
/// </returns>
/// <exception cref="ArgumentNullException">
/// if <paramref name="str"/> is <c>null</c></exception>
/// <exception cref="ArgumentOutOfRangeException">
/// if either <paramref name="start"/> or <paramref name="end"/> is
/// less than zero, if <paramref name="start"/> is greater than or
/// equal to <paramref name="end"/>, or if <paramref name="end"/>
/// is greater than <c>str.Length</c>
/// </exception>
/// <exception cref="OverflowException">
/// if the number represented by the specified substring is too
/// large to be stored in a 32-bit signed integer</exception>
/// <exception cref="FormatException">
/// if the specified substring contains any characters other than
/// ASCII digits ('0' - '9')
/// </exception>
/// <remarks>
/// This method provides a facility to parse simple 32-bit signed
/// integers quickly. "Simple" is used to mean that the number
/// consists of solely ASCII digits ('0' - '9'), without any
/// localization-specific strings. In other words, a number format
/// that the compiler would accept as a (base-10) 32-bit signed
/// integer.
/// <para>
/// The <paramref name="end"/> parameter is the exclusive index of
/// the end of the substring, i.e. after the last valid index for
/// the substring. For example, it could be <c>str.Length</c>.
/// </para>
/// <para>Checking overflow is performed before checking format
/// errors, which may result in an <see cref="OverflowException"/>
/// being thrown when actually a <see cref="FormatException"/>
/// should be thrown.</para>
/// </remarks>
/// <example><code>
/// string str = "Ben is 10 years old";
/// int age = Numbers.Int32.ParseSimple(str, 8, 10);
/// </code></example>
/// <seealso cref="ParseTrusted(string, int, int)"/>
public static int ParseSimple(string str, int start, int end) {
if (str == null) throw new ArgumentNullException("str");
if (start < 0) throw new ArgumentOutOfRangeException("start");
if (end > str.Length) throw new ArgumentOutOfRangeException("end");
if (start >= end) throw new ArgumentOutOfRangeException(null, "start >= end");
// handle signs
bool positive = true; // default when no sign is present
char firstChar = str[start];
if (firstChar == '-') { ++start; positive = false; }
else if (firstChar == '+') { ++start; }
// check for input consisting of a sign only
if (start == end) throw new FormatException();
// check for overflow
int length = end - start;
if (length > Int32MaxLength)
throw new OverflowException();
if (length == Int32MaxLength) {
string maxValue = positive ? Int32Max : Int32Min;
if (String.CompareOrdinal(str, start, maxValue, 0, length) > 0)
throw new OverflowException();
}
// parse input
int result = 0;
for (; start < end; ++start) {
char c = str[start];
if (c >= '0' && c <= '9')
result = unchecked(10 * result + (c - '0'));
else
throw new FormatException();
}
return (positive ? result : -result);
}
/// <summary>
/// Parses a simple (base-10) string representation of a 32-bit
/// signed integer from a trusted input string.
/// </summary>
/// <param name="str">The string to be parsed</param>
/// <returns>
/// A 32-bit signed integer equivalent to the number contained in
/// the <paramref name="str"/>
/// </returns>
/// <exception cref="NullReferenceException">
/// if <paramref name="str"/> is <c>null</c></exception>
/// <remarks>
/// This method provides a facility to parse trusted 32-bit signed
/// integers quickly. "Trusted" means that the number is "simple"
/// (see <see cref="ParseSimple(string)"/>), and the string is
/// known to contain a valid number. This method should only be
/// used when the input is fully trusted, because no validation
/// whatsoever will be performed. No checking will be made to
/// assure that the string only contains digits, or to check for
/// overflow, or even for correctness of parameters. It is up to
/// the caller to make sure that the parameters passed to this
/// method are absolutely correct. If these preconditions cannot be
/// verified, then use <see cref="ParseSimple(string, int, int)"/>
/// instead.
/// <para>
/// No check for overflow is performed. Overflow is allowed
/// silently.
/// </para>
/// <para>
/// This method is equivalent to
/// <c><see cref="ParseTrusted(string, int)"/>(str, 0)</c></para>
/// </remarks>
/// <example><code>
/// string str = "10";
/// int age = Numbers.Int32.ParseTrusted(str);
/// </code></example>
/// <seealso cref="ParseSimple(string, int)"/>
/// <seealso cref="ParseTrusted(string, int, int)"/>
public static int ParseTrusted(string str) {
return ParseTrusted(str, 0);
}
/// <summary>
/// Parses a simple (base-10) string representation of a 32-bit
/// signed integer from a substring of trusted input.
/// </summary>
/// <param name="str">The string to be parsed</param>
/// <param name="start">The starting index of the substring to be parsed</param>
/// <returns>
/// A 32-bit signed integer equivalent to the number contained in
/// the specified substring of <paramref name="str"/>
/// </returns>
/// <exception cref="NullReferenceException">
/// if <paramref name="str"/> is <c>null</c></exception>
/// <exception cref="IndexOutOfRangeException">
/// if <paramref name="start"/> is less than zero</c></exception>
/// <remarks>
/// This method provides a facility to parse trusted 32-bit signed
/// integers quickly. "Trusted" means that the number is "simple"
/// (see <see cref="ParseSimple(string)"/>), and the string is
/// known to contain a valid number. This method should only be
/// used when the input is fully trusted, because no validation
/// whatsoever will be performed. No checking will be made to
/// assure that the string only contains digits, or to check for
/// overflow, or even for correctness of parameters. It is up to
/// the caller to make sure that the parameters passed to this
/// method are absolutely correct. If these preconditions cannot be
/// verified, then use <see cref="ParseSimple(string, int, int)"/>
/// instead.
/// <para>
/// No check for overflow is performed. Overflow is allowed
/// silently.
/// </para>
/// <para>
/// This method is equivalent to
/// <c><see cref="ParseTrusted(string, int, int)"/>(str, start, str.Length)</c></para>
/// </remarks>
/// <example><code>
/// string str = "Ben is 10";
/// int age = Numbers.Int32.ParseTrusted(str, 8);
/// </code></example>
/// <seealso cref="ParseSimple(string, int)"/>
/// <seealso cref="ParseTrusted(string, int, int)"/>
public static int ParseTrusted(string str, int start) {
return ParseTrusted(str, start, str.Length);
}
/// <summary>
/// Parses a simple (base-10) string representation of a 32-bit
/// signed integer from a substring of trusted input.
/// </summary>
/// <param name="str">The string to be parsed</param>
/// <param name="start">The starting index of the substring to be parsed</param>
/// <param name="end">
/// The exclusive ending index of the substring to be parsed</param>
/// <returns>
/// A 32-bit signed integer equivalent to the number contained in
/// the specified substring of <paramref name="str"/>
/// </returns>
/// <exception cref="NullReferenceException">
/// if <paramref name="str"/> is <c>null</c></exception>
/// <exception cref="IndexOutOfRangeException">
/// if <paramref name="start"/> is less than zero, or if
/// <paramref name="end"/> is greater than <c>str.Length</c></exception>
/// <remarks>
/// This method provides a facility to parse trusted 32-bit signed
/// integers quickly. "Trusted" means that the number is "simple"
/// (see <see cref="ParseSimple(string)"/>), and the string is
/// known to contain a valid number. This method should only be
/// used when the input is fully trusted, because no validation
/// whatsoever will be performed. No checking will be made to
/// assure that the string only contains digits, or to check for
/// overflow, or even for correctness of parameters. It is up to
/// the caller to make sure that the parameters passed to this
/// method are absolutely correct. If these preconditions cannot be
/// verified, then use <see cref="ParseSimple(string, int, int)"/>
/// instead.
/// <para>
/// The <paramref name="end"/> parameter is the exclusive index of
/// the end of the substring, i.e. after the last valid index for
/// the substring. For example, it could be <c>str.Length</c>.
/// </para>
/// <para>
/// No check for overflow is performed. Overflow is allowed
/// silently.
/// </para>
/// </remarks>
/// <example><code>
/// string str = "Ben is 10 years old";
/// int age = Numbers.Int32.ParseTrusted(str, 8, 10);
/// </code></example>
/// <seealso cref="ParseSimple(string, int, int)"/>
public static int ParseTrusted(string str, int start, int end) {
// handle signs
bool positive = true; // default when no sign is present
char firstChar = str[start];
if (firstChar == '-') { ++start; positive = false; }
else if (firstChar == '+') { ++start; }
// check parameters
Debug.Assert(end <= str.Length, "end > str.Length");
Debug.Assert(start < end, "start >= end (including sign)");
// check format
Debug.Assert(str.Substring(start, end - start).All(c => c >= '0' && c <= '9'),
(positive ? "" : "-") + str.Substring(start, end - start) + " is not a valid Int32");
// check for overflow
Debug.Assert(end - start < Int32MaxLength
|| (end - start == Int32MaxLength
&& String.CompareOrdinal(str, start, (positive ? Int32Max : Int32Min), 0, Int32MaxLength) <= 0),
(positive ? "" : "-") + str.Substring(start, end - start) + " is too large for Int32");
// parse input
int result = 0;
for (; start < end; ++start) {
result = unchecked(10 * result + (str[start] - '0'));
}
return (positive ? result : -result);
}
}
}
}
Refactorings
No refactoring yet !
Rik Hemsley
January 7, 2009, January 07, 2009 11:51, permalink
How much faster is this than Int32.Parse? I'd like to see benchmarks if you have them as this could come in handy if it's significantly faster.
Hosam Aly
January 7, 2009, January 07, 2009 11:59, permalink
As per my measurements, ParseSimple is ~6.5 times faster, and ParseTrusted is ~11 times faster. Marc Gravell measured it as well (http://stackoverflow.com/questions/419952) and saw it ~10 times faster.
Hosam Aly
January 7, 2009, January 07, 2009 13:05, permalink
I have updated a few methods as per the suggestions I received on Stackoverflow.com (http://stackoverflow.com/questions/419952).
* Changed all "end" parameters to "length"
* Removed subtracting '0' from the loop and did it in one operation at the end
* Added methods for ParseTrustedPositive and ParseTrustedNegative
<b>Edit:</b> I have removed the documentation, since some people pointed out it was too distracting. The documentation is still available in the initial post above.
Here is the updated class (you can use diff to see changes):
namespace FBD.Utilities {
using System;
using System.Diagnostics;
using System.Linq;
public static class Numbers {
public static class Int32 {
#region Constants
private const int Int32MaxLength = 10;
private const string Int32Max = "2147483647";
private const string Int32Min = "2147483648";
private static readonly int[] zeroChars = new int[Int32MaxLength + 1];
static Int32() {
for (int i = 1; i < zeroChars.Length; ++i)
zeroChars[i] = '0' + 10 * zeroChars[i - 1];
}
#endregion
#region ParseSimple
public static int ParseSimple(string str) {
return ParseSimple(str, 0);
}
public static int ParseSimple(string str, int start) {
return ParseSimple(str, start, str.Length - start);
}
public static int ParseSimple(string str, int start, int length) {
if (str == null) throw new ArgumentNullException("str");
if (start < 0 || start >= str.Length) throw new ArgumentOutOfRangeException("start");
if (length <= 0 || start + length > str.Length) throw new ArgumentOutOfRangeException("length");
// handle signs
bool positive = true; // default when no sign is present
char firstChar = str[start];
if (firstChar == '-') { ++start; --length; positive = false; }
else if (firstChar == '+') { ++start; --length; }
// check for inputs consisting of a sign only
if (length == 0) throw new FormatException();
// check for overflow
if (length > Int32MaxLength)
throw new OverflowException();
if (length == Int32MaxLength) {
string maxValue = (positive ? Int32Max : Int32Min);
if (String.CompareOrdinal(str, start, maxValue, 0, length) > 0)
throw new OverflowException();
}
// parse input
int result = 0;
for (int end = start + length; start < end; ++start) {
char c = str[start];
if (c >= '0' && c <= '9')
result = unchecked(result * 10 + c); // subtracting '0' is performed after the loop
else
throw new FormatException();
}
result = unchecked(result - zeroChars[length]);
return (positive ? result : -result);
}
#endregion
#region ParseTrusted
public static int ParseTrusted(string str) {
return ParseTrusted(str, 0);
}
public static int ParseTrusted(string str, int start) {
return ParseTrusted(str, start, str.Length - start);
}
public static int ParseTrusted(string str, int start, int length) {
// handle signs
bool positive = true; // default when no sign is present
char firstChar = str[start];
if (firstChar == '-') { ++start; --length; positive = false; }
else if (firstChar == '+') { ++start; --length; }
// check parameters
Debug.Assert(start + length <= str.Length, "start + length > str.Length (including sign)");
// check format
Debug.Assert(str.Substring(start, length).All(c => c >= '0' && c <= '9'),
(positive ? "" : "-") + str.Substring(start, length) + " is not a valid Int32");
// check for overflow
Debug.Assert(length < Int32MaxLength
|| (length == Int32MaxLength
&& String.CompareOrdinal(str, start, (positive ? Int32Max : Int32Min), 0, Int32MaxLength) <= 0),
(positive ? "" : "-") + str.Substring(start, length - start) + " cannot fit in Int32");
// parse input
int result = 0;
for (int end = start + length; start < end; ++start) {
result = unchecked(result * 10 + str[start]); // subtracting '0' is performed after the loop
}
result = unchecked(result - zeroChars[length]);
return (positive ? result : -result);
}
#endregion
#region ParseTrustedPositive
public static int ParseTrustedPositive(string str) {
return ParseTrustedPositive(str, 0);
}
public static int ParseTrustedPositive(string str, int start) {
return ParseTrustedPositive(str, start, str.Length - start);
}
public static int ParseTrustedPositive(string str, int start, int length) {
if (str[start] == '+') { ++start; --length; }
// check parameters
Debug.Assert(start + length <= str.Length, "start + length > str.Length (including sign)");
// check format
Debug.Assert(str.Substring(start, length).All(c => c >= '0' && c <= '9'),
str.Substring(start, length) + " is not a valid Int32");
// check for overflow
Debug.Assert(length < Int32MaxLength
|| (length == Int32MaxLength
&& String.CompareOrdinal(str, start, Int32Max, 0, Int32MaxLength) <= 0),
str.Substring(start, length) + " cannot fit in Int32");
// parse input
int result = 0;
for (int end = start + length; start < end; ++start) {
result = unchecked(result * 10 + str[start]);
}
result = unchecked(result - zeroChars[length]);
return result;
}
#endregion
#region ParseTrustedNegative
public static int ParseTrustedNegative(string str) {
return ParseTrustedNegative(str, 0);
}
public static int ParseTrustedNegative(string str, int start) {
return ParseTrustedNegative(str, start, str.Length - start);
}
public static int ParseTrustedNegative(string str, int start, int length) {
Debug.Assert(str[start] == '-');
++start; --length; // skip the '-' sign
// check parameters
Debug.Assert(start >= 1, "start < 0");
Debug.Assert(start + length <= str.Length, "start + length > str.Length (including sign)");
// check format
Debug.Assert(str.Substring(start, length).All(c => c >= '0' && c <= '9'),
str.Substring(start, length) + " is not a valid Int32");
// check for overflow
Debug.Assert(length < Int32MaxLength
|| (length == Int32MaxLength
&& String.CompareOrdinal(str, start, Int32Min, 0, Int32MaxLength) <= 0),
"-" + str.Substring(start, length) + " cannot fit in Int32");
// parse input
int result = 0;
for (int end = start + length; start < end; ++start) {
result = unchecked(result * 10 + str[start]);
}
result = unchecked(result - zeroChars[length]);
return -result;
}
#endregion
}
}
}
ESV
January 7, 2009, January 07, 2009 20:03, permalink
How about appealing to ParseTrustedPostive from ParseTrustedNegative? Am I missing something?
public static int ParseTrustedNegative(string str, int start, int length) {
Debug.Assert(str[start] == '-');
++start;
--length; // skip the '-' sign
return -ParseTrustedPositive(str, start, length);
}
Hosam Aly
January 7, 2009, January 07, 2009 20:09, permalink
Well, this would cause a debugging assertion exception for the input "int.MinValue.ToString()", and I couldn't think of a way to work around it without changing method signature (or adding redirection to a private method, which may hurt performance, but I shall try to check it out).
I wrote a few parsing methods to use in place of Int32.Parse, as my requirements are much simpler (and performance is much more important). I would be thankful if you could advice me about anything you think needs enhancement.