F01cbd2238e2a78e4c43fa596f51d6a1

The code below is evil. It's a massive state machine that needs to be refactored to F#, WF, or anything, so long as it becomes more manageable.

/// <summary>
    /// This function actually alters the text and makes the necessary changes.
    /// </summary>
    /// <param name="text">Initial text.</param>
    /// <param name="options">Conversion options. See <see cref="T:DmitriNesteruk.TypograFixImpl.ConversionOptions"/>.</param>
    /// <param name="timeoutMsec">The timeout value in milliseconds.</param>
    /// <param name="createReferences">Indicates whether references should be created. If in doubt, set to <c>true</c>.</param>
    /// <returns>The altered text.</returns>
    /// <remarks>Contains tracing until things work perfectly.</remarks>
    /// <exception cref="Exception">Will throw exception if source HTML is malformed.</exception>
    /// <exception cref="TimeoutException">Will throw if execution time exceeds timeout.</exception>
    private static string Transform([NotNull]string text, ConversionOptions options,
                                    [GreaterThan(0)]int timeoutMsec, bool createReferences)
    {
      int len = text.Length;
      if (len == 0)
        return string.Empty;

      Stopwatch st = new Stopwatch();
      st.Start();

      HtmlBuilder hb = new HtmlBuilder((int)(len * 1.5 + 1));
      context = new Stack<string>();
      if (createReferences)
        references = new List<string>();
      backquoteOpen = emphasisOpen = false;
      Stack<char> quoteStack = new Stack<char>();
      for (int i = 0; i < len; ++i)
      {
        switch (text[i])
        {
          case '{':
            #region { (possible {{ pre)
            if (CannotReplace || !options.DoubleCurlyToPre || i + 1 == len) goto default;
            if (text[i + 1] == '{')
            {
              hb.Append("<pre>");
              ++i;
              context.Push("pre");
              break;
            }
            goto default;
            #endregion
          case '}':
            #region } (possible }} /pre)
            if ((context.Count == 0 || context.Peek() != "pre") || !options.DoubleCurlyToPre || i + 1 == len) goto default;
            if (text[i + 1] == '}')
            {
              hb.Append("</pre>");
              ++i;
              context.Pop();
              break;
            }
            goto default;
            #endregion
          case '~':
            #region ~ (approx)
            if (CannotReplace || !options.ConvertApprox) goto default;
            if (i + 1 < text.Length && text[i + 1] == '=')
            {
              hb.Append("&#8776;");
              ++i;
              break;
            }
            goto default;
            #endregion
          case '[':
            #region [ (possible reference)
            if (CannotReplace || options.ReferenceStyle == ReferenceStyle.None)
              goto default;
            // look for closing index
            int idxClosing = -1;
            for (int a = i; a < text.Length; ++a)
            {
              if (text[a] == ']')
              {
                idxClosing = a;
                break;
              }
            }
            if (idxClosing == -1 || idxClosing == i + 1)
              goto default;
            // grab the content string
            string reference = text.Substring(i + 1, idxClosing - i - 1);
            references.Add(reference);
            string flattened =
              Regex.Replace(
                Transform(reference, options, 5000, false),
                @"<(.|\n)*?>", string.Empty);
            switch (options.ReferenceStyle)
            {
              default:
                hb.Append(string.Format("[<a href=\"#Reference{0}\" title=\"{1}\">{0}</a>]",
                                        references.Count, flattened));
                break;
              case ReferenceStyle.Superscript:
                hb.Append(string.Format("<sup><small><a href=\"#Reference{0}\" title=\"{1}\">{0}</a></small></sup>",
                                        references.Count, flattened));
                break;
            }
            i = idxClosing;
            break;
            #endregion
          case '`':
            #region `
            if ((InCode && !backquoteOpen) || !options.BackquoteToCode)
              goto default;
            if (backquoteOpen)
            {
              hb.Append("</code>");
              context.Pop();
            }
            else
            {
              hb.Append("<code>");
              context.Push("code");
            }
            backquoteOpen = !backquoteOpen;
            break;
            #endregion
          case '*':
            #region * possible emphasis or bold
            if (InCode) goto default;
            if (emphasisOpen)
            {
              if (i + 1 != len && text[i + 1] == '*')
              {
                hb.Append("</strong>");
                i++;
              }
              else
                hb.Append("</em>");
              emphasisOpen = !emphasisOpen;
              continue;
            }
            // should we open it?
            if (i == 0 || !char.IsLetterOrDigit(text[i - 1]))
            {
              if (i + 1 != len && text[i + 1] == '*')
              {
                hb.Append("<strong>");
                i++;
              }
              else
                hb.Append("<em>");
              emphasisOpen = true;
              continue;
            }
            goto default;
            #endregion
          case '<':
            #region <
            // if this is occuring between code tags, check to see if replacement needs to be done
            if (i + 1 != text.Length && text[i + 1] != '/' && InCode && options.EscapeLessGreaterInCode)
            {
              hb.Append("&lt;");
              continue;
            }
            hb.Append('<');
            StringBuilder tb = new StringBuilder();
            while (++i < text.Length)
            {
              hb.Append(text[i]);
              if (text[i] == '\"')
              {
                // this (opening quote) has been appended
                // append everything until we meet a closing one
                if (i + 1 < text.Length)
                {
                  do
                  {
                    ++i;
                    hb.Append(text[i]);
                  } while (i < text.Length && text[i] != '\"');
                }
              }
              else if (char.IsWhiteSpace(text[i]))
              {
                // we're in this tag's context
                context.Push(tb.ToString());
                //tb = new StringBuilder();
                //Debug.WriteLine("The string '" + tb + "' has been pushed, items count = " + context.Count);
              }
              else if (text[i] == '/')
              {
                tb = new StringBuilder();
                try
                {
                  // try popping 
                  context.Pop();
                }
                catch (InvalidOperationException)
                {
                  // we would end up here in cases where tag is followed by /, e.g., <br/>
                }
                // move until we meet > and add it
                while (++i < text.Length)
                {
                  if (text[i] == '>')
                    goto default;
                  hb.Append(text[i]);
                  Console.WriteLine("Appended internally " + text[i]);
                }
              }
              else if (text[i] == '>')
              {
                // if closed, break this loop
                context.Push(tb.ToString());
                //tb = new StringBuilder();
                //Debug.WriteLine("The string '" + tb + "' has been pushed, items count = " + context.Count);
                break;
              }
              else
                tb.Append(text[i]);
            }
            break;
            #endregion
          case '>':
            #region >
            // if this is occuring between code tags, check to see if replacement needs to be done
            if (i + 1 != text.Length && text[i + 1] != '/' && InCode && options.EscapeLessGreaterInCode)
            {
              hb.Append("&gt;");
              continue;
            }
            goto default;

          case '-':
            if (CannotReplace) goto default;
            // if surrounded by spaces, it's an en dash
            if (i > 0 && i + 1 < text.Length && text[i - 1] == ' ' && text[i + 1] == ' ')
              hb.Append("&ndash;");
            else if (i + 1 < text.Length && text[i + 1] == '-' &&
                     (i + 2 == text.Length || text[i + 2] != '&'))
            {
              hb.Append("&mdash;");
              ++i; // ignore the second dash
            }
            else if (i + 5 < text.Length &&
                     text[i + 1] == '-' &&
                     text[i + 2] == '&' &&
                     text[i + 3] == 'g' &&
                     text[i + 4] == 't' &&
                     text[i + 5] == ';')
            {
              hb.Append("&rarr;");
              i += 5;
            }
            else goto default;
            // same for em dash
            break;
            #endregion
          case '=':
            #region =
            if (CannotReplace) goto default;
            if (i + 5 < len &&
                text[i + 1] == '=' &&
                text[i + 2] == '&' &&
                text[i + 3] == 'g' &&
                text[i + 4] == 't' &&
                text[i + 5] == ';')
            {
              hb.Append("&rArr;");
              i += 5;
              break;
            }
            goto default;
            #endregion
          case '(':
            #region (
            if (CannotReplace) goto default;
            if (i + 2 < text.Length && text[i + 2] == ')')
            {
              if (char.ToLower(text[i + 1]) == 'c')
              {
                hb.Append("&copy;");
                i += 2;
              }
              else if (char.ToLower(text[i + 1]) == 'r')
              {
                hb.Append("&reg;");
                i += 2;
              }
              else goto default;
            }
            else if (i + 3 < text.Length && char.ToLower(text[i + 1]) == 't'
                     && char.ToLower(text[i + 2]) == 'm'
                     && char.ToLower(text[i + 3]) == ')')
            {
              hb.Append("&#8482;");
              i += 3;
            }
            else goto default;
            break;
            #endregion
          case '\'': // single quotes
            #region '
            if (CannotReplace) goto default;

            // if there's no space on the right, force closing
            if (i > 0 && i + 1 < text.Length
                && char.IsLetterOrDigit(text[i - 1])
                && char.IsLetterOrDigit(text[i + 1]))
            {
              hb.Append("&rsquo;"); // stack ignored
            }
            else if (quoteStack.Count > 0 && quoteStack.Peek() == '\'')
            {
              hb.Append("&rsquo;");
              quoteStack.Pop();
            }
            else
            {
              // there is nothing on the quote stack
              // it's anyone's guess whether the quote is opening or closing
              if (i + 1 < text.Length && text[i + 1] == ' ')
                hb.Append("&rsquo;");
              else
                hb.Append("&lsquo;");
              quoteStack.Push('\'');
            }
            break;
            #endregion
          case '\"': // double quotes
            #region "
            //Debug.WriteLine("Double quote!");
            if (CannotReplace) goto default;
            // if there is a quote on the stack, close it
            if (quoteStack.Count > 0 && quoteStack.Peek() == '\"')
            {
              if (options.AngledDoubleQuotes)
                hb.Append("»");
              else
                hb.Append("&rdquo;");
              quoteStack.Pop();
            }
            else
            {
              if (options.AngledDoubleQuotes)
                hb.Append("«");
              else
                hb.Append("&ldquo;");
              quoteStack.Push('\"');
            }
            break;
            #endregion
          case '.': // possible ellipsis
            #region .
            if (i + 2 < text.Length && text[i + 1] == '.' && text[i + 2] == '.')
            {
              if (i > 0 && i + 3 <= text.Length)
              {
                // the ellipsis only 'counts' if it does not coexist with other elements
                bool isValidFront = true;
                bool isValidBack = true;
                for (int b = i - 1; b >= 0; --b)
                {
                  isValidBack &= (Environment.NewLine + " \t").Contains(text[b].ToString());
                  if (Environment.NewLine.Contains(text[b].ToString()) || !isValidBack) break;
                }
                for (int f = i + 3; f < len; ++f)
                {
                  isValidFront &= (Environment.NewLine + " \t").Contains(text[f].ToString());
                  if (Environment.NewLine.Contains(text[f].ToString()) || !isValidFront) break;
                }
                if (isValidFront && isValidBack)
                {
                  hb.Append("&#8942;");
                  i += 2;
                  continue;
                }
              }
              if (CannotReplace) goto default;
              hb.Append("&hellip;");
              i += 2;
            }
            else goto default;
            break;
            #endregion
          case 'x':
            #region x
            if (CannotReplace) goto default;
            if (i > 0 && char.IsDigit(text[i - 1])
                &&
                ((
                   i + 1 < text.Length &&
                   (char.IsDigit(text[i + 1]) || char.IsWhiteSpace(text[i + 1]) || text[i + 1] == '<')
                 ) || (i + 1 == text.Length)))
            {
              hb.Append("&times;");
            }
            else goto default;
            break;
            #endregion
          case '&':
            #region &
            if (CannotReplace) goto default;
            if (i + 1 < text.Length &&
                (text[i + 1] == '<' || text[i + 1] == ' '))
            {
              hb.Append("&amp;");
            }
            else if (i + 5 < text.Length)
            {
              if (text[i + 1] == 'l' && text[i + 2] == 't' && text[i + 3] == ';')
              {
                if (text[i + 4] == '-' && text[i + 5] == '-')
                {
                  hb.Append("&larr;");
                  i += 5;
                }
                else if (text[i + 4] == '=' && text[i + 5] == '=')
                {
                  hb.Append("&lArr;");
                  i += 5;
                }
                else goto default;
              }
              else goto default;
            }
            else goto default;
            #endregion
            break;
          case '+':
            #region +
            if (CannotReplace || !options.ConvertPM || i + 1 == text.Length || text[i + 1] != '-')
              goto default;
            hb.Append("&plusmn;");
            i++;
            break;
            #endregion
          case '^':
            #region ^
            if (CannotReplace || !options.ConvertSuper || i + 1 == text.Length || "123".IndexOf(text[i + 1]) == -1)
              goto default;
            hb.Append("&sup" + text[i + 1] + ";");
            i++;
            break;
            #endregion
          case '#':
            #region # (diactrics)
            if (CannotReplace || options.DiactricsOptions == DiactricsOptions.None
                || i == 0 || i + 1 == len)
              goto default;

            if (options.DiactricsOptions == DiactricsOptions.Swedish)
            {
              if (text[i - 1] == 'o' && text[i + 1] == '2')
              {
                // alter last character in buffer
                hb.Replace('o', 'ö', hb.Length - 1, 1);
                ++i;
                continue;
              }
              if (text[i - 1] == 'O' && text[i + 1] == '2')
              {
                // alter last character in buffer
                hb.Replace('O', 'Ö', hb.Length - 1, 1);
                ++i;
                continue;
              }
              if (text[i - 1] == 'a' && text[i + 1] == '1')
              {
                // alter last character in buffer
                hb.Replace('a', 'Ã¥', hb.Length - 1, 1);
                ++i;
                continue;
              }
              if (text[i - 1] == 'A' && text[i + 1] == '1')
              {
                // alter last character in buffer
                hb.Replace('A', 'Ã…', hb.Length - 1, 1);
                ++i;
                continue;
              }
              if (text[i - 1] == 'a' && text[i + 1] == '2')
              {
                // alter last character in buffer
                hb.Replace('a', 'ä', hb.Length - 1, 1);
                ++i;
                continue;
              }
              if (text[i - 1] == 'A' && text[i + 1] == '2')
              {
                // alter last character in buffer
                hb.Replace('A', 'Ä', hb.Length - 1, 1);
                ++i;
                continue;
              }
            }
            else goto default;
            break;
            #endregion
          case '@':
            if (CannotReplace || !options.ObfuscateAt)
              goto default;
            hb.Append("/a&tau;/");
            break;
          case '1':
          case '2':
          case '3':
          case '4':
          case '5':
          case '6':
          case '7':
          case '8':
          case '9':
          case '0':
            {
              #region digits
              if (CannotReplace) goto default;
              // keep accumulating while digit
              int j;
              StringBuilder acc = new StringBuilder();
              for (j = i; j < text.Length; ++j)
              {
                if (char.IsDigit(text[j]) || text[j] == ',')
                  acc.Append(text[j]);
                else break;
              }
              if (j + 1 >= text.Length)
                goto default;
              // st
              string accStr = acc.ToString();
              char lastChar = accStr[accStr.Length - 1];
              if (lastChar == '1' && text[j] == 's' && text[j + 1] == 't')
              {
                hb.Append(acc + "<sup>st</sup>");
                i += acc.Length + 1;
              }
              else if (lastChar == '2' && text[j] == 'n' && text[j + 1] == 'd')
              {
                hb.Append(acc + "<sup>nd</sup>");
                i += acc.Length + 1;
              }
              else if (lastChar == '3' && text[j] == 'r' && text[j + 1] == 'd')
              {
                hb.Append(acc + "<sup>rd</sup>");
                i += acc.Length + 1;
              }
              else if (text[j] == 't' && text[j + 1] == 'h')
              {
                hb.Append(acc + "<sup>th</sup>");
                i += acc.Length + 1;
              }
              else goto default;
              #endregion
            }
            break;

            #region reverse substitions - in case you paste from Word
          case '‘':
            if (CannotReplace) goto default;
            hb.Append("&lsquo;");
            break;

          case '’':
            if (CannotReplace) goto default;
            hb.Append("&rsquo;");
            break;

          case '“':
            if (CannotReplace) goto default;
            hb.Append("&ldquo;");
            break;

          case '”':
            if (CannotReplace) goto default;
            hb.Append("&rdquo;");
            break;

          case '…':
            if (CannotReplace) goto default;
            hb.Append("&hellip;");
            break;

          case '–':
            if (CannotReplace) goto default;
            hb.Append("&ndash;");
            break;
            #endregion

          default:
            // control characters are only allowed inside the no-fly zone (i.e., code, pre, script tags)
            switch (CannotReplace)
            {
              case false:
                if (!char.IsControl(text[i]) || Environment.NewLine.Contains(text[i].ToString()))
                  goto default;
                break;
              default:
                //if (options.AutoParagraphs)
                //{
                //  // if we're 0th or last char in line break
                //  if (i == 0 || Environment.NewLine.Contains(text[i - 1].ToString()))
                //  {
                //    // if we're in P already, close it
                //    if (InParagraph)
                //    {
                //      hb.AppendLine("</p>");
                //      while (context.Pop().ToUpperInvariant() != "P") ;
                //      context.Pop(); // woo-hoo
                //    }
                //    // now add the P, damn it
                //    hb.AppendLine("<p>");
                //    context.Push("p");
                //  }
                //}
                hb.Append(text[i]);
                break;
            }
            break;
        }
        if (st.ElapsedMilliseconds > timeoutMsec)
        {
          st.Stop();
          throw new TimeoutException();
        }
      }
      st.Stop();
      return hb.ToString();
    }

Refactorings

No refactoring yet !

B20991026b45ee0d77e9bbb2b6776097

Nik Radford

February 26, 2009, February 26, 2009 14:17, permalink

No rating. Login to rate!

Is this just trying to html encode strings?

B20991026b45ee0d77e9bbb2b6776097

Sekhat

February 26, 2009, February 26, 2009 14:19, permalink

No rating. Login to rate!

Though I guess not, I just didn't want to read it all straight away.

Your refactoring





Format Copy from initial code

or Cancel