August 06, 2013

Sort Number Strings Naturally

Scenario/Problem:When you sort strings that contain numbers (such as the digits from 1 to 10), they are “computer sorted” (that is, 10 comes before 2), but you need them “naturally” sorted such that 10 comes in order after 9.
Solution:You must write your own string comparer to do the comparison correctly. You can either write a class that implements IComparer or a method that conforms to the delegate Comparison (see Chapter 15, “Delegates, Events, and Anonymous Methods,” which discusses delegates). Listing 7.3 provides an example of the first option.

using System;
using System.Collections.Generic;
using System.Diagnostics;

namespace NaturalSort
{
    class Program
    {
        static void Main(string[] args)
        {
            string[] originals = new string[]
            {
                "Part 1", "Part 2", "Part 3", "Part 4", "Part 5",
                "Part 6", "Part 7", "Part 8", "Part 9", "Part 10",
                "Part 11", "Part 12", "Part 13", "Part 14", "Part 15",
                "Part 16", "Part 17", "Part 18", "Part 19", "Part 20"
            };

            Console.WriteLine("Naive sort:");
            List copy = new List(originals);
            copy.Sort();
            foreach (string s in copy)
            {
                Console.WriteLine("\t{0}", s);
            }

            Console.WriteLine();
            Console.WriteLine("Natural Sort:");
            copy = new List(originals);
            copy.Sort(new NaturalSorter());
            foreach (string s in copy)
            {
                Console.WriteLine("\t{0}", s);
            }
        }
    }

    class NaturalSorter : IComparer
    {
        //use a buffer for performance since we expect
        //the Compare method to be called a lot
        private char[] _splitBuffer = new char[256];

        public int Compare(string x, string y)
        {
            //first split each string into segments
            //of non-numbers and numbers
            IList a = SplitByNumbers(x);
            IList b = SplitByNumbers(y);

            int aInt, bInt;
            int numToCompare = (a.Count < b.Count) ? a.Count : b.Count;
            for (int i = 0; i < numToCompare; i++)
            {
                if (a[i].Equals(b[i]))
                    continue;

                bool aIsNumber = Int32.TryParse(a[i], out aInt);
                bool bIsNumber = Int32.TryParse(b[i], out bInt);
                bool bothNumbers = aIsNumber && bIsNumber;
                bool bothNotNumbers = !aIsNumber && !bIsNumber;
                //do an integer compare
                if (bothNumbers) return aInt.CompareTo(bInt);
                //do a string compare
                if (bothNotNumbers) return a[i].CompareTo(b[i]);
                //only one is a number, which are
                //by definition less than non-numbers
                if (aIsNumber) return -1;
                return 1;
            }
            //only get here if one string is empty
            return a.Count.CompareTo(b.Count);
        }

        private IList  SplitByNumbers(string val)
        {
            System.Diagnostics.Debug.Assert(val.Length <= 256);
            List list = new List();
            int current = 0;
            int dest = 0;
            while (current < val.Length)
            {
                //accumulate non-numbers
                while (current < val.Length &&
                       !char.IsDigit(val[current]))
                {
                    _splitBuffer[dest++] = val[current++];
                }
                if (dest > 0)
                {
                    list.Add(new string(_splitBuffer, 0, dest));
                    dest = 0;
                }
                //accumulate numbers
                while (current < val.Length &&
                       char.IsDigit(val[current]))
                {
                    _splitBuffer[dest++] = val[current++];
                }
                if (dest > 0)
                {
                    list.Add(new string(_splitBuffer, 0, dest));
                    dest = 0;
                }
            }
            return list;
        }
    }
}


References: http://my.safaribooksonline.com/book/programming/csharp/9780672331985/strings/ch07lev1sec12