Mar 19, 2016

Simple Performance Test of List<T> and HashSet<T> in C#

It's often said that the search performance of HashSet<T> is faster than of List<T> and this is  because HashSet<T> has a hash table. I surveyed the performance of the add, contains, remove and where methods of HashSet<T> and List<T>.


I ran the following simple tests.
Expected results were obtained. The power of hash lookups is attractive :)

I. Test

・List<T>
using System;
using System.Collections.Generic;
using System.Linq;
using System.Diagnostics;


namespace WhiteChocolate
{
    class Program
    {
        static void Main(string[] args)
        {
            Enumerable.Range(0, 10).ToList().ForEach(x =>
            {
                TestList(500000 * x);
            });
        }

        public static void TestList(int num)
        {
            var list = new List<string>();
           
            // 1. Add
            var stopWatch = Stopwatch.StartNew();
            Enumerable.Range(0, num).ToList().ForEach(x => list.Add(x.ToString()));
            stopWatch.Stop();
            Console.WriteLine(string.Format("List Add: {0} ms", stopWatch.ElapsedMilliseconds.ToString()));


            // 2. Contains
            var count = (num / 2).ToString();
            stopWatch = Stopwatch.StartNew();
            var result = list.Contains(count);
            stopWatch.Stop();
            Console.WriteLine(string.Format("List Contains: {0} ms", stopWatch.ElapsedMilliseconds.ToString()));


            // 3. Add (if not exists)
            count = (num * 2).ToString();
            stopWatch = Stopwatch.StartNew();
            if (!list.Contains(count)) list.Add(count);
            stopWatch.Stop();
            Console.WriteLine(string.Format("List Add (if not exists): {0} ms", stopWatch.ElapsedMilliseconds.ToString()));


            // 4. Remove (if exists)
            count = (num * 2).ToString();
            stopWatch = Stopwatch.StartNew();
            if (!list.Contains(count)) list.Remove(count);
            stopWatch.Stop();
            Console.WriteLine(string.Format("List Remove (if exists): {0} ms", stopWatch.ElapsedMilliseconds.ToString()));


            // 5. Where
            count = (num / 4).ToString();
            stopWatch = Stopwatch.StartNew();
            foreach (var one in list.Where(x => x == count)) Console.WriteLine(one);
            stopWatch.Stop();
            Console.WriteLine(string.Format("List Where: {0} ms", stopWatch.ElapsedMilliseconds.ToString()));
        }
    }
}

・HashSet<T>
using System;
using System.Collections.Generic;
using System.Linq;
using System.Diagnostics;


namespace StrawberryChocolate
{
    class Program
    {
        static void Main(string[] args)
        {
            Enumerable.Range(0, 10).ToList().ForEach(x =>
            {
                TestHashSet(500000 * x);
            });
        }

        public static void TestHashSet(int num)
        {
            var hashSet = new HashSet<string>();


            // 1. Add
            var stopWatch = Stopwatch.StartNew();
            Enumerable.Range(0, num).ToList().ForEach(x => hashSet.Add(x.ToString()));
            stopWatch.Stop();
            Console.WriteLine(string.Format("HashSet Add: {0} ms", stopWatch.ElapsedMilliseconds.ToString()));
           
            // 2. Contains
            var count = (num / 2).ToString();
            stopWatch = Stopwatch.StartNew();
            var result = hashSet.Contains(count);
            stopWatch.Stop();
            Console.WriteLine(string.Format("HashSet Contains: {0} ms", stopWatch.ElapsedMilliseconds.ToString()));


            // 3. Add (if not exists)
            count = (num * 2).ToString();
            stopWatch = Stopwatch.StartNew();
            if (!hashSet.Contains(count)) hashSet.Add(count);
            stopWatch.Stop();
            Console.WriteLine(string.Format("HashSet Add (if not exists): {0} ms", stopWatch.ElapsedMilliseconds.ToString()));
           
            // 4. Remove (if exists)
            count = (num * 2).ToString();
            stopWatch = Stopwatch.StartNew();
            if (!hashSet.Contains(count)) hashSet.Remove(count);
            stopWatch.Stop();
            Console.WriteLine(string.Format("HashSet Remove (if exists): {0} ms", stopWatch.ElapsedMilliseconds.ToString()));


            // 5. Where
            count = (num / 4).ToString();
            stopWatch = Stopwatch.StartNew();
            foreach (var one in hashSet.Where(x => x == count)) Console.WriteLine(one);
            stopWatch.Stop();
            Console.WriteLine(string.Format("HashSet Where: {0} ms", stopWatch.ElapsedMilliseconds.ToString()));
        }
    }
}


II. Result

1. Add
run the add method. add X string objects
X = 500000, 1000000, 1500000, 2000000, 2500000, 3000000, 3500000, 4000000, 4500000, 5000000


2. Contains
run the contains method against half the objects in a list of X string objects
X = 500000, 1000000, 1500000, 2000000, 2500000, 3000000, 3500000, 4000000, 4500000, 5000000


3. Add (If not exists)
run the contains method against all objects in a list of X string objects and then run the add method
X = 500000, 1000000, 1500000, 2000000, 2500000, 3000000, 3500000, 4000000, 4500000, 5000000


4. Remove (If exists)
run the contains method against a half objects in a list of X string objects and then run the remove method
X = 500000, 1000000, 1500000, 2000000, 2500000, 3000000, 3500000, 4000000, 4500000, 5000000


5. Where
run the where method against all objects in a list of X string objects (this method sounds like full scan in some way :))
X = 500000, 1000000, 1500000, 2000000, 2500000, 3000000, 3500000, 4000000, 4500000, 5000000