The C# battle of the SortedList, SortedDictionary, and List

Okay as part of my internship I have to deal with a huge number of text strings.

These strings come into the program unsorted and they must be sorted and each one must be unique. (i.e. no duplicates).

Now there are a few different ways to be able to store this data.

A SortedList, SortedDictionary, or two different forms of lists, the first where before each add you check to make sure that the data doesn’t already exist, and the second where you just add then sort then remove duplicates at a later time.

Two work out what one would be best I wrote a program to determine which form of storage operated the fastest on input. The results follow and then the code for how I did it.

As you can see the SortedDictionary worked the best, however at the early stages both the SortedList and Duplicate List gave it a run for it’s money.

Results

Sorted List Test:        00:00:00.1169883 Input:    5000    4000 List Size:    2838
Sorted Dictionary Test:  00:00:00.1339866 Input:    5000    4000 List Size:    2858
Unique List Test:        00:00:00.1119888 Input:    5000    4000 List Size:    2862
Duplicate List Test:     00:00:00.0239976 Input:    5000    4000 List Size:    2832
Sorted List Test:        00:00:01.3768623 Input:   50000   40000 List Size:   28516
Sorted Dictionary Test:  00:00:01.0678932 Input:   50000   40000 List Size:   28466
Unique List Test:        00:00:12.3097689 Input:   50000   40000 List Size:   28384
Duplicate List Test:     00:00:01.2058794 Input:   50000   40000 List Size:   28549
Sorted List Test:        00:01:36.9733017 Input:  500000  400000 List Size:  285367
Sorted Dictionary Test:  00:00:12.3307668 Input:  500000  400000 List Size:  285422
Duplicate List Test:     00:02:32.7467238 Input:  500000  400000 List Size:  285506
Sorted Dictionary Test:  00:02:37.8040000 Input: 5000000 4000000 List Size: 2854095

Code for Program.cs

using System;
using System.Collections.Generic;
using System.Text;

namespace SortTest
{
class Program
{
public static void Main()
{
// 5,000 item list
SortedListTest.run(5000,4000);
SortedDictionaryTest.run(5000, 4000);
UniqueListTest.run(5000, 4000);
DuplicateListTest.run(5000, 4000);

// 50,000 item list
SortedListTest.run(50000, 40000);
SortedDictionaryTest.run(50000, 40000);
UniqueListTest.run(50000, 40000);
DuplicateListTest.run(50000, 40000);

// 500,000 item list
SortedListTest.run(500000, 400000);
SortedDictionaryTest.run(500000, 400000);
//UniqueListTest.run(500000, 400000);
DuplicateListTest.run(500000, 400000);

// 5,000,000 item list
//SortedListTest.run(5000000, 4000000);
SortedDictionaryTest.run(5000000, 4000000);
//UniqueListTest.run(5000000, 4000000);
//DuplicateListTest.run(5000000, 4000000);

}
}
}

Code for SortedListTest.cs
using System;
using System.Collections.Generic;
using System.Text;

namespace SortTest
{
class SortedListTest
{
private static SortedList words = null;

public static void run(int loopSize, int multiplier)
{
words = new SortedList();
DateTime startTime = DateTime.Now;
Random rand = new Random();
int i;
for (int j = 0; j != loopSize; j++)
{
i = (int)(rand.NextDouble() * multiplier);
try
{
string s = i.ToString();
words.Add(s, s[0]);
}
catch (Exception e)
{
continue;
}
}
DateTime stopTime = DateTime.Now;
TimeSpan duration = stopTime - startTime;
Console.WriteLine("Sorted List Test: " + duration.ToString() + " Input: " + loopSize + " " + multiplier + " List Size: " + words.Count);
}
}
}

Code for SortedDictionaryTest.cs

using System;
using System.Collections.Generic;
using System.Text;

namespace SortTest
{
class SortedDictionaryTest
{
private static SortedDictionary words = null;

public static void run(int loopSize, int multiplier)
{
words = new SortedDictionary();
DateTime startTime = DateTime.Now;
Random rand = new Random();
int i;
for (int j = 0; j != loopSize; j++)
{
i = (int)(rand.NextDouble() * multiplier);
try
{
string s = i.ToString();
words.Add(s, s[0]);
}
catch (Exception e)
{
continue;
}
}
DateTime stopTime = DateTime.Now;
TimeSpan duration = stopTime - startTime;
Console.WriteLine("Sorted Dictionary Test: " + duration.ToString() + " Input: " + loopSize + " " + multiplier + " List Size: " + words.Count);
}
}
}

Code for UniqueListTest.cs

using System;
using System.Collections.Generic;
using System.Text;

namespace SortTest
{
class UniqueListTest
{
private static List words = null;

public static void run(int loopSize, int multiplier)
{
words = new List();
DateTime startTime = DateTime.Now;
Random rand = new Random();
int i;
for (int j = 0; j != loopSize; j++)
{
i = (int)(rand.NextDouble() * multiplier);
try
{
string s = i.ToString();
bool matchfound = false;
foreach (string w in words) // ignore duplicates
{

if (w == s)
{
matchfound = true;
break;
}
}
if (!matchfound) words.Add(s);
}
catch (Exception e)
{
continue;
}
}
words.Sort();
DateTime stopTime = DateTime.Now;
TimeSpan duration = stopTime - startTime;
Console.WriteLine("Unique List Test: " + duration.ToString() + " Input: " + loopSize + " " + multiplier + " List Size: " + words.Count);
}
}
}

Code for DuplicateListTest.cs

using System;
using System.Collections.Generic;
using System.Text;

namespace SortTest
{
class DuplicateListTest
{

private static List words = null;

public static void run(int loopSize, int multiplier)
{
words = new List();
DateTime startTime = DateTime.Now;
Random rand = new Random();
int i;
for (int j = 0; j != loopSize; j++)
{
i = (int)(rand.NextDouble() * multiplier);
try
{
string s = i.ToString();
words.Add(s);
}
catch (Exception e)
{
continue;
}
}
words.Sort();
string prev = null;
for(int counter = 0; counter < words.Count; counter++) {

if (prev == words[counter])
{
words.RemoveAt(counter);
counter--;
}
prev = words[counter];
}
DateTime stopTime = DateTime.Now;
TimeSpan duration = stopTime - startTime;
Console.WriteLine("Duplicate List Test: " + duration.ToString() + " Input: " + loopSize + " " + multiplier + " List Size: " + words.Count);
}
}
}

One Reply to “The C# battle of the SortedList, SortedDictionary, and List”

Comments are closed.