Similar Names Matching

This is the code that performs similar name matching on the Tasmanian Court Lists pages. It uses the Phonix package available on Nuget to perform matching using Double Metaphone algorithm. Some notes on its operation:

C# Source Code


void Main()
{
	const int MinElementsToMatch = 3;
	var names = Names
		.Select(n => new { Id = n.NameId, n.Name })
		.ToDictionary(n => n.Id, p => n.Name);
	var compare = new DoubleMetaphone();
	foreach (var sourceName in names)
	{
		var sourceComponents = ParseName(sourceName.Value);
		if (sourceComponents.Count() >= MinElementsToMatch)
		{
			foreach (var testName in names.Where(tn => tn.Key != sourceName.Key))
			{
				var testComponents = ParseName(testName.Value);
				var componentsToCheck = Math.Min(sourceComponents.Count(), testComponents.Count());
				if (componentsToCheck >= MinElementsToMatch)
				{
					var matchCount = 0;
					for (var i = 0; i < componentsToCheck; i++)
					{
						if (compare.IsSimilar(new string[] { sourceComponents[i], testComponents[i]}))
						{
							matchCount++;
						}
					}
					if (matchCount ==  componentsToCheck)
					{
						Console.WriteLine($"{matchCount} {sourceName.Value} *** {testName.Value}");
						DuplicateNames.InsertOnSubmit(new DuplicateName { NameId = sourceName.Key, DuplicateNameId = testName.Key});
						SubmitChanges();
					}
				}
			}
		}
	}
}

string[] ParseName(string name)
{
	var commaPos = name.IndexOf(",");
	if (commaPos >= 0)
	{
		var lastName = name.Substring(0, commaPos);
		var otherNames = name.Substring(commaPos + 1).Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
		return new string[] {lastName}.Concat(otherNames).ToArray();
	}
	else
	{
		return name.Split(' ');
	}
}