Planted My First Suffix Tree

Text is a snippet from the article
Comparing PATRICIA Trie, With It’s Nuts and Bolts, With Hash Map

It took a whole day, but I got through it thanks to an excellent YouTube video by
Tarvalas and some very long conversations with Google Gemini to get me through slam-face-on-keyboard debugging sessions. Suffix trees (and tries in general) have fascinated me since the moment I learned they exist (thanks to edX). Since 2020 I’ve wanted to answer the question, “But how would I implement one of those?” Every time I tried I just couldn’t wrap my head around them. I wasn’t understanding that the secret sauce was in Ukkonen’s algorithm, not the tree itself. Proposed in 1995, it’s hard to believe I was alive when it was invented. After a long battle with understanding this algorithm, here’s the code in a little C# program I made for searching for a substring in a text box.

namespace FullTextSearch
{
	public partial class MainForm: Form
	{
		private class Node(int start,int? end)
		{
			public Dictionary<char,Node> Children=[];
			public Node? SuffixLink;
			public int Start=start;
			public int? End=end;
			public int SuffixIndex=-1;

			private static Node MakeRoot()
			{
				return new Node(-1,-1);
			}

			public int? EdgeLength()
			{
				return End is null ? End : End-Start+1;
			}

			public class Active
			{
				public string Context;
				public Node Root;
				public Node Node;
				public int Edge;
				public int Length;

				public Active(string text)
				{
					Context=text;
					Edge=-1;
					Length=0;
					Root=MakeRoot();
					Node=Root;
				}

				public char Character()
				{
					return Context[Edge];
				}

				public bool IsRoot()
				{
					return Node == Root;
				}
			}
		}

		private class SuffixTree
		{
			private readonly Node.Active activePoint;
			private readonly int globalEnd=-1;

			public SuffixTree(string input)
			{
				activePoint=new(input);
				int remainingSuffixCount=0;

				for (int index=0; index < activePoint.Context.Length; index++)
				{
					globalEnd++;
					remainingSuffixCount++;
					Node? suffixLinkCandidate=null;

					while (remainingSuffixCount > 0)
					{
						if (activePoint.Length == 0)
						{
							activePoint.Edge=index;
						}

						if (!activePoint.Node.Children.TryGetValue(activePoint.Character(),out Node? next))
						{
							Node child=new(index,null)
							{
								SuffixIndex=index-remainingSuffixCount+1
							};
							activePoint.Node.Children[activePoint.Character()]=child;
							suffixLinkCandidate?.SuffixLink=activePoint.Node;
							suffixLinkCandidate=null;
						}
						else
						{
							int edgeLength=(next.End ?? globalEnd)-next.Start+1;

							if (activePoint.Length >= edgeLength)
							{
								activePoint.Edge+=edgeLength;
								activePoint.Length-=edgeLength;
								activePoint.Node=next;
								continue;
							}

							if (activePoint.Context[next.Start+activePoint.Length] == activePoint.Context[index])
							{
								if (suffixLinkCandidate is not null && !activePoint.IsRoot())
								{
									suffixLinkCandidate.SuffixLink=activePoint.Node;
								}
								activePoint.Length++;
								break;
							}

							Node split=new(next.Start,next.Start+activePoint.Length-1)
							{
								SuffixIndex=index-remainingSuffixCount+1
							};
							activePoint.Node.Children[activePoint.Character()]=split;

							Node child=new(index,null)
							{
								SuffixIndex=index-remainingSuffixCount+1
							};
							split.Children[activePoint.Context[index]]=child;

							next.Start+=activePoint.Length;
							split.Children[activePoint.Context[next.Start]]=next;

							suffixLinkCandidate?.SuffixLink=split;
							suffixLinkCandidate=split;
						}

						remainingSuffixCount--;
						if (activePoint.IsRoot() && activePoint.Length > 0)
						{
							activePoint.Length--;
							activePoint.Edge=index-remainingSuffixCount+1;
						}
						else
						{
							if (!activePoint.IsRoot())
							{
								activePoint.Node=activePoint.Node.SuffixLink ?? activePoint.Root;
							}
						}
					}
				}
			}

			public (int startIndex,int length) Search(string pattern)
			{
				if (string.IsNullOrEmpty(pattern)) return (0,0);

				Node current=activePoint.Root;
				int patternIndex=0;

				while (patternIndex < pattern.Length)
				{
					if (!current.Children.TryGetValue(pattern[patternIndex],out Node? edgeNode))
					{
						return (0,0);
					}
					int edgeLength=(edgeNode.End ?? globalEnd)-edgeNode.Start+1;

					for (int index = 0; index < edgeLength && patternIndex < pattern.Length; index++)
					{
						if (pattern[patternIndex] != activePoint.Context[edgeNode.Start+index])
						{
							return (0,0);
						}
						patternIndex++;
					}

					current=edgeNode;
				}

				return (current.SuffixIndex,pattern.Length);
			}
		}

		public MainForm()
		{
			InitializeComponent();
		}

		SuffixTree? tree;

		private void SearchBox_TextChanged(object sender,EventArgs e)
		{
			if (tree is null) return;
			var (startIndex,length)=tree.Search(searchBox.Text);
			if (length > 0)
			{
				foundIndicator.BackColor=Color.LightSeaGreen;
				foundIndicator.Text="Found!";
				textBox.Select(startIndex,length);
			}
			else
			{
				foundIndicator.BackColor=Color.Gray;
				foundIndicator.Text="";
			}
		}

		private void TextBox_TextChanged(object sender,EventArgs e)
		{
			tree=new(textBox.Text);
		}
	}
}

Code language: C# (cs)

Leave a Reply

Your email address will not be published. Required fields are marked *