# Simple implementation of SuffixTree (suffix trie) in Ruby

## Build tree in linear time

```class SuffixTree

# node =  [ suffix_link, parent, edges  ]
# edges = { first_char => edge }
# edge =  [ range, target_node ]
# range = [ first, last ] # corresponds to interval  [ first, last )

def initialize(text)
@text = text
@joker_edge = [[0,1]] # empty interval
@joker = [nil, nil, lambda{|c| @joker_edge}]
@root = [@joker, nil, {}]
@joker_edge << @root
@infty = @text.size
build_tree(@root, 0, @infty)
end

private
def build_tree(node, n, infty, skip=0)
while n < infty
c = @text[n]
edges = node[2]
if edge = edges[c]
first,last = edge[0]
i,n0 = first,n
(can_skip = [skip, last-first].min
i += can_skip
n += can_skip
skip -= can_skip) if skip > 0
while i < last && n < infty && (@text[i] == @text[n] || edge.equal?(@joker_edge))
i += 1; n += 1
end
if i == last
# came to the next node
node = edge[1]
else
# splitting edge
middle_node = [ nil, node, {@text[i] => edge} ]
edge_to_middle = [ [first, i], middle_node ]
edges[c] = edge_to_middle
edge[0][0] = i
edge[1][1] = middle_node
middle_node[0] = build_tree(node[0], n0, n, i-first)
node = middle_node
end
else
# no way to go; creating leaf
new_leaf = [ nil,  node, {} ]
edges[c] = [ [n, @infty], new_leaf ]
node = node[0]
end
end
node
end
end```

## Search method

```class SuffixTree
public
def search(word)
node = @root
i = n = 0
infty = word.size
while n < infty
edges = node[2]
c = word[n]
if edge = edges[ c ]
i = edge[0][0]
last = edge[0][1]
(i += 1 and n += 1) while @text[i] == word[n] && n < infty && i < last
if i == last
node = edge[1]
else
break
end
else
break
end
end
n == infty ?  i - infty : nil
end
end```

## Verbose version

Simple

```require 'yaml'
...
puts tree.to_yaml```

could give you all information about tree internals. But let's write pritty_print for educational purposes and make build_tree more verbose.

```require 'yaml'

class NilClass
def name
"nil"
end
end

class Array
def name
@name ||= (
if self[1]
#puts self.inspect
range =  self[1][2].to_a.find{|k,v| v[1].object_id==self.object_id}[1][0]
self[1].name + \$text[range[0]...range[1]]
else
if self[2].is_a?(Proc)
"&"
else
"\$"
end
end
)
end
end

class SuffixTree

# node =  [ suffix_link, parent, edges  ]
# edges = { first_char => edge }
# edge =  [ range, node ]
# range = [ first, last ] # corresponds to interval  [ first, last )

def initialize(text)
\$text = @text = text
@joker_edge = [[0,1]]
@joker = [nil, nil, lambda{|c| @joker_edge}]
@root = [@joker, nil, {}]
@joker_edge << @root
@infty = @text.size
build_tree(@root, 0, @infty)
end

def build_tree(node, n, infty, skip=0)
#puts "ENTER #{[node.name, @text[n...infty]].inspect}"
while n < infty
puts "-----------------------"
pp(@root)
#puts "SEARCH #{@text[n...infty]} FROM #{node.name}"
c = @text[n]
edges = node[2]
if edge = edges[c]
first,last = edge[0]
i,n0 = first,n
can_skip = [skip, last-first].min
i += can_skip
n += can_skip
skip -= can_skip
while i < last && n < infty && (@text[i]==@text[n] || edge==@joker_edge)
i += 1; n += 1
end
if i == last
# came to the next node
puts "NEXT NODE #{@text[n0...n]}+#{@text[n...infty]}:  #{node.name} -> #{edge[1].name}"
node = edge[1]
else
# splitting edge
puts "SPLIT EDGE (#{@text[(0...n0)]},#{@text[(n0...n)]},#{@text[(n...@infty)]})  #{@text[ (first...last)]} = #{@text[ (first...i)]}+#{@text[ (i...last)]} "
middle_node = [ nil, node, {@text[i] => edge} ]
edge_to_middle = [ [first, i], middle_node ]
edges[c] = edge_to_middle
edge[0][0] = i
edge[1][1] = middle_node
node = build_tree(node[0], n0, n, i - first)
middle_node[0] = node
node = middle_node
end
else
# no way to go; creating leaf
new_leaf = [ nil,  node, {} ]
edges[c] = [ [n, @infty], new_leaf ]
puts "CREATE LEAF (#{@text[(n...@infty)]}). #{node.name} -> #{node[0].name}"
node = node[0]
end
end
#puts "OUT #{node.name}"
node
end

# pretty print
def pp(node=nil, indent = 0)
node ||= @root
space = "    " * indent
puts  space + "ID    : #{node.name}"
puts  space + "link  : #{node[0].name}"
# puts  space + "parent: #{node[1].name}"
puts  space + "edges : "
if node == @joker
puts  space + "  — JOKER"
else
node[2].each do |k,v|
puts  space + "  -#{k.to_i.chr} [#{v[0][0]},#{v[0][1]})=#{@text[ v[0][0]...v[0][1] ] }:"
pp(v[1], indent + 1)
end
end
end

def search(word)
node = @root
n = 0
infty = word.size
i = 0
while n < infty
edges = node[2]
c = word[n]
if edge = edges[ c ]
i,last = edge[0]
while @text[i] == word[n] && n < infty && i < last
i += 1; n += 1
end
if i == last
node = edge[1]
else
break
end
else
break
end
end
n == infty ?  i - infty : nil
end

end

text = "abrababb"
tree = SuffixTree.new(text)
tree.pp```

## Tree for "abrababb"

```ID    : \$
edges :
-a [0,2)=ab:
ID    : \$ab
edges :
-a [5,8)=abb:
ID    : \$ababb
edges :
-r [2,8)=rababb:
ID    : \$abrababb
edges :
-b [7,8)=b:
ID    : \$abb
edges :
-r [2,8)=rababb:
ID    : \$rababb
edges :
-b [1,2)=b:
ID    : \$b
edges :
-a [5,8)=abb:
ID    : \$babb
edges :
-r [2,8)=rababb:
ID    : \$brababb
edges :
-b [7,8)=b:
ID    : \$bb
edges : ```

## Benchmarks and Testing

```chars = "qwertyuiopasdfghjklzxcvbnm,./'][;1234567890".split(//)

require 'benchmark'

Benchmark.bmbm do |b|
[2, 3, 40].each do |alpha_size|
[5, 10, 20].each do |i|
n = 100 * i
text = ""
n.times{ text << chars[rand(alpha_size)] }
text = text + text.reverse + text
b.report("#{text.size}-#{alpha_size}") do
SuffixTree.new(text)
end
end
end
end

require 'test/unit'

class TestSuffixTree < Test::Unit::TestCase
def test_simple
tree = SuffixTree.new(text)
tree.search( "abra" )
assert( tree.search( "" ) )
assert( tree.search( text[(7..12)] ) )
assert( tree.search( text[(3..12)] ) )
assert( tree.search( text[(3..22)] ) )
assert( tree.search( text[(21..22)] ) )
assert( tree.search( "abrakadaa" ) == nil )
assert( tree.search( text ) )
end

def test_small_strings
[
"a",
"aaaaaaa",
"azazaz",
"abcabc",
"abrababb",
"abraabaabb",
"azazazaza",
"aaaaaaaaaaaaaaaaaaaa",
"aaaaaaaaaaaaaaaaaaaaz",
"zaaaaaaaaaaaaaaaaaaaa",
"aaaaaaazaaaaaaaaaaaa",
"aaaaaaazaaaaazaaaaaa",
].each do |text|
tree = SuffixTree.new(text)
(1..text.size).each do |length|
(0...text.size-length).each do |i|
word = text[ (i...i+length) ]
assert( text[tree.search(word).to_i, word.length] == word,
"Searching for #{word} in #{text}" )
end
end
end
end
end```

Output:

```              user     system      total        real
1500-2    0.030000   0.000000   0.030000 (  0.028018)
3000-2    0.050000   0.010000   0.060000 (  0.055245)
6000-2    0.100000   0.010000   0.110000 (  0.110966)
1500-3    0.030000   0.000000   0.030000 (  0.026031)
3000-3    0.050000   0.000000   0.050000 (  0.051156)
6000-3    0.100000   0.000000   0.100000 (  0.104076)
1500-40   0.020000   0.000000   0.020000 (  0.020655)
3000-40   0.040000   0.010000   0.050000 (  0.044020)
6000-40   0.080000   0.000000   0.080000 (  0.088967)