Trie Data Structure in Scala

Trie is a data structure can be used for auto complete feature. Following a a rudimentary implementation of a Trie in Scala.

Trie can be constructed similar to a tree structure. Following code snippet illustrated a node in the Trie tree *

package ashika.datastructure.trie

case class Node(content: Char,
                children: collection.mutable.Map[Char,Node],
                words:collection.mutable.Set[String],
                var metaData: Map[String,String] = null)

 

Tree implementation including addWord() and find() methods are implemented as follows :

 

package ashika.datastructure.trie

import scala.collection.mutable
import scala.collection.mutable.ListBuffer
import util.control.Breaks._

case class Entry(words: Set[String], meta: Map[String,String])
class Trie {
  val root : Node = Node(Char.MinValue,mutable.Map[Char,Node](), collection.mutable.Set());

  /**
    * find matches for a given prefix
    * @param str
    * @return
    */
  def find(str: String) : Seq[Entry] = {
    val lb = ListBuffer[Entry]()
    var i = 0
    var node = root

    breakable {
      while (i < str.length) {
        val ch = str.charAt(i)

        val nnode = node.children.get(ch.toLower)
        if (nnode.isDefined) {
          if (nnode.get.words.nonEmpty && i>=str.length) {
            lb.addOne(Entry(nnode.get.words.toSet, nnode.get.metaData))
          }
          node=nnode.get
        } else {
           return lb.toList
        }
        i+=1
      }
    }
    //break
    def dfs(node: Node, charLst : Seq[Char]) : Unit = {
      if (node.words.nonEmpty) {
        lb.addOne(Entry(node.words.toSet, node.metaData))
      }
      node.children.foreach(elm => {
        dfs(elm._2, charLst:+ elm._2.content)
      })
    }
    dfs(node,Seq[Char]())
    lb.toSeq
  }

  /**
    * add new word to the Trie with optional meta data
    * @param str
    */
  def addWord(str: String, meta: Map[String,String] = null): Unit = {
    var node = root

    var i = 0
    breakable {
      while (i < str.length) {
        val ch = str.charAt(i)
        val containerNode = node.children.get(ch.toLower)
        //node exists
        if (containerNode.isDefined) {
          node = containerNode.get
          if(i == str.length-1) {
            node.words.addOne(str)
            node.metaData = meta
          }
        } else {
          //no node for this char
          break
        }
        i += 1
      }
    }
    while(i < str.length) {
      val ch = str.charAt(i)
      val nnode : Node = if (i==str.length-1) {
        Node(ch,mutable.Map[Char,Node](), collection.mutable.Set(str), meta)
      } else {
        Node(ch,mutable.Map[Char,Node](), collection.mutable.Set(), meta)
      }
      node.children.addOne(ch.toLower,nnode)
      node = nnode
      i+=1
    }
  }
}

Sample driver program :

 

object Trie {

  def main(args:Array[String]) = {
    val trie = new Trie
    trie.addWord("Ashika")
    trie.addWord("Ashola")
    trie.addWord("Airplane")
    trie.addWord("Asしか")
    trie.addWord("Ash")
    trie.addWord("ash")
    trie.addWord("AsH",Map("id"->"productId"))
    trie.addWord("日本語")
    trie.addWord("日本人")
    trie.addWord("cat")
    trie.addWord("catterpillar", Map("id"->"a"))

    trie.addWord("日本")
    trie.addWord("日人")
    (1 to 11).foreach(i => {
      trie.addWord("dummy name here with some name"+i)
    })


    (trie.find("as").foreach(e => {
      println(e.words, e.meta)
    }))

  }

}

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s