快速匹配字符串 假设内存有一个大字符串集,里面含有约1000万个字符串,如何快速知道该字符串集是否含有某个指定的测试字符串 ? (假设内存能放下这么多字符串)
方法一: hashCode法
方法二: Trie树 http://zh.wikipedia.org/wiki/Trie
HashCode法 01 def sampleHashMap(strSet: Set[String]): Map[Int, Set[String]]={ 02 strSet.groupBy{ 03 s => s.hashCode 04 } 05 } 06 07 def contain(str: String, sampleMap: Map[Int, Set[String]]): Boolean = { 08 sampleMap.getOrElse(str.hashCode, Set[String]()).exists(_ equals str) 09 } 10 11 // 测试 12 val sampleSet = Set("a","b","c","AA","Archer","Jack"); 13 val sampleMap = sampleHashMap(sampleSet); 14 println(contain("Archer", sampleMap)); // true 15 println(contain("A", sampleMap)); // false 16 println(contain("a", sampleMap)); // true Trie树法 01 sealed abstract class Trie { 02 def contains(msg: String): Boolean = contains(msg, this) 03 04 @scala.annotation.tailrec 05 private def contains(msg: String, trie: Trie): Boolean = (msg, trie) match { 06 case (null, _ ) | (_ , null ) => false 07 case (StringCase(head, tail), TrieNode(edges)) => edges.get(head) match { 08 case None => false 09 case Some(subTrie) => contains(tail,subTrie) 10 } 11 case (StringCase(_,_), TrieLeaf()) => false 12 case _ => true 13 } 14 } 15 16 case class TrieNode(edges: Map[Char, Trie]) extends Trie{ 17 assert(edges.size > ) 18 } 19 case class TrieLeaf extends Trie 20 21 object StringCase{ 22 def unapply(s: String): Option[(Char, String)] ={ 23 if (s == null || s == "" ) None 24 else Some(s.head, s.tail) 25 } 26 } 27 28 object Trie{ 29 30 def apply(strs: String*): Trie ={ 31 apply(strs.toSet); 32 } 33 34 def apply(strSet: Set[String]) : Trie = { 35 if (strSet.size == || strSet == Set("")) TrieLeaf() 36 else { 37 val char2TrieSet = strSet.filter(_.length > ).groupBy(_.head).mapValues{ 38 strs => strs.map(_.tail) 39 }.mapValues{ 40 strs => apply(strs) 41 } 42 TrieNode(char2TrieSet); 43 } 44 } 45 46 } 47 48 val trie= Trie("abc","ac","b") 49 println(trie) 50 assert(trie.contains("") == true); // true 51 assert(trie.contains(null) == false); //false 52 assert(trie.contains("a") == true); // true 53 assert(trie.contains("b") == true); // true 54 assert(trie.contains("ab") == true); //true 55 assert(trie.contains("ac") == true); //true 56 assert(trie.contains("abc") == true); //true 57 assert(trie.contains("acb") == false); // false 58 assert(trie.contains("abcd") == false); //false 59 assert(trie.contains("bc") == false); //false