C# 使预测文本算法更快
我正在开发一个windows phone拨号器应用程序,并在我的应用程序中实现了预测文本。当用户轻触键盘时,会生成与输入匹配的触点。预测太慢了,它还会阻塞我的主线程,这就是为什么我实现了BackGroundWorker,但仍然存在性能问题 我的代码是:C# 使预测文本算法更快,c#,regex,windows,backgroundworker,C#,Regex,Windows,Backgroundworker,我正在开发一个windows phone拨号器应用程序,并在我的应用程序中实现了预测文本。当用户轻触键盘时,会生成与输入匹配的触点。预测太慢了,它还会阻塞我的主线程,这就是为什么我实现了BackGroundWorker,但仍然存在性能问题 我的代码是: private void dialer_TextChanged(object sender, TextChangedEventArgs e) { MainPage.DialerText = dialer.Text;
private void dialer_TextChanged(object sender, TextChangedEventArgs e)
{
MainPage.DialerText = dialer.Text;
if(!bw1.IsBusy)
bw1.RunWorkerAsync();
}
void bw1_DoWork(object sender, DoWorkEventArgs e)
{
try
{
var digitMap = new Dictionary<int, string>() {
{ 1, "" },
{ 2, "[abcABC]" },
{ 3, "[defDEF]" },
{ 4, "[ghiGHI]" },
{ 5, "[jklJKL]" },
{ 6, "[mnoMNO]" },
{ 7, "[pqrsPQRS]" },
{ 8, "[tuvTUV]" },
{ 9, "[wxyzWXYZ]" },
{ 0, "" },
};
var enteredDigits = DialerText;
var charsAsInts = enteredDigits.ToCharArray().Select(x => int.Parse(x.ToString()));
var regexBuilder = new StringBuilder();
foreach (var val in charsAsInts)
regexBuilder.Append(digitMap[val]);
MainPage.pattern = regexBuilder.ToString();
MainPage.pattern = ".*" + MainPage.pattern + ".*";
}
catch (Exception f)
{
// MessageBox.Show(f.Message);
}
}
void bw1_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
{
SearchListbox.ItemsSource = listobj.FindAll(x => x.PhoneNumbers.Any(a=>a.Contains(MainPage.DialerText)) | Regex.IsMatch(x.FirstName, MainPage.pattern));
}
private void dialer\u TextChanged(对象发送者,textchangedventargs e)
{
MainPage.DialerText=拨号器.Text;
如果(!bw1.IsBusy)
bw1.RunWorkerAsync();
}
无效bw1_DoWork(对象发送方,DoWorkEventArgs e)
{
尝试
{
var digitMap=新字典(){
{ 1, "" },
{2,“[abcABC]”,
{3,“[defDEF]”},
{4,“[ghiGHI]”,
{5,“[jklJKL]”},
{6,“[mnoMNO]”,
{7,“[pqrsPQRS]”,
{8,“[tuvTUV]”,
{9,“[wxyzWXYZ]”,
{ 0, "" },
};
var enteredDigits=拨号文本;
var charsAsInts=enteredDigits.ToCharArray().Select(x=>int.Parse(x.ToString());
var regexBuilder=new StringBuilder();
foreach(charsAsInts中的var val)
Append(digitMap[val]);
MainPage.pattern=regexBuilder.ToString();
MainPage.pattern=“*”+MainPage.pattern+”*;
}
捕获(例外f)
{
//MessageBox.Show(f.Message);
}
}
void bw1_RunWorkerCompleted(对象发送方,RunWorkerCompletedEventArgs e)
{
SearchListbox.ItemsSource=listobj.FindAll(x=>x.PhoneNumbers.Any(a=>a.Contains(MainPage.DialerText))| Regex.IsMatch(x.FirstName,MainPage.pattern));
}
BackGroundWorker也阻塞了我的主线程,因此当我点击键盘时,在文本框中添加输入值时会出现延迟。我想在没有任何延迟的情况下向textox添加输入,怎么做?谢谢。为了提高速度,您可以进行一些优化:
- 没有必要在正则表达式模式中添加前缀和后缀,因为
将在字符串中的任何位置检测到匹配IsMatch
- 对模式的某些部分使用本地
,可以用字典
数组代替静态
- 将数字转换为
s可以用减法代替int
循环和追加可以替换为foreach
string.Join
private static string[] digitMap = new[] {
""
, "", "[abcABC]", "[defDEF]"
, "[ghiGHI]", "[jklJKL]", "[mnoMNO]"
, "[pqrsPQRS]", "[tuvTUV]", "[wxyzWXYZ]"
};
void bw1_DoWork(object sender, DoWorkEventArgs e) {
try {
MainPage.pattern = string.Join("", DialerText.Select(c => digitMap[c-'0']));
} catch (Exception f) {
// MessageBox.Show(f.Message);
}
}
为了提高速度,您可以进行许多优化:
- 没有必要在正则表达式模式中添加前缀和后缀,因为
将在字符串中的任何位置检测到匹配IsMatch
- 对模式的某些部分使用本地
,可以用字典
数组代替静态
- 将数字转换为
s可以用减法代替int
循环和追加可以替换为foreach
string.Join
private static string[] digitMap = new[] {
""
, "", "[abcABC]", "[defDEF]"
, "[ghiGHI]", "[jklJKL]", "[mnoMNO]"
, "[pqrsPQRS]", "[tuvTUV]", "[wxyzWXYZ]"
};
void bw1_DoWork(object sender, DoWorkEventArgs e) {
try {
MainPage.pattern = string.Join("", DialerText.Select(c => digitMap[c-'0']));
} catch (Exception f) {
// MessageBox.Show(f.Message);
}
}
您可以通过从对整个单词列表的彻底搜索中解脱出来,转而将您的单词放入一个更高效的数据结构中,从而真正加快搜索速度 为了快速查找任意大小的单词列表(但内存更昂贵),您应该构建一个包含整个单词列表的树结构 根节点表示零个拨号数字,并连接到(最多)十个以上的节点,其中连接节点的边表示按0到9的可能数字之一 然后,每个节点都包含可能的单词,这些单词可以从根节点通过树的路径形成,该路径代表按下的数字 这意味着搜索不再需要迭代整个单词列表,只需很少的操作即可完成 这是我在网上找到的一个实用的概念。在我的桌面上搜索大约需要0.02毫秒。又快又好。似乎需要大约50MB的内存
void Main()
{
var rootNode = new Node();
//probably a bad idea, better to await in an async method
LoadNode(rootNode).Wait();
//let's search a few times to get meaningful timings
for(var i = 0; i < 5; ++i)
{
//"acres" in text-ese (specifically chosen for ambiguity)
var searchTerm = "22737";
var sw = Stopwatch.StartNew();
var wordList = rootNode.Search(searchTerm);
Console.WriteLine("Search complete in {0} ms",
sw.Elapsed.TotalMilliseconds);
Console.WriteLine("Search for {0}:", searchTerm);
foreach(var word in wordList)
{
Console.WriteLine("Found {0}", word);
}
}
GC.Collect();
var bytesAllocated = GC.GetTotalMemory(true);
Console.WriteLine("Allocated {0} bytes", bytesAllocated);
}
async Task LoadNode(Node rootNode)
{
var wordListUrl =
"https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt";
Console.WriteLine("Loading words from {0}", wordListUrl);
using(var httpClient = new HttpClient())
using(var stream = await httpClient.GetStreamAsync(wordListUrl))
using(var reader = new StreamReader(stream))
{
var wordCount = 0;
string word;
while( (word = await reader.ReadLineAsync()) != null )
{
word = word.ToLowerInvariant();
if(!Regex.IsMatch(word,@"^[a-z]+$"))
{
continue;
}
rootNode.Add(word);
wordCount++;
}
Console.WriteLine("Loaded {0} words", wordCount);
}
}
class Node
{
static Dictionary<int, string> digitMap = new Dictionary<int, string>() {
{ 1, "" },
{ 2, "abcABC" },
{ 3, "defDEF" },
{ 4, "ghiGHI" },
{ 5, "jklJKL" },
{ 6, "mnoMNO" },
{ 7, "pqrsPQRS" },
{ 8, "tuvTUV" },
{ 9, "wxyzWXYZ" },
{ 0, "" }};
static Dictionary<char,int> letterMap;
static Node()
{
letterMap = digitMap
.SelectMany(m => m.Value.Select(c=>new {ch = c, num = m.Key}))
.ToDictionary(x => x.ch, x => x.num);
}
List<string> words = new List<string>();
//the edges collection has exactly 10
//slots which represent the numbers [0-9]
Node[] edges = new Node[10];
public IEnumerable<string> Words{get{
return words;
}}
public void Add(string word, int pos = 0)
{
if(pos == word.Length)
{
if(word.Length > 0)
{
words.Add(word);
}
return;
}
var currentChar = word[pos];
int edgeIndex = letterMap[currentChar];
if(edges[edgeIndex] == null)
{
edges[edgeIndex] = new Node();
}
var nextNode = edges[edgeIndex];
nextNode.Add(word, pos+1);
}
public Node FindMostPopulatedNode()
{
Stack<Node> stk = new Stack<Node>();
stk.Push(this);
Node biggest = null;
while(stk.Any())
{
var node = stk.Pop();
biggest = biggest == null
? node
: (node.words.Count > biggest.words.Count
? node
: biggest);
foreach(var next in node.edges.Where(e=>e != null))
{
stk.Push(next);
}
}
return biggest;
}
public IEnumerable<string> Search(string numberSequenceString)
{
var numberSequence = numberSequenceString
.Select(n => int.Parse(n.ToString()));
return Search(numberSequence);
}
private IEnumerable<string> Search(IEnumerable<int> numberSequence)
{
if(!numberSequence.Any())
{
return words;
}
var first = numberSequence.First();
var remaining = numberSequence.Skip(1);
var nextNode = edges[first];
if(nextNode == null)
{
return Enumerable.Empty<string>();
}
return nextNode.Search(remaining);
}
}
void Main()
{
var rootNode=新节点();
//这可能是个坏主意,最好用异步方法等待
LoadNode(rootNode.Wait();
//让我们搜索几次以获得有意义的计时
对于(变量i=0;i<5;++i)
{
//ese文本中的“acres”(专为歧义选择)
var searchTerm=“22737”;
var sw=Stopwatch.StartNew();
var wordList=rootNode.Search(searchTerm);
WriteLine(“搜索在{0}毫秒内完成”,
总运行时间(毫秒);
WriteLine(“搜索{0}:”,searchTerm);
foreach(单词列表中的var单词)
{
WriteLine(“找到{0}”,word);
}
}
GC.Collect();
var bytesAllocated=GC.GetTotalMemory(true);
WriteLine(“分配的{0}字节”,字节分配);
}
异步任务加载节点(节点根节点)
{
var wordListUrl=
"https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt";
WriteLine(“从{0}加载单词”,wordListUrl);
使用(var httpClient=new httpClient())
使用(var stream=await-httpClient.GetStreamAsync(wordListUrl))
使用(变量读取器=新的流读取器(流))
{
var-wordCount=0;
字符串字;
while((word=wait reader.readlinesync())!=null)
{
word=word.ToLowerInvariant();
if(!Regex.IsMatch(word,@“^[a-z]+$”)
{
继续;
}
添加(word);
字数++;
}
WriteLine(“加载的{0}个字”,字数);
}
}
类节点
{
静态字典digitMap=新字典(){
{ 1, "" },
{2,“abcABC”},
{3,“defDEF”},
{4,“ghiGHI”},
{5,“jklJKL”},
{6,“mnoMNO”},
{7,“pqrsPQRS”},
{8,“tuvTUV”},
{9,“wxyzWXYZ”},
{ 0, "" }};
静止的