mirror of
https://github.com/guohuadeng/app-odoo.git
synced 2025-02-23 04:11:36 +02:00
296 lines
9.7 KiB
Python
296 lines
9.7 KiB
Python
#!/usr/bin/env python
|
||
# -*- coding:utf-8 -*-
|
||
# ToolGood.Words.WordsSearch.py
|
||
# 2020, Lin Zhijun, https://github.com/toolgood/ToolGood.Words
|
||
# Licensed under the Apache License 2.0
|
||
# 更新日志
|
||
# 2020.04.06 第一次提交
|
||
# 2020.05.16 修改,支持大于0xffff的字符
|
||
|
||
import os
|
||
|
||
__all__ = ['WordsSearch']
|
||
__author__ = 'Lin Zhijun'
|
||
__date__ = '2020.05.16'
|
||
|
||
|
||
class TrieNode():
|
||
def __init__(self):
|
||
self.Index = 0
|
||
self.Index = 0
|
||
self.Layer = 0
|
||
self.End = False
|
||
self.Char = ''
|
||
self.Results = []
|
||
self.m_values = {}
|
||
self.Failure = None
|
||
self.Parent = None
|
||
|
||
def Add(self, c):
|
||
if c in self.m_values:
|
||
return self.m_values[c]
|
||
node = TrieNode()
|
||
node.Parent = self
|
||
node.Char = c
|
||
self.m_values[c] = node
|
||
return node
|
||
|
||
def SetResults(self, index):
|
||
if (self.End == False):
|
||
self.End = True
|
||
self.Results.append(index)
|
||
|
||
|
||
class TrieNode2():
|
||
def __init__(self):
|
||
self.End = False
|
||
self.Results = []
|
||
self.m_values = {}
|
||
self.minflag = 0xffff
|
||
self.maxflag = 0
|
||
|
||
def Add(self, c, node3):
|
||
if (self.minflag > c):
|
||
self.minflag = c
|
||
if (self.maxflag < c):
|
||
self.maxflag = c
|
||
self.m_values[c] = node3
|
||
|
||
def SetResults(self, index):
|
||
if (self.End == False):
|
||
self.End = True
|
||
if (index in self.Results) == False:
|
||
self.Results.append(index)
|
||
|
||
def HasKey(self, c):
|
||
return c in self.m_values
|
||
|
||
def TryGetValue(self, c):
|
||
if (self.minflag <= c and self.maxflag >= c):
|
||
if c in self.m_values:
|
||
return self.m_values[c]
|
||
return None
|
||
|
||
|
||
class WordsSearch():
|
||
def __init__(self):
|
||
self._first = {}
|
||
self._keywords = []
|
||
self._indexs = []
|
||
|
||
def SetKeywords(self, keywords):
|
||
keyword_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'sensi_words.txt')
|
||
s = open(keyword_path, 'r+', encoding='utf-8').read().split('\n')
|
||
self._keywords = s + keywords
|
||
# self._keywords = keywords
|
||
self._indexs = []
|
||
for i in range(len(keywords)):
|
||
self._indexs.append(i)
|
||
|
||
root = TrieNode()
|
||
allNodeLayer = {}
|
||
|
||
for i in range(len(self._keywords)): # for (i = 0; i < _keywords.length; i++)
|
||
p = self._keywords[i]
|
||
nd = root
|
||
for j in range(len(p)): # for (j = 0; j < p.length; j++)
|
||
nd = nd.Add(ord(p[j]))
|
||
if (nd.Layer == 0):
|
||
nd.Layer = j + 1
|
||
if nd.Layer in allNodeLayer:
|
||
allNodeLayer[nd.Layer].append(nd)
|
||
else:
|
||
allNodeLayer[nd.Layer] = []
|
||
allNodeLayer[nd.Layer].append(nd)
|
||
nd.SetResults(i)
|
||
|
||
allNode = []
|
||
allNode.append(root)
|
||
for key in allNodeLayer.keys():
|
||
for nd in allNodeLayer[key]:
|
||
allNode.append(nd)
|
||
allNodeLayer = None
|
||
|
||
for i in range(len(allNode)): # for (i = 0; i < allNode.length; i++)
|
||
if i == 0:
|
||
continue
|
||
nd = allNode[i]
|
||
nd.Index = i
|
||
r = nd.Parent.Failure
|
||
c = nd.Char
|
||
while (r != None and (c in r.m_values) == False):
|
||
r = r.Failure
|
||
if (r == None):
|
||
nd.Failure = root
|
||
else:
|
||
nd.Failure = r.m_values[c]
|
||
for key2 in nd.Failure.Results:
|
||
nd.SetResults(key2)
|
||
root.Failure = root
|
||
|
||
allNode2 = []
|
||
for i in range(len(allNode)): # for (i = 0; i < allNode.length; i++)
|
||
allNode2.append(TrieNode2())
|
||
|
||
for i in range(len(allNode2)): # for (i = 0; i < allNode2.length; i++)
|
||
oldNode = allNode[i]
|
||
newNode = allNode2[i]
|
||
|
||
for key in oldNode.m_values:
|
||
index = oldNode.m_values[key].Index
|
||
newNode.Add(key, allNode2[index])
|
||
|
||
for index in range(len(oldNode.Results)): # for (index = 0; index < oldNode.Results.length; index++)
|
||
item = oldNode.Results[index]
|
||
newNode.SetResults(item)
|
||
|
||
oldNode = oldNode.Failure
|
||
while oldNode != root:
|
||
for key in oldNode.m_values:
|
||
if (newNode.HasKey(key) == False):
|
||
index = oldNode.m_values[key].Index
|
||
newNode.Add(key, allNode2[index])
|
||
for index in range(len(oldNode.Results)):
|
||
item = oldNode.Results[index]
|
||
newNode.SetResults(item)
|
||
oldNode = oldNode.Failure
|
||
allNode = None
|
||
root = None
|
||
|
||
# first = []
|
||
# for index in range(65535):# for (index = 0; index < 0xffff; index++)
|
||
# first.append(None)
|
||
|
||
# for key in allNode2[0].m_values :
|
||
# first[key] = allNode2[0].m_values[key]
|
||
|
||
self._first = allNode2[0]
|
||
|
||
def FindFirst(self, text):
|
||
ptr = None
|
||
for index in range(len(text)): # for (index = 0; index < text.length; index++)
|
||
t = ord(text[index]) # text.charCodeAt(index)
|
||
tn = None
|
||
if (ptr == None):
|
||
tn = self._first.TryGetValue(t)
|
||
else:
|
||
tn = ptr.TryGetValue(t)
|
||
if (tn == None):
|
||
tn = self._first.TryGetValue(t)
|
||
|
||
if (tn != None):
|
||
if (tn.End):
|
||
item = tn.Results[0]
|
||
keyword = self._keywords[item]
|
||
return {"Keyword": keyword, "Success": True, "End": index, "Start": index + 1 - len(keyword), "Index": self._indexs[item]}
|
||
ptr = tn
|
||
return None
|
||
|
||
def FindAll(self, text):
|
||
ptr = None
|
||
list = []
|
||
|
||
for index in range(len(text)): # for (index = 0; index < text.length; index++)
|
||
t = ord(text[index]) # text.charCodeAt(index)
|
||
tn = None
|
||
if (ptr == None):
|
||
tn = self._first.TryGetValue(t)
|
||
else:
|
||
tn = ptr.TryGetValue(t)
|
||
if (tn == None):
|
||
tn = self._first.TryGetValue(t)
|
||
|
||
if (tn != None):
|
||
if (tn.End):
|
||
for j in range(len(tn.Results)): # for (j = 0; j < tn.Results.length; j++)
|
||
item = tn.Results[j]
|
||
keyword = self._keywords[item]
|
||
list.append({"Keyword": keyword, "Success": True, "End": index, "Start": index + 1 - len(keyword), "Index": self._indexs[item]})
|
||
ptr = tn
|
||
return list
|
||
|
||
def ContainsAny(self, text):
|
||
ptr = None
|
||
for index in range(len(text)): # for (index = 0; index < text.length; index++)
|
||
t = ord(text[index]) # text.charCodeAt(index)
|
||
tn = None
|
||
if (ptr == None):
|
||
tn = self._first.TryGetValue(t)
|
||
else:
|
||
tn = ptr.TryGetValue(t)
|
||
if (tn == None):
|
||
tn = self._first.TryGetValue(t)
|
||
|
||
if (tn != None):
|
||
if (tn.End):
|
||
return True
|
||
ptr = tn
|
||
return False
|
||
|
||
def Replace(self, text, replaceChar='*'):
|
||
result = list(text)
|
||
|
||
ptr = None
|
||
for i in range(len(text)): # for (i = 0; i < text.length; i++)
|
||
t = ord(text[i]) # text.charCodeAt(index)
|
||
tn = None
|
||
if (ptr == None):
|
||
tn = self._first.TryGetValue(t)
|
||
else:
|
||
tn = ptr.TryGetValue(t)
|
||
if (tn == None):
|
||
tn = self._first.TryGetValue(t)
|
||
|
||
if (tn != None):
|
||
if (tn.End):
|
||
maxLength = len(self._keywords[tn.Results[0]])
|
||
start = i + 1 - maxLength
|
||
for j in range(start, i + 1): # for (j = start; j <= i; j++)
|
||
result[j] = replaceChar
|
||
ptr = tn
|
||
return ''.join(result)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
s = "中国|国人|zg人|乾清宫"
|
||
test = "我是中国人"
|
||
|
||
search = WordsSearch()
|
||
search.SetKeywords(s.split('|'))
|
||
|
||
print("----------------------------------- WordsSearch -----------------------------------")
|
||
|
||
print("WordsSearch FindFirst is run.")
|
||
f = search.FindFirst(test)
|
||
if f["Keyword"] != "中国":
|
||
print("WordsSearch FindFirst is error.............................")
|
||
|
||
print("WordsSearch FindFirst is run.")
|
||
all = search.FindAll("乾清宫")
|
||
if all[0]["Keyword"] != "乾清宫":
|
||
print("WordsSearch FindFirst is error.............................")
|
||
|
||
print("WordsSearch FindAll is run.")
|
||
all = search.FindAll(test)
|
||
if all[0]["Keyword"] != "中国":
|
||
print("WordsSearch FindAll is error.............................")
|
||
if all[1]["Keyword"] != "国人":
|
||
print("WordsSearch FindAll is error.............................")
|
||
if all[0]["Start"] != 2:
|
||
print("WordsSearch FindAll is error.............................")
|
||
if all[0]["End"] != 3:
|
||
print("WordsSearch FindAll is error.............................")
|
||
if len(all) != 2:
|
||
print("WordsSearch FindAll is error.............................")
|
||
|
||
print("WordsSearch ContainsAny is run.")
|
||
b = search.ContainsAny(test)
|
||
if b == False:
|
||
print("WordsSearch ContainsAny is error.............................")
|
||
|
||
print("WordsSearch Replace is run.")
|
||
txt = search.Replace(test)
|
||
if (txt != "我是***"):
|
||
print("WordsSearch Replace is error.............................")
|
||
|
||
print("----------------------------------- Test End -----------------------------------") |