Erlang:文本验证
%%%-------------------------------------------------------------------
%%% @author suyang
%%% @copyright (C) 2020, <COMPANY>
%%% @doc 文本验证
%%%
%%% @end
%%% Created : 27. 5月 2020 12:28
%%%-------------------------------------------------------------------
-module(valid_test).
-author("suyang").
%% API
-export([valid_init/0, word_valid/1, test_word/0, word_valid_replace/1]).
-define(ETS_VALID_CONTENT, ets_valid_content).
%% =============================================================================
%% API
%% =============================================================================
%% @doc 加载过滤词初始化
valid_init() ->
ets:new(?ETS_VALID_CONTENT, [named_table, public, set, {read_concurrency, true}]),
import_words(?ETS_VALID_CONTENT),
ok.
%% @doc 过滤词检测
word_valid([]) ->
false;
word_valid(Utf8String) when is_list(Utf8String) ->
Utf8Binary = unicode:characters_to_binary(Utf8String),
word_valid(Utf8Binary);
word_valid(Utf8Binary) when is_binary(Utf8Binary) ->
UniString = unicode:characters_to_list(Utf8Binary, unicode),
word_valid(UniString, ?ETS_VALID_CONTENT).
word_valid([], _EtsName) ->
false;
word_valid(UniString, EtsName) ->
[HeadChar | TailString] = UniString,
UniStrLen = length(UniString),
WordList = get_key_word(HeadChar, EtsName),
Match = fun(Word) ->
WordLen = length(Word),
if
WordLen > UniStrLen -> % 小于敏感词长度直接false
false;
WordLen =:= UniStrLen -> % 等于直接比较
UniString =:= Word;
true -> % 大于取词比较
HeadStr = lists:sublist(UniString, WordLen),
HeadStr =:= Word
end
end,
case lists:any(Match, WordList) of
true -> true;
false -> word_valid(TailString, EtsName)
end.
%% @doc 过滤词替换
word_valid_replace(Utf8String)->
UniString = unicode:characters_to_list(Utf8String, unicode),
replace_sensitive(UniString, [], ?ETS_VALID_CONTENT).
replace_sensitive([], LastReplaced, _EtsName) ->
LastReplaced;
replace_sensitive(Error, LastReplaced, _EtsName) when is_list(Error) =:= false ->
LastReplaced;
replace_sensitive(InputString, LastReplaced, EtsName) ->
private_replace_sensitive(InputString, LastReplaced, EtsName).
%% 测试
test_word() ->
[DescList] = io_lib:format("~ts", ["测试"]),
io:format("~p ~p~n", ["测试", word_valid_replace("测试")]),
io:format("~p~n", [word_valid(DescList)]),
[DescList1] = io_lib:format("~ts", ["毛泽东"]),
io:format("~p~n", [word_valid(DescList1)]),
[DescList2] = io_lib:format("~ts", ["测试毛泽东"]),
io:format("~p~n", [word_valid(DescList2)]),
io:format("~p ~p~n", ["测试毛泽东陈毅", word_valid_replace("测试毛泽东陈毅")]).
%% =============================================================================
%% Internal Functions
%% =============================================================================
%% @doc 加载过滤词
import_words(EtsName) ->
Terms = get_filter_content(),
Convert = fun(X) ->
unicode:characters_to_binary(X)
end,
Terms1 = lists:map(Convert, Terms),
lists:foreach(fun(X) -> add_word_to_ets(X, EtsName) end, Terms1),
ok.
add_word_to_ets(Word, EtsName) ->
UniString = unicode:characters_to_list(Word, unicode),
case UniString of
[] -> ignore;
_ ->
[HeadChar | _Left] = UniString,
case ets:lookup(EtsName, HeadChar) of
[] -> ets:insert(EtsName, {HeadChar, [UniString]});
[{_H, OldList}] ->
case lists:member(UniString, OldList) of
false -> ets:insert(EtsName, {HeadChar, [UniString | OldList]});
true -> ignore
end
end
end.
get_key_word(KeyChar, EtsName) ->
case ets:lookup(EtsName, KeyChar) of
[] -> [];
[{_H, WordList}] -> WordList
end.
get_filter_content() ->
[
"毛泽东","毛主席","主席","习主席","副主席","周恩来","刘少奇","朱德","彭德怀","林彪","刘伯承","陈毅","贺龙","聂荣臻","徐向前","罗荣桓","叶剑英","李大钊","陈独秀","孙中山","孙文","孙逸仙",
"邓小平","陈云","江泽民","李鹏","朱镕基","李瑞环","尉健行","李岚清","胡锦涛","罗干","温家宝","吴邦国","曾庆红"
].
%% @doc 检测过滤词并替换
private_replace_sensitive(InputString, LastReplaced, EtsName) ->
[HeadChar | TailString] = InputString,
WordList = get_key_word(HeadChar, EtsName),
InputStrLen = length(InputString),
Match = fun(Word, Last) ->
match_replace(Word, Last, InputString, InputStrLen)
end,
case lists:foldl(Match, 0, WordList) of
0 ->
NewReplaced = LastReplaced ++ [HeadChar],
replace_sensitive(TailString, NewReplaced, EtsName);
SensWordLen ->
LeftString = lists:sublist(InputString, SensWordLen + 1, InputStrLen - SensWordLen),
NewReplaced = LastReplaced ++ "**",
replace_sensitive(LeftString, NewReplaced, EtsName)
end.
match_replace(Word, Last, InputString, InputStrLen) ->
case Last of
0 ->
WordLen = length(Word),
if WordLen > InputStrLen -> 0;
WordLen =:= InputStrLen ->
if (InputString =:= Word) -> WordLen;
true ->
0
end;
true ->
HeadStr = lists:sublist(InputString, length(Word)),
if (HeadStr =:= Word) -> WordLen;
true ->
0
end
end;
_ -> Last
end.
发表评论
要发表评论,您必须先登录。