| by suyi | No comments

Erlang:文本验证

%%%-------------------------------------------------------------------
%%% @author suyang
%%% @copyright (C) 2020, <COMPANY>
%%% @doc 文本验证
%%% 
%%% @end
%%% Created : 27. 5月 2020 12:28
%%%-------------------------------------------------------------------
-module(valid_test).
-author("suyang").

%% API
-export([valid_init/0, word_valid/1, test_word/0, word_valid_replace/1]).

-define(ETS_VALID_CONTENT, ets_valid_content).

%% =============================================================================
%% API
%% =============================================================================
%% @doc 加载过滤词初始化
valid_init() ->
  ets:new(?ETS_VALID_CONTENT, [named_table, public, set, {read_concurrency, true}]),
  import_words(?ETS_VALID_CONTENT),
  ok.

%% @doc 过滤词检测
word_valid([]) ->
  false;
word_valid(Utf8String) when is_list(Utf8String) ->
  Utf8Binary = unicode:characters_to_binary(Utf8String),
  word_valid(Utf8Binary);
word_valid(Utf8Binary) when is_binary(Utf8Binary) ->
  UniString = unicode:characters_to_list(Utf8Binary, unicode),
  word_valid(UniString, ?ETS_VALID_CONTENT).

word_valid([], _EtsName) ->
  false;
word_valid(UniString, EtsName) ->
  [HeadChar | TailString] = UniString,
  UniStrLen = length(UniString),
  WordList = get_key_word(HeadChar, EtsName),
  Match = fun(Word) ->
    WordLen = length(Word),
    if
      WordLen > UniStrLen -> % 小于敏感词长度直接false
        false;
      WordLen =:= UniStrLen -> % 等于直接比较
        UniString =:= Word;
      true -> % 大于取词比较
        HeadStr = lists:sublist(UniString, WordLen),
        HeadStr =:= Word
    end
          end,
  case lists:any(Match, WordList) of
    true -> true;
    false -> word_valid(TailString, EtsName)
  end.

%% @doc 过滤词替换
word_valid_replace(Utf8String)->
  UniString = unicode:characters_to_list(Utf8String, unicode),
  replace_sensitive(UniString, [], ?ETS_VALID_CONTENT).

replace_sensitive([], LastReplaced, _EtsName) ->
  LastReplaced;
replace_sensitive(Error, LastReplaced, _EtsName) when is_list(Error) =:= false ->
  LastReplaced;
replace_sensitive(InputString, LastReplaced, EtsName) ->
  private_replace_sensitive(InputString, LastReplaced, EtsName).

%% 测试
test_word() ->
  [DescList] = io_lib:format("~ts", ["测试"]),
  io:format("~p ~p~n", ["测试", word_valid_replace("测试")]),
  io:format("~p~n", [word_valid(DescList)]),
  [DescList1] = io_lib:format("~ts", ["毛泽东"]),
  io:format("~p~n", [word_valid(DescList1)]),
  [DescList2] = io_lib:format("~ts", ["测试毛泽东"]),
  io:format("~p~n", [word_valid(DescList2)]),
  io:format("~p ~p~n", ["测试毛泽东陈毅", word_valid_replace("测试毛泽东陈毅")]).

%% =============================================================================
%% Internal Functions
%% =============================================================================
%% @doc 加载过滤词
import_words(EtsName) ->
  Terms = get_filter_content(),
  Convert = fun(X) ->
    unicode:characters_to_binary(X)
            end,
  Terms1 = lists:map(Convert, Terms),
  lists:foreach(fun(X) -> add_word_to_ets(X, EtsName) end, Terms1),
  ok.

add_word_to_ets(Word, EtsName) ->
  UniString = unicode:characters_to_list(Word, unicode),
  case UniString of
    [] -> ignore;
    _ ->
      [HeadChar | _Left] = UniString,
      case ets:lookup(EtsName, HeadChar) of
        [] -> ets:insert(EtsName, {HeadChar, [UniString]});
        [{_H, OldList}] ->
          case lists:member(UniString, OldList) of
            false -> ets:insert(EtsName, {HeadChar, [UniString | OldList]});
            true -> ignore
          end
      end
  end.

get_key_word(KeyChar, EtsName) ->
  case ets:lookup(EtsName, KeyChar) of
    [] -> [];
    [{_H, WordList}] -> WordList
  end.

get_filter_content() ->
  [
    "毛泽东","毛主席","主席","习主席","副主席","周恩来","刘少奇","朱德","彭德怀","林彪","刘伯承","陈毅","贺龙","聂荣臻","徐向前","罗荣桓","叶剑英","李大钊","陈独秀","孙中山","孙文","孙逸仙",
    "邓小平","陈云","江泽民","李鹏","朱镕基","李瑞环","尉健行","李岚清","胡锦涛","罗干","温家宝","吴邦国","曾庆红"
  ].

%% @doc 检测过滤词并替换
private_replace_sensitive(InputString, LastReplaced, EtsName) ->
  [HeadChar | TailString] = InputString,
  WordList = get_key_word(HeadChar, EtsName),
  InputStrLen = length(InputString),
  Match = fun(Word, Last) ->
    match_replace(Word, Last, InputString, InputStrLen)
          end,
  case lists:foldl(Match, 0, WordList) of
    0 ->
      NewReplaced = LastReplaced ++ [HeadChar],
      replace_sensitive(TailString, NewReplaced, EtsName);
    SensWordLen ->
      LeftString = lists:sublist(InputString, SensWordLen + 1, InputStrLen - SensWordLen),
      NewReplaced = LastReplaced ++ "**",
      replace_sensitive(LeftString, NewReplaced, EtsName)
  end.

match_replace(Word, Last, InputString, InputStrLen) ->
  case Last of
    0 ->
      WordLen = length(Word),
      if WordLen > InputStrLen -> 0;
        WordLen =:= InputStrLen ->
          if (InputString =:= Word) -> WordLen;
            true ->
              0
          end;
        true ->
          HeadStr = lists:sublist(InputString, length(Word)),
          if (HeadStr =:= Word) -> WordLen;
            true ->
              0
          end
      end;
    _ -> Last
  end.

发表评论