Warning: WP_Syntax::substituteToken(): Argument #1 ($match) must be passed by reference, value given in /www/wwwroot/fawdlstty.com/wp-content/plugins/wp-syntax/wp-syntax.php on line 383
Warning: WP_Syntax::substituteToken(): Argument #1 ($match) must be passed by reference, value given in /www/wwwroot/fawdlstty.com/wp-content/plugins/wp-syntax/wp-syntax.php on line 383
最近想到一个关于高效字符串查找算法的设想,然后果断实现之,算法基于哈希表,用于源字符串特别长的情况,查找的子字符串越长、越没规律,那么速度越快。可能已经有人做过,不过我撸代码前还没听说过类似算法,算是一种轮子吧。
基本实现的思路是:首先建立一个hash_map,然后将子字符串所有字符及位置录入字符串中,如下图所示:
对于需要查找的字符串(比如在很长的字符串文本中查找“abcdefga”这一串字符),构建如上所示哈希表,键值名为子字符串出现的字符,值为出现的位置。
构建好之后呢,就好玩了,我只说说正向查找原理,逆向查找类似。首先,来一个假设,我就假设源字符串为“abababcdefgaaaaa”这样吧,第一次,取源字符串中,(子字符串长度-1)这个位置的字符,值为d,然后取哈希表的值,为3,那么,将源字符串中7-3的位置开始,与子字符串相比较,比较结果较满意,第一次就查找成功,那么直接返回7-3=4。
实现代码如下,longstr.hpp:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | #pragma once #ifndef _LONGSTR_HPP_ #define _LONGSTR_HPP_ #include <string> #include <cstring> #include <hash_map> template<typename charT> class longstr { protected: //获取目标字符串哈希表 static std::hash_map<charT, int> get_hm (std::basic_string<charT>& strfind) { int sflen = strfind.length (); std::hash_map<charT, int> hm; //hm.reserve (256); for (int i = 0; i < sflen; i++) hm.insert (pair<char, int> (strfind [i], i)); return hm; } //正向查找算法实现 static int _base_find (std::basic_string<charT>& src, std::basic_string<charT>& strfind, std::hash_map<charT, int>& hm, int pos) { int srclen = src.length (), sflen = strfind.length (); if (srclen < 1 || sflen < 1 || srclen < sflen) return -1; std::hash_map<charT, int>::iterator iter; for (int i = pos + sflen - 1; i < srclen; i += sflen) { iter = hm.lower_bound (src [i]); while (iter != hm.end ()) { if (!memcmp (&src [i - iter->second], &strfind [0], sflen)) return i - iter->second; iter++; } } return -1; } //反向查找算法实现 static int _base_find_reverse (std::basic_string<charT>& src, std::basic_string<charT>& strfind, std::hash_map<charT, int>& hm, int pos) { int srclen = src.length (), sflen = strfind.length (); if (srclen < 1 || sflen < 1 || srclen < sflen) return -1; std::hash_map<charT, int>::iterator iter; for (int i = pos - sflen + 1; i >= 0; i -= sflen) { iter = hm.lower_bound (src [i]); while (iter != hm.end ()) { if (i < iter->second) break; if (!memcmp (&src [i - iter->second], &strfind [0], sflen)) return i - iter->second; iter++; } } return -1; } public: //正向查找 static int find (std::basic_string<charT>& src, std::basic_string<charT>& strfind, int pos = 0) { return longstr::_base_find (src, strfind, longstr::get_hm (strfind), pos); } //正向查找,回调返值 static void find (std::basic_string<charT>& src, std::basic_string<charT>& strfind, bool (*callback)(int)) { std::hash_map<charT, int> hm = longstr::get_hm (strfind); int pos = longstr::_base_find (src, strfind, hm, 0); while (pos != -1 && callback (pos)) pos = longstr::_base_find (src, strfind, hm, pos + 1); } //反向查找 static int find_reverse (std::basic_string<charT>& src, std::basic_string<charT>& strfind, int pos = 0) { if (!pos) pos = src.length () - 1; return longstr::_base_find_reverse (src, strfind, longstr::get_hm (strfind), pos); } //反向查找,回调返值 static void find_reverse (std::basic_string<charT>& src, std::basic_string<charT>& strfind, bool (*callback)(int)) { std::hash_map<charT, int> hm = longstr::get_hm (strfind); int pos = longstr::_base_find_reverse (src, strfind, hm, 0); while (pos != -1 && callback (pos)) pos = longstr::_base_find_reverse (src, strfind, hm, pos - 1); } //获取找到的目标字符串的个数 static int find_count (std::basic_string<charT>& src, std::basic_string<charT>& strfind) { std::hash_map<charT, int> hm = longstr::get_hm (strfind); int pos = longstr::_base_find (src, strfind, hm, 0), count = 0; while (pos != -1) { count++; pos = longstr::_base_find (src, strfind, hm, pos + 1); } return count; } }; typedef longstr<char> longstr_a; typedef longstr<wchar_t> longstr_w; #endif //_LONGSTR_HPP_ |
示例调用代码:
1 2 3 4 5 6 7 8 9 10 11 | #include <iostream> #include <string> using namespace std; #include "longstr.hpp" int main(int argc, char* argv[]) { string a = "fhaso4dfhui123456sdahfuiasd123456hfiusadifbsaifd", b = "123456"; int i = longstr_a::find_reverse (a, b); cout << i << endl; return 0; } |