字符串相似度的算法(Sqlserver转和c#)

浏览 : 8546 次 Tue, 30 Sep 2014 09:47:10 GMT
CREATE   function get_semblance_By_2words 
( 
@word1 varchar(50), 
@word2 varchar(50)   
) 
returns nvarchar(4000) 
as 
begin 
declare @re int 
declare @maxLenth int 
declare @i int,@l int 
declare @tb1 table(child varchar(50)) 
declare @tb2 table(child varchar(50)) 
set @i=1 
set @l=2 
set @maxLenth=len(@word1) 
if len(@word1)<len(@word2)  
begin 
set @maxLenth=len(@word2) 
end 
while @l<=len(@word1)  
begin 
while @i<len(@word1)-1 
begin 
insert @tb1 (child) values( SUBSTRING(@word1,@i,@l) )  
set @i=@i+1 
end 
set @i=1 
set @l=@l+1 
end 
set @i=1 
set @l=2 
while @l<=len(@word2)  
begin 
while @i<len(@word2)-1 
begin 
insert @tb2 (child) values( SUBSTRING(@word2,@i,@l) )  
set @i=@i+1 
end 
set @i=1 
set @l=@l+1 
end   
select @re=isnull(max( len(a.child)*100/  @maxLenth ) ,0) from @tb1 a, @tb2 b where a.child=b.child 
return @re 
end 
GO 
  
--测试 
--select dbo.get_semblance_By_2words('我是谁','我是谁啊')  
--75 
--相似度 
c#------------------------------------------------------

using System;
using System.Collections.Generic;
using System.Text;

namespace ConsoleApplication6
{
    class semblance
    {

        static void Main(string[] args)
        {
            string re= get_semblance_By_2words("我是谁", "我是谁啊");
            Console.WriteLine(re);
            Console.ReadLine();
        }

        public static string get_semblance_By_2words(string word1, string word2)
        {
            int re = 0;
            int maxLength;
            int i, l;
            List<string> tb1 = new List<string>();
            List<string> tb2 = new List<string>();
            i = 0;
            l = 1;
            maxLength = word1.Length;
            if (word1.Length < word2.Length)
                maxLength = word2.Length;
            while (l <= word1.Length)
            {
                while (i < word1.Length - 1)
                {
                    if (i + l > word1.Length)
                        break;
                    tb1.Add(word1.Substring(i, l));
                    i++;
                }
                i = 0;
                l++;
            }

            i = 0;
            l = 1;

            while (l <= word2.Length)
            {
                while (i < word2.Length - 1)
                {
                    if (i + l > word2.Length)
                        break;
                    tb2.Add(word2.Substring(i, l));
                    i++;
                }
                i = 0;
                l++;
            }
            foreach (string subStr in tb1)
            {
                int tempRe = 0;
                if (tb2.Contains(subStr))
                {
                    tempRe = subStr.Length * 100 / maxLength;
                    if (tempRe > re)
                        re = tempRe;
                    if (tempRe == 100)
                        break;
                }
            }
            return re.ToString()+"%";
        }
    }
}