C#通過編輯距離算法實現字符串相似度比較

ybny 9年前發布 | 1K 次閱讀 C#

C#通過編輯距離算法實現字符串相似度比較
編輯距離:通過插入、刪除、替換一個字符(和交換相鄰字符)的操作,使得字符串A和字符串B相同,而最少的操作次數就是編輯距離。
如字符串abcd和aca的距離是2

public class LevenshteinDistance
    {

    private static LevenshteinDistance _instance=null;
    public static LevenshteinDistance Instance
    {
        get
        {
            if (_instance == null)
            {
                return new LevenshteinDistance();
            }
            return _instance;
        }
    }


    /// <summary>
    /// 取最小的一位數
    /// </summary>
    /// <param name="first"></param>
    /// <param name="second"></param>
    /// <param name="third"></param>
    /// <returns></returns>
    public int LowerOfThree(int first, int second, int third)
    {
        int min = first;
        if (second < min)
            min = second;
        if (third < min)
            min = third;
        return min;
    }

    public int Levenshtein_Distance(string str1, string str2)
    {
        int[,] Matrix;
        int n=str1.Length;
        int m=str2.Length;

        int temp = 0;
        char ch1;
        char ch2;
        int i = 0;
        int j = 0;
        if (n ==0)
        {
            return m;
        }
        if (m == 0)
        {

            return n;
        }
        Matrix=new int[n+1,m+1];

        for (i = 0; i <= n; i++)
        {
            //初始化第一列
            Matrix[i,0] = i;
        }

        for (j = 0; j <= m; j++)
        {
            //初始化第一行
            Matrix[0, j] = j;
        }

        for (i = 1; i <= n; i++)
        {
            ch1 = str1[i-1];
            for (j = 1; j <= m; j++)
            {
                ch2 = str2[j-1];
                if (ch1.Equals(ch2))
                {
                    temp = 0;
                }
                else
                {
                    temp = 1;
                }
                Matrix[i,j] = LowerOfThree(Matrix[i - 1,j] + 1, Matrix[i,j - 1] + 1, Matrix[i - 1,j - 1] + temp);


            }
        }

        for (i = 0; i <= n; i++)
        {
            for (j = 0; j <= m; j++)
            {
                Console.Write(" {0} ", Matrix[i, j]);
            }
            Console.WriteLine("");
        }
        return Matrix[n, m];

    }

    /// <summary>
    /// 計算字符串相似度
    /// </summary>
    /// <param name="str1"></param>
    /// <param name="str2"></param>
    /// <returns></returns>
    public decimal LevenshteinDistancePercent(string str1,string str2)
    {
        int maxLenth = str1.Length > str2.Length ? str1.Length : str2.Length;
        int val = Levenshtein_Distance(str1, str2);
        return 1 - (decimal)val / maxLenth;
    }
}

class Program
{


    static void Main(string[] args)
    {
        string str1 = "你好蒂蒂";
        string str2="你好蒂芬";
        Console.WriteLine("字符串1 {0}", str1);

        Console.WriteLine("字符串2 {0}", str2);

        Console.WriteLine("相似度 {0} %", LevenshteinDistance.Instance.LevenshteinDistancePercent(str1, str2)*100);
        Console.ReadLine();
    }
}</pre> 


 本文由用戶 ybny 自行上傳分享,僅供網友學習交流。所有權歸原作者,若您的權利被侵害,請聯系管理員。
 轉載本站原創文章,請注明出處,并保留原始鏈接、圖片水印。
 本站是一個以用戶分享為主的開源技術平臺,歡迎各類分享!