UNIX系统下有一个行编辑器ed,它每次只对一行文本做删除一个字符、插入一个字符或替换一个字符三种操作。例如某一行的内容是“ABC”,经过把第二个字符替换成“D”、删除第一个字符、末尾插入一个字符“B”,这三步操作后,内容就变成了“DCB”。即“ABC”变成“DCB”需要经过3步操作,我们称它们的编辑距离为3。
现在给你两个任意字符串(不包含空格),请帮忙计算它们的最短编辑距离。
输入包含多组数据。
每组数据包含两个字符串m和n,它们仅包含字母,并且长度不超过1024。
对应每组输入,输出最短编辑距离。
ABC CBCD ABC DCB
2 3
ef minDistance_dp(word1, word2): """ word1: 原始字符串 word2: 目标字符串 """ N1, N2 = len(word1), len(word2) if(N1 == 0) or (N2 == 0): if N1 > N2: return N1 else: return N2 if (N1 < N2): return minDistance_dp(word2, word1) k = N2 + 1 res = [i for i in range(k)] for i in range(N1): old = res[0] res[0] = i + 1 for j in range(1, k): tmp = res[j] jk = j - 1 if word1[i] == word2[jk]: res[j] = old else: if tmp < old: if tmp < res[jk]: res[j] = tmp + 1 else: res[j] = res[jk] + 1 else: if old < res[jk]: res[j] = old + 1 else: res[j] = res[jk] + 1 old = tmp return res[N2] def genetare_word(m,n): """ n 表示字符的长度 """ word1 = [chr(randint(65,90)) for _ in range(m)] word2 = [chr(randint(65, 90)) for _ in range(n)] return (word1,word2) # while True: # try: # word1, word2 = raw_input().split() # print(minDistance_dp(word1, word2)) # except: # break if __name__ == '__main__': for _ in range(500): m = randint(1, 1024) n = randint(1, 1024) word1,word2 = genetare_word(1024,1024) result = minDistance_dp(word1,word2) if result == None: print("算法不正确") # profile.run("minDistance_dp(word1, word2)")
def minDistance(word1, word2):
l1, l2 = len(word1) + 1, len(word2) + 1
dp = [[0 for i in range(l2)] for j in range(l1)]
for i in range(l1):
dp[i][0] = i
for j in range(l2):
dp[0][j] = j
for i in range(1, l1):
for j in range(1, l2):
dp[i][j] = min(dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + (word1[i - 1] != word2[j - 1]))
return dp[-1][-1]
while True:
try:
word1,word2=input().split()
print(minDistance(word1,word2))
except:
break
#include<iostream> #include<vector> #include<string> using namespace std; int main(){ string str1, str2; while(cin >> str1 >> str2){ int n = str1.length(); int m = str2.length(); vector<vector<int> > dp(n+1); for(int i = 0; i <= n; ++i) dp[i].resize(m+1); dp[0][0] = 0; for(int i = 1; i <= n; ++i) dp[i][0] = i; for(int j = 1; j <= m; ++j) dp[0][j] = j; for(int i = 1; i <= n; ++i){ for(int j = 1; j <= m; ++j){ dp[i][j] = min(dp[i-1][j]+1, dp[i][j-1]+1); if(str1[i-1] == str2[j-1]) dp[i][j] = min(dp[i-1][j-1], dp[i][j]); else dp[i][j] = min(dp[i][j], dp[i-1][j-1]+1); } } cout << dp[n][m] << endl; } return 0; }
#include <iostream> #include <string> #include <vector> using namespace std; int minDistance(string word1, string word2) { int m = word1.size(), n = word2.size(); if (word1 == "") return n; if (word2 == "") return m; vector<vector<int> > dp(m+1, vector<int>(n+1)); for (int i = 0; i <= n; i++) dp[0][i] = i; for (int i = 0; i <= m; i++) dp[i][0] = i; for (int i = 1; i <= m; i++) for (int j = 1; j <= n; j++) { if (word1[i-1] == word2[j-1]) dp[i][j] = dp[i-1][j-1]; else { dp[i][j] = min(dp[i-1][j], dp[i][j-1]); dp[i][j] = min(dp[i][j], dp[i-1][j-1])+1; } } return dp[m][n]; } int main() { string s1, s2; while (cin >> s1 >> s2) { cout << minDistance(s1, s2) << endl; } }
import java.util.Scanner; // write your code here public class Main{ public static void main(String[] args){ Scanner in = new Scanner(System.in); Main m = new Main(); while(in.hasNext()){ String[] str = in.nextLine().split(" "); String word1 = str[0]; String word2 = str[1]; int min = m.minDistance(word1,word2); System.out.println(min); } } public int minDistance(String word1,String word2){ int len1 = word1.length(); int len2 = word2.length(); int[][] dp = new int[len1+1][len2+1]; for(int i =0;i<=len1;i++){ dp[i][0] = i; } for(int j =0;j<= len2;j++){ dp[0][j] = j; } for(int i =0;i< len1;i++){ char ch1 = word1.charAt(i); for(int j =0;j< len2;j++){ char ch2 = word2.charAt(j); if(ch1 == ch2){ dp[i+1][j+1] = dp[i][j]; }else{ int replace = dp[i][j] +1;// ch1 代替 ch2 int insert = dp[i][j+1] + 1;// ch2 插入到 ch1 前面的位置 int delete = dp[i+1][j] + 1;// 删除ch2 int min =replace>insert?insert:replace; min = min>delete?delete:min; dp[i+1][j+1] = min; } } } return dp[len1][len2]; } }
def shortestDistance(s1, s2): l1, l2 = len(s1) + 1, len(s2) + 1 a, b = '0' + s1, '0' + s2 dp = [[0] * l2 for i in range(l1)] for i in range(1, l1): dp[i][0] = i for i in range(1, l2): dp[0][i] = i ''' a[i]添加之后相同,需要a[1~i]=b[1~j-1],dp[i][j]=dp[i][j-1]+1 a[i]删除之后相同,需要a[1~i-1]=b[1~j],dp[i][j]=dp[i-1][j]+1 a[i]修改之后相同,需要a[1~i-1]=b[1^j-1],dp[i][j]=dp[i-1][j-1]+1 也可能a[i]不需要修改,就能a[1~i]=b[1~j],dp[i][j]=dp[i-1][j-1] ''' for i in range(1, l1): for j in range(1, l2): dp[i][j] = min(dp[i][j - 1] + 1, dp[i - 1][j] + 1, dp[i - 1][j - 1] + int(a[i] != b[j])) return dp[l1 - 1][l2 - 1] import sys x = sys.stdin.read()[:-1] xx = x.split("<br/>") ans = "" for i in xx: s1, s2 = i.split() ans += f"{shortestDistance(s1, s2)}<br/>" sys.stdout.write(ans[:-5])
#Q2:levenshtein distance:
n =int(input('input n strings:'))
list = []
edit_distance = 0
for i in range(n):
#print('input string %d:'%(i+1))
list.append( input('input string %d:'%(i+1)))
target = input('the target string is:')
for string1 in list:
num = 1
print(string1)
print(target)
if string1 != '' and target != '':
m = len(string1) #the horizontal axis
n = len(target) #the vertical axis
#initialize:
d_matrix = np.mat(np.zeros((n+1,m+1)))
for i in range(m):
d_matrix[0,i+1] = i+1
for i in range(n):
d_matrix[i+1,0] = i+1
#print(d_matrix)
f = 1
j = 1
for cha_j in target:
i = 1
#print(d_matrix[1,5])
for cha_i in string1:
if cha_i == cha_j:
f = 0
else:
f = 1
d_matrix[j,i]=min(d_matrix[j,i-1]+1,d_matrix[j-1,i]+1,d_matrix[j-1,i-1]+f)
i += 1
j +=1
edit_distance = d_matrix[n,m]
else:
if string1 =='' and target =='':
edit_distance = 0
elif string1 =='':
edit_distance = len(target)
else:
edit_distance = len(string)
print("the edit distance of string %d: "%(num),edit_distance)
#include<iostream> #include<string> #include<vector> using namespace std; int findMinCost(string A, int n, string B, int m, int c0, int c1, int c2) { // write code here vector<vector<int > > dp(n+1,vector<int >(m+1)); dp[0][0]=0; for(int i=1;i<n+1;i++) dp[i][0]=dp[i-1][0]+c1; for(int j=1;j<m+1;j++) dp[0][j]=dp[0][j-1]+c0; for(int i=1;i<n+1;i++) for(int j=1;j<m+1;j++) if(A[i-1]==B[j-1]) dp[i][j]=dp[i-1][j-1]; else { int MIN=min(dp[i][j-1]+c0,dp[i-1][j-1]+c2); dp[i][j]=min(dp[i-1][j]+c1,MIN); } return dp[n][m]; } int main(){ string A; string B; while(cin>>A>>B) { cout<<findMinCost(A,A.size(),B,B.size(),1,1,1)<<endl; } return 0; }
#include <bits/stdc++.h> using namespace std; int main() { string a, b; while (cin >> a >> b) { int i, j, alen = a.size(), blen = b.size(); int dp[1024][1024] = {0}; for (i = 0; i < alen; ++i) { if (a[i] == b[0]) { dp[i][0] = dp[i - 1][0]; for (i += 1; i < alen; ++i) { dp[i][0] = dp[i - 1][0] + 1; } } else dp[i][0] = i + 1; } for (i = 0; i < blen; ++i) { if (b[i] == a[0]) { dp[0][i] = dp[0][i - 1]; for (i += 1; i < blen; ++i) { dp[0][i] = dp[0][i - 1] + 1; } } else dp[0][i] = i + 1; } for (i = 1; i < alen; ++i) { for (j = 1; j < blen; ++j) { dp[i][j] = min(dp[i - 1][j - 1] + (a[i] != b[j] ? 1 : 0), min(dp[i][j - 1], dp[i - 1][j]) + 1); } } cout << dp[alen - 1][blen - 1] << endl; } return 0; }
动态规划问题: import java.io.IOException; import java.util.Scanner; public class Levenshtein { public static void main(String[] args) throws IOException { Scanner sc = new Scanner(System.in); while (sc.hasNext()) { String line = sc.nextLine(); String s1 = line.split(" ")[0]; String s2 = line.split(" ")[1]; int[][] dis = new int[s1.length() + 1][s2.length() + 1]; for (int i = 0; i <= s1.length(); i++) { dis[i][0] = i; } for (int j = 0; j <= s2.length(); j++) { dis[0][j] = j; } for (int i = 1; i <= s1.length(); i++) { for (int j = 1; j <= s2.length(); j++) { dis[i][j] = Math.min(dis[i][j - 1] + 1, Math.min(dis[i - 1][j] + 1, dis[i - 1][j - 1] + (s1.charAt(i - 1) == s2.charAt(j - 1) ? 0 : 1))); } } System.out.println(dis[s1.length()][s2.length()]); } } }
import java.util.Arrays; import java.util.Scanner; /** * Created by Olive on 2017/9/7. * UNIX系统下有一个行编辑器ed,它每次只对一行文本做删除一个字符、插入一个字符 * 或替换一个字符三种操作。例如某一行的内容是“ABC”,经过把第二个字符替换成“D”、 * 删除第一个字符、末尾插入一个字符“B”,这三步操作后,内容就变成了“DCB”。 * 即“ABC”变成“DCB”需要经过3步操作,我们称它们的编辑距离为3 */ public class Main { public static void main(String[] args){ Scanner in = new Scanner(System.in); while(in.hasNext()){ String m = in.next(); String n = in.next(); System.out.println(distance(m, n)); } } public static int distance(String str1, String str2){ if(str1 == null || str2 == null || str1.length() == 0|| str2.length() == 0) return 0; int m = str1.length(); int n = str2.length(); // dp[i][j]代表从str1的第1-i字符转换为str2的第1-j字符的最短编辑距离,第0字符为'' int[][] dp = new int[m + 1][n + 1]; for(int i = 1; i <= m; i++){ dp[i][0] = dp[i - 1][0] + 1; } for(int j = 1; j <= n; j++){ dp[0][j] = dp[0][j - 1] + 1; } for(int i = 1; i <= m; i++){ for(int j = 1; j <= n; j++){ if(str1.charAt(i - 1) == str2.charAt(j - 1)) dp[i][j] = dp[i - 1][j - 1]; else dp[i][j] = Math.min(Math.min(dp[i][j - 1], dp[i - 1][j]), dp[i - 1][j - 1]) + 1; } } return dp[m][n]; } }
#include <iostream> #include <string> using namespace std ; string str1, str2; int main( ) { //cout << "输入两个字符串" << endl; while(cin >> str1>> str2 ){ //表示x0~xi-1与y0~yi-1的最短编辑距离 int dp[1100][1100]; for (int i = 0; i <= str2.size(); ++i) dp[0][i] = i ; for (int i = 0; i <= str1.size(); ++i) dp[i][0] = i ; for( int i=1; i<=str1.size(); ++i ) for (int j = 1; j <= str2.size(); ++j) { int cost; if (str1[i - 1] == str2[j - 1]) cost = 0; else cost = 1; int min; if (dp[i - 1][j - 1] + cost < dp[i - 1][j] + 1) min = dp[i - 1][j - 1] + cost; else min = dp[i - 1][j]+1 ;//删除xi if (dp[i][j - 1] + 1 < min)//添加yj min = dp[i][j - 1] + 1; dp[i][j] = min; } cout << dp[str1.size()][str2.size()] << endl ; } return 0 ; }
import java.util.Scanner; public class Main { public static void main(String[] args) { Scanner sc = new Scanner(System.in); while (sc.hasNext()) { String m = sc.next(); String n = sc.next(); int[][] dp = new int[m.length() + 1][n.length() + 1]; for (int i = 0; i < dp.length; i ++ ) dp[i][0] = i; for (int i = 0; i < dp[0].length; i ++ ) dp[0][i] = i; for (int i = 1; i < dp.length; i ++ ) { for (int j = 1; j < dp[0].length; j ++ ) { dp[i][j] = m.charAt(i - 1) == n.charAt(j - 1) ? dp[i - 1][j - 1] : Math.min(dp[i - 1][j - 1] + 1, Math.min(dp[i - 1][j] + 1, dp[i][j - 1] + 1)); } } System.out.println(dp[m.length()][n.length()]); } } }
import java.util.Scanner;
public class Main { public static void main(String[] args) { Scanner sc=new Scanner(System.in); while(sc.hasNext()){ String line=sc.nextLine(); String[] str=line.split(" "); int m=str[0].length(); int n=str[1].length(); int[][] matrix=new int[m+1][n+1]; for(int i=1;i<=n;i++){ matrix[0][i]=i; } for(int j=1;j<=m;j++){ matrix[j][0]=j; } for(int i=1;i<=m;i++){ for(int j=1;j<=n;j++){ if(str[0].charAt(i-1)==str[1].charAt(j-1)){ matrix[i][j]=matrix[i-1][j-1]; }else{ matrix[i][j]=Math.min(Math.min(matrix[i-1][j]+1, matrix[i][j-1]+1) , matrix[i-1][j-1]+1); } } } System.out.println(matrix[m][n]); } } }