Blame view

kernel/linux-imx6_3.14.28/tools/perf/util/levenshtein.c 2.53 KB
6b13f685e   김민수   BSP 최초 추가
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
  #include "cache.h"
  #include "levenshtein.h"
  
  /*
   * This function implements the Damerau-Levenshtein algorithm to
   * calculate a distance between strings.
   *
   * Basically, it says how many letters need to be swapped, substituted,
   * deleted from, or added to string1, at least, to get string2.
   *
   * The idea is to build a distance matrix for the substrings of both
   * strings.  To avoid a large space complexity, only the last three rows
   * are kept in memory (if swaps had the same or higher cost as one deletion
   * plus one insertion, only two rows would be needed).
   *
   * At any stage, "i + 1" denotes the length of the current substring of
   * string1 that the distance is calculated for.
   *
   * row2 holds the current row, row1 the previous row (i.e. for the substring
   * of string1 of length "i"), and row0 the row before that.
   *
   * In other words, at the start of the big loop, row2[j + 1] contains the
   * Damerau-Levenshtein distance between the substring of string1 of length
   * "i" and the substring of string2 of length "j + 1".
   *
   * All the big loop does is determine the partial minimum-cost paths.
   *
   * It does so by calculating the costs of the path ending in characters
   * i (in string1) and j (in string2), respectively, given that the last
   * operation is a substition, a swap, a deletion, or an insertion.
   *
   * This implementation allows the costs to be weighted:
   *
   * - w (as in "sWap")
   * - s (as in "Substitution")
   * - a (for insertion, AKA "Add")
   * - d (as in "Deletion")
   *
   * Note that this algorithm calculates a distance _iff_ d == a.
   */
  int levenshtein(const char *string1, const char *string2,
  		int w, int s, int a, int d)
  {
  	int len1 = strlen(string1), len2 = strlen(string2);
  	int *row0 = malloc(sizeof(int) * (len2 + 1));
  	int *row1 = malloc(sizeof(int) * (len2 + 1));
  	int *row2 = malloc(sizeof(int) * (len2 + 1));
  	int i, j;
  
  	for (j = 0; j <= len2; j++)
  		row1[j] = j * a;
  	for (i = 0; i < len1; i++) {
  		int *dummy;
  
  		row2[0] = (i + 1) * d;
  		for (j = 0; j < len2; j++) {
  			/* substitution */
  			row2[j + 1] = row1[j] + s * (string1[i] != string2[j]);
  			/* swap */
  			if (i > 0 && j > 0 && string1[i - 1] == string2[j] &&
  					string1[i] == string2[j - 1] &&
  					row2[j + 1] > row0[j - 1] + w)
  				row2[j + 1] = row0[j - 1] + w;
  			/* deletion */
  			if (row2[j + 1] > row1[j + 1] + d)
  				row2[j + 1] = row1[j + 1] + d;
  			/* insertion */
  			if (row2[j + 1] > row2[j] + a)
  				row2[j + 1] = row2[j] + a;
  		}
  
  		dummy = row0;
  		row0 = row1;
  		row1 = row2;
  		row2 = dummy;
  	}
  
  	i = row1[len2];
  	free(row0);
  	free(row1);
  	free(row2);
  
  	return i;
  }