the cat11010cat is01000is on01000on the11111the mat11000the bird00100bird sat00100sat on10111the bush00100
分子
sum_{k=1}^{K} sum_{w in mathcal{W}} min(c_w(mathbf{x}), c_w(mathbf{s}^{(k)}))
分母
sum_{k=1}^{K} sum_{w in mathcal{W}} c_w(mathbf{s}^{(k)})
text{ROUGE-N}(mathbf{x}) = frac{sum_{k=1}^{K} sum_{w in mathcal{W}} min(c_w(mathbf{x}), c_w(mathbf{s}^{(k)}))}{sum_{k=1}^{K} sum_{w in mathcal{W}} c_w(mathbf{s}^{(k))}}=frac{3 2}{5 5}=frac{5}{10}=0.5
3. 程序
代码语言:javascript复制
main_string = 'the cat sat on the mat'
string1 = 'the cat is on the mat'
string2 = 'the bird sat on the bush'
words = list(set(string1.split(' ') string2.split(' '))) # 去除重复元素
total_occurrences, matching_occurrences = 0, 0
for word in words:
matching_occurrences = min(main_string.count(word), string1.count(word)) min(main_string.count(word), string2.count(word))
total_occurrences = string1.count(word) string2.count(word)
print(matching_occurrences / total_occurrences)
bigrams = []
split1 = string1.split(' ')
for i in range(len(split1) - 1):
bigrams.append(split1[i] ' ' split1[i 1])
split2 = string2.split(' ')
for i in range(len(split2) - 1):
bigrams.append(split2[i] ' ' split2[i 1])
bigrams = list(set(bigrams)) # 去除重复元素
total_occurrences, matching_occurrences = 0, 0
for bigram in bigrams:
matching_occurrences = min(main_string.count(bigram), string1.count(bigram)) min(main_string.count(bigram), string2.count(bigram))
total_occurrences = string1.count(bigram) string2.count(bigram)
print(matching_occurrences / total_occurrences)