24 files changed

+648
-564
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
root = true
2+
3+
[*]
4+
indent_style = space
5+
indent_size = 2
6+
charset = utf-8
7+
trim_trailing_whitespace = false
8+
insert_final_newline = false
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"parser": "@typescript-eslint/parser",
3-
"extends": ["alloy", "alloy/typescript", "plugin:prettier/recommended"],
4-
"plugins": ["@typescript-eslint", "prettier"],
3+
"extends": ["alloy", "alloy/typescript"],
4+
"plugins": ["@typescript-eslint"],
55
"env": {
66
"browser": true,
77
"node": true,
@@ -32,6 +32,7 @@
3232
"checkLoops": false
3333
}
3434
],
35+
"semi": [2, "never"],
3536
"no-control-regex": "error",
3637
"no-debugger": "error",
3738
"no-dupe-args": "error",
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,5 @@ pnpm-lock.yaml
77
lib
88

99
dist
10+
*.log
11+
.cache
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"singleQuote": true,
3+
"semi": false
4+
}
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,4 @@
33
"editor.formatOnSave": true
44
},
55
"typescript.tsdk": "node_modules/typescript/lib",
6-
"editor.defaultFormatter": "esbenp.prettier-vscode"
7-
}
6+
}
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
## Release Notes
22

3+
### 1.2.0
4+
* Feature: longestCommonSubsequence = lcs
5+
* Feature: metricLcs = mlcs
6+
7+
38
### 1.1.0
49
* Feature: Support import&require
510
* Optimize: JS to Ts
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ A library implementing different string similarity, distance and sortMatch measu
2020
- [Cosine similarity](#cosine-similarity)
2121
- [Sorensen-Dice coefficient](#sorensen-dice-coefficient)
2222
- [API](#api)
23+
- [Methods](#methods)
2324
- [similarity](#similarity)
2425
- [params](#params)
2526
- [return](#return)
@@ -91,15 +92,16 @@ The Levenshtein distance between two words is the minimum number of single-chara
9192
It is a metric string distance. This implementation uses dynamic programming (Wagner–Fischer algorithm), with only 2 rows of data. The space requirement is thus O(m) and the algorithm runs in O(m.n).
9293

9394
```js
94-
import { Levenshtein } from "string-comparison"
95+
import { levenshtein } from "string-comparison"
96+
import type {SortMatchResultType} from "string-comparison"
9597

9698
const Thanos = 'healed'
9799
const Rival = 'sealed'
98100
const Avengers = ['edward', 'sealed', 'theatre']
99101

100-
console.log(Levenshtein.similarity(Thanos, Rival))
101-
console.log(Levenshtein.distance(Thanos, Rival))
102-
console.log(Levenshtein.sortMatch(Thanos, Avengers))
102+
console.log(levenshtein.similarity(Thanos, Rival))
103+
console.log(levenshtein.distance(Thanos, Rival))
104+
console.log(levenshtein.sortMatch(Thanos, Avengers) as SortMatchResultType)
103105

104106
// output
105107
0.8333333333333334
@@ -129,15 +131,18 @@ This class implements the dynamic programming approach, which has a space requir
129131
In "Length of Maximal Common Subsequences", K.S. Larsen proposed an algorithm that computes the length of LCS in time O(log(m).log(n)). But the algorithm has a memory requirement O(m.n²) and was thus not implemented here.
130132

131133
```js
132-
import { LongestCommonSubsequence } from "string-comparison"
134+
import { longestCommonSubsequence } from "string-comparison"
135+
or
136+
import { lcs } from "string-comparison"
137+
133138

134139
const Thanos = 'healed'
135140
const Rival = 'sealed'
136141
const Avengers = ['edward', 'sealed', 'theatre']
137142

138-
console.log(LongestCommonSubsequence.similarity(Thanos, Rival))
139-
console.log(LongestCommonSubsequence.distance(Thanos, Rival))
140-
console.log(LongestCommonSubsequence.sortMatch(Thanos, Avengers))
143+
console.log(lcs.similarity(Thanos, Rival))
144+
console.log(lcs.distance(Thanos, Rival))
145+
console.log(lcs.sortMatch(Thanos, Avengers))
141146

142147
// output
143148
0.8333333333333334
@@ -157,15 +162,17 @@ http://heim.ifi.uio.no/~danielry/StringMetric.pdf
157162
The distance is computed as 1 - |LCS(s1, s2)| / max(|s1|, |s2|)
158163

159164
```js
160-
import { MetricLCS } from "string-comparison"
165+
import { metricLcs } from "string-comparison"
166+
or
167+
import { mlcs } from "string-comparison"
161168

162169
const Thanos = 'healed'
163170
const Rival = 'sealed'
164171
const Avengers = ['edward', 'sealed', 'theatre']
165172

166-
console.log(MetricLCS.similarity(Thanos, Rival))
167-
console.log(MetricLCS.distance(Thanos, Rival))
168-
console.log(MetricLCS.sortMatch(Thanos, Avengers))
173+
console.log(metricLcs.similarity(Thanos, Rival))
174+
console.log(metricLcs.distance(Thanos, Rival))
175+
console.log(metricLcs.sortMatch(Thanos, Avengers))
169176

170177
// output
171178
0.8333333333333334
@@ -185,7 +192,7 @@ Distance is computed as 1 - similarity.
185192
Jaccard index is a metric distance.
186193

187194
```js
188-
import { Cosine } from "string-comparison"
195+
import { cosine } from "string-comparison"
189196
```
190197

191198
## Sorensen-Dice coefficient
@@ -195,10 +202,18 @@ Similar to Jaccard index, but this time the similarity is computed as 2 * |V1 in
195202
Distance is computed as 1 - similarity.
196203

197204
```js
198-
import { DiceCoefficient } from "string-comparison"
205+
import { diceCoefficient } from "string-comparison"
199206
```
200207

201208
## API
209+
* `cosine`
210+
* `diceCoefficient`
211+
* `jaccardIndex`
212+
* `levenshtein`
213+
* `lcs` = `longestCommonSubsequence`
214+
* `mlcs` = `metricLcs`
215+
216+
## Methods
202217
* `similarity`.
203218
* `distance`.
204219
* `sortMatch`
@@ -222,8 +237,8 @@ Implementing algorithms define a distance between strings (0 means strings are i
222237

223238
#### params
224239

225-
1. thanos [String]
226-
2. rival [String]
240+
1. `thanos` [String]
241+
2. `rival` [String]
227242

228243
#### return
229244

@@ -238,7 +253,7 @@ Return a number
238253

239254
#### return
240255

241-
Return an array of objects. ex:
256+
Return an array of objects - `SortMatchResultType` ex:
242257
```js
243258
[
244259
{ member: 'edward', rating: 0.16666666666666663 },
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
{
22
"name": "string-comparison",
3-
"version": "1.1.0",
3+
"version": "1.2.0",
44
"description": "A library implementing different string similarity",
55
"main": "dist/index.js",
66
"module": "dist/index.mjs",
77
"types": "dist/index.d.ts",
8+
"engines": {
9+
"node": "^16.0.0 || >=18.0.0"
10+
},
811
"exports": {
912
".": {
1013
"import": "./dist/index.mjs",
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,57 @@
1-
import SimilarityResult from "./SimilarityResult";
1+
export interface SortMatchResultType {
2+
member: string
3+
index: number
4+
rating: number
5+
}
26

37
export default abstract class Similarity {
4-
/**
5-
* @description 比较两个字符串
6-
*/
7-
protected static similarity: (thanos: string, rival: string) => number;
8+
public static initParams(thanos: string, rival: string) {
9+
return [
10+
thanos.replace(/\s+/g, '').toLowerCase(),
11+
rival.replace(/\s+/g, '').toLowerCase(),
12+
]
13+
}
814

9-
// distance
10-
protected static distance: (thanos: string, rival: string) => number;
15+
protected static checkThanosType(thanos: string) {
16+
if (typeof thanos !== 'string')
17+
throw new Error('first argument should be a string')
18+
}
19+
20+
protected static checkRivalType(rival: string) {
21+
if (typeof rival !== 'string')
22+
throw new Error('second argument should be a string')
23+
}
24+
25+
protected static checkAvengersType(avengers: string[]) {
26+
if (!Array.isArray(avengers))
27+
throw new Error('second argument should be an array of strings')
28+
if (avengers.find((s) => typeof s !== 'string'))
29+
throw new Error('second argument should be an array of strings')
30+
}
1131

1232
/**
1333
* @description 寻找最佳匹配结果
1434
*/
15-
public static sortMatch(
16-
thanos: string,
17-
avengers: string[],
18-
): SimilarityResult[] {
35+
public sortMatch(thanos: string, avengers: string[]): SortMatchResultType[] {
36+
Similarity.checkThanosType(thanos)
37+
Similarity.checkAvengersType(avengers)
38+
1939
return avengers
2040
.map((str, index) => {
2141
return {
2242
member: str,
2343
index,
24-
rating: Similarity.similarity(thanos, str),
25-
};
44+
rating: this.similarity(thanos, str),
45+
}
2646
})
27-
.sort((a, b) => a.rating - b.rating);
47+
.sort((a, b) => a.rating - b.rating)
2848
}
2949

30-
protected static initParams(thanos: string, rival: string): string[] {
31-
return [
32-
thanos.replace(/\s+/g, "").toLowerCase(),
33-
rival.replace(/\s+/g, "").toLowerCase(),
34-
];
35-
}
50+
/**
51+
* @description 比较两个字符串
52+
*/
53+
public abstract similarity(thanos: string, rival: string): number
54+
55+
// distance
56+
public abstract distance(thanos: string, rival: string): number
3657
}
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,40 @@
1-
import Similarity from "../interface/Similarity";
1+
import Similarity from '../interface/Similarity'
22

33
export default class Cosine extends Similarity {
4-
public static similarity(pThanos: string, pRival: string): number {
4+
public similarity(pThanos: string, pRival: string) {
5+
Similarity.checkThanosType(pThanos)
6+
Similarity.checkRivalType(pRival)
7+
58
// clear white space characters & to low
6-
const [thanos, rival] = Similarity.initParams(pThanos, pRival);
9+
const [thanos, rival] = Similarity.initParams(pThanos, pRival)
710

8-
if (!thanos.length && !rival.length) return 1;
9-
if (!thanos.length || !rival.length) return 0;
10-
if (thanos === rival) return 1;
11+
if (!thanos.length && !rival.length) return 1
12+
if (!thanos.length || !rival.length) return 0
13+
if (thanos === rival) return 1
1114

12-
let common = Array.from(new Set(thanos.split("").concat(rival.split(""))));
15+
// string vectorization
16+
let common = Array.from(new Set(thanos.split('').concat(rival.split(''))))
1317

14-
let vectorThanos = this.stringVectorization(thanos.split(""), common);
15-
let vectorRival = Cosine.stringVectorization(rival.split(""), common);
16-
let [dotproduct, mThanos, mRival] = [0, 0, 0];
18+
let vectorThanos = this.stringVectorization(thanos.split(''), common)
19+
let vectorRival = this.stringVectorization(rival.split(''), common)
20+
let [dotproduct, mThanos, mRival] = [0, 0, 0]
1721

1822
for (let i = 0; i < vectorThanos.length; ++i) {
19-
dotproduct += vectorThanos[i] * vectorRival[i];
20-
mThanos += vectorThanos[i] * vectorThanos[i];
21-
mRival += vectorRival[i] * vectorRival[i];
23+
dotproduct += vectorThanos[i] * vectorRival[i]
24+
mThanos += vectorThanos[i] * vectorThanos[i]
25+
mRival += vectorRival[i] * vectorRival[i]
2226
}
23-
mThanos = Math.sqrt(mThanos);
24-
mRival = Math.sqrt(mRival);
25-
return Number(dotproduct) / (mThanos * mRival);
27+
mThanos = Math.sqrt(mThanos)
28+
mRival = Math.sqrt(mRival)
29+
return Number(dotproduct) / (mThanos * mRival)
2630
}
2731

28-
public static distance(thanos: string, rival: string): number {
29-
return 1.0 - Cosine.similarity(thanos, rival);
32+
public distance(thanos: string, rival: string) {
33+
return 1.0 - this.similarity(thanos, rival)
3034
}
3135

3236
// string vectorization
33-
private static stringVectorization(strArr: string[], common: string[]) {
34-
return common.map((v) => (strArr.includes(v) ? 1 : 0));
37+
private stringVectorization(strArr: string | any[], common: any[]) {
38+
return common.map((v: any) => (strArr.includes(v) ? 1 : 0))
3539
}
3640
}

0 commit comments

Comments
 (0)