Skip to content

Commit bdb25c2

Browse files
authored
Merge pull request #4519 from PerBothner/clusters
New unicode-graphemes addon.
2 parents 16bd8de + 32c6165 commit bdb25c2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1481
-104
lines changed

.eslintrc.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@
3030
"addons/xterm-addon-serialize/benchmark/tsconfig.json",
3131
"addons/xterm-addon-unicode11/src/tsconfig.json",
3232
"addons/xterm-addon-unicode11/test/tsconfig.json",
33+
"addons/xterm-addon-unicode-graphemes/src/tsconfig.json",
34+
"addons/xterm-addon-unicode-graphemes/test/tsconfig.json",
35+
"addons/xterm-addon-unicode-graphemes/benchmark/tsconfig.json",
3336
"addons/xterm-addon-web-links/src/tsconfig.json",
3437
"addons/xterm-addon-web-links/test/tsconfig.json",
3538
"addons/xterm-addon-webgl/src/tsconfig.json",
@@ -38,6 +41,7 @@
3841
"sourceType": "module"
3942
},
4043
"ignorePatterns": [
44+
"addons/*/src/third-party/*.ts",
4145
"**/inwasm-sdks/*",
4246
"**/typings/*.d.ts",
4347
"**/node_modules",

.github/workflows/ci.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ jobs:
4242
./addons/xterm-addon-serialize/out-test/* \
4343
./addons/xterm-addon-unicode11/out/* \
4444
./addons/xterm-addon-unicode11/out-test/* \
45+
./addons/xterm-addon-unicode-graphemes/out/* \
46+
./addons/xterm-addon-unicode-graphemes/out-test/* \
4547
./addons/xterm-addon-web-links/out/* \
4648
./addons/xterm-addon-web-links/out-test/* \
4749
./addons/xterm-addon-webgl/out/* \
@@ -68,6 +70,8 @@ jobs:
6870
yarn --frozen-lockfile
6971
yarn install-addons
7072
- name: Lint code
73+
env:
74+
NODE_OPTIONS: --max_old_space_size=4096
7175
run: yarn lint
7276
- name: Lint API
7377
run: yarn lint-api
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
lib
2+
node_modules
3+
out-benchmark
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Blacklist - exclude everything except npm defaults such as LICENSE, etc
2+
*
3+
!*/
4+
5+
# Whitelist - lib/
6+
!lib/**/*.d.ts
7+
8+
!lib/**/*.js
9+
!lib/**/*.js.map
10+
11+
!lib/**/*.css
12+
13+
# Whitelist - src/
14+
!src/**/*.ts
15+
!src/**/*.d.ts
16+
17+
!src/**/*.js
18+
!src/**/*.js.map
19+
20+
!src/**/*.css
21+
22+
# Blacklist - src/ test files
23+
src/**/*.test.ts
24+
src/**/*.test.d.ts
25+
src/**/*.test.js
26+
src/**/*.test.js.map
27+
28+
# Whitelist - typings/
29+
!typings/*.d.ts
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
Copyright (c) 2023, The xterm.js authors (https://github.com/xtermjs/xterm.js)
2+
3+
Permission is hereby granted, free of charge, to any person obtaining a copy
4+
of this software and associated documentation files (the "Software"), to deal
5+
in the Software without restriction, including without limitation the rights
6+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7+
copies of the Software, and to permit persons to whom the Software is
8+
furnished to do so, subject to the following conditions:
9+
10+
The above copyright notice and this permission notice shall be included in
11+
all copies or substantial portions of the Software.
12+
13+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19+
THE SOFTWARE.
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
## xterm-addon-unicode-graphemes
2+
3+
⚠️ **This addon is currently experimental and may introduce unexpected and non-standard behavior**
4+
5+
An addon providing enhanced Unicode support (include grapheme clustering) for xterm.js.
6+
7+
The file `src/UnicodeProperties.ts` is generated and depends on the Unicode version. See [the unicode-properties project](https://github.com/PerBothner/unicode-properties) for credits and re-generation instructions.
8+
9+
### Install
10+
11+
```bash
12+
npm install --save xterm-addon-unicode-graphemes
13+
```
14+
15+
### Usage
16+
17+
```ts
18+
import { Terminal } from 'xterm';
19+
import { UnicodeGraphemeAddon } from 'xterm-addon-unicode-graphemes';
20+
21+
const terminal = new Terminal();
22+
const unicodeGraphemeAddon = new UnicodeGraphemeAddon();
23+
terminal.loadAddon(unicodeGraphemeAddon);
24+
```
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
/**
2+
* Copyright (c) 2019 The xterm.js authors. All rights reserved.
3+
* @license MIT
4+
*/
5+
6+
import { perfContext, before, ThroughputRuntimeCase } from 'xterm-benchmark';
7+
8+
import { spawn } from 'node-pty';
9+
import { Utf8ToUtf32, stringFromCodePoint } from 'common/input/TextDecoder';
10+
import { Terminal } from 'browser/Terminal';
11+
import { UnicodeGraphemeProvider } from 'UnicodeGraphemeProvider';
12+
13+
14+
function fakedAddonLoad(terminal: any): void {
15+
// resembles what UnicodeGraphemesAddon.activate does under the hood
16+
terminal.unicodeService.register(new UnicodeGraphemeProvider());
17+
terminal.unicodeService.activeVersion = '15-graphemes';
18+
}
19+
20+
21+
perfContext('Terminal: ls -lR /usr/lib', () => {
22+
let content = '';
23+
let contentUtf8: Uint8Array;
24+
25+
before(async () => {
26+
// grab output from "ls -lR /usr"
27+
const p = spawn('ls', ['--color=auto', '-lR', '/usr/lib'], {
28+
name: 'xterm-256color',
29+
cols: 80,
30+
rows: 25,
31+
cwd: process.env.HOME,
32+
env: process.env,
33+
encoding: (null as unknown as string) // needs to be fixed in node-pty
34+
});
35+
const chunks: Buffer[] = [];
36+
let length = 0;
37+
p.on('data', data => {
38+
chunks.push(data as unknown as Buffer);
39+
length += data.length;
40+
});
41+
await new Promise<void>(resolve => p.on('exit', () => resolve()));
42+
contentUtf8 = Buffer.concat(chunks, length);
43+
// translate to content string
44+
const buffer = new Uint32Array(contentUtf8.length);
45+
const decoder = new Utf8ToUtf32();
46+
const codepoints = decoder.decode(contentUtf8, buffer);
47+
for (let i = 0; i < codepoints; ++i) {
48+
content += stringFromCodePoint(buffer[i]);
49+
// peek into content to force flat repr in v8
50+
if (!(i % 10000000)) {
51+
content[i];
52+
}
53+
}
54+
});
55+
56+
perfContext('write/string/async', () => {
57+
let terminal: Terminal;
58+
before(() => {
59+
terminal = new Terminal({ cols: 80, rows: 25, scrollback: 1000 });
60+
fakedAddonLoad(terminal);
61+
});
62+
new ThroughputRuntimeCase('', async () => {
63+
await new Promise<void>(res => terminal.write(content, res));
64+
return { payloadSize: contentUtf8.length };
65+
}, { fork: false }).showAverageThroughput();
66+
});
67+
68+
perfContext('write/Utf8/async', () => {
69+
let terminal: Terminal;
70+
before(() => {
71+
terminal = new Terminal({ cols: 80, rows: 25, scrollback: 1000 });
72+
});
73+
new ThroughputRuntimeCase('', async () => {
74+
await new Promise<void>(res => terminal.write(content, res));
75+
return { payloadSize: contentUtf8.length };
76+
}, { fork: false }).showAverageThroughput();
77+
});
78+
});
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
"APP_PATH": ".benchmark",
3+
"evalConfig": {
4+
"tolerance": {
5+
"*": [0.75, 1.5],
6+
"*.dev": [0.01, 1.5],
7+
"*.cv": [0.01, 1.5],
8+
"EscapeSequenceParser.benchmark.js.*.averageThroughput.mean": [0.9, 5]
9+
},
10+
"skip": [
11+
"*.median",
12+
"*.runs",
13+
"*.dev",
14+
"*.cv",
15+
"EscapeSequenceParser.benchmark.js.*.averageRuntime",
16+
"Terminal.benchmark.js.*.averageRuntime"
17+
]
18+
}
19+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"compilerOptions": {
3+
"lib": ["dom", "es6"],
4+
"outDir": "../out-benchmark",
5+
"types": ["../../../node_modules/@types/node"],
6+
"moduleResolution": "node",
7+
"strict": false,
8+
"target": "es2015",
9+
"module": "commonjs",
10+
"baseUrl": ".",
11+
"paths": {
12+
"common/*": ["../../../src/common/*"],
13+
"browser/*": ["../../../src/browser/*"],
14+
"UnicodeGraphemeProvider": ["../src/UnicodeGraphemeProvider"]
15+
}
16+
},
17+
"include": ["../**/*", "../../../typings/xterm.d.ts"],
18+
"exclude": ["../../../**/*test.ts", "../../**/*api.ts"],
19+
"references": [
20+
{ "path": "../../../src/common" },
21+
{ "path": "../../../src/browser" }
22+
]
23+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
{
2+
"name": "xterm-addon-unicode-graphemes",
3+
"version": "0.1.0",
4+
"author": {
5+
"name": "The xterm.js authors",
6+
"url": "https://xtermjs.org/"
7+
},
8+
"main": "lib/xterm-addon-unicode-graphemes.js",
9+
"types": "typings/xterm-addon-unicode-graphemes.d.ts",
10+
"repository": "https://github.com/xtermjs/xterm.js/tree/master/addons/xterm-addon-unicode-graphemes",
11+
"license": "MIT",
12+
"keywords": [
13+
"terminal",
14+
"xterm",
15+
"xterm.js"
16+
],
17+
"scripts": {
18+
"build": "../../node_modules/.bin/tsc -p .",
19+
"prepackage": "npm run build",
20+
"package": "../../node_modules/.bin/webpack",
21+
"prepublishOnly": "npm run package",
22+
"benchmark": "NODE_PATH=../../out:./out:./out-benchmark/ ../../node_modules/.bin/xterm-benchmark -r 5 -c benchmark/benchmark.json out-benchmark/benchmark/*benchmark.js",
23+
"benchmark-baseline": "NODE_PATH=../../out:./out:./out-benchmark/ ../../node_modules/.bin/xterm-benchmark -r 5 -c benchmark/benchmark.json --baseline out-benchmark/benchmark/*benchmark.js",
24+
"benchmark-eval": "NODE_PATH=../../out:./out:./out-benchmark/ ../../node_modules/.bin/xterm-benchmark -r 5 -c benchmark/benchmark.json --eval out-benchmark/benchmark/*benchmark.js"
25+
},
26+
"peerDependencies": {
27+
"xterm": "^5.0.0"
28+
}
29+
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/**
2+
* Copyright (c) 2023 The xterm.js authors. All rights reserved.
3+
* @license MIT
4+
*/
5+
6+
import { IUnicodeVersionProvider } from 'xterm';
7+
import { UnicodeCharProperties, UnicodeCharWidth } from 'common/services/Services';
8+
import { UnicodeService } from 'common/services/UnicodeService';
9+
import * as UC from './third-party/UnicodeProperties';
10+
11+
export class UnicodeGraphemeProvider implements IUnicodeVersionProvider {
12+
public readonly version;
13+
public ambiguousCharsAreWide: boolean = false;
14+
public readonly handleGraphemes: boolean;
15+
16+
constructor(handleGraphemes: boolean = true) {
17+
this.version = handleGraphemes ? '15-graphemes' : '15';
18+
this.handleGraphemes = handleGraphemes;
19+
}
20+
21+
private static readonly _plainNarrowProperties: UnicodeCharProperties
22+
= UnicodeService.createPropertyValue(UC.GRAPHEME_BREAK_Other, 1, false);
23+
24+
public charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties {
25+
// Optimize the simple ASCII case, under the condition that
26+
// UnicodeService.extractCharKind(preceding) === GRAPHEME_BREAK_Other
27+
// (which also covers the case that preceding === 0).
28+
if ((codepoint >= 32 && codepoint < 127) && (preceding >> 3) === 0) {
29+
return UnicodeGraphemeProvider._plainNarrowProperties;
30+
}
31+
32+
let charInfo = UC.getInfo(codepoint);
33+
let w = UC.infoToWidthInfo(charInfo);
34+
let shouldJoin = false;
35+
if (w >= 2) {
36+
// Treat emoji_presentation_selector as WIDE.
37+
w = w === 3 || this.ambiguousCharsAreWide || codepoint === 0xfe0f ? 2 : 1;
38+
} else {
39+
w = 1;
40+
}
41+
if (preceding !== 0) {
42+
const oldWidth = UnicodeService.extractWidth(preceding);
43+
if (this.handleGraphemes) {
44+
charInfo = UC.shouldJoin(UnicodeService.extractCharKind(preceding), charInfo);
45+
} else {
46+
charInfo = w === 0 ? 1 : 0;
47+
}
48+
shouldJoin = charInfo > 0;
49+
if (shouldJoin) {
50+
if (oldWidth > w) {
51+
w = oldWidth;
52+
} else if (charInfo === 32) { // UC.GRAPHEME_BREAK_SAW_Regional_Pair)
53+
w = 2;
54+
}
55+
}
56+
}
57+
return UnicodeService.createPropertyValue(charInfo, w, shouldJoin);
58+
}
59+
60+
public wcwidth(codepoint: number): UnicodeCharWidth {
61+
const charInfo = UC.getInfo(codepoint);
62+
const w = UC.infoToWidthInfo(charInfo);
63+
const kind = (charInfo & UC.GRAPHEME_BREAK_MASK) >> UC.GRAPHEME_BREAK_SHIFT;
64+
if (kind === UC.GRAPHEME_BREAK_Extend || kind === UC.GRAPHEME_BREAK_Prepend) {
65+
return 0;
66+
}
67+
if (w >= 2 && (w === 3 || this.ambiguousCharsAreWide)) {
68+
return 2;
69+
}
70+
return 1;
71+
}
72+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/**
2+
* Copyright (c) 2023 The xterm.js authors. All rights reserved.
3+
* @license MIT
4+
*
5+
* UnicodeVersionProvider for V15 with grapeme cluster handleing.
6+
*/
7+
8+
import { Terminal, ITerminalAddon, IUnicodeHandling } from 'xterm';
9+
import { UnicodeGraphemeProvider } from './UnicodeGraphemeProvider';
10+
11+
12+
export class UnicodeGraphemesAddon implements ITerminalAddon {
13+
private _provider15Graphemes?: UnicodeGraphemeProvider;
14+
private _provider15?: UnicodeGraphemeProvider;
15+
private _unicode?: IUnicodeHandling;
16+
private _oldVersion: string = '';
17+
18+
public activate(terminal: Terminal): void {
19+
if (! this._provider15) {
20+
this._provider15 = new UnicodeGraphemeProvider(false);
21+
}
22+
if (! this._provider15Graphemes) {
23+
this._provider15Graphemes = new UnicodeGraphemeProvider(true);
24+
}
25+
const unicode = terminal.unicode;
26+
this._unicode = unicode;
27+
unicode.register(this._provider15);
28+
unicode.register(this._provider15Graphemes);
29+
this._oldVersion = unicode.activeVersion;
30+
unicode.activeVersion = '15-graphemes';
31+
}
32+
33+
public dispose(): void {
34+
if (this._unicode) {
35+
this._unicode.activeVersion = this._oldVersion;
36+
}
37+
}
38+
}

0 commit comments

Comments
 (0)