textUtils.cpp
2.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#include <windows.h>
#include <usp10.h>
bool calculateWordOffsets(wchar_t* text, int textLength, int offset, int* startOffset, int* endOffset) {
if(textLength<=0) return false;
if(offset<0) return false;
if(offset>=textLength) {
*startOffset=offset;
*endOffset=offset+1;
return true;
}
SCRIPT_ITEM* pItems=new SCRIPT_ITEM[textLength+1];
int numItems=0;
if(ScriptItemize(text,textLength,textLength,NULL,NULL,pItems,&numItems)!=S_OK||numItems==0) {
delete[] pItems;
return false;
}
SCRIPT_LOGATTR* logAttrArray=new SCRIPT_LOGATTR[textLength];
int nextICharPos=textLength;
for(int itemIndex=numItems-1;itemIndex>=0;--itemIndex) {
int iCharPos=pItems[itemIndex].iCharPos;
int iCharLength=nextICharPos-iCharPos;
if(ScriptBreak(text+iCharPos,iCharLength,&(pItems[itemIndex].a),logAttrArray+iCharPos)!=S_OK) {
delete[] pItems;
delete[] logAttrArray;
return false;
}
}
delete[] pItems;
for(int i=offset;i>=0;--i) {
if(logAttrArray[i].fWordStop) {
*startOffset=i;
break;
}
}
// #1656: fWordStop doesn't seem to stop on whitespace where punctuation follows the whitespace.
bool skipWhitespace=true;
for(int i=offset;i>=*startOffset;--i) {
if(iswspace(text[i])) {
if(skipWhitespace) {
// If we start in a block of whitespace, the word must start before this,
// as whitespace is included at the end of a word.
// Therefore, skip the whitespace and keep searching.
continue;
}
// This is whitespace. The word starts after it.
*startOffset=i+1;
break;
} else
skipWhitespace=false;
}
*endOffset=textLength;
for(int i=offset+1;i<textLength;++i) {
if(logAttrArray[i].fWordStop) {
*endOffset=i;
break;
}
}
// #1656: fWordStop doesn't seem to stop on whitespace where punctuation follows the whitespace.
for(int i=offset;i<*endOffset;++i) {
if(iswspace(text[i])) {
// This begins a block of whitespace. The word ends after it.
// Find the end of the whitespace.
for(;i<*endOffset;++i) {
if(!iswspace(text[i]))
break;
}
// We're now positioned on the first non-whitespace character,
// so the word ends here.
*endOffset=i;
break;
}
}
delete[] logAttrArray;
return true;
}