Compare commits
206 Commits
v0.3.12
...
v0.3.13-be
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a0de409878 | ||
|
|
3f0944906d | ||
|
|
79ef5445a1 | ||
|
|
dea2795499 | ||
|
|
650e4fb3a9 | ||
|
|
29a630dcd1 | ||
|
|
7733ea0c02 | ||
|
|
3d13d65c52 | ||
|
|
575058550a | ||
|
|
ad3e3cb7ec | ||
|
|
e24ca7ca4a | ||
|
|
1b6d8c8f6d | ||
|
|
27e172cbe3 | ||
|
|
e40c720f99 | ||
|
|
c8d7071741 | ||
|
|
5c2154253d | ||
|
|
3c79cca77c | ||
|
|
65c0ab724f | ||
|
|
d5d259e13e | ||
|
|
691d3929eb | ||
|
|
57b3b7b5d7 | ||
|
|
1582c1a3cf | ||
|
|
e22fe940c1 | ||
|
|
7f19892444 | ||
|
|
123a016ec0 | ||
|
|
5b6dcb3bc4 | ||
|
|
8d71200b66 | ||
|
|
6d333d2b40 | ||
|
|
baacfd4469 | ||
|
|
e8925ce697 | ||
|
|
e40c2a6736 | ||
|
|
b9518dc92b | ||
|
|
47f26f2336 | ||
|
|
fbc8d98209 | ||
|
|
27aeda8921 | ||
|
|
4c2e642a85 | ||
|
|
f8995827f6 | ||
|
|
d7593d12f2 | ||
|
|
cd471a8323 | ||
|
|
9ad962c7d0 | ||
|
|
b4e16ca445 | ||
|
|
c2269fe23d | ||
|
|
d720435945 | ||
|
|
e33b0d39f9 | ||
|
|
bbf3fb96be | ||
|
|
09567234cd | ||
|
|
1c2179fc50 | ||
|
|
c7fff5d9e4 | ||
|
|
25badd6c2e | ||
|
|
97fb7b9427 | ||
|
|
f9b1aba27d | ||
|
|
aa0b9acabc | ||
|
|
67b3ae5170 | ||
|
|
7d796ebdb3 | ||
|
|
5737e68b8f | ||
|
|
211019b78b | ||
|
|
1db6676c45 | ||
|
|
da7ae028bf | ||
|
|
f3aa739e72 | ||
|
|
7f09d1a1d1 | ||
|
|
5a8483e78d | ||
|
|
841d15056d | ||
|
|
09cdd0fff0 | ||
|
|
ebb677d203 | ||
|
|
cf3236f57f | ||
|
|
3bd8169600 | ||
|
|
f9aaec6020 | ||
|
|
bb2cc995d6 | ||
|
|
a65aaa5f95 | ||
|
|
92b9a978dc | ||
|
|
5f2729e065 | ||
|
|
37bb4cea43 | ||
|
|
79d608feea | ||
|
|
54573de3e3 | ||
|
|
a2243b8825 | ||
|
|
2fba2d3b4a | ||
|
|
fd0cbbdcb1 | ||
|
|
b6e3deedf4 | ||
|
|
4c74bf1b4a | ||
|
|
2a4e3c8c58 | ||
|
|
e34e5b4260 | ||
|
|
ae2df7dfe4 | ||
|
|
1b3d0a5cf2 | ||
|
|
4c94329071 | ||
|
|
6ffcf2f865 | ||
|
|
e2c9a66880 | ||
|
|
e9bc25ebc7 | ||
|
|
6379e63669 | ||
|
|
70a0763e7f | ||
|
|
863080e6ce | ||
|
|
3ef454b8bd | ||
|
|
2bbdfc71d0 | ||
|
|
d1c783dde1 | ||
|
|
644da67601 | ||
|
|
b8d99efd29 | ||
|
|
4067d92a44 | ||
|
|
13a17f3a6b | ||
|
|
57c679e500 | ||
|
|
f70f45dab6 | ||
|
|
8d8f723d66 | ||
|
|
7c3c6a7ad7 | ||
|
|
d7a1c9377a | ||
|
|
2a317372b2 | ||
|
|
402f7bd267 | ||
|
|
e8eb6e3068 | ||
|
|
3dd9c45777 | ||
|
|
7255229361 | ||
|
|
4d2fa29886 | ||
|
|
ef90faf98b | ||
|
|
82caa8365e | ||
|
|
391257e9e9 | ||
|
|
b082253167 | ||
|
|
8df701e3fe | ||
|
|
9f232f5dbf | ||
|
|
7017726dcb | ||
|
|
b48ca8fd1e | ||
|
|
88d5e15a5e | ||
|
|
e9537cbd1d | ||
|
|
8e216bf3ac | ||
|
|
63352cc615 | ||
|
|
e9e2563739 | ||
|
|
87bb098445 | ||
|
|
da1944bedf | ||
|
|
d4a92e0d46 | ||
|
|
0fa6c1f235 | ||
|
|
260b1ba5ca | ||
|
|
f0799a6a0e | ||
|
|
155238946a | ||
|
|
45f91cf40c | ||
|
|
94f5b56b6a | ||
|
|
46db467073 | ||
|
|
17dde536d9 | ||
|
|
be67bf4b84 | ||
|
|
8f142548fe | ||
|
|
a68f439f39 | ||
|
|
7a0892bb36 | ||
|
|
8457390156 | ||
|
|
72be3898c1 | ||
|
|
d35bf5af63 | ||
|
|
04d3af6484 | ||
|
|
26920e4a98 | ||
|
|
7419966b51 | ||
|
|
58b832c6c3 | ||
|
|
99f2ec1879 | ||
|
|
4249f9ef86 | ||
|
|
60107ae299 | ||
|
|
6a95a865fa | ||
|
|
9e32589af5 | ||
|
|
6133e225e1 | ||
|
|
348c143d92 | ||
|
|
ce00785ffe | ||
|
|
78cdce750d | ||
|
|
f3f95ae282 | ||
|
|
018885eb30 | ||
|
|
c6c8a76dd6 | ||
|
|
3cae8b7230 | ||
|
|
814c8de0c2 | ||
|
|
32fe175b48 | ||
|
|
b901f6de8d | ||
|
|
fe9ba3246c | ||
|
|
71a39f0fc1 | ||
|
|
f7556898e1 | ||
|
|
578539f5d0 | ||
|
|
7c28c7fbea | ||
|
|
88bcadff81 | ||
|
|
25e25dfbf0 | ||
|
|
ba3dc0178d | ||
|
|
91e7f424bb | ||
|
|
b89f791eb0 | ||
|
|
ad3a0425ab | ||
|
|
7cf52ecf3e | ||
|
|
b1ef18f4fd | ||
|
|
b74af5bbe9 | ||
|
|
b8aa4bbfc4 | ||
|
|
e024ac9272 | ||
|
|
c5fa027a8e | ||
|
|
b6ec2b25be | ||
|
|
a756b59c60 | ||
|
|
8687ce55ed | ||
|
|
1ac6985dd0 | ||
|
|
986b4a878f | ||
|
|
1ef38fe7f3 | ||
|
|
bcad0af35e | ||
|
|
b5b89fde4f | ||
|
|
be1fc710ed | ||
|
|
aa55fd3070 | ||
|
|
a132462466 | ||
|
|
df393ff607 | ||
|
|
88a6f436ef | ||
|
|
ee8f44d816 | ||
|
|
0308ec355f | ||
|
|
3ac14f8a2a | ||
|
|
2b087b76dc | ||
|
|
1715e5ddfa | ||
|
|
6cc17161a5 | ||
|
|
5d1c20617b | ||
|
|
d9efa48c9c | ||
|
|
dedd4cb7f0 | ||
|
|
42b147b656 | ||
|
|
47ce490d6c | ||
|
|
5563a1cadd | ||
|
|
7beb2e5ef6 | ||
|
|
f00da13cba | ||
|
|
bfed1747f7 | ||
|
|
abb4b104fa | ||
|
|
b69b1caa72 |
@@ -9,7 +9,7 @@ insert_final_newline = true
|
||||
max_line_length = 120
|
||||
trim_trailing_whitespace = true
|
||||
|
||||
[{*.har,*.json}]
|
||||
[{*.har,*.json,*yml}]
|
||||
indent_size = 2
|
||||
|
||||
[*.kt]
|
||||
|
||||
4
.github/workflows/android.yml
vendored
4
.github/workflows/android.yml
vendored
@@ -16,6 +16,8 @@ jobs:
|
||||
uses: actions/setup-java@v1
|
||||
with:
|
||||
java-version: 1.8
|
||||
- name: Setup CMake and Ninja
|
||||
uses: lukka/get-cmake@v3.20.1
|
||||
- uses: actions/cache@v2
|
||||
with:
|
||||
path: |
|
||||
@@ -25,7 +27,7 @@ jobs:
|
||||
restore-keys: |
|
||||
${{ runner.os }}-gradle-
|
||||
- name: Build with Gradle
|
||||
run: ./gradlew clean assemble
|
||||
run: ./gradlew clean assembleDebug
|
||||
- uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: app-debug.apk
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -41,5 +41,8 @@ captures/
|
||||
*.jks
|
||||
crowdin.properties
|
||||
|
||||
# C++
|
||||
.cxx/
|
||||
|
||||
# AndroidX Room schema JSONs
|
||||
/app/schemas/
|
||||
|
||||
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
[submodule "app/src/main/cpp/icu4c/android"]
|
||||
path = app/src/main/cpp/icu4c/android
|
||||
url = https://github.com/patrickgold/icu4c-android
|
||||
@@ -41,7 +41,7 @@ syntax (it is very easy though by just looking at some other layout files).
|
||||
There are two main steps in adding new layouts, though the config step can
|
||||
be skipped if you only add a layout without a new default language support.
|
||||
|
||||
### The config file (`app/src/main/assets/ime/config.json`)
|
||||
### The config file ([`app/src/main/assets/ime/config.json`](app/src/main/assets/ime/config.json))
|
||||
|
||||
This file is very important, as it defines all default currency sets as
|
||||
well as all default subtypes available in the Settings Subtype UI. Note
|
||||
@@ -66,7 +66,7 @@ pre-configured language.
|
||||
|
||||
Since v0.3.10-beta05 it is possible to add custom layouts for all types.
|
||||
|
||||
To add a new layout, head to `app/src/main/assets/ime/text` and then select
|
||||
To add a new layout, head to [`app/src/main/assets/ime/text`](app/src/main/assets/ime/text) and then select
|
||||
the correct sub-directory for the type of layout you want to add. In most cases
|
||||
this will be `characters` to add a layout like QWERTY etc.
|
||||
|
||||
@@ -74,14 +74,14 @@ For the `code` field of each key, make sure to use the UTF-8 code. An
|
||||
useful tool for finding the correct code is [unicode-table.com](https://unicode-table.com/en/).
|
||||
From there, you search for your letter and then use the HTML code, but without the `&#;`
|
||||
For internal codes of functional or UI keys, see
|
||||
`app/src/main/java/dev/patrickgold/florisboard/ime/text/key/KeyCode.kt`.
|
||||
[`app/src/main/java/dev/patrickgold/florisboard/ime/text/key/KeyCode.kt`](app/src/main/java/dev/patrickgold/florisboard/ime/text/key/KeyCode.kt).
|
||||
|
||||
The label is equally important and should always match up with the defined
|
||||
code. If `code` and `label` don't match up, FlorisBoard won't crash but
|
||||
it will most likely lead to confusion in the key processing logic.
|
||||
|
||||
Any accents or diacritics that should be exposed via long press can be
|
||||
added at `assets/ime/text/characters/extended_popups/<languageTag_name_here>.json`.
|
||||
added at [`app/src/main/assets/ime/text/characters/extended_popups/<languageTag_name_here>.json`](app/src/main/assets/ime/text/characters/extended_popups).
|
||||
For each key, you can add 1 main and several relevant accents. The main
|
||||
accent should be used for accents which are important for the language
|
||||
you add. The main field is used for determining if a hint or an accent
|
||||
|
||||
28
README.md
28
README.md
@@ -45,7 +45,11 @@ _A. IzzySoft's repo for F-Droid_:
|
||||
|
||||
[<img src="https://gitlab.com/IzzyOnDroid/repo/-/raw/master/assets/IzzyOnDroid.png" height="64" alt="IzzySoft repo badge">](https://apt.izzysoft.de/fdroid/index/apk/dev.patrickgold.florisboard.beta)
|
||||
|
||||
_B. Use the APK provided in the release section of this repo_
|
||||
_B. Google Play_:
|
||||
|
||||
Follow the same steps as for the stable track, the app can then be accessed [here](https://play.google.com/store/apps/details?id=dev.patrickgold.florisboard.beta).
|
||||
|
||||
_C. Use the APK provided in the release section of this repo_
|
||||
|
||||
### Giving feedback
|
||||
If you want to give feedback to FlorisBoard, there are several ways to
|
||||
@@ -96,6 +100,7 @@ milestones, please refer to the [Feature roadmap](#feature-roadmap).
|
||||
* [x] User dictionary manager (system and internal)
|
||||
|
||||
### Other useful features
|
||||
* [x] Support for Android 11+ inline autofill API
|
||||
* [x] One-handed mode
|
||||
* [x] Clipboard/cursor tools
|
||||
* [x] Clipboard manager/history
|
||||
@@ -198,21 +203,12 @@ to get more information on this topic.
|
||||
[JakeWharton](https://github.com/JakeWharton)
|
||||
* [expandable-fab](https://github.com/nambicompany/expandable-fab) by
|
||||
[Nambi](https://github.com/nambicompany)
|
||||
|
||||
## Usage notes for included binary dictionary files
|
||||
All binary dictionaries included within this project in
|
||||
(this)[app/src/main/assets/ime/dict] asset folder are built from various
|
||||
sources, as stated below.
|
||||
|
||||
### Source 1: [wordfreq library by LuminosoInsight](https://github.com/LuminosoInsight/wordfreq):
|
||||
`wordfreq` is a repository which provides both a Python library and raw
|
||||
data (the wordlists). Only the data has been extracted in order to build
|
||||
binary dictionary files from it. `wordfreq`'s data is licensed under the
|
||||
Creative Commons Attribution-ShareAlike 4.0 license
|
||||
(https://creativecommons.org/licenses/by-sa/4.0/).
|
||||
|
||||
For further information on what wordfreq's data depends on, see
|
||||
(https://github.com/LuminosoInsight/wordfreq#license).
|
||||
* [ICU4C](https://github.com/unicode-org/icu) by
|
||||
[The Unicode Consortium](https://github.com/unicode-org)
|
||||
* [Nuspell](https://github.com/nuspell/nuspell) by
|
||||
[Nuspell](https://github.com/nuspell)
|
||||
* [TokyoCabinet (only used glob.h and glob.c)](https://github.com/white-gecko/TokyoCabinet) by
|
||||
[Natanael Arndt](https://github.com/white-gecko)
|
||||
|
||||
## License
|
||||
```
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
|
||||
plugins {
|
||||
id("com.android.application") version "4.2.0"
|
||||
kotlin("android") version "1.5.0"
|
||||
kotlin("kapt") version "1.5.0"
|
||||
kotlin("plugin.serialization") version "1.5.0"
|
||||
id("com.android.application") version "4.2.1"
|
||||
kotlin("android") version "1.5.20"
|
||||
kotlin("kapt") version "1.5.20"
|
||||
kotlin("plugin.serialization") version "1.5.20"
|
||||
}
|
||||
|
||||
android {
|
||||
compileSdkVersion(30)
|
||||
buildToolsVersion("30.0.3")
|
||||
ndkVersion = "22.1.7171670"
|
||||
|
||||
compileOptions {
|
||||
sourceCompatibility = JavaVersion.VERSION_1_8
|
||||
@@ -17,15 +18,15 @@ android {
|
||||
|
||||
kotlinOptions {
|
||||
jvmTarget = JavaVersion.VERSION_1_8.toString()
|
||||
freeCompilerArgs = listOf("-Xallow-result-return-type", "-Xopt-in=kotlin.RequiresOptIn")
|
||||
freeCompilerArgs = listOf("-Xallow-result-return-type", "-Xopt-in=kotlin.RequiresOptIn", "-Xopt-in=kotlin.contracts.ExperimentalContracts")
|
||||
}
|
||||
|
||||
defaultConfig {
|
||||
applicationId = "dev.patrickgold.florisboard"
|
||||
minSdkVersion(23)
|
||||
targetSdkVersion(30)
|
||||
versionCode(43)
|
||||
versionName("0.3.12")
|
||||
versionCode(49)
|
||||
versionName("0.3.13")
|
||||
|
||||
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
|
||||
|
||||
@@ -38,17 +39,47 @@ android {
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
externalNativeBuild {
|
||||
cmake {
|
||||
cFlags("-fvisibility=hidden", "-DU_STATIC_IMPLEMENTATION=1")
|
||||
cppFlags("-fvisibility=hidden", "-std=c++17", "-fexceptions", "-ffunction-sections", "-fdata-sections", "-DU_DISABLE_RENAMING=1", "-DU_STATIC_IMPLEMENTATION=1")
|
||||
arguments("-DANDROID_STL=c++_static")
|
||||
}
|
||||
}
|
||||
|
||||
ndk {
|
||||
//abiFilters += listOf("x86", "x86_64", "armeabi-v7a", "arm64-v8a")
|
||||
abiFilters += listOf("armeabi-v7a", "arm64-v8a")
|
||||
}
|
||||
|
||||
sourceSets {
|
||||
maybeCreate("main").apply {
|
||||
jni {
|
||||
srcDirs("src/main/jniLibs")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
buildFeatures {
|
||||
viewBinding = true
|
||||
}
|
||||
|
||||
externalNativeBuild {
|
||||
cmake {
|
||||
path("src/main/cpp/CMakeLists.txt")
|
||||
}
|
||||
}
|
||||
|
||||
buildTypes {
|
||||
named("debug").configure {
|
||||
applicationIdSuffix = ".debug"
|
||||
versionNameSuffix = "-debug"
|
||||
|
||||
isDebuggable = true
|
||||
isJniDebuggable = true
|
||||
|
||||
resValue("mipmap", "floris_app_icon", "@mipmap/ic_app_icon_debug")
|
||||
resValue("mipmap", "floris_app_icon_round", "@mipmap/ic_app_icon_debug_round")
|
||||
resValue("string", "floris_app_name", "FlorisBoard Debug")
|
||||
@@ -89,6 +120,7 @@ android {
|
||||
dependencies {
|
||||
implementation("androidx.activity", "activity-ktx", "1.2.1")
|
||||
implementation("androidx.appcompat", "appcompat", "1.2.0")
|
||||
implementation("androidx.autofill", "autofill", "1.1.0")
|
||||
implementation("androidx.core", "core-ktx", "1.3.2")
|
||||
implementation("androidx.fragment", "fragment-ktx", "1.3.0")
|
||||
implementation("androidx.preference", "preference-ktx", "1.1.1")
|
||||
@@ -104,9 +136,11 @@ dependencies {
|
||||
implementation("androidx.room", "room-runtime", "2.2.6")
|
||||
kapt("androidx.room", "room-compiler","2.2.6")
|
||||
|
||||
testImplementation("junit", "junit", "4.13.1")
|
||||
testImplementation(kotlin("test"))
|
||||
testImplementation("androidx.test", "core", "1.3.0")
|
||||
testImplementation("org.mockito", "mockito-inline", "3.7.7")
|
||||
testImplementation("org.robolectric", "robolectric", "4.5.1")
|
||||
|
||||
androidTestImplementation("androidx.test.ext", "junit", "1.1.2")
|
||||
androidTestImplementation("androidx.test.espresso", "espresso-core", "3.3.0")
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
<uses-permission android:name="android.permission.VIBRATE"/>
|
||||
|
||||
<application
|
||||
android:name=".ime.core.FlorisApplication"
|
||||
android:name="dev.patrickgold.florisboard.FlorisApplication"
|
||||
android:allowBackup="false"
|
||||
android:icon="@mipmap/floris_app_icon"
|
||||
android:label="@string/floris_app_name"
|
||||
@@ -31,16 +31,25 @@
|
||||
|
||||
<!-- IME service -->
|
||||
<service
|
||||
android:name="dev.patrickgold.florisboard.ime.core.FlorisBoard"
|
||||
android:name="dev.patrickgold.florisboard.FlorisImeService"
|
||||
android:label="@string/floris_app_name"
|
||||
android:permission="android.permission.BIND_INPUT_METHOD">
|
||||
<meta-data
|
||||
android:name="android.view.im"
|
||||
android:resource="@xml/method"/>
|
||||
|
||||
android:permission="android.permission.BIND_INPUT_METHOD"
|
||||
android:directBootAware="true">
|
||||
<intent-filter>
|
||||
<action android:name="android.view.InputMethod"/>
|
||||
</intent-filter>
|
||||
<meta-data android:name="android.view.im" android:resource="@xml/method"/>
|
||||
</service>
|
||||
|
||||
<!-- Spellchecker service -->
|
||||
<service
|
||||
android:name="dev.patrickgold.florisboard.FlorisSpellCheckerService"
|
||||
android:label="@string/floris_app_name"
|
||||
android:permission="android.permission.BIND_TEXT_SERVICE">
|
||||
<intent-filter>
|
||||
<action android:name="android.service.textservice.SpellCheckerService"/>
|
||||
</intent-filter>
|
||||
<meta-data android:name="android.view.textservice.scs" android:resource="@xml/spellchecker"/>
|
||||
</service>
|
||||
|
||||
<!-- Settings Activity -->
|
||||
@@ -87,6 +96,14 @@
|
||||
android:label="@string/settings__theme_editor__title"
|
||||
android:theme="@style/SettingsTheme"/>
|
||||
|
||||
<!-- Spelling Activity -->
|
||||
<activity
|
||||
android:name="dev.patrickgold.florisboard.settings.spelling.SpellingActivity"
|
||||
android:icon="@mipmap/floris_app_icon"
|
||||
android:label="@string/settings__spelling__title_overview"
|
||||
android:roundIcon="@mipmap/floris_app_icon_round"
|
||||
android:theme="@style/SettingsTheme"/>
|
||||
|
||||
<!-- About Activity -->
|
||||
<activity
|
||||
android:name="dev.patrickgold.florisboard.settings.AboutActivity"
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
{
|
||||
"package": "dev.patrickgold.florisboard",
|
||||
"composers": [
|
||||
{ "$": "appender" },
|
||||
{ "$": "hangul-unicode" }
|
||||
],
|
||||
"currencySets": [
|
||||
{
|
||||
"name": "azerbaijani_manat",
|
||||
@@ -246,6 +250,7 @@
|
||||
{
|
||||
"id": 101,
|
||||
"languageTag": "en-US",
|
||||
"composer": "appender",
|
||||
"currencySet": "dollar",
|
||||
"preferred": {
|
||||
"characters": "qwerty"
|
||||
@@ -254,6 +259,7 @@
|
||||
{
|
||||
"id": 102,
|
||||
"languageTag": "en-UK",
|
||||
"composer": "appender",
|
||||
"currencySet": "pound",
|
||||
"preferred": {
|
||||
"characters": "qwerty"
|
||||
@@ -262,6 +268,7 @@
|
||||
{
|
||||
"id": 103,
|
||||
"languageTag": "en-CA",
|
||||
"composer": "appender",
|
||||
"currencySet": "dollar",
|
||||
"preferred": {
|
||||
"characters": "qwerty"
|
||||
@@ -270,6 +277,7 @@
|
||||
{
|
||||
"id": 104,
|
||||
"languageTag": "en-AU",
|
||||
"composer": "appender",
|
||||
"currencySet": "dollar",
|
||||
"preferred": {
|
||||
"characters": "qwerty"
|
||||
@@ -278,6 +286,7 @@
|
||||
{
|
||||
"id": 201,
|
||||
"languageTag": "de-DE",
|
||||
"composer": "appender",
|
||||
"currencySet": "euro",
|
||||
"preferred": {
|
||||
"characters": "qwertz"
|
||||
@@ -286,6 +295,7 @@
|
||||
{
|
||||
"id": 202,
|
||||
"languageTag": "de-AT",
|
||||
"composer": "appender",
|
||||
"currencySet": "euro",
|
||||
"preferred": {
|
||||
"characters": "qwertz"
|
||||
@@ -294,14 +304,27 @@
|
||||
{
|
||||
"id": 203,
|
||||
"languageTag": "de-CH",
|
||||
"composer": "appender",
|
||||
"currencySet": "euro",
|
||||
"preferred": {
|
||||
"characters": "swiss_german"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 204,
|
||||
"languageTag": "de-DE-neobone",
|
||||
"composer": "appender",
|
||||
"currencySet": "euro",
|
||||
"preferred": {
|
||||
"characters": "neo2",
|
||||
"symbols": "neo2",
|
||||
"numericRow": "neo2"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 301,
|
||||
"languageTag": "fr-FR",
|
||||
"composer": "appender",
|
||||
"currencySet": "euro",
|
||||
"preferred": {
|
||||
"characters": "azerty"
|
||||
@@ -310,6 +333,7 @@
|
||||
{
|
||||
"id": 302,
|
||||
"languageTag": "fr-CA",
|
||||
"composer": "appender",
|
||||
"currencySet": "dollar",
|
||||
"preferred": {
|
||||
"characters": "canadian_french"
|
||||
@@ -318,6 +342,7 @@
|
||||
{
|
||||
"id": 303,
|
||||
"languageTag": "fr-CH",
|
||||
"composer": "appender",
|
||||
"currencySet": "euro",
|
||||
"preferred": {
|
||||
"characters": "swiss_french"
|
||||
@@ -326,6 +351,7 @@
|
||||
{
|
||||
"id": 401,
|
||||
"languageTag": "it-IT",
|
||||
"composer": "appender",
|
||||
"currencySet": "euro",
|
||||
"preferred": {
|
||||
"characters": "qwerty"
|
||||
@@ -334,6 +360,7 @@
|
||||
{
|
||||
"id": 402,
|
||||
"languageTag": "it-CH",
|
||||
"composer": "appender",
|
||||
"currencySet": "euro",
|
||||
"preferred": {
|
||||
"characters": "swiss_italian"
|
||||
@@ -342,6 +369,7 @@
|
||||
{
|
||||
"id": 501,
|
||||
"languageTag": "es-ES",
|
||||
"composer": "appender",
|
||||
"currencySet": "euro",
|
||||
"preferred": {
|
||||
"characters": "spanish"
|
||||
@@ -350,6 +378,7 @@
|
||||
{
|
||||
"id": 502,
|
||||
"languageTag": "es-US",
|
||||
"composer": "appender",
|
||||
"currencySet": "dollar",
|
||||
"preferred": {
|
||||
"characters": "spanish"
|
||||
@@ -358,6 +387,7 @@
|
||||
{
|
||||
"id": 503,
|
||||
"languageTag": "es-419",
|
||||
"composer": "appender",
|
||||
"currencySet": "dollar",
|
||||
"preferred": {
|
||||
"characters": "spanish"
|
||||
@@ -366,6 +396,7 @@
|
||||
{
|
||||
"id": 601,
|
||||
"languageTag": "pt-PT",
|
||||
"composer": "appender",
|
||||
"currencySet": "euro",
|
||||
"preferred": {
|
||||
"characters": "qwerty"
|
||||
@@ -374,6 +405,7 @@
|
||||
{
|
||||
"id": 602,
|
||||
"languageTag": "pt-BR",
|
||||
"composer": "appender",
|
||||
"currencySet": "dollar",
|
||||
"preferred": {
|
||||
"characters": "qwerty"
|
||||
@@ -382,6 +414,7 @@
|
||||
{
|
||||
"id": 701,
|
||||
"languageTag": "nb-NO",
|
||||
"composer": "appender",
|
||||
"currencySet": "dollar",
|
||||
"preferred": {
|
||||
"characters": "norwegian"
|
||||
@@ -390,6 +423,7 @@
|
||||
{
|
||||
"id": 702,
|
||||
"languageTag": "nn-NO",
|
||||
"composer": "appender",
|
||||
"currencySet": "dollar",
|
||||
"preferred": {
|
||||
"characters": "norwegian"
|
||||
@@ -398,6 +432,7 @@
|
||||
{
|
||||
"id": 711,
|
||||
"languageTag": "sv-SE",
|
||||
"composer": "appender",
|
||||
"currencySet": "dollar",
|
||||
"preferred": {
|
||||
"characters": "swedish_finnish"
|
||||
@@ -406,6 +441,7 @@
|
||||
{
|
||||
"id": 721,
|
||||
"languageTag": "fi-FI",
|
||||
"composer": "appender",
|
||||
"currencySet": "euro",
|
||||
"preferred": {
|
||||
"characters": "swedish_finnish"
|
||||
@@ -414,6 +450,7 @@
|
||||
{
|
||||
"id": 731,
|
||||
"languageTag": "da-DK",
|
||||
"composer": "appender",
|
||||
"currencySet": "dollar",
|
||||
"preferred": {
|
||||
"characters": "danish"
|
||||
@@ -422,6 +459,7 @@
|
||||
{
|
||||
"id": 741,
|
||||
"languageTag": "is-IS",
|
||||
"composer": "appender",
|
||||
"currencySet": "dollar",
|
||||
"preferred": {
|
||||
"characters": "icelandic"
|
||||
@@ -430,6 +468,7 @@
|
||||
{
|
||||
"id": 751,
|
||||
"languageTag": "fo",
|
||||
"composer": "appender",
|
||||
"currencySet": "dollar",
|
||||
"preferred": {
|
||||
"characters": "faroese"
|
||||
@@ -438,6 +477,7 @@
|
||||
{
|
||||
"id": 801,
|
||||
"languageTag": "fa-FA",
|
||||
"composer": "appender",
|
||||
"currencySet": "iranian_rial",
|
||||
"preferred": {
|
||||
"characters": "persian",
|
||||
@@ -449,6 +489,7 @@
|
||||
{
|
||||
"id": 901,
|
||||
"languageTag": "ar",
|
||||
"composer": "appender",
|
||||
"currencySet": "dollar",
|
||||
"preferred": {
|
||||
"characters": "arabic",
|
||||
@@ -460,6 +501,7 @@
|
||||
{
|
||||
"id": 1001,
|
||||
"languageTag": "hu",
|
||||
"composer": "appender",
|
||||
"currencySet": "euro",
|
||||
"preferred": {
|
||||
"characters": "hungarian"
|
||||
@@ -468,6 +510,7 @@
|
||||
{
|
||||
"id": 1101,
|
||||
"languageTag": "eo",
|
||||
"composer": "appender",
|
||||
"currencySet": "dollar",
|
||||
"preferred": {
|
||||
"characters": "esperanto"
|
||||
@@ -476,6 +519,7 @@
|
||||
{
|
||||
"id": 1201,
|
||||
"languageTag": "hr",
|
||||
"composer": "appender",
|
||||
"currencySet": "euro",
|
||||
"preferred": {
|
||||
"characters": "qwertz"
|
||||
@@ -484,6 +528,7 @@
|
||||
{
|
||||
"id": 1301,
|
||||
"languageTag": "ru",
|
||||
"composer": "appender",
|
||||
"currencySet": "russian_ruble",
|
||||
"preferred": {
|
||||
"characters": "jcuken_russian"
|
||||
@@ -492,6 +537,7 @@
|
||||
{
|
||||
"id": 1351,
|
||||
"languageTag": "uk",
|
||||
"composer": "appender",
|
||||
"currencySet": "ukrainian_hryvnia",
|
||||
"preferred": {
|
||||
"characters": "jcuken_ukrainian"
|
||||
@@ -500,6 +546,7 @@
|
||||
{
|
||||
"id": 1401,
|
||||
"languageTag": "el",
|
||||
"composer": "appender",
|
||||
"currencySet": "euro",
|
||||
"preferred": {
|
||||
"characters": "greek"
|
||||
@@ -508,6 +555,7 @@
|
||||
{
|
||||
"id": 1501,
|
||||
"languageTag": "ro",
|
||||
"composer": "appender",
|
||||
"currencySet": "euro",
|
||||
"preferred": {
|
||||
"characters": "qwerty"
|
||||
@@ -516,6 +564,7 @@
|
||||
{
|
||||
"id": 1601,
|
||||
"languageTag": "pl",
|
||||
"composer": "appender",
|
||||
"currencySet": "euro",
|
||||
"preferred": {
|
||||
"characters": "qwerty"
|
||||
@@ -524,6 +573,7 @@
|
||||
{
|
||||
"id": 1701,
|
||||
"languageTag": "bg-bg",
|
||||
"composer": "appender",
|
||||
"currencySet": "dollar",
|
||||
"preferred": {
|
||||
"characters": "bulgarian_phonetic"
|
||||
@@ -532,6 +582,7 @@
|
||||
{
|
||||
"id": 1801,
|
||||
"languageTag": "tr",
|
||||
"composer": "appender",
|
||||
"currencySet": "turkish_lira",
|
||||
"preferred": {
|
||||
"characters": "qwerty"
|
||||
@@ -540,6 +591,7 @@
|
||||
{
|
||||
"id": 1901,
|
||||
"languageTag": "iw-IL",
|
||||
"composer": "appender",
|
||||
"currencySet": "israeli_new_shekel",
|
||||
"preferred": {
|
||||
"characters": "hebrew"
|
||||
@@ -548,6 +600,7 @@
|
||||
{
|
||||
"id": 2001,
|
||||
"languageTag": "ckb",
|
||||
"composer": "appender",
|
||||
"currencySet": "dollar",
|
||||
"preferred": {
|
||||
"characters": "kurdish",
|
||||
@@ -559,6 +612,7 @@
|
||||
{
|
||||
"id": 2101,
|
||||
"languageTag": "sr-RS",
|
||||
"composer": "appender",
|
||||
"currencySet": "dollar",
|
||||
"preferred": {
|
||||
"characters": "serbian_cyrillic"
|
||||
@@ -567,6 +621,7 @@
|
||||
{
|
||||
"id": 2201,
|
||||
"languageTag": "lv-LV",
|
||||
"composer": "appender",
|
||||
"currencySet": "euro",
|
||||
"preferred": {
|
||||
"characters": "qwerty"
|
||||
@@ -575,6 +630,7 @@
|
||||
{
|
||||
"id": 2301,
|
||||
"languageTag": "ku",
|
||||
"composer": "appender",
|
||||
"currencySet": "dollar",
|
||||
"preferred": {
|
||||
"characters": "kurdish_kurmanci"
|
||||
@@ -583,6 +639,7 @@
|
||||
{
|
||||
"id": 2501,
|
||||
"languageTag": "ca",
|
||||
"composer": "appender",
|
||||
"currencySet": "euro",
|
||||
"preferred": {
|
||||
"characters": "catalan"
|
||||
@@ -591,6 +648,7 @@
|
||||
{
|
||||
"id": 2601,
|
||||
"languageTag": "IPA-IPA",
|
||||
"composer": "appender",
|
||||
"currencySet": "dollar",
|
||||
"preferred": {
|
||||
"characters": "ipa",
|
||||
@@ -601,6 +659,7 @@
|
||||
{
|
||||
"id": 2701,
|
||||
"languageTag": "sk",
|
||||
"composer": "appender",
|
||||
"currencySet": "euro",
|
||||
"preferred": {
|
||||
"characters": "qwertz"
|
||||
@@ -609,10 +668,20 @@
|
||||
{
|
||||
"id": 2801,
|
||||
"languageTag": "cs",
|
||||
"composer": "appender",
|
||||
"currencySet": "euro",
|
||||
"preferred": {
|
||||
"characters": "qwertz"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2900,
|
||||
"languageTag": "ko",
|
||||
"composer": "hangul-unicode",
|
||||
"currencySet": "south_korean_won",
|
||||
"preferred": {
|
||||
"characters": "korean"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
Binary file not shown.
69
app/src/main/assets/ime/spelling/config.json
Normal file
69
app/src/main/assets/ime/spelling/config.json
Normal file
@@ -0,0 +1,69 @@
|
||||
{
|
||||
"basePath": "ime/spelling",
|
||||
"importSources": [
|
||||
{
|
||||
"id": "mozilla_firefox",
|
||||
"label": "Mozilla Firefox Add-ons",
|
||||
"url": "https://addons.mozilla.org/firefox/language-tools/",
|
||||
"format": {
|
||||
"$": "archive",
|
||||
"file": {
|
||||
"name": "^.+\\.xpi$",
|
||||
"isRequired": true
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "libre_office",
|
||||
"label": "LibreOffice [CURRENTLY UNSUPPORTED]",
|
||||
"url": "https://extensions.libreoffice.org/?Tags%5B%5D=50",
|
||||
"format": {
|
||||
"$": "archive",
|
||||
"file": {
|
||||
"name": "^.+\\.oxt$",
|
||||
"isRequired": true
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "open_office",
|
||||
"label": "Apache OpenOffice [CURRENTLY UNSUPPORTED]",
|
||||
"url": "https://extensions.openoffice.org/en/search?f%5B0%5D=field_project_tags%3A157",
|
||||
"format": {
|
||||
"$": "archive",
|
||||
"file": {
|
||||
"name": "^.+\\.oxt$",
|
||||
"isRequired": true
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "free_office",
|
||||
"label": "SoftMaker FreeOffice",
|
||||
"url": "https://www.freeoffice.com/en/download/dictionaries",
|
||||
"format": {
|
||||
"$": "archive",
|
||||
"file": {
|
||||
"name": "^.+\\.sox$",
|
||||
"isRequired": true
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "gh_wooorm",
|
||||
"label": "GitHub collection by Titus Wormer",
|
||||
"url": "https://github.com/wooorm/dictionaries",
|
||||
"format": {
|
||||
"$": "raw",
|
||||
"affFile": {
|
||||
"name": "^.+\\.aff$",
|
||||
"isRequired": true
|
||||
},
|
||||
"dicFile": {
|
||||
"name": "^.+\\.dic$",
|
||||
"isRequired": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
61
app/src/main/assets/ime/text/characters/bone.json
Normal file
61
app/src/main/assets/ime/text/characters/bone.json
Normal file
@@ -0,0 +1,61 @@
|
||||
{
|
||||
"type": "characters",
|
||||
"name": "bone",
|
||||
"label": "Bone",
|
||||
"authors": [ "ostrya" ],
|
||||
"direction": "ltr",
|
||||
"modifier": "neo2",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "$": "auto_text_key", "code": 106, "label": "j" },
|
||||
{ "$": "auto_text_key", "code": 100, "label": "d" },
|
||||
{ "$": "auto_text_key", "code": 117, "label": "u" },
|
||||
{ "$": "auto_text_key", "code": 97, "label": "a" },
|
||||
{ "$": "auto_text_key", "code": 120, "label": "x" },
|
||||
{ "$": "auto_text_key", "code": 112, "label": "p" },
|
||||
{ "$": "auto_text_key", "code": 104, "label": "h" },
|
||||
{ "$": "auto_text_key", "code": 108, "label": "l" },
|
||||
{ "$": "auto_text_key", "code": 109, "label": "m" },
|
||||
{ "$": "auto_text_key", "code": 119, "label": "w" },
|
||||
{ "$": "case_selector",
|
||||
"lower": {
|
||||
"code": 223, "label": "ß", "popup": {
|
||||
"relevant": [
|
||||
{ "code": 180, "label": "´" }
|
||||
]
|
||||
}
|
||||
},
|
||||
"upper": {
|
||||
"code": 7838, "label": "ẞ", "popup": {
|
||||
"relevant": [
|
||||
{ "code": 180, "label": "´" }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
[
|
||||
{ "$": "auto_text_key", "code": 99, "label": "c" },
|
||||
{ "$": "auto_text_key", "code": 116, "label": "t" },
|
||||
{ "$": "auto_text_key", "code": 105, "label": "i" },
|
||||
{ "$": "auto_text_key", "code": 101, "label": "e" },
|
||||
{ "$": "auto_text_key", "code": 111, "label": "o" },
|
||||
{ "$": "auto_text_key", "code": 98, "label": "b" },
|
||||
{ "$": "auto_text_key", "code": 110, "label": "n" },
|
||||
{ "$": "auto_text_key", "code": 114, "label": "r" },
|
||||
{ "$": "auto_text_key", "code": 115, "label": "s" },
|
||||
{ "$": "auto_text_key", "code": 103, "label": "g" },
|
||||
{ "$": "auto_text_key", "code": 113, "label": "q" }
|
||||
],
|
||||
[
|
||||
{ "$": "auto_text_key", "code": 102, "label": "f" },
|
||||
{ "$": "auto_text_key", "code": 118, "label": "v" },
|
||||
{ "$": "auto_text_key", "code": 252, "label": "ü" },
|
||||
{ "$": "auto_text_key", "code": 228, "label": "ä" },
|
||||
{ "$": "auto_text_key", "code": 246, "label": "ö" },
|
||||
{ "$": "auto_text_key", "code": 121, "label": "y" },
|
||||
{ "$": "auto_text_key", "code": 122, "label": "z" },
|
||||
{ "$": "auto_text_key", "code": 107, "label": "k" }
|
||||
]
|
||||
]
|
||||
}
|
||||
@@ -4,28 +4,22 @@
|
||||
"authors": [ "GoRaN" ],
|
||||
"mapping": {
|
||||
"all": {
|
||||
|
||||
"ﻪ": {
|
||||
"relevant": [
|
||||
{ "code": 1577, "label": "ة" },
|
||||
{ "code": 1729, "label": "ـہ" }
|
||||
]
|
||||
},
|
||||
"ر": {
|
||||
"relevant": [
|
||||
{ "code": 1685, "label": "ڕ" },
|
||||
{ "code": 1682, "label": "ڒ" }
|
||||
]
|
||||
},
|
||||
|
||||
|
||||
"ی": {
|
||||
"relevant": [
|
||||
{ "code": 1746, "label": "ے" },
|
||||
{ "code": 1610, "label": "ي" },
|
||||
{ "code": 1744, "label": "ې" },
|
||||
{ "code": 1741, "label": "ۍ" },
|
||||
{ "code": 1742, "label": "ێ" },
|
||||
{ "code": 1744, "label": "ې" },
|
||||
{ "code": 1610, "label": "ي" },
|
||||
{ "code": 1597, "label": "ؽ" }
|
||||
]
|
||||
},
|
||||
@@ -34,10 +28,15 @@
|
||||
"ﺋ": {
|
||||
"relevant": [
|
||||
{ "code": 65163, "label": "ﺋ" },
|
||||
{ "code": 1569, "label": "ء" },
|
||||
{ "code": 65139, "label": "ﹳ" }
|
||||
]
|
||||
},
|
||||
"ح": {
|
||||
"relevant": [
|
||||
{ "code": 65010, "label": "ﷲ" },
|
||||
{ "code": 65019, "label": "ﷻ" }
|
||||
]
|
||||
},
|
||||
|
||||
"ع": {
|
||||
"relevant": [
|
||||
@@ -56,12 +55,9 @@
|
||||
]
|
||||
},
|
||||
|
||||
|
||||
|
||||
"ف": {
|
||||
"relevant": [
|
||||
{ "code": 1701, "label": "ڥ" },
|
||||
{ "code": 1700, "label": "ڤ" },
|
||||
{ "code": 1698, "label": "ڢ" },
|
||||
{ "code": 1697, "label": "ڡ" }
|
||||
]
|
||||
@@ -70,7 +66,6 @@
|
||||
"د": {
|
||||
"relevant": [
|
||||
{ "code": 1676, "label": "ڌ" },
|
||||
{ "code": 1584, "label": "ذ" },
|
||||
{ "code": 64390, "label": "ﮆ" },
|
||||
{ "code": 1774, "label": "ۮ" }
|
||||
]
|
||||
@@ -93,9 +88,7 @@
|
||||
},
|
||||
"ب": {
|
||||
"relevant": [
|
||||
{ "code": 65010, "label": "ﷲ" },
|
||||
{ "code": 65021, "label": "﷽" },
|
||||
{ "code": 65019, "label": "ﷻ" }
|
||||
{ "code": 65021, "label": "﷽" }
|
||||
]
|
||||
},
|
||||
"م": {
|
||||
@@ -108,7 +101,6 @@
|
||||
"relevant": [
|
||||
{ "code": 1718, "label": "ڶ" },
|
||||
{ "code": 1719, "label": "ڷ" },
|
||||
{ "code": 1717, "label": "ڵ" },
|
||||
{ "code": 1720, "label": "ڸ" }
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"type": "characters/extended_popups",
|
||||
"name": "de-DE-neobone",
|
||||
"authors": [ "ostrya" ],
|
||||
"mapping": {
|
||||
"uri": {
|
||||
"~right": {
|
||||
"main": { "code": -255, "label": ".com" },
|
||||
"relevant": [
|
||||
{ "code": -255, "label": ".ch" },
|
||||
{ "code": -255, "label": ".de" },
|
||||
{ "code": -255, "label": ".at" },
|
||||
{ "code": -255, "label": ".net" }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,10 +20,10 @@
|
||||
]
|
||||
},
|
||||
"ι": {
|
||||
"main": { "$": "auto_text_key", "code": 943, "label": "ί" },
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 912, "label": "ΐ" },
|
||||
{ "$": "auto_text_key", "code": 970, "label": "ϊ" },
|
||||
{ "$": "auto_text_key", "code": 943, "label": "ί" }
|
||||
{ "$": "auto_text_key", "code": 970, "label": "ϊ" }
|
||||
]
|
||||
},
|
||||
"ο": {
|
||||
@@ -32,10 +32,10 @@
|
||||
]
|
||||
},
|
||||
"υ": {
|
||||
"main": { "$": "auto_text_key", "code": 973, "label": "ύ" },
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 944, "label": "ΰ" },
|
||||
{ "$": "auto_text_key", "code": 971, "label": "ϋ" },
|
||||
{ "$": "auto_text_key", "code": 973, "label": "ύ" }
|
||||
{ "$": "auto_text_key", "code": 971, "label": "ϋ" }
|
||||
]
|
||||
},
|
||||
"ω": {
|
||||
|
||||
@@ -0,0 +1,75 @@
|
||||
{
|
||||
"type": "characters/extended_popups",
|
||||
"name": "ko",
|
||||
"authors": [ "patrickgold", "Hayleia" ],
|
||||
"mapping": {
|
||||
"all": {
|
||||
"ㅂ": {
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 12611, "label": "ㅃ" }
|
||||
]
|
||||
},
|
||||
"ㅈ": {
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 12617, "label": "ㅉ" }
|
||||
]
|
||||
},
|
||||
"ㄷ": {
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 12600, "label": "ㄸ" }
|
||||
]
|
||||
},
|
||||
"ㄱ": {
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 12594, "label": "ㄲ" }
|
||||
]
|
||||
},
|
||||
"ㅅ": {
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 12614, "label": "ㅆ" }
|
||||
]
|
||||
},
|
||||
"ㅐ": {
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 12626, "label": "ㅒ" }
|
||||
]
|
||||
},
|
||||
"ㅔ": {
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 12630, "label": "ㅖ" }
|
||||
]
|
||||
},
|
||||
"~right": {
|
||||
"main": { "code": 44, "label": "," },
|
||||
"relevant": [
|
||||
{ "code": 38, "label": "&" },
|
||||
{ "code": 37, "label": "%" },
|
||||
{ "code": 43, "label": "+" },
|
||||
{ "code": 34, "label": "\"" },
|
||||
{ "code": 45, "label": "-" },
|
||||
{ "code": 58, "label": ":" },
|
||||
{ "code": 39, "label": "'" },
|
||||
{ "code": 64, "label": "@" },
|
||||
{ "code": 59, "label": ";" },
|
||||
{ "code": 47, "label": "/" },
|
||||
{ "code": 40, "label": "(" },
|
||||
{ "code": 41, "label": ")" },
|
||||
{ "code": 35, "label": "#" },
|
||||
{ "code": 33, "label": "!" },
|
||||
{ "code": 63, "label": "?" }
|
||||
]
|
||||
}
|
||||
},
|
||||
"uri": {
|
||||
"~right": {
|
||||
"main": { "code": -255, "label": ".com" },
|
||||
"relevant": [
|
||||
{ "code": -255, "label": ".gov" },
|
||||
{ "code": -255, "label": ".edu" },
|
||||
{ "code": -255, "label": ".org" },
|
||||
{ "code": -255, "label": ".net" }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
77
app/src/main/assets/ime/text/characters/halmak.json
Normal file
77
app/src/main/assets/ime/text/characters/halmak.json
Normal file
@@ -0,0 +1,77 @@
|
||||
{
|
||||
"type": "characters",
|
||||
"name": "halmak",
|
||||
"label": "Halmak",
|
||||
"authors": [ "dessalines" ],
|
||||
"direction": "ltr",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "$": "auto_text_key", "code": 119, "label": "w" },
|
||||
{ "$": "auto_text_key", "code": 108, "label": "l" },
|
||||
{ "$": "auto_text_key", "code": 114, "label": "r" },
|
||||
{ "$": "auto_text_key", "code": 98, "label": "b" },
|
||||
{ "$": "auto_text_key", "code": 122, "label": "z" },
|
||||
{ "$": "case_selector",
|
||||
"lower": { "code": 59, "label": ";", "popup": {
|
||||
"relevant": [
|
||||
{ "code": 58, "label": ":" }
|
||||
]
|
||||
} },
|
||||
"upper": { "code": 58, "label": ":", "popup": {
|
||||
"relevant": [
|
||||
{ "code": 59, "label": ";" }
|
||||
]
|
||||
} }
|
||||
},
|
||||
{ "$": "auto_text_key", "code": 113, "label": "q" },
|
||||
{ "$": "auto_text_key", "code": 117, "label": "u" },
|
||||
{ "$": "auto_text_key", "code": 100, "label": "d" },
|
||||
{ "$": "auto_text_key", "code": 106, "label": "j" }
|
||||
],
|
||||
[
|
||||
{ "$": "auto_text_key", "code": 115, "label": "s" },
|
||||
{ "$": "auto_text_key", "code": 104, "label": "h" },
|
||||
{ "$": "auto_text_key", "code": 110, "label": "n" },
|
||||
{ "$": "auto_text_key", "code": 116, "label": "t" },
|
||||
{ "$": "case_selector",
|
||||
"lower": { "code": 44, "label": ",", "popup": {
|
||||
"relevant": [
|
||||
{ "code": 40, "label": "(" }
|
||||
]
|
||||
} },
|
||||
"upper": { "code": 40, "label": "(", "popup": {
|
||||
"relevant": [
|
||||
{ "code": 44, "label": "," }
|
||||
]
|
||||
} }
|
||||
},
|
||||
{ "$": "case_selector",
|
||||
"lower": { "code": 46, "label": ".", "popup": {
|
||||
"relevant": [
|
||||
{ "code": 41, "label": ")" }
|
||||
]
|
||||
} },
|
||||
"upper": { "code": 41, "label": ")", "popup": {
|
||||
"relevant": [
|
||||
{ "code": 46, "label": "." }
|
||||
]
|
||||
} }
|
||||
},
|
||||
{ "$": "auto_text_key", "code": 97, "label": "a" },
|
||||
{ "$": "auto_text_key", "code": 101, "label": "e" },
|
||||
{ "$": "auto_text_key", "code": 111, "label": "o" },
|
||||
{ "$": "auto_text_key", "code": 105, "label": "i" }
|
||||
],
|
||||
[
|
||||
{ "$": "auto_text_key", "code": 102, "label": "f" },
|
||||
{ "$": "auto_text_key", "code": 109, "label": "m" },
|
||||
{ "$": "auto_text_key", "code": 118, "label": "v" },
|
||||
{ "$": "auto_text_key", "code": 99, "label": "c" },
|
||||
{ "$": "auto_text_key", "code": 103, "label": "g" },
|
||||
{ "$": "auto_text_key", "code": 112, "label": "p" },
|
||||
{ "$": "auto_text_key", "code": 120, "label": "x" },
|
||||
{ "$": "auto_text_key", "code": 107, "label": "k" },
|
||||
{ "$": "auto_text_key", "code": 121, "label": "y" }
|
||||
]
|
||||
]
|
||||
}
|
||||
@@ -28,27 +28,27 @@
|
||||
{ "code": 1508, "label": "פ" }
|
||||
],
|
||||
[
|
||||
{ "code": 1513, "label": "ף" },
|
||||
{ "code": 1491, "label": "ך" },
|
||||
{ "code": 1490, "label": "ל" },
|
||||
{ "code": 1499, "label": "ח" },
|
||||
{ "code": 1506, "label": "י" },
|
||||
{ "code": 1497, "label": "ע" },
|
||||
{ "code": 1495, "label": "כ" },
|
||||
{ "code": 1500, "label": "ג" },
|
||||
{ "code": 1498, "label": "ד" },
|
||||
{ "code": 1507, "label": "ש" }
|
||||
{ "code": 1513, "label": "ש" },
|
||||
{ "code": 1491, "label": "ד" },
|
||||
{ "code": 1490, "label": "ג" },
|
||||
{ "code": 1499, "label": "כ" },
|
||||
{ "code": 1506, "label": "ע" },
|
||||
{ "code": 1497, "label": "י" },
|
||||
{ "code": 1495, "label": "ח" },
|
||||
{ "code": 1500, "label": "ל" },
|
||||
{ "code": 1498, "label": "ך" },
|
||||
{ "code": 1507, "label": "ף" }
|
||||
],
|
||||
[
|
||||
{ "code": 1494, "label": "ץ" },
|
||||
{ "code": 1505, "label": "ת" },
|
||||
{ "code": 1489, "label": "צ" },
|
||||
{ "code": 1492, "label": "מ" },
|
||||
{ "code": 1494, "label": "ז" },
|
||||
{ "code": 1505, "label": "ס" },
|
||||
{ "code": 1489, "label": "ב" },
|
||||
{ "code": 1492, "label": "ה" },
|
||||
{ "code": 1504, "label": "נ" },
|
||||
{ "code": 1502, "label": "ה" },
|
||||
{ "code": 1510, "label": "ב" },
|
||||
{ "code": 1514, "label": "ס" },
|
||||
{ "code": 1509, "label": "ז" }
|
||||
{ "code": 1502, "label": "מ" },
|
||||
{ "code": 1510, "label": "צ" },
|
||||
{ "code": 1514, "label": "ת" },
|
||||
{ "code": 1509, "label": "ץ" }
|
||||
]
|
||||
]
|
||||
}
|
||||
|
||||
62
app/src/main/assets/ime/text/characters/korean.json
Normal file
62
app/src/main/assets/ime/text/characters/korean.json
Normal file
@@ -0,0 +1,62 @@
|
||||
{
|
||||
"type": "characters",
|
||||
"name": "korean",
|
||||
"label": "South Korean standard",
|
||||
"authors": [ "patrickgold", "Hayleia" ],
|
||||
"direction": "ltr",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "$": "case_selector",
|
||||
"lower": { "code": 12610, "label": "ㅂ" },
|
||||
"upper": { "code": 12611, "label": "ㅃ" }
|
||||
},
|
||||
{ "$": "case_selector",
|
||||
"lower": { "code": 12616, "label": "ㅈ" },
|
||||
"upper": { "code": 12617, "label": "ㅉ" }
|
||||
},
|
||||
{ "$": "case_selector",
|
||||
"lower": { "code": 12599, "label": "ㄷ" },
|
||||
"upper": { "code": 12600, "label": "ㄸ" }
|
||||
},
|
||||
{ "$": "case_selector",
|
||||
"lower": { "code": 12593, "label": "ㄱ" },
|
||||
"upper": { "code": 12594, "label": "ㄲ" }
|
||||
},
|
||||
{ "$": "case_selector",
|
||||
"lower": { "code": 12613, "label": "ㅅ" },
|
||||
"upper": { "code": 12614, "label": "ㅆ" }
|
||||
},
|
||||
{ "$": "auto_text_key", "code": 12635, "label": "ㅛ"},
|
||||
{ "$": "auto_text_key", "code": 12629, "label": "ㅕ"},
|
||||
{ "$": "auto_text_key", "code": 12625, "label": "ㅑ"},
|
||||
{ "$": "case_selector",
|
||||
"lower": { "code": 12624, "label": "ㅐ" },
|
||||
"upper": { "code": 12626, "label": "ㅒ" }
|
||||
},
|
||||
{ "$": "case_selector",
|
||||
"lower": { "code": 12628, "label": "ㅔ" },
|
||||
"upper": { "code": 12630, "label": "ㅖ" }
|
||||
}
|
||||
],
|
||||
[
|
||||
{ "$": "auto_text_key", "code": 12609, "label": "ㅁ"},
|
||||
{ "$": "auto_text_key", "code": 12596, "label": "ㄴ"},
|
||||
{ "$": "auto_text_key", "code": 12615, "label": "ㅇ"},
|
||||
{ "$": "auto_text_key", "code": 12601, "label": "ㄹ"},
|
||||
{ "$": "auto_text_key", "code": 12622, "label": "ㅎ"},
|
||||
{ "$": "auto_text_key", "code": 12631, "label": "ㅗ"},
|
||||
{ "$": "auto_text_key", "code": 12627, "label": "ㅓ"},
|
||||
{ "$": "auto_text_key", "code": 12623, "label": "ㅏ"},
|
||||
{ "$": "auto_text_key", "code": 12643, "label": "ㅣ"}
|
||||
],
|
||||
[
|
||||
{ "$": "auto_text_key", "code": 12619, "label": "ㅋ"},
|
||||
{ "$": "auto_text_key", "code": 12620, "label": "ㅌ"},
|
||||
{ "$": "auto_text_key", "code": 12618, "label": "ㅊ"},
|
||||
{ "$": "auto_text_key", "code": 12621, "label": "ㅍ"},
|
||||
{ "$": "auto_text_key", "code": 12640, "label": "ㅠ"},
|
||||
{ "$": "auto_text_key", "code": 12636, "label": "ㅜ"},
|
||||
{ "$": "auto_text_key", "code": 12641, "label": "ㅡ"}
|
||||
]
|
||||
]
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"type": "characters",
|
||||
"name": "kurdish",
|
||||
"label": "کوردی",
|
||||
"label": "کوردی (قوەرتی نوێ)",
|
||||
"authors": [ "GoRaN" ],
|
||||
"direction": "rtl",
|
||||
"modifier": "kurdish",
|
||||
@@ -13,34 +13,46 @@
|
||||
{ "code": 1608, "label": "و", "popup": {
|
||||
"main": { "code": -255, "label": "وو" }
|
||||
} },
|
||||
{ "code": 1749, "label": "ﻪ" },
|
||||
{ "code": 1585, "label": "ر" },
|
||||
|
||||
{ "code": 1749, "label": "ﻪ", "popup": {
|
||||
"main": { "code": 1577, "label": "ة" }
|
||||
} },
|
||||
{ "code": 1585, "label": "ر", "popup": {
|
||||
"main": { "code": 1685, "label": "ڕ" }
|
||||
} },
|
||||
{ "code": 1578, "label": "ت", "popup": {
|
||||
"main": { "code": 1591, "label": "ط" }
|
||||
} },
|
||||
{ "code": 1740, "label": "ی" },
|
||||
|
||||
{ "code": 1574, "label": "ﺋ"},
|
||||
|
||||
{ "code": 1740, "label": "ی", "popup": {
|
||||
"main": { "code": 1742, "label": "ێ" }
|
||||
} },
|
||||
{ "code": 1574, "label": "ﺋ", "popup": {
|
||||
"main": { "code": 1569, "label": "ء" }
|
||||
} },
|
||||
{ "code": 1593, "label": "ع", "popup": {
|
||||
"main": { "code": 1594, "label": "غ" }
|
||||
} },
|
||||
{ "code": 1734, "label": "ۆ" },
|
||||
|
||||
{ "code": 1662, "label": "پ", "popup": {
|
||||
"main": { "code": 1579, "label": "ث" }
|
||||
} }
|
||||
],
|
||||
[
|
||||
{ "code": 1575, "label": "ا" },
|
||||
{"code": 1575, "label": "ا"},
|
||||
{ "code": 1587, "label": "س" },
|
||||
{ "code": 1588, "label": "ش" },
|
||||
{ "code": 1583, "label": "د" },
|
||||
{ "code": 1601, "label": "ف" },
|
||||
{ "code": 1583, "label": "د", "popup": {
|
||||
"main": {"code": 1584, "label": "ذ" }
|
||||
} },
|
||||
{ "code": 1601, "label": "ف" , "popup": {
|
||||
"main": {"code": 1700, "label": "ڤ" }
|
||||
} },
|
||||
{ "code": 1607, "label": "ھ" },
|
||||
{ "code": 1688, "label": "ژ" },
|
||||
{ "code": 1604, "label": "ل" },
|
||||
{ "code": 1688, "label": "ژ", "popup": {
|
||||
"main": { "code": 1600, "label": "▬" }
|
||||
} },
|
||||
{ "code": 1604, "label": "ل", "popup": {
|
||||
"main": { "code": 1717, "label": "ڵ" }
|
||||
} },
|
||||
{ "code": 1705, "label": "ک" },
|
||||
{ "code": 1711, "label": "گ" }
|
||||
],
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
{ "$": "auto_text_key", "code": 116, "label": "t" },
|
||||
{ "$": "auto_text_key", "code": 121, "label": "y" },
|
||||
{ "$": "auto_text_key", "code": 117, "label": "u" },
|
||||
{ "$": "auto_text_key", "code": 305, "label": "ı" },
|
||||
{ "$": "auto_text_key", "code": 105, "label": "i" },
|
||||
{ "$": "auto_text_key", "code": 111, "label": "o" },
|
||||
{ "$": "auto_text_key", "code": 112, "label": "p" },
|
||||
{ "$": "auto_text_key", "code": 251, "label": "û" }
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"type": "characters",
|
||||
"name": "kurdish_standard",
|
||||
"label": "کوردی - ستاندارد",
|
||||
"label": "کوردی (قڤفغ)",
|
||||
"authors": [ "GoRaN" ],
|
||||
"direction": "rtl",
|
||||
"modifier": "kurdish",
|
||||
@@ -10,16 +10,14 @@
|
||||
{ "code": 1602, "label": "ق", "popup": {
|
||||
"main": { "code": 1647, "label": "ٯ" }
|
||||
} },
|
||||
{ "code": 1700, "label": "ڤ", "popup": {
|
||||
"main": { "code": 1701, "label": "ڥ" }
|
||||
} },
|
||||
{ "code": 1601, "label": "ف", "popup": {
|
||||
"main": { "code": 1698, "label": "ڢ" }
|
||||
} },
|
||||
{ "code": 1700, "label": "ڤ" },
|
||||
{ "code": 1601, "label": "ف" },
|
||||
{ "code": 1594, "label": "غ" },
|
||||
{ "code": 1593, "label": "ع"},
|
||||
{ "code": 1607, "label": "ھ" },
|
||||
{ "code": 1749, "label": "ﻪ" },
|
||||
{ "code": 1749, "label": "ﻪ", "popup": {
|
||||
"main": { "code": 1577, "label": "ة" }
|
||||
} },
|
||||
|
||||
{ "code": 1578, "label": "ت", "popup": {
|
||||
"main": { "code": 1591, "label": "ط" }
|
||||
@@ -46,7 +44,9 @@
|
||||
} },
|
||||
{ "code": 1585, "label": "ر" },
|
||||
{ "code": 1685, "label": "ڕ" },
|
||||
{ "code": 1583, "label": "د" },
|
||||
{ "code": 1583, "label": "د", "popup": {
|
||||
"main": {"code": 1584, "label": "ذ" }
|
||||
} },
|
||||
{ "code": -255, "label": "وو" },
|
||||
{ "code": 1608, "label": "و" },
|
||||
{ "code": 1734, "label": "ۆ" },
|
||||
@@ -55,8 +55,10 @@
|
||||
|
||||
],
|
||||
[
|
||||
{ "code": 1600, "label": "kashida", "variation": "normal" },
|
||||
{ "code": 1574, "label": "ﺋ"},
|
||||
|
||||
{ "code": 1574, "label": "ﺋ", "popup": {
|
||||
"main": { "code": 1569, "label": "ء" }
|
||||
} },
|
||||
|
||||
{ "code": 1662, "label": "پ", "popup": {
|
||||
"main": { "code": 1579, "label": "ث" }
|
||||
|
||||
@@ -6,14 +6,17 @@
|
||||
"direction": "rtl",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "code": 1600, "label": "kashida", "popup":
|
||||
{ "main": { "code": 8204, "label": "half_space" }
|
||||
} },
|
||||
{ "code": 0, "type": "placeholder" },
|
||||
{ "code": -5, "label": "delete", "type": "enter_editing" }
|
||||
],
|
||||
[
|
||||
{ "code": -202, "label": "view_symbols", "type": "system_gui" },
|
||||
{ "$": "variation_selector",
|
||||
"default": { "code": 1567, "label": "؟", "groupId": 1 },
|
||||
"password": { "code": 1548, "label": "،", "groupId": 1 },
|
||||
"default": { "code": 1548, "label": "،", "groupId": 1 },
|
||||
"password": { "code": 35, "label": "#", "groupId": 1 },
|
||||
"email": { "code": 64, "label": "@", "groupId": 1 },
|
||||
"uri": { "code": 47, "label": "/", "groupId": 1 }
|
||||
},
|
||||
|
||||
53
app/src/main/assets/ime/text/characters/mod/neo2.json
Normal file
53
app/src/main/assets/ime/text/characters/mod/neo2.json
Normal file
@@ -0,0 +1,53 @@
|
||||
{
|
||||
"type": "characters/mod",
|
||||
"name": "neo2",
|
||||
"label": "Neo2",
|
||||
"authors": [ "ostrya" ],
|
||||
"direction": "ltr",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "code": -1, "label": "shift", "type": "modifier" },
|
||||
{ "code": 0, "type": "placeholder" },
|
||||
{ "code": -5, "label": "delete", "type": "enter_editing" }
|
||||
],
|
||||
[
|
||||
{ "code": -202, "label": "view_symbols", "type": "system_gui" },
|
||||
{ "code": -210, "label": "language_switch", "type": "system_gui" },
|
||||
{ "code": -213, "label": "switch_to_media_context", "type": "system_gui" },
|
||||
{ "code": 32, "label": "space" },
|
||||
{ "$": "variation_selector",
|
||||
"default": { "code": 44, "label": ",", "groupId": 1,
|
||||
"popup": {
|
||||
"main": { "code": 34, "label": "\"" },
|
||||
"relevant": [
|
||||
{ "code": 8211, "label": "–" }
|
||||
]
|
||||
} },
|
||||
"email": { "code": 64, "label": "@", "groupId": 1,
|
||||
"popup": {
|
||||
"relevant": [
|
||||
{ "code": 44, "label": "," }
|
||||
]
|
||||
} },
|
||||
"uri": { "code": 47, "label": "/", "groupId": 1,
|
||||
"popup": {
|
||||
"relevant": [
|
||||
{ "code": 44, "label": "," }
|
||||
]
|
||||
} }
|
||||
},
|
||||
{ "$": "variation_selector",
|
||||
"default": { "code": 46, "label": ".", "groupId": 2,
|
||||
"popup": {
|
||||
"relevant": [
|
||||
{ "code": 183, "label": "·" },
|
||||
{ "code": 39, "label": "'" }
|
||||
]
|
||||
} },
|
||||
"email": { "code": 46, "label": ".", "groupId": 2 },
|
||||
"uri": { "code": 46, "label": ".", "groupId": 2 }
|
||||
},
|
||||
{ "code": 10, "label": "enter", "groupId": 3, "type": "enter_editing" }
|
||||
]
|
||||
]
|
||||
}
|
||||
41
app/src/main/assets/ime/text/characters/nalmy.json
Normal file
41
app/src/main/assets/ime/text/characters/nalmy.json
Normal file
@@ -0,0 +1,41 @@
|
||||
{
|
||||
"type": "characters",
|
||||
"name": "nalmy",
|
||||
"label": "NALMY",
|
||||
"authors": [ "jeremiah-mille", "jasmcole" ],
|
||||
"direction": "ltr",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "$": "auto_text_key", "code": 122, "label": "z" },
|
||||
{ "$": "auto_text_key", "code": 118, "label": "v" },
|
||||
{ "$": "auto_text_key", "code": 100, "label": "d" },
|
||||
{ "$": "auto_text_key", "code": 110, "label": "n" },
|
||||
{ "$": "auto_text_key", "code": 97, "label": "a" },
|
||||
{ "$": "auto_text_key", "code": 108, "label": "l" },
|
||||
{ "$": "auto_text_key", "code": 109, "label": "m" },
|
||||
{ "$": "auto_text_key", "code": 121, "label": "y" },
|
||||
{ "$": "auto_text_key", "code": 120, "label": "x" },
|
||||
{ "$": "auto_text_key", "code": 106, "label": "j" }
|
||||
],
|
||||
[
|
||||
{ "$": "auto_text_key", "code": 107, "label": "k" },
|
||||
{ "$": "auto_text_key", "code": 103, "label": "g" },
|
||||
{ "$": "auto_text_key", "code": 105, "label": "i" },
|
||||
{ "$": "auto_text_key", "code": 101, "label": "e" },
|
||||
{ "$": "auto_text_key", "code": 114, "label": "r" },
|
||||
{ "$": "auto_text_key", "code": 111, "label": "o" },
|
||||
{ "$": "auto_text_key", "code": 112, "label": "p" },
|
||||
{ "$": "auto_text_key", "code": 98, "label": "b" },
|
||||
{ "$": "auto_text_key", "code": 113, "label": "q" }
|
||||
],
|
||||
[
|
||||
{ "$": "auto_text_key", "code": 119, "label": "w" },
|
||||
{ "$": "auto_text_key", "code": 104, "label": "h" },
|
||||
{ "$": "auto_text_key", "code": 116, "label": "t" },
|
||||
{ "$": "auto_text_key", "code": 115, "label": "s" },
|
||||
{ "$": "auto_text_key", "code": 99, "label": "c" },
|
||||
{ "$": "auto_text_key", "code": 117, "label": "u" },
|
||||
{ "$": "auto_text_key", "code": 102, "label": "f" }
|
||||
]
|
||||
]
|
||||
}
|
||||
61
app/src/main/assets/ime/text/characters/neo2.json
Normal file
61
app/src/main/assets/ime/text/characters/neo2.json
Normal file
@@ -0,0 +1,61 @@
|
||||
{
|
||||
"type": "characters",
|
||||
"name": "neo2",
|
||||
"label": "Neo2",
|
||||
"authors": [ "ostrya" ],
|
||||
"direction": "ltr",
|
||||
"modifier": "neo2",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "$": "auto_text_key", "code": 120, "label": "x" },
|
||||
{ "$": "auto_text_key", "code": 118, "label": "v" },
|
||||
{ "$": "auto_text_key", "code": 108, "label": "l" },
|
||||
{ "$": "auto_text_key", "code": 99, "label": "c" },
|
||||
{ "$": "auto_text_key", "code": 119, "label": "w" },
|
||||
{ "$": "auto_text_key", "code": 107, "label": "k" },
|
||||
{ "$": "auto_text_key", "code": 104, "label": "h" },
|
||||
{ "$": "auto_text_key", "code": 103, "label": "g" },
|
||||
{ "$": "auto_text_key", "code": 102, "label": "f" },
|
||||
{ "$": "auto_text_key", "code": 113, "label": "q" },
|
||||
{ "$": "case_selector",
|
||||
"lower": {
|
||||
"code": 223, "label": "ß", "popup": {
|
||||
"relevant": [
|
||||
{ "code": 180, "label": "´" }
|
||||
]
|
||||
}
|
||||
},
|
||||
"upper": {
|
||||
"code": 7838, "label": "ẞ", "popup": {
|
||||
"relevant": [
|
||||
{ "code": 180, "label": "´" }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
[
|
||||
{ "$": "auto_text_key", "code": 117, "label": "u" },
|
||||
{ "$": "auto_text_key", "code": 105, "label": "i" },
|
||||
{ "$": "auto_text_key", "code": 97, "label": "a" },
|
||||
{ "$": "auto_text_key", "code": 101, "label": "e" },
|
||||
{ "$": "auto_text_key", "code": 111, "label": "o" },
|
||||
{ "$": "auto_text_key", "code": 115, "label": "s" },
|
||||
{ "$": "auto_text_key", "code": 110, "label": "n" },
|
||||
{ "$": "auto_text_key", "code": 114, "label": "r" },
|
||||
{ "$": "auto_text_key", "code": 116, "label": "t" },
|
||||
{ "$": "auto_text_key", "code": 100, "label": "d" },
|
||||
{ "$": "auto_text_key", "code": 121, "label": "y" }
|
||||
],
|
||||
[
|
||||
{ "$": "auto_text_key", "code": 252, "label": "ü" },
|
||||
{ "$": "auto_text_key", "code": 246, "label": "ö" },
|
||||
{ "$": "auto_text_key", "code": 228, "label": "ä" },
|
||||
{ "$": "auto_text_key", "code": 112, "label": "p" },
|
||||
{ "$": "auto_text_key", "code": 122, "label": "z" },
|
||||
{ "$": "auto_text_key", "code": 98, "label": "b" },
|
||||
{ "$": "auto_text_key", "code": 109, "label": "m" },
|
||||
{ "$": "auto_text_key", "code": 106, "label": "j" }
|
||||
]
|
||||
]
|
||||
}
|
||||
41
app/src/main/assets/ime/text/characters/sangaline.json
Normal file
41
app/src/main/assets/ime/text/characters/sangaline.json
Normal file
@@ -0,0 +1,41 @@
|
||||
{
|
||||
"type": "characters",
|
||||
"name": "sangaline",
|
||||
"label": "Sangaline",
|
||||
"authors": [ "jeremiah-miller", "sangaline" ],
|
||||
"direction": "ltr",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "$": "auto_text_key", "code": 100, "label": "d" },
|
||||
{ "$": "auto_text_key", "code": 103, "label": "g" },
|
||||
{ "$": "auto_text_key", "code": 104, "label": "h" },
|
||||
{ "$": "auto_text_key", "code": 112, "label": "p" },
|
||||
{ "$": "auto_text_key", "code": 97, "label": "a" },
|
||||
{ "$": "auto_text_key", "code": 115, "label": "s" },
|
||||
{ "$": "auto_text_key", "code": 106, "label": "j" },
|
||||
{ "$": "auto_text_key", "code": 114, "label": "r" },
|
||||
{ "$": "auto_text_key", "code": 107, "label": "k" },
|
||||
{ "$": "auto_text_key", "code": 110, "label": "n" }
|
||||
],
|
||||
[
|
||||
{ "$": "auto_text_key", "code": 105, "label": "i" },
|
||||
{ "$": "auto_text_key", "code": 113, "label": "q" },
|
||||
{ "$": "auto_text_key", "code": 118, "label": "v" },
|
||||
{ "$": "auto_text_key", "code": 117, "label": "u" },
|
||||
{ "$": "auto_text_key", "code": 119, "label": "w" },
|
||||
{ "$": "auto_text_key", "code": 99, "label": "c" },
|
||||
{ "$": "auto_text_key", "code": 108, "label": "l" },
|
||||
{ "$": "auto_text_key", "code": 120, "label": "x" },
|
||||
{ "$": "auto_text_key", "code": 109, "label": "m" }
|
||||
],
|
||||
[
|
||||
{ "$": "auto_text_key", "code": 116, "label": "t" },
|
||||
{ "$": "auto_text_key", "code": 121, "label": "y" },
|
||||
{ "$": "auto_text_key", "code": 98, "label": "b" },
|
||||
{ "$": "auto_text_key", "code": 101, "label": "e" },
|
||||
{ "$": "auto_text_key", "code": 122, "label": "z" },
|
||||
{ "$": "auto_text_key", "code": 102, "label": "f" },
|
||||
{ "$": "auto_text_key", "code": 111, "label": "o" }
|
||||
]
|
||||
]
|
||||
}
|
||||
91
app/src/main/assets/ime/text/numeric/row/bengali.json
Normal file
91
app/src/main/assets/ime/text/numeric/row/bengali.json
Normal file
@@ -0,0 +1,91 @@
|
||||
{
|
||||
"type": "numeric_row",
|
||||
"name": "bengali",
|
||||
"label": "Bengali",
|
||||
"authors": [ "yashpalgoyal1304" ],
|
||||
"direction": "ltr",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "code": 2535, "label": "১", "type": "numeric", "popup": {
|
||||
"main": { "code": 49, "label": "1" },
|
||||
"relevant": [
|
||||
{ "code": 8537, "label": "⅙" },
|
||||
{ "code": 8528, "label": "⅐" },
|
||||
{ "code": 8539, "label": "⅛" },
|
||||
{ "code": 8529, "label": "⅑" },
|
||||
{ "code": 8530, "label": "⅒" },
|
||||
{ "code": 185, "label": "¹" },
|
||||
{ "code": 189, "label": "½" },
|
||||
{ "code": 8531, "label": "⅓" },
|
||||
{ "code": 188, "label": "¼" },
|
||||
{ "code": 8533, "label": "⅕" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2536, "label": "২", "type": "numeric", "popup": {
|
||||
"main": { "code": 50, "label": "2" },
|
||||
"relevant": [
|
||||
{ "code": 8532, "label": "⅔" },
|
||||
{ "code": 178, "label": "²" },
|
||||
{ "code": 8534, "label": "⅖" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2537, "label": "৩", "type": "numeric", "popup": {
|
||||
"main": { "code": 51, "label": "3" },
|
||||
"relevant": [
|
||||
{ "code": 8535, "label": "⅗" },
|
||||
{ "code": 190, "label": "¾" },
|
||||
{ "code": 179, "label": "³" },
|
||||
{ "code": 8540, "label": "⅜" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2538, "label": "৪", "type": "numeric", "popup": {
|
||||
"main": { "code": 52, "label": "4" },
|
||||
"relevant": [
|
||||
{ "code": 8536, "label": "⅘" },
|
||||
{ "code": 8308, "label": "⁴" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2539, "label": "৫", "type": "numeric", "popup": {
|
||||
"main": { "code": 53, "label": "5" },
|
||||
"relevant": [
|
||||
{ "code": 8538, "label": "⅚" },
|
||||
{ "code": 8309, "label": "⁵" },
|
||||
{ "code": 8541, "label": "⅝" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2540, "label": "৬", "type": "numeric", "popup": {
|
||||
"main": { "code": 54, "label": "6" },
|
||||
"relevant": [
|
||||
{ "code": 8310, "label": "⁶" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2541, "label": "৭", "type": "numeric", "popup": {
|
||||
"main": { "code": 55, "label": "7" },
|
||||
"relevant": [
|
||||
{ "code": 8542, "label": "⅞" },
|
||||
{ "code": 8311, "label": "⁷" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2542, "label": "৮", "type": "numeric", "popup": {
|
||||
"main": { "code": 56, "label": "8" },
|
||||
"relevant": [
|
||||
{ "code": 8312, "label": "⁸" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2543, "label": "৯", "type": "numeric", "popup": {
|
||||
"main": { "code": 57, "label": "9" },
|
||||
"relevant": [
|
||||
{ "code": 8313, "label": "⁹" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2534, "label": "০", "type": "numeric", "popup": {
|
||||
"main": { "code": 48, "label": "0" },
|
||||
"relevant": [
|
||||
{ "code": 8319, "label": "ⁿ" },
|
||||
{ "code": 8709, "label": "∅" },
|
||||
{ "code": 8304, "label": "⁰" }
|
||||
]
|
||||
} }
|
||||
]
|
||||
]
|
||||
}
|
||||
91
app/src/main/assets/ime/text/numeric/row/devanagari.json
Normal file
91
app/src/main/assets/ime/text/numeric/row/devanagari.json
Normal file
@@ -0,0 +1,91 @@
|
||||
{
|
||||
"type": "numeric_row",
|
||||
"name": "devanagari",
|
||||
"label": "Devanagari",
|
||||
"authors": [ "yashpalgoyal1304" ],
|
||||
"direction": "ltr",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "code": 2407, "label": "१", "type": "numeric", "popup": {
|
||||
"main": { "code": 49, "label": "1" },
|
||||
"relevant": [
|
||||
{ "code": 8537, "label": "⅙" },
|
||||
{ "code": 8528, "label": "⅐" },
|
||||
{ "code": 8539, "label": "⅛" },
|
||||
{ "code": 8529, "label": "⅑" },
|
||||
{ "code": 8530, "label": "⅒" },
|
||||
{ "code": 185, "label": "¹" },
|
||||
{ "code": 189, "label": "½" },
|
||||
{ "code": 8531, "label": "⅓" },
|
||||
{ "code": 188, "label": "¼" },
|
||||
{ "code": 8533, "label": "⅕" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2408, "label": "२", "type": "numeric", "popup": {
|
||||
"main": { "code": 50, "label": "2" },
|
||||
"relevant": [
|
||||
{ "code": 8532, "label": "⅔" },
|
||||
{ "code": 178, "label": "²" },
|
||||
{ "code": 8534, "label": "⅖" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2409, "label": "३", "type": "numeric", "popup": {
|
||||
"main": { "code": 51, "label": "3" },
|
||||
"relevant": [
|
||||
{ "code": 8535, "label": "⅗" },
|
||||
{ "code": 190, "label": "¾" },
|
||||
{ "code": 179, "label": "³" },
|
||||
{ "code": 8540, "label": "⅜" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2410, "label": "४", "type": "numeric", "popup": {
|
||||
"main": { "code": 52, "label": "4" },
|
||||
"relevant": [
|
||||
{ "code": 8536, "label": "⅘" },
|
||||
{ "code": 8308, "label": "⁴" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2411, "label": "५", "type": "numeric", "popup": {
|
||||
"main": { "code": 53, "label": "5" },
|
||||
"relevant": [
|
||||
{ "code": 8538, "label": "⅚" },
|
||||
{ "code": 8309, "label": "⁵" },
|
||||
{ "code": 8541, "label": "⅝" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2412, "label": "६", "type": "numeric", "popup": {
|
||||
"main": { "code": 54, "label": "6" },
|
||||
"relevant": [
|
||||
{ "code": 8310, "label": "⁶" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2413, "label": "७", "type": "numeric", "popup": {
|
||||
"main": { "code": 55, "label": "7" },
|
||||
"relevant": [
|
||||
{ "code": 8542, "label": "⅞" },
|
||||
{ "code": 8311, "label": "⁷" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2414, "label": "८", "type": "numeric", "popup": {
|
||||
"main": { "code": 56, "label": "8" },
|
||||
"relevant": [
|
||||
{ "code": 8312, "label": "⁸" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2415, "label": "९", "type": "numeric", "popup": {
|
||||
"main": { "code": 57, "label": "9" },
|
||||
"relevant": [
|
||||
{ "code": 8313, "label": "⁹" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2406, "label": "०", "type": "numeric", "popup": {
|
||||
"main": { "code": 48, "label": "0" },
|
||||
"relevant": [
|
||||
{ "code": 8319, "label": "ⁿ" },
|
||||
{ "code": 8709, "label": "∅" },
|
||||
{ "code": 8304, "label": "⁰" }
|
||||
]
|
||||
} }
|
||||
]
|
||||
]
|
||||
}
|
||||
91
app/src/main/assets/ime/text/numeric/row/gujarati.json
Normal file
91
app/src/main/assets/ime/text/numeric/row/gujarati.json
Normal file
@@ -0,0 +1,91 @@
|
||||
{
|
||||
"type": "numeric_row",
|
||||
"name": "gujarati",
|
||||
"label": "Gujarati",
|
||||
"authors": [ "yashpalgoyal1304" ],
|
||||
"direction": "ltr",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "code": 2791, "label": "૧", "type": "numeric", "popup": {
|
||||
"main": { "code": 49, "label": "1" },
|
||||
"relevant": [
|
||||
{ "code": 8537, "label": "⅙" },
|
||||
{ "code": 8528, "label": "⅐" },
|
||||
{ "code": 8539, "label": "⅛" },
|
||||
{ "code": 8529, "label": "⅑" },
|
||||
{ "code": 8530, "label": "⅒" },
|
||||
{ "code": 185, "label": "¹" },
|
||||
{ "code": 189, "label": "½" },
|
||||
{ "code": 8531, "label": "⅓" },
|
||||
{ "code": 188, "label": "¼" },
|
||||
{ "code": 8533, "label": "⅕" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2792, "label": "૨", "type": "numeric", "popup": {
|
||||
"main": { "code": 50, "label": "2" },
|
||||
"relevant": [
|
||||
{ "code": 8532, "label": "⅔" },
|
||||
{ "code": 178, "label": "²" },
|
||||
{ "code": 8534, "label": "⅖" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2793, "label": "૩", "type": "numeric", "popup": {
|
||||
"main": { "code": 51, "label": "3" },
|
||||
"relevant": [
|
||||
{ "code": 8535, "label": "⅗" },
|
||||
{ "code": 190, "label": "¾" },
|
||||
{ "code": 179, "label": "³" },
|
||||
{ "code": 8540, "label": "⅜" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2794, "label": "૪", "type": "numeric", "popup": {
|
||||
"main": { "code": 52, "label": "4" },
|
||||
"relevant": [
|
||||
{ "code": 8536, "label": "⅘" },
|
||||
{ "code": 8308, "label": "⁴" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2795, "label": "૫", "type": "numeric", "popup": {
|
||||
"main": { "code": 53, "label": "5" },
|
||||
"relevant": [
|
||||
{ "code": 8538, "label": "⅚" },
|
||||
{ "code": 8309, "label": "⁵" },
|
||||
{ "code": 8541, "label": "⅝" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2796, "label": "૬", "type": "numeric", "popup": {
|
||||
"main": { "code": 54, "label": "6" },
|
||||
"relevant": [
|
||||
{ "code": 8310, "label": "⁶" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2797, "label": "૭", "type": "numeric", "popup": {
|
||||
"main": { "code": 55, "label": "7" },
|
||||
"relevant": [
|
||||
{ "code": 8542, "label": "⅞" },
|
||||
{ "code": 8311, "label": "⁷" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2798, "label": "૮", "type": "numeric", "popup": {
|
||||
"main": { "code": 56, "label": "8" },
|
||||
"relevant": [
|
||||
{ "code": 8312, "label": "⁸" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2799, "label": "૯", "type": "numeric", "popup": {
|
||||
"main": { "code": 57, "label": "9" },
|
||||
"relevant": [
|
||||
{ "code": 8313, "label": "⁹" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2790, "label": "૦", "type": "numeric", "popup": {
|
||||
"main": { "code": 48, "label": "0" },
|
||||
"relevant": [
|
||||
{ "code": 8319, "label": "ⁿ" },
|
||||
{ "code": 8709, "label": "∅" },
|
||||
{ "code": 8304, "label": "⁰" }
|
||||
]
|
||||
} }
|
||||
]
|
||||
]
|
||||
}
|
||||
91
app/src/main/assets/ime/text/numeric/row/gurmukhi.json
Normal file
91
app/src/main/assets/ime/text/numeric/row/gurmukhi.json
Normal file
@@ -0,0 +1,91 @@
|
||||
{
|
||||
"type": "numeric_row",
|
||||
"name": "gurmukhi",
|
||||
"label": "Gurmukhi",
|
||||
"authors": [ "yashpalgoyal1304" ],
|
||||
"direction": "ltr",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "code": 2663, "label": "੧", "type": "numeric", "popup": {
|
||||
"main": { "code": 49, "label": "1" },
|
||||
"relevant": [
|
||||
{ "code": 8537, "label": "⅙" },
|
||||
{ "code": 8528, "label": "⅐" },
|
||||
{ "code": 8539, "label": "⅛" },
|
||||
{ "code": 8529, "label": "⅑" },
|
||||
{ "code": 8530, "label": "⅒" },
|
||||
{ "code": 185, "label": "¹" },
|
||||
{ "code": 189, "label": "½" },
|
||||
{ "code": 8531, "label": "⅓" },
|
||||
{ "code": 188, "label": "¼" },
|
||||
{ "code": 8533, "label": "⅕" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2664, "label": "੨", "type": "numeric", "popup": {
|
||||
"main": { "code": 50, "label": "2" },
|
||||
"relevant": [
|
||||
{ "code": 8532, "label": "⅔" },
|
||||
{ "code": 178, "label": "²" },
|
||||
{ "code": 8534, "label": "⅖" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2665, "label": "੩", "type": "numeric", "popup": {
|
||||
"main": { "code": 51, "label": "3" },
|
||||
"relevant": [
|
||||
{ "code": 8535, "label": "⅗" },
|
||||
{ "code": 190, "label": "¾" },
|
||||
{ "code": 179, "label": "³" },
|
||||
{ "code": 8540, "label": "⅜" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2666, "label": "੪", "type": "numeric", "popup": {
|
||||
"main": { "code": 52, "label": "4" },
|
||||
"relevant": [
|
||||
{ "code": 8536, "label": "⅘" },
|
||||
{ "code": 8308, "label": "⁴" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2667, "label": "੫", "type": "numeric", "popup": {
|
||||
"main": { "code": 53, "label": "5" },
|
||||
"relevant": [
|
||||
{ "code": 8538, "label": "⅚" },
|
||||
{ "code": 8309, "label": "⁵" },
|
||||
{ "code": 8541, "label": "⅝" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2668, "label": "੬", "type": "numeric", "popup": {
|
||||
"main": { "code": 54, "label": "6" },
|
||||
"relevant": [
|
||||
{ "code": 8310, "label": "⁶" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2669, "label": "੭", "type": "numeric", "popup": {
|
||||
"main": { "code": 55, "label": "7" },
|
||||
"relevant": [
|
||||
{ "code": 8542, "label": "⅞" },
|
||||
{ "code": 8311, "label": "⁷" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2670, "label": "੮", "type": "numeric", "popup": {
|
||||
"main": { "code": 56, "label": "8" },
|
||||
"relevant": [
|
||||
{ "code": 8312, "label": "⁸" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2671, "label": "੯", "type": "numeric", "popup": {
|
||||
"main": { "code": 57, "label": "9" },
|
||||
"relevant": [
|
||||
{ "code": 8313, "label": "⁹" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2662, "label": "੦", "type": "numeric", "popup": {
|
||||
"main": { "code": 48, "label": "0" },
|
||||
"relevant": [
|
||||
{ "code": 8319, "label": "ⁿ" },
|
||||
{ "code": 8709, "label": "∅" },
|
||||
{ "code": 8304, "label": "⁰" }
|
||||
]
|
||||
} }
|
||||
]
|
||||
]
|
||||
}
|
||||
91
app/src/main/assets/ime/text/numeric/row/kannada.json
Normal file
91
app/src/main/assets/ime/text/numeric/row/kannada.json
Normal file
@@ -0,0 +1,91 @@
|
||||
{
|
||||
"type": "numeric_row",
|
||||
"name": "kannada",
|
||||
"label": "Kannada",
|
||||
"authors": [ "yashpalgoyal1304" ],
|
||||
"direction": "ltr",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "code": 3303, "label": "೧", "type": "numeric", "popup": {
|
||||
"main": { "code": 49, "label": "1" },
|
||||
"relevant": [
|
||||
{ "code": 8537, "label": "⅙" },
|
||||
{ "code": 8528, "label": "⅐" },
|
||||
{ "code": 8539, "label": "⅛" },
|
||||
{ "code": 8529, "label": "⅑" },
|
||||
{ "code": 8530, "label": "⅒" },
|
||||
{ "code": 185, "label": "¹" },
|
||||
{ "code": 189, "label": "½" },
|
||||
{ "code": 8531, "label": "⅓" },
|
||||
{ "code": 188, "label": "¼" },
|
||||
{ "code": 8533, "label": "⅕" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3304, "label": "೨", "type": "numeric", "popup": {
|
||||
"main": { "code": 50, "label": "2" },
|
||||
"relevant": [
|
||||
{ "code": 8532, "label": "⅔" },
|
||||
{ "code": 178, "label": "²" },
|
||||
{ "code": 8534, "label": "⅖" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3305, "label": "೩", "type": "numeric", "popup": {
|
||||
"main": { "code": 51, "label": "3" },
|
||||
"relevant": [
|
||||
{ "code": 8535, "label": "⅗" },
|
||||
{ "code": 190, "label": "¾" },
|
||||
{ "code": 179, "label": "³" },
|
||||
{ "code": 8540, "label": "⅜" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3306, "label": "೪", "type": "numeric", "popup": {
|
||||
"main": { "code": 52, "label": "4" },
|
||||
"relevant": [
|
||||
{ "code": 8536, "label": "⅘" },
|
||||
{ "code": 8308, "label": "⁴" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3307, "label": "೫", "type": "numeric", "popup": {
|
||||
"main": { "code": 53, "label": "5" },
|
||||
"relevant": [
|
||||
{ "code": 8538, "label": "⅚" },
|
||||
{ "code": 8309, "label": "⁵" },
|
||||
{ "code": 8541, "label": "⅝" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3308, "label": "೬", "type": "numeric", "popup": {
|
||||
"main": { "code": 54, "label": "6" },
|
||||
"relevant": [
|
||||
{ "code": 8310, "label": "⁶" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3309, "label": "೭", "type": "numeric", "popup": {
|
||||
"main": { "code": 55, "label": "7" },
|
||||
"relevant": [
|
||||
{ "code": 8542, "label": "⅞" },
|
||||
{ "code": 8311, "label": "⁷" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3310, "label": "೮", "type": "numeric", "popup": {
|
||||
"main": { "code": 56, "label": "8" },
|
||||
"relevant": [
|
||||
{ "code": 8312, "label": "⁸" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3311, "label": "೯", "type": "numeric", "popup": {
|
||||
"main": { "code": 57, "label": "9" },
|
||||
"relevant": [
|
||||
{ "code": 8313, "label": "⁹" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3302, "label": "೦", "type": "numeric", "popup": {
|
||||
"main": { "code": 48, "label": "0" },
|
||||
"relevant": [
|
||||
{ "code": 8319, "label": "ⁿ" },
|
||||
{ "code": 8709, "label": "∅" },
|
||||
{ "code": 8304, "label": "⁰" }
|
||||
]
|
||||
} }
|
||||
]
|
||||
]
|
||||
}
|
||||
91
app/src/main/assets/ime/text/numeric/row/malayalam.json
Normal file
91
app/src/main/assets/ime/text/numeric/row/malayalam.json
Normal file
@@ -0,0 +1,91 @@
|
||||
{
|
||||
"type": "numeric_row",
|
||||
"name": "malayalam",
|
||||
"label": "Malayalam",
|
||||
"authors": [ "yashpalgoyal1304" ],
|
||||
"direction": "ltr",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "code": 3431, "label": "൧", "type": "numeric", "popup": {
|
||||
"main": { "code": 49, "label": "1" },
|
||||
"relevant": [
|
||||
{ "code": 8537, "label": "⅙" },
|
||||
{ "code": 8528, "label": "⅐" },
|
||||
{ "code": 8539, "label": "⅛" },
|
||||
{ "code": 8529, "label": "⅑" },
|
||||
{ "code": 8530, "label": "⅒" },
|
||||
{ "code": 185, "label": "¹" },
|
||||
{ "code": 189, "label": "½" },
|
||||
{ "code": 8531, "label": "⅓" },
|
||||
{ "code": 188, "label": "¼" },
|
||||
{ "code": 8533, "label": "⅕" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3432, "label": "൨", "type": "numeric", "popup": {
|
||||
"main": { "code": 50, "label": "2" },
|
||||
"relevant": [
|
||||
{ "code": 8532, "label": "⅔" },
|
||||
{ "code": 178, "label": "²" },
|
||||
{ "code": 8534, "label": "⅖" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3433, "label": "൩", "type": "numeric", "popup": {
|
||||
"main": { "code": 51, "label": "3" },
|
||||
"relevant": [
|
||||
{ "code": 8535, "label": "⅗" },
|
||||
{ "code": 190, "label": "¾" },
|
||||
{ "code": 179, "label": "³" },
|
||||
{ "code": 8540, "label": "⅜" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3434, "label": "൪", "type": "numeric", "popup": {
|
||||
"main": { "code": 52, "label": "4" },
|
||||
"relevant": [
|
||||
{ "code": 8536, "label": "⅘" },
|
||||
{ "code": 8308, "label": "⁴" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3435, "label": "൫", "type": "numeric", "popup": {
|
||||
"main": { "code": 53, "label": "5" },
|
||||
"relevant": [
|
||||
{ "code": 8538, "label": "⅚" },
|
||||
{ "code": 8309, "label": "⁵" },
|
||||
{ "code": 8541, "label": "⅝" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3436, "label": "൬", "type": "numeric", "popup": {
|
||||
"main": { "code": 54, "label": "6" },
|
||||
"relevant": [
|
||||
{ "code": 8310, "label": "⁶" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3437, "label": "൭", "type": "numeric", "popup": {
|
||||
"main": { "code": 55, "label": "7" },
|
||||
"relevant": [
|
||||
{ "code": 8542, "label": "⅞" },
|
||||
{ "code": 8311, "label": "⁷" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3438, "label": "൮", "type": "numeric", "popup": {
|
||||
"main": { "code": 56, "label": "8" },
|
||||
"relevant": [
|
||||
{ "code": 8312, "label": "⁸" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3439, "label": "൯", "type": "numeric", "popup": {
|
||||
"main": { "code": 57, "label": "9" },
|
||||
"relevant": [
|
||||
{ "code": 8313, "label": "⁹" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3430, "label": "൦", "type": "numeric", "popup": {
|
||||
"main": { "code": 48, "label": "0" },
|
||||
"relevant": [
|
||||
{ "code": 8319, "label": "ⁿ" },
|
||||
{ "code": 8709, "label": "∅" },
|
||||
{ "code": 8304, "label": "⁰" }
|
||||
]
|
||||
} }
|
||||
]
|
||||
]
|
||||
}
|
||||
80
app/src/main/assets/ime/text/numeric/row/neo2.json
Normal file
80
app/src/main/assets/ime/text/numeric/row/neo2.json
Normal file
@@ -0,0 +1,80 @@
|
||||
{
|
||||
"type": "numeric_row",
|
||||
"name": "neo2",
|
||||
"label": "Neo2",
|
||||
"authors": [ "ostrya" ],
|
||||
"direction": "ltr",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "code": 49, "label": "1", "type": "numeric", "popup": {
|
||||
"relevant": [
|
||||
{ "code": 176, "label": "°" },
|
||||
{ "code": 185, "label": "¹" }
|
||||
]
|
||||
} },
|
||||
{ "code": 50, "label": "2", "type": "numeric", "popup": {
|
||||
"relevant": [
|
||||
{ "code": 167, "label": "§" },
|
||||
{ "code": 178, "label": "²" }
|
||||
]
|
||||
} },
|
||||
{ "code": 51, "label": "3", "type": "numeric", "popup": {
|
||||
"relevant": [
|
||||
{ "code": 8467, "label": "ℓ" },
|
||||
{ "code": 179, "label": "³" }
|
||||
]
|
||||
} },
|
||||
{ "code": 52, "label": "4", "type": "numeric", "popup": {
|
||||
"relevant": [
|
||||
{ "code": 187, "label": "»" },
|
||||
{ "code": 8250, "label": "›" }
|
||||
]
|
||||
} },
|
||||
{ "code": 53, "label": "5", "type": "numeric", "popup": {
|
||||
"relevant": [
|
||||
{ "code": 171, "label": "«" },
|
||||
{ "code": 8249, "label": "‹" }
|
||||
]
|
||||
} },
|
||||
{ "code": 54, "label": "6", "type": "numeric", "popup": {
|
||||
"relevant": [
|
||||
{ "code": 36, "label": "$" },
|
||||
{ "code": 162, "label": "¢" }
|
||||
]
|
||||
} },
|
||||
{ "code": 55, "label": "7", "type": "numeric", "popup": {
|
||||
"main": { "code": -801, "label": "currency_slot_1" },
|
||||
"relevant": [
|
||||
{ "code": -802, "label": "currency_slot_2" },
|
||||
{ "code": -803, "label": "currency_slot_3" },
|
||||
{ "code": -804, "label": "currency_slot_4" },
|
||||
{ "code": -805, "label": "currency_slot_5" },
|
||||
{ "code": -806, "label": "currency_slot_6" }
|
||||
]
|
||||
} },
|
||||
{ "code": 56, "label": "8", "type": "numeric", "popup": {
|
||||
"relevant": [
|
||||
{ "code": 8222, "label": "„" },
|
||||
{ "code": 8218, "label": "‚" }
|
||||
]
|
||||
} },
|
||||
{ "code": 57, "label": "9", "type": "numeric", "popup": {
|
||||
"relevant": [
|
||||
{ "code": 8220, "label": "“" },
|
||||
{ "code": 8216, "label": "‘" }
|
||||
]
|
||||
} },
|
||||
{ "code": 48, "label": "0", "type": "numeric", "popup": {
|
||||
"relevant": [
|
||||
{ "code": 8221, "label": "”" },
|
||||
{ "code": 8217, "label": "’" }
|
||||
]
|
||||
} },
|
||||
{ "code": 45, "label": "-", "type": "numeric", "popup": {
|
||||
"relevant": [
|
||||
{ "code": 8212, "label": "—" }
|
||||
]
|
||||
} }
|
||||
]
|
||||
]
|
||||
}
|
||||
91
app/src/main/assets/ime/text/numeric/row/oriya.json
Normal file
91
app/src/main/assets/ime/text/numeric/row/oriya.json
Normal file
@@ -0,0 +1,91 @@
|
||||
{
|
||||
"type": "numeric_row",
|
||||
"name": "oriya",
|
||||
"label": "Odia",
|
||||
"authors": [ "yashpalgoyal1304" ],
|
||||
"direction": "ltr",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "code": 2919, "label": "୧", "type": "numeric", "popup": {
|
||||
"main": { "code": 49, "label": "1" },
|
||||
"relevant": [
|
||||
{ "code": 8537, "label": "⅙" },
|
||||
{ "code": 8528, "label": "⅐" },
|
||||
{ "code": 8539, "label": "⅛" },
|
||||
{ "code": 8529, "label": "⅑" },
|
||||
{ "code": 8530, "label": "⅒" },
|
||||
{ "code": 185, "label": "¹" },
|
||||
{ "code": 189, "label": "½" },
|
||||
{ "code": 8531, "label": "⅓" },
|
||||
{ "code": 188, "label": "¼" },
|
||||
{ "code": 8533, "label": "⅕" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2920, "label": "୨", "type": "numeric", "popup": {
|
||||
"main": { "code": 50, "label": "2" },
|
||||
"relevant": [
|
||||
{ "code": 8532, "label": "⅔" },
|
||||
{ "code": 178, "label": "²" },
|
||||
{ "code": 8534, "label": "⅖" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2921, "label": "୩", "type": "numeric", "popup": {
|
||||
"main": { "code": 51, "label": "3" },
|
||||
"relevant": [
|
||||
{ "code": 8535, "label": "⅗" },
|
||||
{ "code": 190, "label": "¾" },
|
||||
{ "code": 179, "label": "³" },
|
||||
{ "code": 8540, "label": "⅜" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2922, "label": "୪", "type": "numeric", "popup": {
|
||||
"main": { "code": 52, "label": "4" },
|
||||
"relevant": [
|
||||
{ "code": 8536, "label": "⅘" },
|
||||
{ "code": 8308, "label": "⁴" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2923, "label": "୫", "type": "numeric", "popup": {
|
||||
"main": { "code": 53, "label": "5" },
|
||||
"relevant": [
|
||||
{ "code": 8538, "label": "⅚" },
|
||||
{ "code": 8309, "label": "⁵" },
|
||||
{ "code": 8541, "label": "⅝" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2924, "label": "୬", "type": "numeric", "popup": {
|
||||
"main": { "code": 54, "label": "6" },
|
||||
"relevant": [
|
||||
{ "code": 8310, "label": "⁶" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2925, "label": "୭", "type": "numeric", "popup": {
|
||||
"main": { "code": 55, "label": "7" },
|
||||
"relevant": [
|
||||
{ "code": 8542, "label": "⅞" },
|
||||
{ "code": 8311, "label": "⁷" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2926, "label": "୮", "type": "numeric", "popup": {
|
||||
"main": { "code": 56, "label": "8" },
|
||||
"relevant": [
|
||||
{ "code": 8312, "label": "⁸" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2927, "label": "୯", "type": "numeric", "popup": {
|
||||
"main": { "code": 57, "label": "9" },
|
||||
"relevant": [
|
||||
{ "code": 8313, "label": "⁹" }
|
||||
]
|
||||
} },
|
||||
{ "code": 2918, "label": "୦", "type": "numeric", "popup": {
|
||||
"main": { "code": 48, "label": "0" },
|
||||
"relevant": [
|
||||
{ "code": 8319, "label": "ⁿ" },
|
||||
{ "code": 8709, "label": "∅" },
|
||||
{ "code": 8304, "label": "⁰" }
|
||||
]
|
||||
} }
|
||||
]
|
||||
]
|
||||
}
|
||||
91
app/src/main/assets/ime/text/numeric/row/tamil.json
Normal file
91
app/src/main/assets/ime/text/numeric/row/tamil.json
Normal file
@@ -0,0 +1,91 @@
|
||||
{
|
||||
"type": "numeric_row",
|
||||
"name": "tamil",
|
||||
"label": "Tamil",
|
||||
"authors": [ "yashpalgoyal1304" ],
|
||||
"direction": "ltr",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "code": 3047, "label": "௧", "type": "numeric", "popup": {
|
||||
"main": { "code": 49, "label": "1" },
|
||||
"relevant": [
|
||||
{ "code": 8537, "label": "⅙" },
|
||||
{ "code": 8528, "label": "⅐" },
|
||||
{ "code": 8539, "label": "⅛" },
|
||||
{ "code": 8529, "label": "⅑" },
|
||||
{ "code": 8530, "label": "⅒" },
|
||||
{ "code": 185, "label": "¹" },
|
||||
{ "code": 189, "label": "½" },
|
||||
{ "code": 8531, "label": "⅓" },
|
||||
{ "code": 188, "label": "¼" },
|
||||
{ "code": 8533, "label": "⅕" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3048, "label": "௨", "type": "numeric", "popup": {
|
||||
"main": { "code": 50, "label": "2" },
|
||||
"relevant": [
|
||||
{ "code": 8532, "label": "⅔" },
|
||||
{ "code": 178, "label": "²" },
|
||||
{ "code": 8534, "label": "⅖" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3049, "label": "௩", "type": "numeric", "popup": {
|
||||
"main": { "code": 51, "label": "3" },
|
||||
"relevant": [
|
||||
{ "code": 8535, "label": "⅗" },
|
||||
{ "code": 190, "label": "¾" },
|
||||
{ "code": 179, "label": "³" },
|
||||
{ "code": 8540, "label": "⅜" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3050, "label": "௪", "type": "numeric", "popup": {
|
||||
"main": { "code": 52, "label": "4" },
|
||||
"relevant": [
|
||||
{ "code": 8536, "label": "⅘" },
|
||||
{ "code": 8308, "label": "⁴" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3051, "label": "௫", "type": "numeric", "popup": {
|
||||
"main": { "code": 53, "label": "5" },
|
||||
"relevant": [
|
||||
{ "code": 8538, "label": "⅚" },
|
||||
{ "code": 8309, "label": "⁵" },
|
||||
{ "code": 8541, "label": "⅝" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3052, "label": "௬", "type": "numeric", "popup": {
|
||||
"main": { "code": 54, "label": "6" },
|
||||
"relevant": [
|
||||
{ "code": 8310, "label": "⁶" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3053, "label": "௭", "type": "numeric", "popup": {
|
||||
"main": { "code": 55, "label": "7" },
|
||||
"relevant": [
|
||||
{ "code": 8542, "label": "⅞" },
|
||||
{ "code": 8311, "label": "⁷" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3054, "label": "௮", "type": "numeric", "popup": {
|
||||
"main": { "code": 56, "label": "8" },
|
||||
"relevant": [
|
||||
{ "code": 8312, "label": "⁸" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3055, "label": "௯", "type": "numeric", "popup": {
|
||||
"main": { "code": 57, "label": "9" },
|
||||
"relevant": [
|
||||
{ "code": 8313, "label": "⁹" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3046, "label": "௦", "type": "numeric", "popup": {
|
||||
"main": { "code": 48, "label": "0" },
|
||||
"relevant": [
|
||||
{ "code": 8319, "label": "ⁿ" },
|
||||
{ "code": 8709, "label": "∅" },
|
||||
{ "code": 8304, "label": "⁰" }
|
||||
]
|
||||
} }
|
||||
]
|
||||
]
|
||||
}
|
||||
91
app/src/main/assets/ime/text/numeric/row/telugu.json
Normal file
91
app/src/main/assets/ime/text/numeric/row/telugu.json
Normal file
@@ -0,0 +1,91 @@
|
||||
{
|
||||
"type": "numeric_row",
|
||||
"name": "telugu",
|
||||
"label": "Telugu",
|
||||
"authors": [ "yashpalgoyal1304" ],
|
||||
"direction": "ltr",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "code": 3175, "label": "౧", "type": "numeric", "popup": {
|
||||
"main": { "code": 49, "label": "1" },
|
||||
"relevant": [
|
||||
{ "code": 8537, "label": "⅙" },
|
||||
{ "code": 8528, "label": "⅐" },
|
||||
{ "code": 8539, "label": "⅛" },
|
||||
{ "code": 8529, "label": "⅑" },
|
||||
{ "code": 8530, "label": "⅒" },
|
||||
{ "code": 185, "label": "¹" },
|
||||
{ "code": 189, "label": "½" },
|
||||
{ "code": 8531, "label": "⅓" },
|
||||
{ "code": 188, "label": "¼" },
|
||||
{ "code": 8533, "label": "⅕" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3176, "label": "౨", "type": "numeric", "popup": {
|
||||
"main": { "code": 50, "label": "2" },
|
||||
"relevant": [
|
||||
{ "code": 8532, "label": "⅔" },
|
||||
{ "code": 178, "label": "²" },
|
||||
{ "code": 8534, "label": "⅖" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3177, "label": "౩", "type": "numeric", "popup": {
|
||||
"main": { "code": 51, "label": "3" },
|
||||
"relevant": [
|
||||
{ "code": 8535, "label": "⅗" },
|
||||
{ "code": 190, "label": "¾" },
|
||||
{ "code": 179, "label": "³" },
|
||||
{ "code": 8540, "label": "⅜" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3178, "label": "౪", "type": "numeric", "popup": {
|
||||
"main": { "code": 52, "label": "4" },
|
||||
"relevant": [
|
||||
{ "code": 8536, "label": "⅘" },
|
||||
{ "code": 8308, "label": "⁴" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3179, "label": "౫", "type": "numeric", "popup": {
|
||||
"main": { "code": 53, "label": "5" },
|
||||
"relevant": [
|
||||
{ "code": 8538, "label": "⅚" },
|
||||
{ "code": 8309, "label": "⁵" },
|
||||
{ "code": 8541, "label": "⅝" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3180, "label": "౬", "type": "numeric", "popup": {
|
||||
"main": { "code": 54, "label": "6" },
|
||||
"relevant": [
|
||||
{ "code": 8310, "label": "⁶" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3181, "label": "౭", "type": "numeric", "popup": {
|
||||
"main": { "code": 55, "label": "7" },
|
||||
"relevant": [
|
||||
{ "code": 8542, "label": "⅞" },
|
||||
{ "code": 8311, "label": "⁷" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3182, "label": "౮", "type": "numeric", "popup": {
|
||||
"main": { "code": 56, "label": "8" },
|
||||
"relevant": [
|
||||
{ "code": 8312, "label": "⁸" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3183, "label": "౯", "type": "numeric", "popup": {
|
||||
"main": { "code": 57, "label": "9" },
|
||||
"relevant": [
|
||||
{ "code": 8313, "label": "⁹" }
|
||||
]
|
||||
} },
|
||||
{ "code": 3174, "label": "౦", "type": "numeric", "popup": {
|
||||
"main": { "code": 48, "label": "0" },
|
||||
"relevant": [
|
||||
{ "code": 8319, "label": "ⁿ" },
|
||||
{ "code": 8709, "label": "∅" },
|
||||
{ "code": 8304, "label": "⁰" }
|
||||
]
|
||||
} }
|
||||
]
|
||||
]
|
||||
}
|
||||
22
app/src/main/assets/ime/text/symbols/mod/neo2.json
Normal file
22
app/src/main/assets/ime/text/symbols/mod/neo2.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"type": "symbols/mod",
|
||||
"name": "neo2",
|
||||
"label": "Neo2",
|
||||
"authors": [ "ostrya" ],
|
||||
"direction": "ltr",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "code": -203, "label": "view_symbols2", "type": "system_gui" },
|
||||
{ "code": 0, "type": "placeholder" },
|
||||
{ "code": -5, "label": "delete", "type": "enter_editing" }
|
||||
],
|
||||
[
|
||||
{ "code": -201, "label": "view_characters", "type": "system_gui" },
|
||||
{ "code": -205, "label": "view_numeric_advanced", "type": "system_gui" },
|
||||
{ "code": 32, "label": "space" },
|
||||
{ "code": 34, "label": "\"" },
|
||||
{ "code": 39, "label": "'" },
|
||||
{ "code": 10, "label": "enter", "groupId": 3, "type": "enter_editing" }
|
||||
]
|
||||
]
|
||||
}
|
||||
46
app/src/main/assets/ime/text/symbols/neo2.json
Normal file
46
app/src/main/assets/ime/text/symbols/neo2.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"type": "symbols",
|
||||
"name": "neo2",
|
||||
"label": "Neo2",
|
||||
"authors": [ "ostrya" ],
|
||||
"direction": "ltr",
|
||||
"modifier": "neo2",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "code": 8230, "label": "…" },
|
||||
{ "code": 95, "label": "_" },
|
||||
{ "code": 91, "label": "[" },
|
||||
{ "code": 93, "label": "]" },
|
||||
{ "code": 94, "label": "^" },
|
||||
{ "code": 33, "label": "!" },
|
||||
{ "code": 60, "label": "<" },
|
||||
{ "code": 62, "label": ">" },
|
||||
{ "code": 61, "label": "=" },
|
||||
{ "code": 38, "label": "&" },
|
||||
{ "code": 383, "label": "ſ" }
|
||||
],
|
||||
[
|
||||
{ "code": 92, "label": "\\" },
|
||||
{ "code": 47, "label": "/" },
|
||||
{ "code": 123, "label": "{" },
|
||||
{ "code": 125, "label": "}" },
|
||||
{ "code": 42, "label": "*" },
|
||||
{ "code": 63, "label": "?" },
|
||||
{ "code": 40, "label": "(" },
|
||||
{ "code": 41, "label": ")" },
|
||||
{ "code": 45, "label": "-" },
|
||||
{ "code": 58, "label": ":" },
|
||||
{ "code": 64, "label": "@" }
|
||||
],
|
||||
[
|
||||
{ "code": 35, "label": "#" },
|
||||
{ "code": 36, "label": "$" },
|
||||
{ "code": 124, "label": "|" },
|
||||
{ "code": 126, "label": "~" },
|
||||
{ "code": 96, "label": "`" },
|
||||
{ "code": 43, "label": "+" },
|
||||
{ "code": 37, "label": "%" },
|
||||
{ "code": 59, "label": ";" }
|
||||
]
|
||||
]
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
54
app/src/main/cpp/CMakeLists.txt
Normal file
54
app/src/main/cpp/CMakeLists.txt
Normal file
@@ -0,0 +1,54 @@
|
||||
# For more information about using CMake with Android Studio, read the
|
||||
# documentation: https://d.android.com/studio/projects/add-native-code.html
|
||||
|
||||
cmake_minimum_required(VERSION 3.10.2)
|
||||
|
||||
project("florisboard")
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
include_directories(.)
|
||||
|
||||
### ICU4C ###
|
||||
include_directories(icu4c/include)
|
||||
#link_directories(${CMAKE_SOURCE_DIR}/../${ANDROID_ABI})
|
||||
set(JNI_LIBS ${CMAKE_SOURCE_DIR}/../jniLibs/${ANDROID_ABI})
|
||||
add_library(ICU::data STATIC IMPORTED)
|
||||
set_property(TARGET ICU::data PROPERTY IMPORTED_LOCATION "${JNI_LIBS}/libicudata.a")
|
||||
#add_library(ICU::i18n STATIC IMPORTED)
|
||||
#set_property(TARGET ICU::i18n PROPERTY IMPORTED_LOCATION "${JNI_LIBS}/libicui18n.a")
|
||||
#add_library(ICU::tu STATIC IMPORTED)
|
||||
#set_property(TARGET ICU::tu PROPERTY IMPORTED_LOCATION "${JNI_LIBS}/libicutu.a")
|
||||
add_library(ICU::uc STATIC IMPORTED)
|
||||
set_property(TARGET ICU::uc PROPERTY IMPORTED_LOCATION "${JNI_LIBS}/libicuuc.a")
|
||||
|
||||
### FlorisBoard ###
|
||||
add_subdirectory(nuspell)
|
||||
add_subdirectory(glob_ndk)
|
||||
add_subdirectory(utils)
|
||||
add_subdirectory(ime/nlp)
|
||||
add_subdirectory(ime/spelling)
|
||||
|
||||
add_library(
|
||||
florisboard-native
|
||||
SHARED
|
||||
dev_patrickgold_florisboard_ime_nlp_SuggestionList.cpp
|
||||
dev_patrickgold_florisboard_ime_spelling_SpellingDict.cpp
|
||||
)
|
||||
|
||||
target_compile_options(florisboard-native PRIVATE -ffunction-sections -fdata-sections -fexceptions)
|
||||
target_link_libraries(
|
||||
# Destination
|
||||
florisboard-native
|
||||
|
||||
# Sources
|
||||
android
|
||||
log
|
||||
glob_ndk
|
||||
ICU::uc
|
||||
ICU::data
|
||||
Nuspell::nuspell
|
||||
utils
|
||||
ime-nlp
|
||||
ime-spelling
|
||||
)
|
||||
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <jni.h>
|
||||
#include "ime/nlp/suggestion_list.h"
|
||||
|
||||
#pragma ide diagnostic ignored "UnusedLocalVariable"
|
||||
|
||||
using namespace ime::nlp;
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jlong JNICALL
|
||||
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeInitialize(
|
||||
JNIEnv *env,
|
||||
jobject thiz,
|
||||
jint max_size) {
|
||||
auto *suggestionList = new SuggestionList(max_size);
|
||||
return reinterpret_cast<jlong>(suggestionList);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT void JNICALL
|
||||
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeDispose(
|
||||
JNIEnv *env,
|
||||
jobject thiz,
|
||||
jlong native_ptr) {
|
||||
auto *suggestionList = reinterpret_cast<SuggestionList *>(native_ptr);
|
||||
suggestionList->clear();
|
||||
delete suggestionList;
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jboolean JNICALL
|
||||
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeAdd(
|
||||
JNIEnv *env,
|
||||
jobject thiz,
|
||||
jlong native_ptr,
|
||||
jstring word,
|
||||
jint freq) {
|
||||
const char *cWord = env->GetStringUTFChars(word, nullptr);
|
||||
word_t stdWord = word_t(cWord);
|
||||
env->ReleaseStringUTFChars(word, cWord);
|
||||
auto *suggestionList = reinterpret_cast<SuggestionList *>(native_ptr);
|
||||
return suggestionList->add(std::move(stdWord), freq);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT void JNICALL
|
||||
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeClear(
|
||||
JNIEnv *env,
|
||||
jobject thiz,
|
||||
jlong native_ptr) {
|
||||
auto *suggestionList = reinterpret_cast<SuggestionList *>(native_ptr);
|
||||
suggestionList->clear();
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jboolean JNICALL
|
||||
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeContains(
|
||||
JNIEnv *env,
|
||||
jobject thiz,
|
||||
jlong native_ptr,
|
||||
jstring element) {
|
||||
const char *cWord = env->GetStringUTFChars(element, nullptr);
|
||||
const word_t stdWord = word_t(cWord);
|
||||
env->ReleaseStringUTFChars(element, cWord);
|
||||
auto *suggestionList = reinterpret_cast<SuggestionList *>(native_ptr);
|
||||
return suggestionList->containsWord(stdWord);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeGetOrNull(
|
||||
JNIEnv *env,
|
||||
jobject thiz,
|
||||
jlong native_ptr,
|
||||
jint index) {
|
||||
auto *suggestionList = reinterpret_cast<SuggestionList *>(native_ptr);
|
||||
auto weightedToken = suggestionList->get(index);
|
||||
if (weightedToken == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
return env->NewStringUTF(weightedToken->data.c_str());
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jint JNICALL
|
||||
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeSize(
|
||||
JNIEnv *env,
|
||||
jobject thiz,
|
||||
jlong native_ptr) {
|
||||
auto *suggestionList = reinterpret_cast<SuggestionList *>(native_ptr);
|
||||
return suggestionList->size();
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jboolean JNICALL
|
||||
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeGetIsPrimaryTokenAutoInsert(
|
||||
JNIEnv *env, jobject thiz, jlong native_ptr) {
|
||||
auto *suggestionList = reinterpret_cast<SuggestionList *>(native_ptr);
|
||||
return suggestionList->isPrimaryTokenAutoInsert;
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT void JNICALL
|
||||
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeSetIsPrimaryTokenAutoInsert(
|
||||
JNIEnv *env, jobject thiz, jlong native_ptr, jboolean v) {
|
||||
auto *suggestionList = reinterpret_cast<SuggestionList *>(native_ptr);
|
||||
suggestionList->isPrimaryTokenAutoInsert = v;
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <jni.h>
|
||||
#include <algorithm>
|
||||
#include "ime/spelling/spellingdict.h"
|
||||
#include "utils/jni_utils.h"
|
||||
|
||||
#pragma ide diagnostic ignored "UnusedLocalVariable"
|
||||
|
||||
using namespace ime::spellcheck;
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jlong JNICALL
|
||||
Java_dev_patrickgold_florisboard_ime_spelling_SpellingDict_00024Companion_nativeInitialize(
|
||||
JNIEnv *env,
|
||||
jobject thiz,
|
||||
jobject base_path) {
|
||||
auto strBasePath = utils::j2std_string(env, base_path);
|
||||
|
||||
auto *spellingDict = SpellingDict::load(strBasePath);
|
||||
|
||||
if (spellingDict == nullptr) {
|
||||
return 0L;
|
||||
} else {
|
||||
return reinterpret_cast<jlong>(spellingDict);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT void JNICALL
|
||||
Java_dev_patrickgold_florisboard_ime_spelling_SpellingDict_00024Companion_nativeDispose(
|
||||
JNIEnv *env,
|
||||
jobject thiz,
|
||||
jlong native_ptr) {
|
||||
auto spellingDict = reinterpret_cast<SpellingDict *>(native_ptr);
|
||||
|
||||
delete spellingDict;
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jboolean JNICALL
|
||||
Java_dev_patrickgold_florisboard_ime_spelling_SpellingDict_00024Companion_nativeSpell(
|
||||
JNIEnv *env,
|
||||
jobject thiz,
|
||||
jlong native_ptr,
|
||||
jobject word) {
|
||||
auto strWord = utils::j2std_string(env, word);
|
||||
|
||||
auto spellingDict = reinterpret_cast<SpellingDict *>(native_ptr);
|
||||
auto result = spellingDict->spell(strWord);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jobjectArray JNICALL
|
||||
Java_dev_patrickgold_florisboard_ime_spelling_SpellingDict_00024Companion_nativeSuggest(
|
||||
JNIEnv *env,
|
||||
jobject thiz,
|
||||
jlong native_ptr,
|
||||
jobject word,
|
||||
jint limit) {
|
||||
auto strWord = utils::j2std_string(env, word);
|
||||
|
||||
auto spellingDict = reinterpret_cast<SpellingDict *>(native_ptr);
|
||||
auto result = spellingDict->suggest(strWord);
|
||||
auto retSize = std::min(result.size(), (size_t)std::max(0, limit));
|
||||
|
||||
jclass jByteArrayClass = env->FindClass("java/nio/ByteBuffer");
|
||||
jobjectArray jSuggestions = env->NewObjectArray(retSize, jByteArrayClass, nullptr);
|
||||
for (int n = 0; n < retSize; n++) {
|
||||
env->SetObjectArrayElement(jSuggestions, n, utils::std2j_string(env, result[n]));
|
||||
}
|
||||
|
||||
return jSuggestions;
|
||||
}
|
||||
2
app/src/main/cpp/glob_ndk/CMakeLists.txt
Normal file
2
app/src/main/cpp/glob_ndk/CMakeLists.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
add_library(glob_ndk
|
||||
glob_ndk.c glob_ndk.h)
|
||||
906
app/src/main/cpp/glob_ndk/glob_ndk.c
Normal file
906
app/src/main/cpp/glob_ndk/glob_ndk.c
Normal file
@@ -0,0 +1,906 @@
|
||||
/*
|
||||
* Natanael Arndt, 2011: removed collate.h dependencies
|
||||
* (my changes are trivial)
|
||||
*
|
||||
* Copyright (c) 1989, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Guido van Rossum.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#if defined(LIBC_SCCS) && !defined(lint)
|
||||
static char sccsid[] = "@(#)glob.c 8.3 (Berkeley) 10/13/93";
|
||||
#endif /* LIBC_SCCS and not lint */
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
/*
|
||||
* glob(3) -- a superset of the one defined in POSIX 1003.2.
|
||||
*
|
||||
* The [!...] convention to negate a range is supported (SysV, Posix, ksh).
|
||||
*
|
||||
* Optional extra services, controlled by flags not defined by POSIX:
|
||||
*
|
||||
* GLOB_QUOTE:
|
||||
* Escaping convention: \ inhibits any special meaning the following
|
||||
* character might have (except \ at end of string is retained).
|
||||
* GLOB_MAGCHAR:
|
||||
* Set in gl_flags if pattern contained a globbing character.
|
||||
* GLOB_NOMAGIC:
|
||||
* Same as GLOB_NOCHECK, but it will only append pattern if it did
|
||||
* not contain any magic characters. [Used in csh style globbing]
|
||||
* GLOB_ALTDIRFUNC:
|
||||
* Use alternately specified directory access functions.
|
||||
* GLOB_TILDE:
|
||||
* expand ~user/foo to the /home/dir/of/user/foo
|
||||
* GLOB_BRACE:
|
||||
* expand {1,2}{a,b} to 1a 1b 2a 2b
|
||||
* gl_matchc:
|
||||
* Number of matches in the current invocation of glob.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Some notes on multibyte character support:
|
||||
* 1. Patterns with illegal byte sequences match nothing - even if
|
||||
* GLOB_NOCHECK is specified.
|
||||
* 2. Illegal byte sequences in filenames are handled by treating them as
|
||||
* single-byte characters with a value of the first byte of the sequence
|
||||
* cast to wchar_t.
|
||||
* 3. State-dependent encodings are not currently supported.
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <ctype.h>
|
||||
#include <dirent.h>
|
||||
#include <errno.h>
|
||||
#include "glob_ndk.h"
|
||||
#include <limits.h>
|
||||
#include <pwd.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <wchar.h>
|
||||
|
||||
#define DOLLAR '$'
|
||||
#define DOT '.'
|
||||
#define EOS '\0'
|
||||
#define LBRACKET '['
|
||||
#define NOT '!'
|
||||
#define QUESTION '?'
|
||||
#define QUOTE '\\'
|
||||
#define RANGE '-'
|
||||
#define RBRACKET ']'
|
||||
#define SEP '/'
|
||||
#define STAR '*'
|
||||
#define TILDE '~'
|
||||
#define UNDERSCORE '_'
|
||||
#define LBRACE '{'
|
||||
#define RBRACE '}'
|
||||
#define SLASH '/'
|
||||
#define COMMA ','
|
||||
|
||||
#ifndef DEBUG
|
||||
|
||||
#define M_QUOTE 0x8000000000ULL
|
||||
#define M_PROTECT 0x4000000000ULL
|
||||
#define M_MASK 0xffffffffffULL
|
||||
#define M_CHAR 0x00ffffffffULL
|
||||
|
||||
typedef uint_fast64_t Char;
|
||||
|
||||
#else
|
||||
|
||||
#define M_QUOTE 0x80
|
||||
#define M_PROTECT 0x40
|
||||
#define M_MASK 0xff
|
||||
#define M_CHAR 0x7f
|
||||
|
||||
typedef char Char;
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define CHAR(c) ((Char)((c)&M_CHAR))
|
||||
#define META(c) ((Char)((c)|M_QUOTE))
|
||||
#define M_ALL META('*')
|
||||
#define M_END META(']')
|
||||
#define M_NOT META('!')
|
||||
#define M_ONE META('?')
|
||||
#define M_RNG META('-')
|
||||
#define M_SET META('[')
|
||||
#define ismeta(c) (((c)&M_QUOTE) != 0)
|
||||
|
||||
|
||||
static int compare(const void *, const void *);
|
||||
static int g_Ctoc(const Char *, char *, size_t);
|
||||
static int g_lstat(Char *, struct stat *, glob_t *);
|
||||
static DIR *g_opendir(Char *, glob_t *);
|
||||
static const Char *g_strchr(const Char *, wchar_t);
|
||||
#ifdef notdef
|
||||
static Char *g_strcat(Char *, const Char *);
|
||||
#endif
|
||||
static int g_stat(Char *, struct stat *, glob_t *);
|
||||
static int glob0(const Char *, glob_t *, size_t *);
|
||||
static int glob1(Char *, glob_t *, size_t *);
|
||||
static int glob2(Char *, Char *, Char *, Char *, glob_t *, size_t *);
|
||||
static int glob3(Char *, Char *, Char *, Char *, Char *, glob_t *, size_t *);
|
||||
static int globextend(const Char *, glob_t *, size_t *);
|
||||
static const Char *
|
||||
globtilde(const Char *, Char *, size_t, glob_t *);
|
||||
static int globexp1(const Char *, glob_t *, size_t *);
|
||||
static int globexp2(const Char *, const Char *, glob_t *, int *, size_t *);
|
||||
static int match(Char *, Char *, Char *);
|
||||
#ifdef DEBUG
|
||||
static void qprintf(const char *, Char *);
|
||||
#endif
|
||||
|
||||
int
|
||||
glob(const char *pattern, int flags, int (*errfunc)(const char *, int), glob_t *pglob)
|
||||
{
|
||||
const char *patnext;
|
||||
size_t limit;
|
||||
Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot;
|
||||
mbstate_t mbs;
|
||||
wchar_t wc;
|
||||
size_t clen;
|
||||
|
||||
patnext = pattern;
|
||||
if (!(flags & GLOB_APPEND)) {
|
||||
pglob->gl_pathc = 0;
|
||||
pglob->gl_pathv = NULL;
|
||||
if (!(flags & GLOB_DOOFFS))
|
||||
pglob->gl_offs = 0;
|
||||
}
|
||||
if (flags & GLOB_LIMIT) {
|
||||
limit = pglob->gl_matchc;
|
||||
if (limit == 0)
|
||||
limit = ARG_MAX;
|
||||
} else
|
||||
limit = 0;
|
||||
pglob->gl_flags = flags & ~GLOB_MAGCHAR;
|
||||
pglob->gl_errfunc = errfunc;
|
||||
pglob->gl_matchc = 0;
|
||||
|
||||
bufnext = patbuf;
|
||||
bufend = bufnext + MAXPATHLEN - 1;
|
||||
if (flags & GLOB_NOESCAPE) {
|
||||
memset(&mbs, 0, sizeof(mbs));
|
||||
while (bufend - bufnext >= MB_CUR_MAX) {
|
||||
clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
|
||||
if (clen == (size_t)-1 || clen == (size_t)-2)
|
||||
return (GLOB_NOMATCH);
|
||||
else if (clen == 0)
|
||||
break;
|
||||
*bufnext++ = wc;
|
||||
patnext += clen;
|
||||
}
|
||||
} else {
|
||||
/* Protect the quoted characters. */
|
||||
memset(&mbs, 0, sizeof(mbs));
|
||||
while (bufend - bufnext >= MB_CUR_MAX) {
|
||||
if (*patnext == QUOTE) {
|
||||
if (*++patnext == EOS) {
|
||||
*bufnext++ = QUOTE | M_PROTECT;
|
||||
continue;
|
||||
}
|
||||
prot = M_PROTECT;
|
||||
} else
|
||||
prot = 0;
|
||||
clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
|
||||
if (clen == (size_t)-1 || clen == (size_t)-2)
|
||||
return (GLOB_NOMATCH);
|
||||
else if (clen == 0)
|
||||
break;
|
||||
*bufnext++ = wc | prot;
|
||||
patnext += clen;
|
||||
}
|
||||
}
|
||||
*bufnext = EOS;
|
||||
|
||||
if (flags & GLOB_BRACE)
|
||||
return globexp1(patbuf, pglob, &limit);
|
||||
else
|
||||
return glob0(patbuf, pglob, &limit);
|
||||
}
|
||||
|
||||
/*
|
||||
* Expand recursively a glob {} pattern. When there is no more expansion
|
||||
* invoke the standard globbing routine to glob the rest of the magic
|
||||
* characters
|
||||
*/
|
||||
static int
|
||||
globexp1(const Char *pattern, glob_t *pglob, size_t *limit)
|
||||
{
|
||||
const Char* ptr = pattern;
|
||||
int rv;
|
||||
|
||||
/* Protect a single {}, for find(1), like csh */
|
||||
if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS)
|
||||
return glob0(pattern, pglob, limit);
|
||||
|
||||
while ((ptr = g_strchr(ptr, LBRACE)) != NULL)
|
||||
if (!globexp2(ptr, pattern, pglob, &rv, limit))
|
||||
return rv;
|
||||
|
||||
return glob0(pattern, pglob, limit);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Recursive brace globbing helper. Tries to expand a single brace.
|
||||
* If it succeeds then it invokes globexp1 with the new pattern.
|
||||
* If it fails then it tries to glob the rest of the pattern and returns.
|
||||
*/
|
||||
static int
|
||||
globexp2(const Char *ptr, const Char *pattern, glob_t *pglob, int *rv, size_t *limit)
|
||||
{
|
||||
int i;
|
||||
Char *lm, *ls;
|
||||
const Char *pe, *pm, *pm1, *pl;
|
||||
Char patbuf[MAXPATHLEN];
|
||||
|
||||
/* copy part up to the brace */
|
||||
for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
|
||||
continue;
|
||||
*lm = EOS;
|
||||
ls = lm;
|
||||
|
||||
/* Find the balanced brace */
|
||||
for (i = 0, pe = ++ptr; *pe; pe++)
|
||||
if (*pe == LBRACKET) {
|
||||
/* Ignore everything between [] */
|
||||
for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++)
|
||||
continue;
|
||||
if (*pe == EOS) {
|
||||
/*
|
||||
* We could not find a matching RBRACKET.
|
||||
* Ignore and just look for RBRACE
|
||||
*/
|
||||
pe = pm;
|
||||
}
|
||||
}
|
||||
else if (*pe == LBRACE)
|
||||
i++;
|
||||
else if (*pe == RBRACE) {
|
||||
if (i == 0)
|
||||
break;
|
||||
i--;
|
||||
}
|
||||
|
||||
/* Non matching braces; just glob the pattern */
|
||||
if (i != 0 || *pe == EOS) {
|
||||
*rv = glob0(patbuf, pglob, limit);
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (i = 0, pl = pm = ptr; pm <= pe; pm++)
|
||||
switch (*pm) {
|
||||
case LBRACKET:
|
||||
/* Ignore everything between [] */
|
||||
for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++)
|
||||
continue;
|
||||
if (*pm == EOS) {
|
||||
/*
|
||||
* We could not find a matching RBRACKET.
|
||||
* Ignore and just look for RBRACE
|
||||
*/
|
||||
pm = pm1;
|
||||
}
|
||||
break;
|
||||
|
||||
case LBRACE:
|
||||
i++;
|
||||
break;
|
||||
|
||||
case RBRACE:
|
||||
if (i) {
|
||||
i--;
|
||||
break;
|
||||
}
|
||||
/* FALLTHROUGH */
|
||||
case COMMA:
|
||||
if (i && *pm == COMMA)
|
||||
break;
|
||||
else {
|
||||
/* Append the current string */
|
||||
for (lm = ls; (pl < pm); *lm++ = *pl++)
|
||||
continue;
|
||||
/*
|
||||
* Append the rest of the pattern after the
|
||||
* closing brace
|
||||
*/
|
||||
for (pl = pe + 1; (*lm++ = *pl++) != EOS;)
|
||||
continue;
|
||||
|
||||
/* Expand the current pattern */
|
||||
#ifdef DEBUG
|
||||
qprintf("globexp2:", patbuf);
|
||||
#endif
|
||||
*rv = globexp1(patbuf, pglob, limit);
|
||||
|
||||
/* move after the comma, to the next string */
|
||||
pl = pm + 1;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
*rv = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* expand tilde from the passwd file.
|
||||
*/
|
||||
static const Char *
|
||||
globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob)
|
||||
{
|
||||
struct passwd *pwd;
|
||||
char *h;
|
||||
const Char *p;
|
||||
Char *b, *eb;
|
||||
|
||||
if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE))
|
||||
return pattern;
|
||||
|
||||
/*
|
||||
* Copy up to the end of the string or /
|
||||
*/
|
||||
eb = &patbuf[patbuf_len - 1];
|
||||
for (p = pattern + 1, h = (char *) patbuf;
|
||||
h < (char *)eb && *p && *p != SLASH; *h++ = *p++)
|
||||
continue;
|
||||
|
||||
*h = EOS;
|
||||
|
||||
if (((char *) patbuf)[0] == EOS) {
|
||||
/*
|
||||
* handle a plain ~ or ~/ by expanding $HOME first (iff
|
||||
* we're not running setuid or setgid) and then trying
|
||||
* the password file
|
||||
*/
|
||||
if (issetugid() != 0 ||
|
||||
(h = getenv("HOME")) == NULL) {
|
||||
if (((h = getlogin()) != NULL &&
|
||||
(pwd = getpwnam(h)) != NULL) ||
|
||||
(pwd = getpwuid(getuid())) != NULL)
|
||||
h = pwd->pw_dir;
|
||||
else
|
||||
return pattern;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/*
|
||||
* Expand a ~user
|
||||
*/
|
||||
if ((pwd = getpwnam((char*) patbuf)) == NULL)
|
||||
return pattern;
|
||||
else
|
||||
h = pwd->pw_dir;
|
||||
}
|
||||
|
||||
/* Copy the home directory */
|
||||
for (b = patbuf; b < eb && *h; *b++ = *h++)
|
||||
continue;
|
||||
|
||||
/* Append the rest of the pattern */
|
||||
while (b < eb && (*b++ = *p++) != EOS)
|
||||
continue;
|
||||
*b = EOS;
|
||||
|
||||
return patbuf;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* The main glob() routine: compiles the pattern (optionally processing
|
||||
* quotes), calls glob1() to do the real pattern matching, and finally
|
||||
* sorts the list (unless unsorted operation is requested). Returns 0
|
||||
* if things went well, nonzero if errors occurred.
|
||||
*/
|
||||
static int
|
||||
glob0(const Char *pattern, glob_t *pglob, size_t *limit)
|
||||
{
|
||||
const Char *qpatnext;
|
||||
int err;
|
||||
size_t oldpathc;
|
||||
Char *bufnext, c, patbuf[MAXPATHLEN];
|
||||
|
||||
qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
|
||||
oldpathc = pglob->gl_pathc;
|
||||
bufnext = patbuf;
|
||||
|
||||
/* We don't need to check for buffer overflow any more. */
|
||||
while ((c = *qpatnext++) != EOS) {
|
||||
switch (c) {
|
||||
case LBRACKET:
|
||||
c = *qpatnext;
|
||||
if (c == NOT)
|
||||
++qpatnext;
|
||||
if (*qpatnext == EOS ||
|
||||
g_strchr(qpatnext+1, RBRACKET) == NULL) {
|
||||
*bufnext++ = LBRACKET;
|
||||
if (c == NOT)
|
||||
--qpatnext;
|
||||
break;
|
||||
}
|
||||
*bufnext++ = M_SET;
|
||||
if (c == NOT)
|
||||
*bufnext++ = M_NOT;
|
||||
c = *qpatnext++;
|
||||
do {
|
||||
*bufnext++ = CHAR(c);
|
||||
if (*qpatnext == RANGE &&
|
||||
(c = qpatnext[1]) != RBRACKET) {
|
||||
*bufnext++ = M_RNG;
|
||||
*bufnext++ = CHAR(c);
|
||||
qpatnext += 2;
|
||||
}
|
||||
} while ((c = *qpatnext++) != RBRACKET);
|
||||
pglob->gl_flags |= GLOB_MAGCHAR;
|
||||
*bufnext++ = M_END;
|
||||
break;
|
||||
case QUESTION:
|
||||
pglob->gl_flags |= GLOB_MAGCHAR;
|
||||
*bufnext++ = M_ONE;
|
||||
break;
|
||||
case STAR:
|
||||
pglob->gl_flags |= GLOB_MAGCHAR;
|
||||
/* collapse adjacent stars to one,
|
||||
* to avoid exponential behavior
|
||||
*/
|
||||
if (bufnext == patbuf || bufnext[-1] != M_ALL)
|
||||
*bufnext++ = M_ALL;
|
||||
break;
|
||||
default:
|
||||
*bufnext++ = CHAR(c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
*bufnext = EOS;
|
||||
#ifdef DEBUG
|
||||
qprintf("glob0:", patbuf);
|
||||
#endif
|
||||
|
||||
if ((err = glob1(patbuf, pglob, limit)) != 0)
|
||||
return(err);
|
||||
|
||||
/*
|
||||
* If there was no match we are going to append the pattern
|
||||
* if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
|
||||
* and the pattern did not contain any magic characters
|
||||
* GLOB_NOMAGIC is there just for compatibility with csh.
|
||||
*/
|
||||
if (pglob->gl_pathc == oldpathc) {
|
||||
if (((pglob->gl_flags & GLOB_NOCHECK) ||
|
||||
((pglob->gl_flags & GLOB_NOMAGIC) &&
|
||||
!(pglob->gl_flags & GLOB_MAGCHAR))))
|
||||
return(globextend(pattern, pglob, limit));
|
||||
else
|
||||
return(GLOB_NOMATCH);
|
||||
}
|
||||
if (!(pglob->gl_flags & GLOB_NOSORT))
|
||||
qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
|
||||
pglob->gl_pathc - oldpathc, sizeof(char *), compare);
|
||||
return(0);
|
||||
}
|
||||
|
||||
static int
|
||||
compare(const void *p, const void *q)
|
||||
{
|
||||
return(strcmp(*(char **)p, *(char **)q));
|
||||
}
|
||||
|
||||
static int
|
||||
glob1(Char *pattern, glob_t *pglob, size_t *limit)
|
||||
{
|
||||
Char pathbuf[MAXPATHLEN];
|
||||
|
||||
/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
|
||||
if (*pattern == EOS)
|
||||
return(0);
|
||||
return(glob2(pathbuf, pathbuf, pathbuf + MAXPATHLEN - 1,
|
||||
pattern, pglob, limit));
|
||||
}
|
||||
|
||||
/*
|
||||
* The functions glob2 and glob3 are mutually recursive; there is one level
|
||||
* of recursion for each segment in the pattern that contains one or more
|
||||
* meta characters.
|
||||
*/
|
||||
static int
|
||||
glob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern,
|
||||
glob_t *pglob, size_t *limit)
|
||||
{
|
||||
struct stat sb;
|
||||
Char *p, *q;
|
||||
int anymeta;
|
||||
|
||||
/*
|
||||
* Loop over pattern segments until end of pattern or until
|
||||
* segment with meta character found.
|
||||
*/
|
||||
for (anymeta = 0;;) {
|
||||
if (*pattern == EOS) { /* End of pattern? */
|
||||
*pathend = EOS;
|
||||
if (g_lstat(pathbuf, &sb, pglob))
|
||||
return(0);
|
||||
|
||||
if (((pglob->gl_flags & GLOB_MARK) &&
|
||||
pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
|
||||
|| (S_ISLNK(sb.st_mode) &&
|
||||
(g_stat(pathbuf, &sb, pglob) == 0) &&
|
||||
S_ISDIR(sb.st_mode)))) {
|
||||
if (pathend + 1 > pathend_last)
|
||||
return (GLOB_ABORTED);
|
||||
*pathend++ = SEP;
|
||||
*pathend = EOS;
|
||||
}
|
||||
++pglob->gl_matchc;
|
||||
return(globextend(pathbuf, pglob, limit));
|
||||
}
|
||||
|
||||
/* Find end of next segment, copy tentatively to pathend. */
|
||||
q = pathend;
|
||||
p = pattern;
|
||||
while (*p != EOS && *p != SEP) {
|
||||
if (ismeta(*p))
|
||||
anymeta = 1;
|
||||
if (q + 1 > pathend_last)
|
||||
return (GLOB_ABORTED);
|
||||
*q++ = *p++;
|
||||
}
|
||||
|
||||
if (!anymeta) { /* No expansion, do next segment. */
|
||||
pathend = q;
|
||||
pattern = p;
|
||||
while (*pattern == SEP) {
|
||||
if (pathend + 1 > pathend_last)
|
||||
return (GLOB_ABORTED);
|
||||
*pathend++ = *pattern++;
|
||||
}
|
||||
} else /* Need expansion, recurse. */
|
||||
return(glob3(pathbuf, pathend, pathend_last, pattern, p,
|
||||
pglob, limit));
|
||||
}
|
||||
/* NOTREACHED */
|
||||
}
|
||||
|
||||
static int
|
||||
glob3(Char *pathbuf, Char *pathend, Char *pathend_last,
|
||||
Char *pattern, Char *restpattern,
|
||||
glob_t *pglob, size_t *limit)
|
||||
{
|
||||
struct dirent *dp;
|
||||
DIR *dirp;
|
||||
int err;
|
||||
char buf[MAXPATHLEN];
|
||||
|
||||
/*
|
||||
* The readdirfunc declaration can't be prototyped, because it is
|
||||
* assigned, below, to two functions which are prototyped in glob.h
|
||||
* and dirent.h as taking pointers to differently typed opaque
|
||||
* structures.
|
||||
*/
|
||||
struct dirent *(*readdirfunc)();
|
||||
|
||||
if (pathend > pathend_last)
|
||||
return (GLOB_ABORTED);
|
||||
*pathend = EOS;
|
||||
errno = 0;
|
||||
|
||||
if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
|
||||
/* TODO: don't call for ENOENT or ENOTDIR? */
|
||||
if (pglob->gl_errfunc) {
|
||||
if (g_Ctoc(pathbuf, buf, sizeof(buf)))
|
||||
return (GLOB_ABORTED);
|
||||
if (pglob->gl_errfunc(buf, errno) ||
|
||||
pglob->gl_flags & GLOB_ERR)
|
||||
return (GLOB_ABORTED);
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
err = 0;
|
||||
|
||||
/* Search directory for matching names. */
|
||||
if (pglob->gl_flags & GLOB_ALTDIRFUNC)
|
||||
readdirfunc = pglob->gl_readdir;
|
||||
else
|
||||
readdirfunc = readdir;
|
||||
while ((dp = (*readdirfunc)(dirp))) {
|
||||
char *sc;
|
||||
Char *dc;
|
||||
wchar_t wc;
|
||||
size_t clen;
|
||||
mbstate_t mbs;
|
||||
|
||||
/* Initial DOT must be matched literally. */
|
||||
if (dp->d_name[0] == DOT && *pattern != DOT)
|
||||
continue;
|
||||
memset(&mbs, 0, sizeof(mbs));
|
||||
dc = pathend;
|
||||
sc = dp->d_name;
|
||||
while (dc < pathend_last) {
|
||||
clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
|
||||
if (clen == (size_t)-1 || clen == (size_t)-2) {
|
||||
wc = *sc;
|
||||
clen = 1;
|
||||
memset(&mbs, 0, sizeof(mbs));
|
||||
}
|
||||
if ((*dc++ = wc) == EOS)
|
||||
break;
|
||||
sc += clen;
|
||||
}
|
||||
if (!match(pathend, pattern, restpattern)) {
|
||||
*pathend = EOS;
|
||||
continue;
|
||||
}
|
||||
err = glob2(pathbuf, --dc, pathend_last, restpattern,
|
||||
pglob, limit);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
if (pglob->gl_flags & GLOB_ALTDIRFUNC)
|
||||
(*pglob->gl_closedir)(dirp);
|
||||
else
|
||||
closedir(dirp);
|
||||
return(err);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Extend the gl_pathv member of a glob_t structure to accomodate a new item,
|
||||
* add the new item, and update gl_pathc.
|
||||
*
|
||||
* This assumes the BSD realloc, which only copies the block when its size
|
||||
* crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
|
||||
* behavior.
|
||||
*
|
||||
* Return 0 if new item added, error code if memory couldn't be allocated.
|
||||
*
|
||||
* Invariant of the glob_t structure:
|
||||
* Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
|
||||
* gl_pathv points to (gl_offs + gl_pathc + 1) items.
|
||||
*/
|
||||
static int
|
||||
globextend(const Char *path, glob_t *pglob, size_t *limit)
|
||||
{
|
||||
char **pathv;
|
||||
size_t i, newsize, len;
|
||||
char *copy;
|
||||
const Char *p;
|
||||
|
||||
if (*limit && pglob->gl_pathc > *limit) {
|
||||
errno = 0;
|
||||
return (GLOB_NOSPACE);
|
||||
}
|
||||
|
||||
newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
|
||||
pathv = pglob->gl_pathv ?
|
||||
realloc((char *)pglob->gl_pathv, newsize) :
|
||||
malloc(newsize);
|
||||
if (pathv == NULL) {
|
||||
if (pglob->gl_pathv) {
|
||||
free(pglob->gl_pathv);
|
||||
pglob->gl_pathv = NULL;
|
||||
}
|
||||
return(GLOB_NOSPACE);
|
||||
}
|
||||
|
||||
if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
|
||||
/* first time around -- clear initial gl_offs items */
|
||||
pathv += pglob->gl_offs;
|
||||
for (i = pglob->gl_offs + 1; --i > 0; )
|
||||
*--pathv = NULL;
|
||||
}
|
||||
pglob->gl_pathv = pathv;
|
||||
|
||||
for (p = path; *p++;)
|
||||
continue;
|
||||
len = MB_CUR_MAX * (size_t)(p - path); /* XXX overallocation */
|
||||
if ((copy = malloc(len)) != NULL) {
|
||||
if (g_Ctoc(path, copy, len)) {
|
||||
free(copy);
|
||||
return (GLOB_NOSPACE);
|
||||
}
|
||||
pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
|
||||
}
|
||||
pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
|
||||
return(copy == NULL ? GLOB_NOSPACE : 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* pattern matching function for filenames. Each occurrence of the *
|
||||
* pattern causes a recursion level.
|
||||
*/
|
||||
static int
|
||||
match(Char *name, Char *pat, Char *patend)
|
||||
{
|
||||
int ok, negate_range;
|
||||
Char c, k;
|
||||
|
||||
while (pat < patend) {
|
||||
c = *pat++;
|
||||
switch (c & M_MASK) {
|
||||
case M_ALL:
|
||||
if (pat == patend)
|
||||
return(1);
|
||||
do
|
||||
if (match(name, pat, patend))
|
||||
return(1);
|
||||
while (*name++ != EOS);
|
||||
return(0);
|
||||
case M_ONE:
|
||||
if (*name++ == EOS)
|
||||
return(0);
|
||||
break;
|
||||
case M_SET:
|
||||
ok = 0;
|
||||
if ((k = *name++) == EOS)
|
||||
return(0);
|
||||
if ((negate_range = ((*pat & M_MASK) == M_NOT)) != EOS)
|
||||
++pat;
|
||||
while (((c = *pat++) & M_MASK) != M_END)
|
||||
if ((*pat & M_MASK) == M_RNG) {
|
||||
if (CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1])) ok = 1;
|
||||
pat += 2;
|
||||
} else if (c == k)
|
||||
ok = 1;
|
||||
if (ok == negate_range)
|
||||
return(0);
|
||||
break;
|
||||
default:
|
||||
if (*name++ != c)
|
||||
return(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return(*name == EOS);
|
||||
}
|
||||
|
||||
/* Free allocated data belonging to a glob_t structure. */
|
||||
void
|
||||
globfree(glob_t *pglob)
|
||||
{
|
||||
size_t i;
|
||||
char **pp;
|
||||
|
||||
if (pglob->gl_pathv != NULL) {
|
||||
pp = pglob->gl_pathv + pglob->gl_offs;
|
||||
for (i = pglob->gl_pathc; i--; ++pp)
|
||||
if (*pp)
|
||||
free(*pp);
|
||||
free(pglob->gl_pathv);
|
||||
pglob->gl_pathv = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static DIR *
|
||||
g_opendir(Char *str, glob_t *pglob)
|
||||
{
|
||||
char buf[MAXPATHLEN];
|
||||
|
||||
if (!*str)
|
||||
strcpy(buf, ".");
|
||||
else {
|
||||
if (g_Ctoc(str, buf, sizeof(buf)))
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
if (pglob->gl_flags & GLOB_ALTDIRFUNC)
|
||||
return((*pglob->gl_opendir)(buf));
|
||||
|
||||
return(opendir(buf));
|
||||
}
|
||||
|
||||
static int
|
||||
g_lstat(Char *fn, struct stat *sb, glob_t *pglob)
|
||||
{
|
||||
char buf[MAXPATHLEN];
|
||||
|
||||
if (g_Ctoc(fn, buf, sizeof(buf))) {
|
||||
errno = ENAMETOOLONG;
|
||||
return (-1);
|
||||
}
|
||||
if (pglob->gl_flags & GLOB_ALTDIRFUNC)
|
||||
return((*pglob->gl_lstat)(buf, sb));
|
||||
return(lstat(buf, sb));
|
||||
}
|
||||
|
||||
static int
|
||||
g_stat(Char *fn, struct stat *sb, glob_t *pglob)
|
||||
{
|
||||
char buf[MAXPATHLEN];
|
||||
|
||||
if (g_Ctoc(fn, buf, sizeof(buf))) {
|
||||
errno = ENAMETOOLONG;
|
||||
return (-1);
|
||||
}
|
||||
if (pglob->gl_flags & GLOB_ALTDIRFUNC)
|
||||
return((*pglob->gl_stat)(buf, sb));
|
||||
return(stat(buf, sb));
|
||||
}
|
||||
|
||||
static const Char *
|
||||
g_strchr(const Char *str, wchar_t ch)
|
||||
{
|
||||
|
||||
do {
|
||||
if (*str == ch)
|
||||
return (str);
|
||||
} while (*str++);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static int
|
||||
g_Ctoc(const Char *str, char *buf, size_t len)
|
||||
{
|
||||
mbstate_t mbs;
|
||||
size_t clen;
|
||||
|
||||
memset(&mbs, 0, sizeof(mbs));
|
||||
while (len >= MB_CUR_MAX) {
|
||||
clen = wcrtomb(buf, *str, &mbs);
|
||||
if (clen == (size_t)-1)
|
||||
return (1);
|
||||
if (*str == L'\0')
|
||||
return (0);
|
||||
str++;
|
||||
buf += clen;
|
||||
len -= clen;
|
||||
}
|
||||
return (1);
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
static void
|
||||
qprintf(const char *str, Char *s)
|
||||
{
|
||||
Char *p;
|
||||
|
||||
(void)printf("%s:\n", str);
|
||||
for (p = s; *p; p++)
|
||||
(void)printf("%c", CHAR(*p));
|
||||
(void)printf("\n");
|
||||
for (p = s; *p; p++)
|
||||
(void)printf("%c", *p & M_PROTECT ? '"' : ' ');
|
||||
(void)printf("\n");
|
||||
for (p = s; *p; p++)
|
||||
(void)printf("%c", ismeta(*p) ? '_' : ' ');
|
||||
(void)printf("\n");
|
||||
}
|
||||
#endif
|
||||
98
app/src/main/cpp/glob_ndk/glob_ndk.h
Normal file
98
app/src/main/cpp/glob_ndk/glob_ndk.h
Normal file
@@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Copyright (c) 1989, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Guido van Rossum.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)glob.h 8.1 (Berkeley) 6/2/93
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _GLOB_H_
|
||||
#define _GLOB_H_
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <glob.h>
|
||||
|
||||
/*#ifndef _SIZE_T_DECLARED
|
||||
typedef __size_t size_t;
|
||||
#define _SIZE_T_DECLARED
|
||||
#endif*/
|
||||
|
||||
struct stat;
|
||||
typedef struct {
|
||||
size_t gl_pathc; /* Count of total paths so far. */
|
||||
size_t gl_matchc; /* Count of paths matching pattern. */
|
||||
size_t gl_offs; /* Reserved at beginning of gl_pathv. */
|
||||
int gl_flags; /* Copy of flags parameter to glob. */
|
||||
char **gl_pathv; /* List of paths matching pattern. */
|
||||
/* Copy of errfunc parameter to glob. */
|
||||
int (*gl_errfunc)(const char *, int);
|
||||
|
||||
/*
|
||||
* Alternate filesystem access methods for glob; replacement
|
||||
* versions of closedir(3), readdir(3), opendir(3), stat(2)
|
||||
* and lstat(2).
|
||||
*/
|
||||
void (*gl_closedir)(void *);
|
||||
struct dirent *(*gl_readdir)(void *);
|
||||
void *(*gl_opendir)(const char *);
|
||||
int (*gl_lstat)(const char *, struct stat *);
|
||||
int (*gl_stat)(const char *, struct stat *);
|
||||
} glob_t;
|
||||
|
||||
/* Believed to have been introduced in 1003.2-1992 */
|
||||
#define GLOB_APPEND 0x0001 /* Append to output from previous call. */
|
||||
#define GLOB_DOOFFS 0x0002 /* Prepend `gl_offs` null pointers (leaving space for exec, say). */
|
||||
#define GLOB_ERR 0x0004 /* Return on error. */
|
||||
#define GLOB_MARK 0x0008 /* Append "/" to the names of returned directories. */
|
||||
#define GLOB_NOCHECK 0x0010 /* Return pattern itself if nothing matches. */
|
||||
#define GLOB_NOSORT 0x0020 /* Don't sort. */
|
||||
#define GLOB_NOESCAPE 0x2000 /* Disable backslash escaping. */
|
||||
|
||||
/* Error values returned by glob(3) */
|
||||
#define GLOB_NOSPACE (-1) /* Malloc call failed. */
|
||||
#define GLOB_ABORTED (-2) /* Unignored error. */
|
||||
#define GLOB_NOMATCH (-3) /* No match and GLOB_NOCHECK was not set. */
|
||||
|
||||
#define GLOB_ALTDIRFUNC 0x0040 /* Use alternately specified directory funcs. */
|
||||
#define GLOB_BRACE 0x0080 /* Expand braces like csh. */
|
||||
#define GLOB_MAGCHAR 0x0100 /* Set in `gl_flags` if the pattern had globbing characters. */
|
||||
#define GLOB_NOMAGIC 0x0200 /* GLOB_NOCHECK without magic chars (csh). */
|
||||
#define GLOB_QUOTE 0x0400 /* Quote special chars with \. */
|
||||
#define GLOB_TILDE 0x0800 /* Expand tilde names from the passwd file. */
|
||||
#define GLOB_LIMIT 0x1000 /* limit number of returned paths */
|
||||
|
||||
__BEGIN_DECLS
|
||||
int glob(const char *, int, int (*)(const char *, int), glob_t *);
|
||||
void globfree(glob_t *);
|
||||
__END_DECLS
|
||||
|
||||
#endif /* !_GLOB_H_ */
|
||||
1
app/src/main/cpp/icu4c/android
Submodule
1
app/src/main/cpp/icu4c/android
Submodule
Submodule app/src/main/cpp/icu4c/android added at 4574d1dddf
55
app/src/main/cpp/icu4c/data-feature-filter.json
Normal file
55
app/src/main/cpp/icu4c/data-feature-filter.json
Normal file
@@ -0,0 +1,55 @@
|
||||
{
|
||||
"strategy": "subtractive",
|
||||
"featureFilters": {
|
||||
"coll_ucadata": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"coll_tree": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"confusables": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"curr_supplemental": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"curr_tree": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"locales_tree": {
|
||||
"excludelist": [
|
||||
"en_US_POSIX"
|
||||
]
|
||||
},
|
||||
"misc": {
|
||||
"excludelist": [
|
||||
"currencyNumericCodes",
|
||||
"genderList"
|
||||
]
|
||||
},
|
||||
"region_tree": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"rbnf_tree": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"stringprep": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"translit": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"unames": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"unit_tree": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"zone_supplemental": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"zone_tree": {
|
||||
"filterType": "exclude"
|
||||
}
|
||||
}
|
||||
}
|
||||
106
app/src/main/cpp/icu4c/floris-cc-icu4c.sh
Normal file
106
app/src/main/cpp/icu4c/floris-cc-icu4c.sh
Normal file
@@ -0,0 +1,106 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright 2021 Patrick Goldinger
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Build script for ICU4C, tailored exactly for FlorisBoard's needs.
|
||||
|
||||
# Before executing this script to manually rebuild the ICU libraries, make sure to execute
|
||||
# git submodule update --init --recursive
|
||||
# else the script won't find the ICU source files!
|
||||
|
||||
###### Build ICU4C ######
|
||||
|
||||
./android/cc-icu4c.sh build \
|
||||
--arch=arm,arm64 \
|
||||
--api=23 \
|
||||
--library-type=static \
|
||||
--build-dir=./build \
|
||||
--icu-src-dir=./android/icu/icu4c \
|
||||
--install-include-dir=./include \
|
||||
--install-libs-dir=./../../jniLibs \
|
||||
--data-filter-file=./data-feature-filter.json \
|
||||
--enable-collation=no \
|
||||
--enable-formatting=no \
|
||||
--enable-legacy-converters=yes \
|
||||
--enable-regex=no \
|
||||
--enable-transliteration=no
|
||||
|
||||
###### Clean up unused header files in include/unicode header dir ######
|
||||
|
||||
readonly SEP=":"
|
||||
readonly NUSPELL_DIR=$(realpath ../nuspell)
|
||||
readonly UNICODE_DIR=$(realpath include/unicode)
|
||||
|
||||
scan_file() {
|
||||
file=$1
|
||||
local -n var=$2
|
||||
#echo "Scanning '$file'..."
|
||||
while IFS= read -r line; do
|
||||
case $line in
|
||||
*"#include <unicode/"*)
|
||||
# shellcheck disable=SC2001
|
||||
header=$(sed -e 's/.*<unicode\/\(.*\)>.*/\1/' <<< "$line")
|
||||
;;
|
||||
*"#include \"unicode/"*)
|
||||
# shellcheck disable=SC2001
|
||||
header=$(sed -e 's/.*\"unicode\/\(.*\)\".*/\1/' <<< "$line")
|
||||
;;
|
||||
*)
|
||||
header=""
|
||||
;;
|
||||
esac
|
||||
if [ -z "$header" ]; then
|
||||
continue
|
||||
fi
|
||||
# shellcheck disable=SC2091
|
||||
# shellcheck disable=SC2086
|
||||
if [[ ! "$var" == *"$header"* ]]; then
|
||||
# shellcheck disable=SC2140
|
||||
var+="$SEP$header"
|
||||
fi
|
||||
done < "$file"
|
||||
}
|
||||
|
||||
req_headers=""
|
||||
|
||||
for nsrcfile in "$NUSPELL_DIR"/*; do
|
||||
scan_file "$nsrcfile" "req_headers"
|
||||
done
|
||||
|
||||
if [ -n "$req_headers" ]; then
|
||||
req_headers=${req_headers:1}
|
||||
fi
|
||||
|
||||
while true; do
|
||||
old_req_headers=$req_headers
|
||||
IFS="$SEP" read -ra req_header_splitted <<< "$req_headers"
|
||||
for req_header in "${req_header_splitted[@]}"; do
|
||||
scan_file "$UNICODE_DIR/$req_header" "req_headers"
|
||||
done
|
||||
[ ! $req_headers = $old_req_headers ] || break
|
||||
done
|
||||
|
||||
#echo "$req_headers"
|
||||
|
||||
for headerfile in "$UNICODE_DIR"/*; do
|
||||
header=$(basename "$headerfile")
|
||||
if [[ "$req_headers" == *"$header"* ]]; then
|
||||
echo "KEEP '$headerfile'"
|
||||
else
|
||||
echo "DELETE '$headerfile'"
|
||||
rm "$headerfile"
|
||||
fi
|
||||
done
|
||||
|
||||
670
app/src/main/cpp/icu4c/include/unicode/brkiter.h
Normal file
670
app/src/main/cpp/icu4c/include/unicode/brkiter.h
Normal file
@@ -0,0 +1,670 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
********************************************************************************
|
||||
* Copyright (C) 1997-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
********************************************************************************
|
||||
*
|
||||
* File brkiter.h
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 02/18/97 aliu Added typedef for TextCount. Made DONE const.
|
||||
* 05/07/97 aliu Fixed DLL declaration.
|
||||
* 07/09/97 jfitz Renamed BreakIterator and interface synced with JDK
|
||||
* 08/11/98 helena Sync-up JDK1.2.
|
||||
* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
|
||||
********************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef BRKITER_H
|
||||
#define BRKITER_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Break Iterator.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#if UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/*
|
||||
* Allow the declaration of APIs with pointers to BreakIterator
|
||||
* even when break iteration is removed from the build.
|
||||
*/
|
||||
class BreakIterator;
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#else
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/chariter.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/ubrk.h"
|
||||
#include "unicode/strenum.h"
|
||||
#include "unicode/utext.h"
|
||||
#include "unicode/umisc.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* The BreakIterator class implements methods for finding the location
|
||||
* of boundaries in text. BreakIterator is an abstract base class.
|
||||
* Instances of BreakIterator maintain a current position and scan over
|
||||
* text returning the index of characters where boundaries occur.
|
||||
* <p>
|
||||
* Line boundary analysis determines where a text string can be broken
|
||||
* when line-wrapping. The mechanism correctly handles punctuation and
|
||||
* hyphenated words.
|
||||
* <p>
|
||||
* Sentence boundary analysis allows selection with correct
|
||||
* interpretation of periods within numbers and abbreviations, and
|
||||
* trailing punctuation marks such as quotation marks and parentheses.
|
||||
* <p>
|
||||
* Word boundary analysis is used by search and replace functions, as
|
||||
* well as within text editing applications that allow the user to
|
||||
* select words with a double click. Word selection provides correct
|
||||
* interpretation of punctuation marks within and following
|
||||
* words. Characters that are not part of a word, such as symbols or
|
||||
* punctuation marks, have word-breaks on both sides.
|
||||
* <p>
|
||||
* Character boundary analysis allows users to interact with
|
||||
* characters as they expect to, for example, when moving the cursor
|
||||
* through a text string. Character boundary analysis provides correct
|
||||
* navigation of through character strings, regardless of how the
|
||||
* character is stored. For example, an accented character might be
|
||||
* stored as a base character and a diacritical mark. What users
|
||||
* consider to be a character can differ between languages.
|
||||
* <p>
|
||||
* The text boundary positions are found according to the rules
|
||||
* described in Unicode Standard Annex #29, Text Boundaries, and
|
||||
* Unicode Standard Annex #14, Line Breaking Properties. These
|
||||
* are available at http://www.unicode.org/reports/tr14/ and
|
||||
* http://www.unicode.org/reports/tr29/.
|
||||
* <p>
|
||||
* In addition to the C++ API defined in this header file, a
|
||||
* plain C API with equivalent functionality is defined in the
|
||||
* file ubrk.h
|
||||
* <p>
|
||||
* Code snippets illustrating the use of the Break Iterator APIs
|
||||
* are available in the ICU User Guide,
|
||||
* http://icu-project.org/userguide/boundaryAnalysis.html
|
||||
* and in the sample program icu/source/samples/break/break.cpp
|
||||
*
|
||||
*/
|
||||
class U_COMMON_API BreakIterator : public UObject {
|
||||
public:
|
||||
/**
|
||||
* destructor
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual ~BreakIterator();
|
||||
|
||||
/**
|
||||
* Return true if another object is semantically equal to this
|
||||
* one. The other object should be an instance of the same subclass of
|
||||
* BreakIterator. Objects of different subclasses are considered
|
||||
* unequal.
|
||||
* <P>
|
||||
* Return true if this BreakIterator is at the same position in the
|
||||
* same text, and is the same class and type (word, line, etc.) of
|
||||
* BreakIterator, as the argument. Text is considered the same if
|
||||
* it contains the same characters, it need not be the same
|
||||
* object, and styles are not considered.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UBool operator==(const BreakIterator&) const = 0;
|
||||
|
||||
/**
|
||||
* Returns the complement of the result of operator==
|
||||
* @param rhs The BreakIterator to be compared for inequality
|
||||
* @return the complement of the result of operator==
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UBool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
|
||||
|
||||
/**
|
||||
* Return a polymorphic copy of this object. This is an abstract
|
||||
* method which subclasses implement.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual BreakIterator* clone() const = 0;
|
||||
|
||||
/**
|
||||
* Return a polymorphic class ID for this object. Different subclasses
|
||||
* will return distinct unequal values.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UClassID getDynamicClassID(void) const = 0;
|
||||
|
||||
/**
|
||||
* Return a CharacterIterator over the text being analyzed.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual CharacterIterator& getText(void) const = 0;
|
||||
|
||||
|
||||
/**
|
||||
* Get a UText for the text being analyzed.
|
||||
* The returned UText is a shallow clone of the UText used internally
|
||||
* by the break iterator implementation. It can safely be used to
|
||||
* access the text without impacting any break iterator operations,
|
||||
* but the underlying text itself must not be altered.
|
||||
*
|
||||
* @param fillIn A UText to be filled in. If NULL, a new UText will be
|
||||
* allocated to hold the result.
|
||||
* @param status receives any error codes.
|
||||
* @return The current UText for this break iterator. If an input
|
||||
* UText was provided, it will always be returned.
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
|
||||
|
||||
/**
|
||||
* Change the text over which this operates. The text boundary is
|
||||
* reset to the start.
|
||||
*
|
||||
* The BreakIterator will retain a reference to the supplied string.
|
||||
* The caller must not modify or delete the text while the BreakIterator
|
||||
* retains the reference.
|
||||
*
|
||||
* @param text The UnicodeString used to change the text.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual void setText(const UnicodeString &text) = 0;
|
||||
|
||||
/**
|
||||
* Reset the break iterator to operate over the text represented by
|
||||
* the UText. The iterator position is reset to the start.
|
||||
*
|
||||
* This function makes a shallow clone of the supplied UText. This means
|
||||
* that the caller is free to immediately close or otherwise reuse the
|
||||
* Utext that was passed as a parameter, but that the underlying text itself
|
||||
* must not be altered while being referenced by the break iterator.
|
||||
*
|
||||
* All index positions returned by break iterator functions are
|
||||
* native indices from the UText. For example, when breaking UTF-8
|
||||
* encoded text, the break positions returned by next(), previous(), etc.
|
||||
* will be UTF-8 string indices, not UTF-16 positions.
|
||||
*
|
||||
* @param text The UText used to change the text.
|
||||
* @param status receives any error codes.
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
virtual void setText(UText *text, UErrorCode &status) = 0;
|
||||
|
||||
/**
|
||||
* Change the text over which this operates. The text boundary is
|
||||
* reset to the start.
|
||||
* Note that setText(UText *) provides similar functionality to this function,
|
||||
* and is more efficient.
|
||||
* @param it The CharacterIterator used to change the text.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual void adoptText(CharacterIterator* it) = 0;
|
||||
|
||||
enum {
|
||||
/**
|
||||
* DONE is returned by previous() and next() after all valid
|
||||
* boundaries have been returned.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
DONE = (int32_t)-1
|
||||
};
|
||||
|
||||
/**
|
||||
* Sets the current iteration position to the beginning of the text, position zero.
|
||||
* @return The offset of the beginning of the text, zero.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t first(void) = 0;
|
||||
|
||||
/**
|
||||
* Set the iterator position to the index immediately BEYOND the last character in the text being scanned.
|
||||
* @return The index immediately BEYOND the last character in the text being scanned.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t last(void) = 0;
|
||||
|
||||
/**
|
||||
* Set the iterator position to the boundary preceding the current boundary.
|
||||
* @return The character index of the previous text boundary or DONE if all
|
||||
* boundaries have been returned.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t previous(void) = 0;
|
||||
|
||||
/**
|
||||
* Advance the iterator to the boundary following the current boundary.
|
||||
* @return The character index of the next text boundary or DONE if all
|
||||
* boundaries have been returned.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t next(void) = 0;
|
||||
|
||||
/**
|
||||
* Return character index of the current iterator position within the text.
|
||||
* @return The boundary most recently returned.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t current(void) const = 0;
|
||||
|
||||
/**
|
||||
* Advance the iterator to the first boundary following the specified offset.
|
||||
* The value returned is always greater than the offset or
|
||||
* the value BreakIterator.DONE
|
||||
* @param offset the offset to begin scanning.
|
||||
* @return The first boundary after the specified offset.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t following(int32_t offset) = 0;
|
||||
|
||||
/**
|
||||
* Set the iterator position to the first boundary preceding the specified offset.
|
||||
* The value returned is always smaller than the offset or
|
||||
* the value BreakIterator.DONE
|
||||
* @param offset the offset to begin scanning.
|
||||
* @return The first boundary before the specified offset.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t preceding(int32_t offset) = 0;
|
||||
|
||||
/**
|
||||
* Return true if the specified position is a boundary position.
|
||||
* As a side effect, the current position of the iterator is set
|
||||
* to the first boundary position at or following the specified offset.
|
||||
* @param offset the offset to check.
|
||||
* @return True if "offset" is a boundary position.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UBool isBoundary(int32_t offset) = 0;
|
||||
|
||||
/**
|
||||
* Set the iterator position to the nth boundary from the current boundary
|
||||
* @param n the number of boundaries to move by. A value of 0
|
||||
* does nothing. Negative values move to previous boundaries
|
||||
* and positive values move to later boundaries.
|
||||
* @return The new iterator position, or
|
||||
* DONE if there are fewer than |n| boundaries in the specified direction.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t next(int32_t n) = 0;
|
||||
|
||||
/**
|
||||
* For RuleBasedBreakIterators, return the status tag from the break rule
|
||||
* that determined the boundary at the current iteration position.
|
||||
* <p>
|
||||
* For break iterator types that do not support a rule status,
|
||||
* a default value of 0 is returned.
|
||||
* <p>
|
||||
* @return the status from the break rule that determined the boundary at
|
||||
* the current iteration position.
|
||||
* @see RuleBaseBreakIterator::getRuleStatus()
|
||||
* @see UWordBreak
|
||||
* @stable ICU 52
|
||||
*/
|
||||
virtual int32_t getRuleStatus() const;
|
||||
|
||||
/**
|
||||
* For RuleBasedBreakIterators, get the status (tag) values from the break rule(s)
|
||||
* that determined the boundary at the current iteration position.
|
||||
* <p>
|
||||
* For break iterator types that do not support rule status,
|
||||
* no values are returned.
|
||||
* <p>
|
||||
* The returned status value(s) are stored into an array provided by the caller.
|
||||
* The values are stored in sorted (ascending) order.
|
||||
* If the capacity of the output array is insufficient to hold the data,
|
||||
* the output will be truncated to the available length, and a
|
||||
* U_BUFFER_OVERFLOW_ERROR will be signaled.
|
||||
* <p>
|
||||
* @see RuleBaseBreakIterator::getRuleStatusVec
|
||||
*
|
||||
* @param fillInVec an array to be filled in with the status values.
|
||||
* @param capacity the length of the supplied vector. A length of zero causes
|
||||
* the function to return the number of status values, in the
|
||||
* normal way, without attempting to store any values.
|
||||
* @param status receives error codes.
|
||||
* @return The number of rule status values from rules that determined
|
||||
* the boundary at the current iteration position.
|
||||
* In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
|
||||
* is the total number of status values that were available,
|
||||
* not the reduced number that were actually returned.
|
||||
* @see getRuleStatus
|
||||
* @stable ICU 52
|
||||
*/
|
||||
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Create BreakIterator for word-breaks using the given locale.
|
||||
* Returns an instance of a BreakIterator implementing word breaks.
|
||||
* WordBreak is useful for word selection (ex. double click)
|
||||
* @param where the locale.
|
||||
* @param status the error code
|
||||
* @return A BreakIterator for word-breaks. The UErrorCode& status
|
||||
* parameter is used to return status information to the user.
|
||||
* To check whether the construction succeeded or not, you should check
|
||||
* the value of U_SUCCESS(err). If you wish more detailed information, you
|
||||
* can check for informational error results which still indicate success.
|
||||
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
|
||||
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
|
||||
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
|
||||
* used; neither the requested locale nor any of its fall back locales
|
||||
* could be found.
|
||||
* The caller owns the returned object and is responsible for deleting it.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static BreakIterator* U_EXPORT2
|
||||
createWordInstance(const Locale& where, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Create BreakIterator for line-breaks using specified locale.
|
||||
* Returns an instance of a BreakIterator implementing line breaks. Line
|
||||
* breaks are logically possible line breaks, actual line breaks are
|
||||
* usually determined based on display width.
|
||||
* LineBreak is useful for word wrapping text.
|
||||
* @param where the locale.
|
||||
* @param status The error code.
|
||||
* @return A BreakIterator for line-breaks. The UErrorCode& status
|
||||
* parameter is used to return status information to the user.
|
||||
* To check whether the construction succeeded or not, you should check
|
||||
* the value of U_SUCCESS(err). If you wish more detailed information, you
|
||||
* can check for informational error results which still indicate success.
|
||||
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
|
||||
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
|
||||
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
|
||||
* used; neither the requested locale nor any of its fall back locales
|
||||
* could be found.
|
||||
* The caller owns the returned object and is responsible for deleting it.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static BreakIterator* U_EXPORT2
|
||||
createLineInstance(const Locale& where, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Create BreakIterator for character-breaks using specified locale
|
||||
* Returns an instance of a BreakIterator implementing character breaks.
|
||||
* Character breaks are boundaries of combining character sequences.
|
||||
* @param where the locale.
|
||||
* @param status The error code.
|
||||
* @return A BreakIterator for character-breaks. The UErrorCode& status
|
||||
* parameter is used to return status information to the user.
|
||||
* To check whether the construction succeeded or not, you should check
|
||||
* the value of U_SUCCESS(err). If you wish more detailed information, you
|
||||
* can check for informational error results which still indicate success.
|
||||
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
|
||||
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
|
||||
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
|
||||
* used; neither the requested locale nor any of its fall back locales
|
||||
* could be found.
|
||||
* The caller owns the returned object and is responsible for deleting it.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static BreakIterator* U_EXPORT2
|
||||
createCharacterInstance(const Locale& where, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Create BreakIterator for sentence-breaks using specified locale
|
||||
* Returns an instance of a BreakIterator implementing sentence breaks.
|
||||
* @param where the locale.
|
||||
* @param status The error code.
|
||||
* @return A BreakIterator for sentence-breaks. The UErrorCode& status
|
||||
* parameter is used to return status information to the user.
|
||||
* To check whether the construction succeeded or not, you should check
|
||||
* the value of U_SUCCESS(err). If you wish more detailed information, you
|
||||
* can check for informational error results which still indicate success.
|
||||
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
|
||||
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
|
||||
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
|
||||
* used; neither the requested locale nor any of its fall back locales
|
||||
* could be found.
|
||||
* The caller owns the returned object and is responsible for deleting it.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static BreakIterator* U_EXPORT2
|
||||
createSentenceInstance(const Locale& where, UErrorCode& status);
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Create BreakIterator for title-casing breaks using the specified locale
|
||||
* Returns an instance of a BreakIterator implementing title breaks.
|
||||
* The iterator returned locates title boundaries as described for
|
||||
* Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
|
||||
* please use a word boundary iterator. See {@link #createWordInstance }.
|
||||
*
|
||||
* @param where the locale.
|
||||
* @param status The error code.
|
||||
* @return A BreakIterator for title-breaks. The UErrorCode& status
|
||||
* parameter is used to return status information to the user.
|
||||
* To check whether the construction succeeded or not, you should check
|
||||
* the value of U_SUCCESS(err). If you wish more detailed information, you
|
||||
* can check for informational error results which still indicate success.
|
||||
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
|
||||
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
|
||||
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
|
||||
* used; neither the requested locale nor any of its fall back locales
|
||||
* could be found.
|
||||
* The caller owns the returned object and is responsible for deleting it.
|
||||
* @deprecated ICU 64 Use createWordInstance instead.
|
||||
*/
|
||||
static BreakIterator* U_EXPORT2
|
||||
createTitleInstance(const Locale& where, UErrorCode& status);
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
/**
|
||||
* Get the set of Locales for which TextBoundaries are installed.
|
||||
* <p><b>Note:</b> this will not return locales added through the register
|
||||
* call. To see the registered locales too, use the getAvailableLocales
|
||||
* function that returns a StringEnumeration object </p>
|
||||
* @param count the output parameter of number of elements in the locale list
|
||||
* @return available locales
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
|
||||
|
||||
/**
|
||||
* Get name of the object for the desired Locale, in the desired language.
|
||||
* @param objectLocale must be from getAvailableLocales.
|
||||
* @param displayLocale specifies the desired locale for output.
|
||||
* @param name the fill-in parameter of the return value
|
||||
* Uses best match.
|
||||
* @return user-displayable name
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
|
||||
const Locale& displayLocale,
|
||||
UnicodeString& name);
|
||||
|
||||
/**
|
||||
* Get name of the object for the desired Locale, in the language of the
|
||||
* default locale.
|
||||
* @param objectLocale must be from getMatchingLocales
|
||||
* @param name the fill-in parameter of the return value
|
||||
* @return user-displayable name
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
|
||||
UnicodeString& name);
|
||||
|
||||
#ifndef U_FORCE_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Deprecated functionality. Use clone() instead.
|
||||
*
|
||||
* Thread safe client-buffer-based cloning operation
|
||||
* Do NOT call delete on a safeclone, since 'new' is not used to create it.
|
||||
* @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated.
|
||||
* If buffer is not large enough, new memory will be allocated.
|
||||
* @param BufferSize reference to size of allocated space.
|
||||
* If BufferSize == 0, a sufficient size for use in cloning will
|
||||
* be returned ('pre-flighting')
|
||||
* If BufferSize is not enough for a stack-based safe clone,
|
||||
* new memory will be allocated.
|
||||
* @param status to indicate whether the operation went on smoothly or there were errors
|
||||
* An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were
|
||||
* necessary.
|
||||
* @return pointer to the new clone
|
||||
*
|
||||
* @deprecated ICU 52. Use clone() instead.
|
||||
*/
|
||||
virtual BreakIterator * createBufferClone(void *stackBuffer,
|
||||
int32_t &BufferSize,
|
||||
UErrorCode &status) = 0;
|
||||
#endif // U_FORCE_HIDE_DEPRECATED_API
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
||||
/**
|
||||
* Determine whether the BreakIterator was created in user memory by
|
||||
* createBufferClone(), and thus should not be deleted. Such objects
|
||||
* must be closed by an explicit call to the destructor (not delete).
|
||||
* @deprecated ICU 52. Always delete the BreakIterator.
|
||||
*/
|
||||
inline UBool isBufferClone(void);
|
||||
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
#if !UCONFIG_NO_SERVICE
|
||||
/**
|
||||
* Register a new break iterator of the indicated kind, to use in the given locale.
|
||||
* The break iterator will be adopted. Clones of the iterator will be returned
|
||||
* if a request for a break iterator of the given kind matches or falls back to
|
||||
* this locale.
|
||||
* Because ICU may choose to cache BreakIterators internally, this must
|
||||
* be called at application startup, prior to any calls to
|
||||
* BreakIterator::createXXXInstance to avoid undefined behavior.
|
||||
* @param toAdopt the BreakIterator instance to be adopted
|
||||
* @param locale the Locale for which this instance is to be registered
|
||||
* @param kind the type of iterator for which this instance is to be registered
|
||||
* @param status the in/out status code, no special meanings are assigned
|
||||
* @return a registry key that can be used to unregister this instance
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,
|
||||
const Locale& locale,
|
||||
UBreakIteratorType kind,
|
||||
UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Unregister a previously-registered BreakIterator using the key returned from the
|
||||
* register call. Key becomes invalid after a successful call and should not be used again.
|
||||
* The BreakIterator corresponding to the key will be deleted.
|
||||
* Because ICU may choose to cache BreakIterators internally, this should
|
||||
* be called during application shutdown, after all calls to
|
||||
* BreakIterator::createXXXInstance to avoid undefined behavior.
|
||||
* @param key the registry key returned by a previous call to registerInstance
|
||||
* @param status the in/out status code, no special meanings are assigned
|
||||
* @return true if the iterator for the key was successfully unregistered
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Return a StringEnumeration over the locales available at the time of the call,
|
||||
* including registered locales.
|
||||
* @return a StringEnumeration over the locales available at the time of the call
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Returns the locale for this break iterator. Two flavors are available: valid and
|
||||
* actual locale.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/** Get the locale for this break iterator object. You can choose between valid and actual locale.
|
||||
* @param type type of the locale we're looking for (valid or actual)
|
||||
* @param status error code for the operation
|
||||
* @return the locale
|
||||
* @internal
|
||||
*/
|
||||
const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/**
|
||||
* Set the subject text string upon which the break iterator is operating
|
||||
* without changing any other aspect of the matching state.
|
||||
* The new and previous text strings must have the same content.
|
||||
*
|
||||
* This function is intended for use in environments where ICU is operating on
|
||||
* strings that may move around in memory. It provides a mechanism for notifying
|
||||
* ICU that the string has been relocated, and providing a new UText to access the
|
||||
* string in its new position.
|
||||
*
|
||||
* Note that the break iterator implementation never copies the underlying text
|
||||
* of a string being processed, but always operates directly on the original text
|
||||
* provided by the user. Refreshing simply drops the references to the old text
|
||||
* and replaces them with references to the new.
|
||||
*
|
||||
* Caution: this function is normally used only by very specialized,
|
||||
* system-level code. One example use case is with garbage collection that moves
|
||||
* the text in memory.
|
||||
*
|
||||
* @param input The new (moved) text string.
|
||||
* @param status Receives errors detected by this function.
|
||||
* @return *this
|
||||
*
|
||||
* @stable ICU 49
|
||||
*/
|
||||
virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
|
||||
|
||||
private:
|
||||
static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status);
|
||||
static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
|
||||
static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
|
||||
|
||||
friend class ICUBreakIteratorFactory;
|
||||
friend class ICUBreakIteratorService;
|
||||
|
||||
protected:
|
||||
// Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API
|
||||
// or else the compiler will create a public ones.
|
||||
/** @internal */
|
||||
BreakIterator();
|
||||
/** @internal */
|
||||
BreakIterator (const BreakIterator &other);
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/** @internal */
|
||||
BreakIterator (const Locale& valid, const Locale &actual);
|
||||
/** @internal. Assignment Operator, used by RuleBasedBreakIterator. */
|
||||
BreakIterator &operator = (const BreakIterator &other);
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
private:
|
||||
|
||||
/** @internal (private) */
|
||||
char actualLocale[ULOC_FULLNAME_CAPACITY];
|
||||
char validLocale[ULOC_FULLNAME_CAPACITY];
|
||||
};
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
||||
inline UBool BreakIterator::isBufferClone()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // BRKITER_H
|
||||
//eof
|
||||
307
app/src/main/cpp/icu4c/include/unicode/bytestream.h
Normal file
307
app/src/main/cpp/icu4c/include/unicode/bytestream.h
Normal file
@@ -0,0 +1,307 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
// Copyright (C) 2009-2012, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//
|
||||
// Copyright 2007 Google Inc. All Rights Reserved.
|
||||
// Author: sanjay@google.com (Sanjay Ghemawat)
|
||||
//
|
||||
// Abstract interface that consumes a sequence of bytes (ByteSink).
|
||||
//
|
||||
// Used so that we can write a single piece of code that can operate
|
||||
// on a variety of output string types.
|
||||
//
|
||||
// Various implementations of this interface are provided:
|
||||
// ByteSink:
|
||||
// CheckedArrayByteSink Write to a flat array, with bounds checking
|
||||
// StringByteSink Write to an STL string
|
||||
|
||||
// This code is a contribution of Google code, and the style used here is
|
||||
// a compromise between the original Google code and the ICU coding guidelines.
|
||||
// For example, data types are ICU-ified (size_t,int->int32_t),
|
||||
// and API comments doxygen-ified, but function names and behavior are
|
||||
// as in the original, if possible.
|
||||
// Assertion-style error handling, not available in ICU, was changed to
|
||||
// parameter "pinning" similar to UnicodeString.
|
||||
//
|
||||
// In addition, this is only a partial port of the original Google code,
|
||||
// limited to what was needed so far. The (nearly) complete original code
|
||||
// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
|
||||
// (see ICU ticket 6765, r25517).
|
||||
|
||||
#ifndef __BYTESTREAM_H__
|
||||
#define __BYTESTREAM_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Interface for writing bytes, and implementation classes.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/std_string.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* A ByteSink can be filled with bytes.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
class U_COMMON_API ByteSink : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Default constructor.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
ByteSink() { }
|
||||
/**
|
||||
* Virtual destructor.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual ~ByteSink();
|
||||
|
||||
/**
|
||||
* Append "bytes[0,n-1]" to this.
|
||||
* @param bytes the pointer to the bytes
|
||||
* @param n the number of bytes; must be non-negative
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual void Append(const char* bytes, int32_t n) = 0;
|
||||
|
||||
/**
|
||||
* Appends n bytes to this. Same as Append().
|
||||
* Call AppendU8() with u8"string literals" which are const char * in C++11
|
||||
* but const char8_t * in C++20.
|
||||
* If the compiler does support char8_t as a distinct type,
|
||||
* then an AppendU8() overload for that is defined and will be chosen.
|
||||
*
|
||||
* @param bytes the pointer to the bytes
|
||||
* @param n the number of bytes; must be non-negative
|
||||
* @stable ICU 67
|
||||
*/
|
||||
inline void AppendU8(const char* bytes, int32_t n) {
|
||||
Append(bytes, n);
|
||||
}
|
||||
|
||||
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Appends n bytes to this. Same as Append() but for a const char8_t * pointer.
|
||||
* Call AppendU8() with u8"string literals" which are const char * in C++11
|
||||
* but const char8_t * in C++20.
|
||||
* If the compiler does support char8_t as a distinct type,
|
||||
* then this AppendU8() overload for that is defined and will be chosen.
|
||||
*
|
||||
* @param bytes the pointer to the bytes
|
||||
* @param n the number of bytes; must be non-negative
|
||||
* @stable ICU 67
|
||||
*/
|
||||
inline void AppendU8(const char8_t* bytes, int32_t n) {
|
||||
Append(reinterpret_cast<const char*>(bytes), n);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Returns a writable buffer for appending and writes the buffer's capacity to
|
||||
* *result_capacity. Guarantees *result_capacity>=min_capacity.
|
||||
* May return a pointer to the caller-owned scratch buffer which must have
|
||||
* scratch_capacity>=min_capacity.
|
||||
* The returned buffer is only valid until the next operation
|
||||
* on this ByteSink.
|
||||
*
|
||||
* After writing at most *result_capacity bytes, call Append() with the
|
||||
* pointer returned from this function and the number of bytes written.
|
||||
* Many Append() implementations will avoid copying bytes if this function
|
||||
* returned an internal buffer.
|
||||
*
|
||||
* Partial usage example:
|
||||
* int32_t capacity;
|
||||
* char* buffer = sink->GetAppendBuffer(..., &capacity);
|
||||
* ... Write n bytes into buffer, with n <= capacity.
|
||||
* sink->Append(buffer, n);
|
||||
* In many implementations, that call to Append will avoid copying bytes.
|
||||
*
|
||||
* If the ByteSink allocates or reallocates an internal buffer, it should use
|
||||
* the desired_capacity_hint if appropriate.
|
||||
* If a caller cannot provide a reasonable guess at the desired capacity,
|
||||
* it should pass desired_capacity_hint=0.
|
||||
*
|
||||
* If a non-scratch buffer is returned, the caller may only pass
|
||||
* a prefix to it to Append().
|
||||
* That is, it is not correct to pass an interior pointer to Append().
|
||||
*
|
||||
* The default implementation always returns the scratch buffer.
|
||||
*
|
||||
* @param min_capacity required minimum capacity of the returned buffer;
|
||||
* must be non-negative
|
||||
* @param desired_capacity_hint desired capacity of the returned buffer;
|
||||
* must be non-negative
|
||||
* @param scratch default caller-owned buffer
|
||||
* @param scratch_capacity capacity of the scratch buffer
|
||||
* @param result_capacity pointer to an integer which will be set to the
|
||||
* capacity of the returned buffer
|
||||
* @return a buffer with *result_capacity>=min_capacity
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual char* GetAppendBuffer(int32_t min_capacity,
|
||||
int32_t desired_capacity_hint,
|
||||
char* scratch, int32_t scratch_capacity,
|
||||
int32_t* result_capacity);
|
||||
|
||||
/**
|
||||
* Flush internal buffers.
|
||||
* Some byte sinks use internal buffers or provide buffering
|
||||
* and require calling Flush() at the end of the stream.
|
||||
* The ByteSink should be ready for further Append() calls after Flush().
|
||||
* The default implementation of Flush() does nothing.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual void Flush();
|
||||
|
||||
private:
|
||||
ByteSink(const ByteSink &) = delete;
|
||||
ByteSink &operator=(const ByteSink &) = delete;
|
||||
};
|
||||
|
||||
// -------------------------------------------------------------
|
||||
// Some standard implementations
|
||||
|
||||
/**
|
||||
* Implementation of ByteSink that writes to a flat byte array,
|
||||
* with bounds-checking:
|
||||
* This sink will not write more than capacity bytes to outbuf.
|
||||
* If more than capacity bytes are Append()ed, then excess bytes are ignored,
|
||||
* and Overflowed() will return true.
|
||||
* Overflow does not cause a runtime error.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
class U_COMMON_API CheckedArrayByteSink : public ByteSink {
|
||||
public:
|
||||
/**
|
||||
* Constructs a ByteSink that will write to outbuf[0..capacity-1].
|
||||
* @param outbuf buffer to write to
|
||||
* @param capacity size of the buffer
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
CheckedArrayByteSink(char* outbuf, int32_t capacity);
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual ~CheckedArrayByteSink();
|
||||
/**
|
||||
* Returns the sink to its original state, without modifying the buffer.
|
||||
* Useful for reusing both the buffer and the sink for multiple streams.
|
||||
* Resets the state to NumberOfBytesWritten()=NumberOfBytesAppended()=0
|
||||
* and Overflowed()=false.
|
||||
* @return *this
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
virtual CheckedArrayByteSink& Reset();
|
||||
/**
|
||||
* Append "bytes[0,n-1]" to this.
|
||||
* @param bytes the pointer to the bytes
|
||||
* @param n the number of bytes; must be non-negative
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual void Append(const char* bytes, int32_t n);
|
||||
/**
|
||||
* Returns a writable buffer for appending and writes the buffer's capacity to
|
||||
* *result_capacity. For details see the base class documentation.
|
||||
* @param min_capacity required minimum capacity of the returned buffer;
|
||||
* must be non-negative
|
||||
* @param desired_capacity_hint desired capacity of the returned buffer;
|
||||
* must be non-negative
|
||||
* @param scratch default caller-owned buffer
|
||||
* @param scratch_capacity capacity of the scratch buffer
|
||||
* @param result_capacity pointer to an integer which will be set to the
|
||||
* capacity of the returned buffer
|
||||
* @return a buffer with *result_capacity>=min_capacity
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual char* GetAppendBuffer(int32_t min_capacity,
|
||||
int32_t desired_capacity_hint,
|
||||
char* scratch, int32_t scratch_capacity,
|
||||
int32_t* result_capacity);
|
||||
/**
|
||||
* Returns the number of bytes actually written to the sink.
|
||||
* @return number of bytes written to the buffer
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
int32_t NumberOfBytesWritten() const { return size_; }
|
||||
/**
|
||||
* Returns true if any bytes were discarded, i.e., if there was an
|
||||
* attempt to write more than 'capacity' bytes.
|
||||
* @return true if more than 'capacity' bytes were Append()ed
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
UBool Overflowed() const { return overflowed_; }
|
||||
/**
|
||||
* Returns the number of bytes appended to the sink.
|
||||
* If Overflowed() then NumberOfBytesAppended()>NumberOfBytesWritten()
|
||||
* else they return the same number.
|
||||
* @return number of bytes written to the buffer
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
int32_t NumberOfBytesAppended() const { return appended_; }
|
||||
private:
|
||||
char* outbuf_;
|
||||
const int32_t capacity_;
|
||||
int32_t size_;
|
||||
int32_t appended_;
|
||||
UBool overflowed_;
|
||||
|
||||
CheckedArrayByteSink() = delete;
|
||||
CheckedArrayByteSink(const CheckedArrayByteSink &) = delete;
|
||||
CheckedArrayByteSink &operator=(const CheckedArrayByteSink &) = delete;
|
||||
};
|
||||
|
||||
/**
|
||||
* Implementation of ByteSink that writes to a "string".
|
||||
* The StringClass is usually instantiated with a std::string.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
template<typename StringClass>
|
||||
class StringByteSink : public ByteSink {
|
||||
public:
|
||||
/**
|
||||
* Constructs a ByteSink that will append bytes to the dest string.
|
||||
* @param dest pointer to string object to append to
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringByteSink(StringClass* dest) : dest_(dest) { }
|
||||
/**
|
||||
* Constructs a ByteSink that reserves append capacity and will append bytes to the dest string.
|
||||
*
|
||||
* @param dest pointer to string object to append to
|
||||
* @param initialAppendCapacity capacity beyond dest->length() to be reserve()d
|
||||
* @stable ICU 60
|
||||
*/
|
||||
StringByteSink(StringClass* dest, int32_t initialAppendCapacity) : dest_(dest) {
|
||||
if (initialAppendCapacity > 0 &&
|
||||
(uint32_t)initialAppendCapacity > (dest->capacity() - dest->length())) {
|
||||
dest->reserve(dest->length() + initialAppendCapacity);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Append "bytes[0,n-1]" to this.
|
||||
* @param data the pointer to the bytes
|
||||
* @param n the number of bytes; must be non-negative
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual void Append(const char* data, int32_t n) { dest_->append(data, n); }
|
||||
private:
|
||||
StringClass* dest_;
|
||||
|
||||
StringByteSink() = delete;
|
||||
StringByteSink(const StringByteSink &) = delete;
|
||||
StringByteSink &operator=(const StringByteSink &) = delete;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __BYTESTREAM_H__
|
||||
313
app/src/main/cpp/icu4c/include/unicode/char16ptr.h
Normal file
313
app/src/main/cpp/icu4c/include/unicode/char16ptr.h
Normal file
@@ -0,0 +1,313 @@
|
||||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// char16ptr.h
|
||||
// created: 2017feb28 Markus W. Scherer
|
||||
|
||||
#ifndef __CHAR16PTR_H__
|
||||
#define __CHAR16PTR_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: char16_t pointer wrappers with
|
||||
* implicit conversion from bit-compatible raw pointer types.
|
||||
* Also conversion functions from char16_t * to UChar * and OldUChar *.
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \def U_ALIASING_BARRIER
|
||||
* Barrier for pointer anti-aliasing optimizations even across function boundaries.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
// Use the predefined value.
|
||||
#elif (defined(__clang__) || defined(__GNUC__)) && U_PLATFORM != U_PF_BROWSER_NATIVE_CLIENT
|
||||
# define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory")
|
||||
#elif defined(U_IN_DOXYGEN)
|
||||
# define U_ALIASING_BARRIER(ptr)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
class U_COMMON_API Char16Ptr U_FINAL {
|
||||
public:
|
||||
/**
|
||||
* Copies the pointer.
|
||||
* @param p pointer
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline Char16Ptr(char16_t *p);
|
||||
#if !U_CHAR16_IS_TYPEDEF
|
||||
/**
|
||||
* Converts the pointer to char16_t *.
|
||||
* @param p pointer to be converted
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline Char16Ptr(uint16_t *p);
|
||||
#endif
|
||||
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Converts the pointer to char16_t *.
|
||||
* (Only defined if U_SIZEOF_WCHAR_T==2.)
|
||||
* @param p pointer to be converted
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline Char16Ptr(wchar_t *p);
|
||||
#endif
|
||||
/**
|
||||
* nullptr constructor.
|
||||
* @param p nullptr
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline Char16Ptr(std::nullptr_t p);
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ~Char16Ptr();
|
||||
|
||||
/**
|
||||
* Pointer access.
|
||||
* @return the wrapped pointer
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline char16_t *get() const;
|
||||
/**
|
||||
* char16_t pointer access via type conversion (e.g., static_cast).
|
||||
* @return the wrapped pointer
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline operator char16_t *() const { return get(); }
|
||||
|
||||
private:
|
||||
Char16Ptr() = delete;
|
||||
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
template<typename T> static char16_t *cast(T *t) {
|
||||
U_ALIASING_BARRIER(t);
|
||||
return reinterpret_cast<char16_t *>(t);
|
||||
}
|
||||
|
||||
char16_t *p_;
|
||||
#else
|
||||
union {
|
||||
char16_t *cp;
|
||||
uint16_t *up;
|
||||
wchar_t *wp;
|
||||
} u_;
|
||||
#endif
|
||||
};
|
||||
|
||||
/// \cond
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
|
||||
Char16Ptr::Char16Ptr(char16_t *p) : p_(p) {}
|
||||
#if !U_CHAR16_IS_TYPEDEF
|
||||
Char16Ptr::Char16Ptr(uint16_t *p) : p_(cast(p)) {}
|
||||
#endif
|
||||
#if U_SIZEOF_WCHAR_T==2
|
||||
Char16Ptr::Char16Ptr(wchar_t *p) : p_(cast(p)) {}
|
||||
#endif
|
||||
Char16Ptr::Char16Ptr(std::nullptr_t p) : p_(p) {}
|
||||
Char16Ptr::~Char16Ptr() {
|
||||
U_ALIASING_BARRIER(p_);
|
||||
}
|
||||
|
||||
char16_t *Char16Ptr::get() const { return p_; }
|
||||
|
||||
#else
|
||||
|
||||
Char16Ptr::Char16Ptr(char16_t *p) { u_.cp = p; }
|
||||
#if !U_CHAR16_IS_TYPEDEF
|
||||
Char16Ptr::Char16Ptr(uint16_t *p) { u_.up = p; }
|
||||
#endif
|
||||
#if U_SIZEOF_WCHAR_T==2
|
||||
Char16Ptr::Char16Ptr(wchar_t *p) { u_.wp = p; }
|
||||
#endif
|
||||
Char16Ptr::Char16Ptr(std::nullptr_t p) { u_.cp = p; }
|
||||
Char16Ptr::~Char16Ptr() {}
|
||||
|
||||
char16_t *Char16Ptr::get() const { return u_.cp; }
|
||||
|
||||
#endif
|
||||
/// \endcond
|
||||
|
||||
/**
|
||||
* const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
class U_COMMON_API ConstChar16Ptr U_FINAL {
|
||||
public:
|
||||
/**
|
||||
* Copies the pointer.
|
||||
* @param p pointer
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ConstChar16Ptr(const char16_t *p);
|
||||
#if !U_CHAR16_IS_TYPEDEF
|
||||
/**
|
||||
* Converts the pointer to char16_t *.
|
||||
* @param p pointer to be converted
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ConstChar16Ptr(const uint16_t *p);
|
||||
#endif
|
||||
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Converts the pointer to char16_t *.
|
||||
* (Only defined if U_SIZEOF_WCHAR_T==2.)
|
||||
* @param p pointer to be converted
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ConstChar16Ptr(const wchar_t *p);
|
||||
#endif
|
||||
/**
|
||||
* nullptr constructor.
|
||||
* @param p nullptr
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ConstChar16Ptr(const std::nullptr_t p);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ~ConstChar16Ptr();
|
||||
|
||||
/**
|
||||
* Pointer access.
|
||||
* @return the wrapped pointer
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline const char16_t *get() const;
|
||||
/**
|
||||
* char16_t pointer access via type conversion (e.g., static_cast).
|
||||
* @return the wrapped pointer
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline operator const char16_t *() const { return get(); }
|
||||
|
||||
private:
|
||||
ConstChar16Ptr() = delete;
|
||||
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
template<typename T> static const char16_t *cast(const T *t) {
|
||||
U_ALIASING_BARRIER(t);
|
||||
return reinterpret_cast<const char16_t *>(t);
|
||||
}
|
||||
|
||||
const char16_t *p_;
|
||||
#else
|
||||
union {
|
||||
const char16_t *cp;
|
||||
const uint16_t *up;
|
||||
const wchar_t *wp;
|
||||
} u_;
|
||||
#endif
|
||||
};
|
||||
|
||||
/// \cond
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
|
||||
ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p_(p) {}
|
||||
#if !U_CHAR16_IS_TYPEDEF
|
||||
ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p_(cast(p)) {}
|
||||
#endif
|
||||
#if U_SIZEOF_WCHAR_T==2
|
||||
ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p_(cast(p)) {}
|
||||
#endif
|
||||
ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p_(p) {}
|
||||
ConstChar16Ptr::~ConstChar16Ptr() {
|
||||
U_ALIASING_BARRIER(p_);
|
||||
}
|
||||
|
||||
const char16_t *ConstChar16Ptr::get() const { return p_; }
|
||||
|
||||
#else
|
||||
|
||||
ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u_.cp = p; }
|
||||
#if !U_CHAR16_IS_TYPEDEF
|
||||
ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u_.up = p; }
|
||||
#endif
|
||||
#if U_SIZEOF_WCHAR_T==2
|
||||
ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u_.wp = p; }
|
||||
#endif
|
||||
ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u_.cp = p; }
|
||||
ConstChar16Ptr::~ConstChar16Ptr() {}
|
||||
|
||||
const char16_t *ConstChar16Ptr::get() const { return u_.cp; }
|
||||
|
||||
#endif
|
||||
/// \endcond
|
||||
|
||||
/**
|
||||
* Converts from const char16_t * to const UChar *.
|
||||
* Includes an aliasing barrier if available.
|
||||
* @param p pointer
|
||||
* @return p as const UChar *
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline const UChar *toUCharPtr(const char16_t *p) {
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
U_ALIASING_BARRIER(p);
|
||||
#endif
|
||||
return reinterpret_cast<const UChar *>(p);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts from char16_t * to UChar *.
|
||||
* Includes an aliasing barrier if available.
|
||||
* @param p pointer
|
||||
* @return p as UChar *
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline UChar *toUCharPtr(char16_t *p) {
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
U_ALIASING_BARRIER(p);
|
||||
#endif
|
||||
return reinterpret_cast<UChar *>(p);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts from const char16_t * to const OldUChar *.
|
||||
* Includes an aliasing barrier if available.
|
||||
* @param p pointer
|
||||
* @return p as const OldUChar *
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline const OldUChar *toOldUCharPtr(const char16_t *p) {
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
U_ALIASING_BARRIER(p);
|
||||
#endif
|
||||
return reinterpret_cast<const OldUChar *>(p);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts from char16_t * to OldUChar *.
|
||||
* Includes an aliasing barrier if available.
|
||||
* @param p pointer
|
||||
* @return p as OldUChar *
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline OldUChar *toOldUCharPtr(char16_t *p) {
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
U_ALIASING_BARRIER(p);
|
||||
#endif
|
||||
return reinterpret_cast<OldUChar *>(p);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __CHAR16PTR_H__
|
||||
734
app/src/main/cpp/icu4c/include/unicode/chariter.h
Normal file
734
app/src/main/cpp/icu4c/include/unicode/chariter.h
Normal file
@@ -0,0 +1,734 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
********************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
********************************************************************
|
||||
*/
|
||||
|
||||
#ifndef CHARITER_H
|
||||
#define CHARITER_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/unistr.h"
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Character Iterator
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
/**
|
||||
* Abstract class that defines an API for forward-only iteration
|
||||
* on text objects.
|
||||
* This is a minimal interface for iteration without random access
|
||||
* or backwards iteration. It is especially useful for wrapping
|
||||
* streams with converters into an object for collation or
|
||||
* normalization.
|
||||
*
|
||||
* <p>Characters can be accessed in two ways: as code units or as
|
||||
* code points.
|
||||
* Unicode code points are 21-bit integers and are the scalar values
|
||||
* of Unicode characters. ICU uses the type UChar32 for them.
|
||||
* Unicode code units are the storage units of a given
|
||||
* Unicode/UCS Transformation Format (a character encoding scheme).
|
||||
* With UTF-16, all code points can be represented with either one
|
||||
* or two code units ("surrogates").
|
||||
* String storage is typically based on code units, while properties
|
||||
* of characters are typically determined using code point values.
|
||||
* Some processes may be designed to work with sequences of code units,
|
||||
* or it may be known that all characters that are important to an
|
||||
* algorithm can be represented with single code units.
|
||||
* Other processes will need to use the code point access functions.</p>
|
||||
*
|
||||
* <p>ForwardCharacterIterator provides nextPostInc() to access
|
||||
* a code unit and advance an internal position into the text object,
|
||||
* similar to a <code>return text[position++]</code>.<br>
|
||||
* It provides next32PostInc() to access a code point and advance an internal
|
||||
* position.</p>
|
||||
*
|
||||
* <p>next32PostInc() assumes that the current position is that of
|
||||
* the beginning of a code point, i.e., of its first code unit.
|
||||
* After next32PostInc(), this will be true again.
|
||||
* In general, access to code units and code points in the same
|
||||
* iteration loop should not be mixed. In UTF-16, if the current position
|
||||
* is on a second code unit (Low Surrogate), then only that code unit
|
||||
* is returned even by next32PostInc().</p>
|
||||
*
|
||||
* <p>For iteration with either function, there are two ways to
|
||||
* check for the end of the iteration. When there are no more
|
||||
* characters in the text object:
|
||||
* <ul>
|
||||
* <li>The hasNext() function returns false.</li>
|
||||
* <li>nextPostInc() and next32PostInc() return DONE
|
||||
* when one attempts to read beyond the end of the text object.</li>
|
||||
* </ul>
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* void function1(ForwardCharacterIterator &it) {
|
||||
* UChar32 c;
|
||||
* while(it.hasNext()) {
|
||||
* c=it.next32PostInc();
|
||||
* // use c
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* void function1(ForwardCharacterIterator &it) {
|
||||
* char16_t c;
|
||||
* while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) {
|
||||
* // use c
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
* </p>
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
class U_COMMON_API ForwardCharacterIterator : public UObject {
|
||||
public:
|
||||
/**
|
||||
* Value returned by most of ForwardCharacterIterator's functions
|
||||
* when the iterator has reached the limits of its iteration.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
enum { DONE = 0xffff };
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual ~ForwardCharacterIterator();
|
||||
|
||||
/**
|
||||
* Returns true when both iterators refer to the same
|
||||
* character in the same character-storage object.
|
||||
* @param that The ForwardCharacterIterator to be compared for equality
|
||||
* @return true when both iterators refer to the same
|
||||
* character in the same character-storage object
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UBool operator==(const ForwardCharacterIterator& that) const = 0;
|
||||
|
||||
/**
|
||||
* Returns true when the iterators refer to different
|
||||
* text-storage objects, or to different characters in the
|
||||
* same text-storage object.
|
||||
* @param that The ForwardCharacterIterator to be compared for inequality
|
||||
* @return true when the iterators refer to different
|
||||
* text-storage objects, or to different characters in the
|
||||
* same text-storage object
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline UBool operator!=(const ForwardCharacterIterator& that) const;
|
||||
|
||||
/**
|
||||
* Generates a hash code for this iterator.
|
||||
* @return the hash code.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t hashCode(void) const = 0;
|
||||
|
||||
/**
|
||||
* Returns a UClassID for this ForwardCharacterIterator ("poor man's
|
||||
* RTTI").<P> Despite the fact that this function is public,
|
||||
* DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API!
|
||||
* @return a UClassID for this ForwardCharacterIterator
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UClassID getDynamicClassID(void) const = 0;
|
||||
|
||||
/**
|
||||
* Gets the current code unit for returning and advances to the next code unit
|
||||
* in the iteration range
|
||||
* (toward endIndex()). If there are
|
||||
* no more code units to return, returns DONE.
|
||||
* @return the current code unit.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t nextPostInc(void) = 0;
|
||||
|
||||
/**
|
||||
* Gets the current code point for returning and advances to the next code point
|
||||
* in the iteration range
|
||||
* (toward endIndex()). If there are
|
||||
* no more code points to return, returns DONE.
|
||||
* @return the current code point.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 next32PostInc(void) = 0;
|
||||
|
||||
/**
|
||||
* Returns false if there are no more code units or code points
|
||||
* at or after the current position in the iteration range.
|
||||
* This is used with nextPostInc() or next32PostInc() in forward
|
||||
* iteration.
|
||||
* @returns false if there are no more code units or code points
|
||||
* at or after the current position in the iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UBool hasNext() = 0;
|
||||
|
||||
protected:
|
||||
/** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/
|
||||
ForwardCharacterIterator();
|
||||
|
||||
/** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/
|
||||
ForwardCharacterIterator(const ForwardCharacterIterator &other);
|
||||
|
||||
/**
|
||||
* Assignment operator to be overridden in the implementing class.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; }
|
||||
};
|
||||
|
||||
/**
|
||||
* Abstract class that defines an API for iteration
|
||||
* on text objects.
|
||||
* This is an interface for forward and backward iteration
|
||||
* and random access into a text object.
|
||||
*
|
||||
* <p>The API provides backward compatibility to the Java and older ICU
|
||||
* CharacterIterator classes but extends them significantly:
|
||||
* <ol>
|
||||
* <li>CharacterIterator is now a subclass of ForwardCharacterIterator.</li>
|
||||
* <li>While the old API functions provided forward iteration with
|
||||
* "pre-increment" semantics, the new one also provides functions
|
||||
* with "post-increment" semantics. They are more efficient and should
|
||||
* be the preferred iterator functions for new implementations.
|
||||
* The backward iteration always had "pre-decrement" semantics, which
|
||||
* are efficient.</li>
|
||||
* <li>Just like ForwardCharacterIterator, it provides access to
|
||||
* both code units and code points. Code point access versions are available
|
||||
* for the old and the new iteration semantics.</li>
|
||||
* <li>There are new functions for setting and moving the current position
|
||||
* without returning a character, for efficiency.</li>
|
||||
* </ol>
|
||||
*
|
||||
* See ForwardCharacterIterator for examples for using the new forward iteration
|
||||
* functions. For backward iteration, there is also a hasPrevious() function
|
||||
* that can be used analogously to hasNext().
|
||||
* The old functions work as before and are shown below.</p>
|
||||
*
|
||||
* <p>Examples for some of the new functions:</p>
|
||||
*
|
||||
* Forward iteration with hasNext():
|
||||
* \code
|
||||
* void forward1(CharacterIterator &it) {
|
||||
* UChar32 c;
|
||||
* for(it.setToStart(); it.hasNext();) {
|
||||
* c=it.next32PostInc();
|
||||
* // use c
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
* Forward iteration more similar to loops with the old forward iteration,
|
||||
* showing a way to convert simple for() loops:
|
||||
* \code
|
||||
* void forward2(CharacterIterator &it) {
|
||||
* char16_t c;
|
||||
* for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) {
|
||||
* // use c
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
* Backward iteration with setToEnd() and hasPrevious():
|
||||
* \code
|
||||
* void backward1(CharacterIterator &it) {
|
||||
* UChar32 c;
|
||||
* for(it.setToEnd(); it.hasPrevious();) {
|
||||
* c=it.previous32();
|
||||
* // use c
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
* Backward iteration with a more traditional for() loop:
|
||||
* \code
|
||||
* void backward2(CharacterIterator &it) {
|
||||
* char16_t c;
|
||||
* for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) {
|
||||
* // use c
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* Example for random access:
|
||||
* \code
|
||||
* void random(CharacterIterator &it) {
|
||||
* // set to the third code point from the beginning
|
||||
* it.move32(3, CharacterIterator::kStart);
|
||||
* // get a code point from here without moving the position
|
||||
* UChar32 c=it.current32();
|
||||
* // get the position
|
||||
* int32_t pos=it.getIndex();
|
||||
* // get the previous code unit
|
||||
* char16_t u=it.previous();
|
||||
* // move back one more code unit
|
||||
* it.move(-1, CharacterIterator::kCurrent);
|
||||
* // set the position back to where it was
|
||||
* // and read the same code point c and move beyond it
|
||||
* it.setIndex(pos);
|
||||
* if(c!=it.next32PostInc()) {
|
||||
* exit(1); // CharacterIterator inconsistent
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* <p>Examples, especially for the old API:</p>
|
||||
*
|
||||
* Function processing characters, in this example simple output
|
||||
* <pre>
|
||||
* \code
|
||||
* void processChar( char16_t c )
|
||||
* {
|
||||
* cout << " " << c;
|
||||
* }
|
||||
* \endcode
|
||||
* </pre>
|
||||
* Traverse the text from start to finish
|
||||
* <pre>
|
||||
* \code
|
||||
* void traverseForward(CharacterIterator& iter)
|
||||
* {
|
||||
* for(char16_t c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
|
||||
* processChar(c);
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
* </pre>
|
||||
* Traverse the text backwards, from end to start
|
||||
* <pre>
|
||||
* \code
|
||||
* void traverseBackward(CharacterIterator& iter)
|
||||
* {
|
||||
* for(char16_t c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
|
||||
* processChar(c);
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
* </pre>
|
||||
* Traverse both forward and backward from a given position in the text.
|
||||
* Calls to notBoundary() in this example represents some additional stopping criteria.
|
||||
* <pre>
|
||||
* \code
|
||||
* void traverseOut(CharacterIterator& iter, int32_t pos)
|
||||
* {
|
||||
* char16_t c;
|
||||
* for (c = iter.setIndex(pos);
|
||||
* c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
|
||||
* c = iter.next()) {}
|
||||
* int32_t end = iter.getIndex();
|
||||
* for (c = iter.setIndex(pos);
|
||||
* c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
|
||||
* c = iter.previous()) {}
|
||||
* int32_t start = iter.getIndex() + 1;
|
||||
*
|
||||
* cout << "start: " << start << " end: " << end << endl;
|
||||
* for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) {
|
||||
* processChar(c);
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
* </pre>
|
||||
* Creating a StringCharacterIterator and calling the test functions
|
||||
* <pre>
|
||||
* \code
|
||||
* void CharacterIterator_Example( void )
|
||||
* {
|
||||
* cout << endl << "===== CharacterIterator_Example: =====" << endl;
|
||||
* UnicodeString text("Ein kleiner Satz.");
|
||||
* StringCharacterIterator iterator(text);
|
||||
* cout << "----- traverseForward: -----------" << endl;
|
||||
* traverseForward( iterator );
|
||||
* cout << endl << endl << "----- traverseBackward: ----------" << endl;
|
||||
* traverseBackward( iterator );
|
||||
* cout << endl << endl << "----- traverseOut: ---------------" << endl;
|
||||
* traverseOut( iterator, 7 );
|
||||
* cout << endl << endl << "-----" << endl;
|
||||
* }
|
||||
* \endcode
|
||||
* </pre>
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
class U_COMMON_API CharacterIterator : public ForwardCharacterIterator {
|
||||
public:
|
||||
/**
|
||||
* Origin enumeration for the move() and move32() functions.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
enum EOrigin { kStart, kCurrent, kEnd };
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual ~CharacterIterator();
|
||||
|
||||
/**
|
||||
* Returns a pointer to a new CharacterIterator of the same
|
||||
* concrete class as this one, and referring to the same
|
||||
* character in the same text-storage object as this one. The
|
||||
* caller is responsible for deleting the new clone.
|
||||
* @return a pointer to a new CharacterIterator
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual CharacterIterator* clone() const = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first code unit in its
|
||||
* iteration range, and returns that code unit.
|
||||
* This can be used to begin an iteration with next().
|
||||
* @return the first code unit in its iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t first(void) = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first code unit in its
|
||||
* iteration range, returns that code unit, and moves the position
|
||||
* to the second code unit. This is an alternative to setToStart()
|
||||
* for forward iteration with nextPostInc().
|
||||
* @return the first code unit in its iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t firstPostInc(void);
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first code point in its
|
||||
* iteration range, and returns that code unit,
|
||||
* This can be used to begin an iteration with next32().
|
||||
* Note that an iteration with next32PostInc(), beginning with,
|
||||
* e.g., setToStart() or firstPostInc(), is more efficient.
|
||||
* @return the first code point in its iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 first32(void) = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first code point in its
|
||||
* iteration range, returns that code point, and moves the position
|
||||
* to the second code point. This is an alternative to setToStart()
|
||||
* for forward iteration with next32PostInc().
|
||||
* @return the first code point in its iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 first32PostInc(void);
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first code unit or code point in its
|
||||
* iteration range. This can be used to begin a forward
|
||||
* iteration with nextPostInc() or next32PostInc().
|
||||
* @return the start position of the iteration range
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline int32_t setToStart();
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the last code unit in its
|
||||
* iteration range, and returns that code unit.
|
||||
* This can be used to begin an iteration with previous().
|
||||
* @return the last code unit.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t last(void) = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the last code point in its
|
||||
* iteration range, and returns that code unit.
|
||||
* This can be used to begin an iteration with previous32().
|
||||
* @return the last code point.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 last32(void) = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to the end of its iteration range, just behind
|
||||
* the last code unit or code point. This can be used to begin a backward
|
||||
* iteration with previous() or previous32().
|
||||
* @return the end position of the iteration range
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline int32_t setToEnd();
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the "position"-th code unit
|
||||
* in the text-storage object the iterator refers to, and
|
||||
* returns that code unit.
|
||||
* @param position the "position"-th code unit in the text-storage object
|
||||
* @return the "position"-th code unit.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t setIndex(int32_t position) = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the beginning of the code point
|
||||
* that contains the "position"-th code unit
|
||||
* in the text-storage object the iterator refers to, and
|
||||
* returns that code point.
|
||||
* The current position is adjusted to the beginning of the code point
|
||||
* (its first code unit).
|
||||
* @param position the "position"-th code unit in the text-storage object
|
||||
* @return the "position"-th code point.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 setIndex32(int32_t position) = 0;
|
||||
|
||||
/**
|
||||
* Returns the code unit the iterator currently refers to.
|
||||
* @return the current code unit.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t current(void) const = 0;
|
||||
|
||||
/**
|
||||
* Returns the code point the iterator currently refers to.
|
||||
* @return the current code point.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 current32(void) const = 0;
|
||||
|
||||
/**
|
||||
* Advances to the next code unit in the iteration range
|
||||
* (toward endIndex()), and returns that code unit. If there are
|
||||
* no more code units to return, returns DONE.
|
||||
* @return the next code unit.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t next(void) = 0;
|
||||
|
||||
/**
|
||||
* Advances to the next code point in the iteration range
|
||||
* (toward endIndex()), and returns that code point. If there are
|
||||
* no more code points to return, returns DONE.
|
||||
* Note that iteration with "pre-increment" semantics is less
|
||||
* efficient than iteration with "post-increment" semantics
|
||||
* that is provided by next32PostInc().
|
||||
* @return the next code point.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 next32(void) = 0;
|
||||
|
||||
/**
|
||||
* Advances to the previous code unit in the iteration range
|
||||
* (toward startIndex()), and returns that code unit. If there are
|
||||
* no more code units to return, returns DONE.
|
||||
* @return the previous code unit.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t previous(void) = 0;
|
||||
|
||||
/**
|
||||
* Advances to the previous code point in the iteration range
|
||||
* (toward startIndex()), and returns that code point. If there are
|
||||
* no more code points to return, returns DONE.
|
||||
* @return the previous code point.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 previous32(void) = 0;
|
||||
|
||||
/**
|
||||
* Returns false if there are no more code units or code points
|
||||
* before the current position in the iteration range.
|
||||
* This is used with previous() or previous32() in backward
|
||||
* iteration.
|
||||
* @return false if there are no more code units or code points
|
||||
* before the current position in the iteration range, return true otherwise.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UBool hasPrevious() = 0;
|
||||
|
||||
/**
|
||||
* Returns the numeric index in the underlying text-storage
|
||||
* object of the character returned by first(). Since it's
|
||||
* possible to create an iterator that iterates across only
|
||||
* part of a text-storage object, this number isn't
|
||||
* necessarily 0.
|
||||
* @returns the numeric index in the underlying text-storage
|
||||
* object of the character returned by first().
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline int32_t startIndex(void) const;
|
||||
|
||||
/**
|
||||
* Returns the numeric index in the underlying text-storage
|
||||
* object of the position immediately BEYOND the character
|
||||
* returned by last().
|
||||
* @return the numeric index in the underlying text-storage
|
||||
* object of the position immediately BEYOND the character
|
||||
* returned by last().
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline int32_t endIndex(void) const;
|
||||
|
||||
/**
|
||||
* Returns the numeric index in the underlying text-storage
|
||||
* object of the character the iterator currently refers to
|
||||
* (i.e., the character returned by current()).
|
||||
* @return the numeric index in the text-storage object of
|
||||
* the character the iterator currently refers to
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline int32_t getIndex(void) const;
|
||||
|
||||
/**
|
||||
* Returns the length of the entire text in the underlying
|
||||
* text-storage object.
|
||||
* @return the length of the entire text in the text-storage object
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline int32_t getLength() const;
|
||||
|
||||
/**
|
||||
* Moves the current position relative to the start or end of the
|
||||
* iteration range, or relative to the current position itself.
|
||||
* The movement is expressed in numbers of code units forward
|
||||
* or backward by specifying a positive or negative delta.
|
||||
* @param delta the position relative to origin. A positive delta means forward;
|
||||
* a negative delta means backward.
|
||||
* @param origin Origin enumeration {kStart, kCurrent, kEnd}
|
||||
* @return the new position
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t move(int32_t delta, EOrigin origin) = 0;
|
||||
|
||||
/**
|
||||
* Moves the current position relative to the start or end of the
|
||||
* iteration range, or relative to the current position itself.
|
||||
* The movement is expressed in numbers of code points forward
|
||||
* or backward by specifying a positive or negative delta.
|
||||
* @param delta the position relative to origin. A positive delta means forward;
|
||||
* a negative delta means backward.
|
||||
* @param origin Origin enumeration {kStart, kCurrent, kEnd}
|
||||
* @return the new position
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#ifdef move32
|
||||
// One of the system headers right now is sometimes defining a conflicting macro we don't use
|
||||
#undef move32
|
||||
#endif
|
||||
virtual int32_t move32(int32_t delta, EOrigin origin) = 0;
|
||||
|
||||
/**
|
||||
* Copies the text under iteration into the UnicodeString
|
||||
* referred to by "result".
|
||||
* @param result Receives a copy of the text under iteration.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual void getText(UnicodeString& result) = 0;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Empty constructor.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
CharacterIterator();
|
||||
|
||||
/**
|
||||
* Constructor, just setting the length field in this base class.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
CharacterIterator(int32_t length);
|
||||
|
||||
/**
|
||||
* Constructor, just setting the length and position fields in this base class.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
CharacterIterator(int32_t length, int32_t position);
|
||||
|
||||
/**
|
||||
* Constructor, just setting the length, start, end, and position fields in this base class.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position);
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
*
|
||||
* @param that The CharacterIterator to be copied
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
CharacterIterator(const CharacterIterator &that);
|
||||
|
||||
/**
|
||||
* Assignment operator. Sets this CharacterIterator to have the same behavior,
|
||||
* as the one passed in.
|
||||
* @param that The CharacterIterator passed in.
|
||||
* @return the newly set CharacterIterator.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
CharacterIterator &operator=(const CharacterIterator &that);
|
||||
|
||||
/**
|
||||
* Base class text length field.
|
||||
* Necessary this for correct getText() and hashCode().
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t textLength;
|
||||
|
||||
/**
|
||||
* Base class field for the current position.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t pos;
|
||||
|
||||
/**
|
||||
* Base class field for the start of the iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t begin;
|
||||
|
||||
/**
|
||||
* Base class field for the end of the iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t end;
|
||||
};
|
||||
|
||||
inline UBool
|
||||
ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const {
|
||||
return !operator==(that);
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
CharacterIterator::setToStart() {
|
||||
return move(0, kStart);
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
CharacterIterator::setToEnd() {
|
||||
return move(0, kEnd);
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
CharacterIterator::startIndex(void) const {
|
||||
return begin;
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
CharacterIterator::endIndex(void) const {
|
||||
return end;
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
CharacterIterator::getIndex(void) const {
|
||||
return pos;
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
CharacterIterator::getLength(void) const {
|
||||
return textLength;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif
|
||||
595
app/src/main/cpp/icu4c/include/unicode/localpointer.h
Normal file
595
app/src/main/cpp/icu4c/include/unicode/localpointer.h
Normal file
@@ -0,0 +1,595 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2009-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: localpointer.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2009nov13
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __LOCALPOINTER_H__
|
||||
#define __LOCALPOINTER_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: "Smart pointers" for use with and in ICU4C C++ code.
|
||||
*
|
||||
* These classes are inspired by
|
||||
* - std::auto_ptr
|
||||
* - boost::scoped_ptr & boost::scoped_array
|
||||
* - Taligent Safe Pointers (TOnlyPointerTo)
|
||||
*
|
||||
* but none of those provide for all of the goals for ICU smart pointers:
|
||||
* - Smart pointer owns the object and releases it when it goes out of scope.
|
||||
* - No transfer of ownership via copy/assignment to reduce misuse. Simpler & more robust.
|
||||
* - ICU-compatible: No exceptions.
|
||||
* - Need to be able to orphan/release the pointer and its ownership.
|
||||
* - Need variants for normal C++ object pointers, C++ arrays, and ICU C service objects.
|
||||
*
|
||||
* For details see http://site.icu-project.org/design/cpp/scoped_ptr
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include <memory>
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* "Smart pointer" base class; do not use directly: use LocalPointer etc.
|
||||
*
|
||||
* Base class for smart pointer classes that do not throw exceptions.
|
||||
*
|
||||
* Do not use this base class directly, since it does not delete its pointer.
|
||||
* A subclass must implement methods that delete the pointer:
|
||||
* Destructor and adoptInstead().
|
||||
*
|
||||
* There is no operator T *() provided because the programmer must decide
|
||||
* whether to use getAlias() (without transfer of ownership) or orphan()
|
||||
* (with transfer of ownership and NULLing of the pointer).
|
||||
*
|
||||
* @see LocalPointer
|
||||
* @see LocalArray
|
||||
* @see U_DEFINE_LOCAL_OPEN_POINTER
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
template<typename T>
|
||||
class LocalPointerBase {
|
||||
public:
|
||||
// No heap allocation. Use only on the stack.
|
||||
static void* U_EXPORT2 operator new(size_t) = delete;
|
||||
static void* U_EXPORT2 operator new[](size_t) = delete;
|
||||
#if U_HAVE_PLACEMENT_NEW
|
||||
static void* U_EXPORT2 operator new(size_t, void*) = delete;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Constructor takes ownership.
|
||||
* @param p simple pointer to an object that is adopted
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
explicit LocalPointerBase(T *p=NULL) : ptr(p) {}
|
||||
/**
|
||||
* Destructor deletes the object it owns.
|
||||
* Subclass must override: Base class does nothing.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
~LocalPointerBase() { /* delete ptr; */ }
|
||||
/**
|
||||
* NULL check.
|
||||
* @return true if ==NULL
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
UBool isNull() const { return ptr==NULL; }
|
||||
/**
|
||||
* NULL check.
|
||||
* @return true if !=NULL
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
UBool isValid() const { return ptr!=NULL; }
|
||||
/**
|
||||
* Comparison with a simple pointer, so that existing code
|
||||
* with ==NULL need not be changed.
|
||||
* @param other simple pointer for comparison
|
||||
* @return true if this pointer value equals other
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
bool operator==(const T *other) const { return ptr==other; }
|
||||
/**
|
||||
* Comparison with a simple pointer, so that existing code
|
||||
* with !=NULL need not be changed.
|
||||
* @param other simple pointer for comparison
|
||||
* @return true if this pointer value differs from other
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
bool operator!=(const T *other) const { return ptr!=other; }
|
||||
/**
|
||||
* Access without ownership change.
|
||||
* @return the pointer value
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
T *getAlias() const { return ptr; }
|
||||
/**
|
||||
* Access without ownership change.
|
||||
* @return the pointer value as a reference
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
T &operator*() const { return *ptr; }
|
||||
/**
|
||||
* Access without ownership change.
|
||||
* @return the pointer value
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
T *operator->() const { return ptr; }
|
||||
/**
|
||||
* Gives up ownership; the internal pointer becomes NULL.
|
||||
* @return the pointer value;
|
||||
* caller becomes responsible for deleting the object
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
T *orphan() {
|
||||
T *p=ptr;
|
||||
ptr=NULL;
|
||||
return p;
|
||||
}
|
||||
/**
|
||||
* Deletes the object it owns,
|
||||
* and adopts (takes ownership of) the one passed in.
|
||||
* Subclass must override: Base class does not delete the object.
|
||||
* @param p simple pointer to an object that is adopted
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
void adoptInstead(T *p) {
|
||||
// delete ptr;
|
||||
ptr=p;
|
||||
}
|
||||
protected:
|
||||
/**
|
||||
* Actual pointer.
|
||||
* @internal
|
||||
*/
|
||||
T *ptr;
|
||||
private:
|
||||
// No comparison operators with other LocalPointerBases.
|
||||
bool operator==(const LocalPointerBase<T> &other);
|
||||
bool operator!=(const LocalPointerBase<T> &other);
|
||||
// No ownership sharing: No copy constructor, no assignment operator.
|
||||
LocalPointerBase(const LocalPointerBase<T> &other);
|
||||
void operator=(const LocalPointerBase<T> &other);
|
||||
};
|
||||
|
||||
/**
|
||||
* "Smart pointer" class, deletes objects via the standard C++ delete operator.
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* Usage example:
|
||||
* \code
|
||||
* LocalPointer<UnicodeString> s(new UnicodeString((UChar32)0x50005));
|
||||
* int32_t length=s->length(); // 2
|
||||
* char16_t lead=s->charAt(0); // 0xd900
|
||||
* if(some condition) { return; } // no need to explicitly delete the pointer
|
||||
* s.adoptInstead(new UnicodeString((char16_t)0xfffc));
|
||||
* length=s->length(); // 1
|
||||
* // no need to explicitly delete the pointer
|
||||
* \endcode
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
template<typename T>
|
||||
class LocalPointer : public LocalPointerBase<T> {
|
||||
public:
|
||||
using LocalPointerBase<T>::operator*;
|
||||
using LocalPointerBase<T>::operator->;
|
||||
/**
|
||||
* Constructor takes ownership.
|
||||
* @param p simple pointer to an object that is adopted
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
explicit LocalPointer(T *p=NULL) : LocalPointerBase<T>(p) {}
|
||||
/**
|
||||
* Constructor takes ownership and reports an error if NULL.
|
||||
*
|
||||
* This constructor is intended to be used with other-class constructors
|
||||
* that may report a failure UErrorCode,
|
||||
* so that callers need to check only for U_FAILURE(errorCode)
|
||||
* and not also separately for isNull().
|
||||
*
|
||||
* @param p simple pointer to an object that is adopted
|
||||
* @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
|
||||
* if p==NULL and no other failure code had been set
|
||||
* @stable ICU 55
|
||||
*/
|
||||
LocalPointer(T *p, UErrorCode &errorCode) : LocalPointerBase<T>(p) {
|
||||
if(p==NULL && U_SUCCESS(errorCode)) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Move constructor, leaves src with isNull().
|
||||
* @param src source smart pointer
|
||||
* @stable ICU 56
|
||||
*/
|
||||
LocalPointer(LocalPointer<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
|
||||
src.ptr=NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a LocalPointer from a C++11 std::unique_ptr.
|
||||
* The LocalPointer steals the object owned by the std::unique_ptr.
|
||||
*
|
||||
* This constructor works via move semantics. If your std::unique_ptr is
|
||||
* in a local variable, you must use std::move.
|
||||
*
|
||||
* @param p The std::unique_ptr from which the pointer will be stolen.
|
||||
* @stable ICU 64
|
||||
*/
|
||||
explicit LocalPointer(std::unique_ptr<T> &&p)
|
||||
: LocalPointerBase<T>(p.release()) {}
|
||||
|
||||
/**
|
||||
* Destructor deletes the object it owns.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
~LocalPointer() {
|
||||
delete LocalPointerBase<T>::ptr;
|
||||
}
|
||||
/**
|
||||
* Move assignment operator, leaves src with isNull().
|
||||
* The behavior is undefined if *this and src are the same object.
|
||||
* @param src source smart pointer
|
||||
* @return *this
|
||||
* @stable ICU 56
|
||||
*/
|
||||
LocalPointer<T> &operator=(LocalPointer<T> &&src) U_NOEXCEPT {
|
||||
delete LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=src.ptr;
|
||||
src.ptr=NULL;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Move-assign from an std::unique_ptr to this LocalPointer.
|
||||
* Steals the pointer from the std::unique_ptr.
|
||||
*
|
||||
* @param p The std::unique_ptr from which the pointer will be stolen.
|
||||
* @return *this
|
||||
* @stable ICU 64
|
||||
*/
|
||||
LocalPointer<T> &operator=(std::unique_ptr<T> &&p) U_NOEXCEPT {
|
||||
adoptInstead(p.release());
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Swap pointers.
|
||||
* @param other other smart pointer
|
||||
* @stable ICU 56
|
||||
*/
|
||||
void swap(LocalPointer<T> &other) U_NOEXCEPT {
|
||||
T *temp=LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=other.ptr;
|
||||
other.ptr=temp;
|
||||
}
|
||||
/**
|
||||
* Non-member LocalPointer swap function.
|
||||
* @param p1 will get p2's pointer
|
||||
* @param p2 will get p1's pointer
|
||||
* @stable ICU 56
|
||||
*/
|
||||
friend inline void swap(LocalPointer<T> &p1, LocalPointer<T> &p2) U_NOEXCEPT {
|
||||
p1.swap(p2);
|
||||
}
|
||||
/**
|
||||
* Deletes the object it owns,
|
||||
* and adopts (takes ownership of) the one passed in.
|
||||
* @param p simple pointer to an object that is adopted
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
void adoptInstead(T *p) {
|
||||
delete LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=p;
|
||||
}
|
||||
/**
|
||||
* Deletes the object it owns,
|
||||
* and adopts (takes ownership of) the one passed in.
|
||||
*
|
||||
* If U_FAILURE(errorCode), then the current object is retained and the new one deleted.
|
||||
*
|
||||
* If U_SUCCESS(errorCode) but the input pointer is NULL,
|
||||
* then U_MEMORY_ALLOCATION_ERROR is set,
|
||||
* the current object is deleted, and NULL is set.
|
||||
*
|
||||
* @param p simple pointer to an object that is adopted
|
||||
* @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
|
||||
* if p==NULL and no other failure code had been set
|
||||
* @stable ICU 55
|
||||
*/
|
||||
void adoptInsteadAndCheckErrorCode(T *p, UErrorCode &errorCode) {
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
delete LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=p;
|
||||
if(p==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
} else {
|
||||
delete p;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Conversion operator to a C++11 std::unique_ptr.
|
||||
* Disowns the object and gives it to the returned std::unique_ptr.
|
||||
*
|
||||
* This operator works via move semantics. If your LocalPointer is
|
||||
* in a local variable, you must use std::move.
|
||||
*
|
||||
* @return An std::unique_ptr owning the pointer previously owned by this
|
||||
* icu::LocalPointer.
|
||||
* @stable ICU 64
|
||||
*/
|
||||
operator std::unique_ptr<T> () && {
|
||||
return std::unique_ptr<T>(LocalPointerBase<T>::orphan());
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* "Smart pointer" class, deletes objects via the C++ array delete[] operator.
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
* Adds operator[] for array item access.
|
||||
*
|
||||
* Usage example:
|
||||
* \code
|
||||
* LocalArray<UnicodeString> a(new UnicodeString[2]);
|
||||
* a[0].append((char16_t)0x61);
|
||||
* if(some condition) { return; } // no need to explicitly delete the array
|
||||
* a.adoptInstead(new UnicodeString[4]);
|
||||
* a[3].append((char16_t)0x62).append((char16_t)0x63).reverse();
|
||||
* // no need to explicitly delete the array
|
||||
* \endcode
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
template<typename T>
|
||||
class LocalArray : public LocalPointerBase<T> {
|
||||
public:
|
||||
using LocalPointerBase<T>::operator*;
|
||||
using LocalPointerBase<T>::operator->;
|
||||
/**
|
||||
* Constructor takes ownership.
|
||||
* @param p simple pointer to an array of T objects that is adopted
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
explicit LocalArray(T *p=NULL) : LocalPointerBase<T>(p) {}
|
||||
/**
|
||||
* Constructor takes ownership and reports an error if NULL.
|
||||
*
|
||||
* This constructor is intended to be used with other-class constructors
|
||||
* that may report a failure UErrorCode,
|
||||
* so that callers need to check only for U_FAILURE(errorCode)
|
||||
* and not also separately for isNull().
|
||||
*
|
||||
* @param p simple pointer to an array of T objects that is adopted
|
||||
* @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
|
||||
* if p==NULL and no other failure code had been set
|
||||
* @stable ICU 56
|
||||
*/
|
||||
LocalArray(T *p, UErrorCode &errorCode) : LocalPointerBase<T>(p) {
|
||||
if(p==NULL && U_SUCCESS(errorCode)) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Move constructor, leaves src with isNull().
|
||||
* @param src source smart pointer
|
||||
* @stable ICU 56
|
||||
*/
|
||||
LocalArray(LocalArray<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
|
||||
src.ptr=NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a LocalArray from a C++11 std::unique_ptr of an array type.
|
||||
* The LocalPointer steals the array owned by the std::unique_ptr.
|
||||
*
|
||||
* This constructor works via move semantics. If your std::unique_ptr is
|
||||
* in a local variable, you must use std::move.
|
||||
*
|
||||
* @param p The std::unique_ptr from which the array will be stolen.
|
||||
* @stable ICU 64
|
||||
*/
|
||||
explicit LocalArray(std::unique_ptr<T[]> &&p)
|
||||
: LocalPointerBase<T>(p.release()) {}
|
||||
|
||||
/**
|
||||
* Destructor deletes the array it owns.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
~LocalArray() {
|
||||
delete[] LocalPointerBase<T>::ptr;
|
||||
}
|
||||
/**
|
||||
* Move assignment operator, leaves src with isNull().
|
||||
* The behavior is undefined if *this and src are the same object.
|
||||
* @param src source smart pointer
|
||||
* @return *this
|
||||
* @stable ICU 56
|
||||
*/
|
||||
LocalArray<T> &operator=(LocalArray<T> &&src) U_NOEXCEPT {
|
||||
delete[] LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=src.ptr;
|
||||
src.ptr=NULL;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Move-assign from an std::unique_ptr to this LocalPointer.
|
||||
* Steals the array from the std::unique_ptr.
|
||||
*
|
||||
* @param p The std::unique_ptr from which the array will be stolen.
|
||||
* @return *this
|
||||
* @stable ICU 64
|
||||
*/
|
||||
LocalArray<T> &operator=(std::unique_ptr<T[]> &&p) U_NOEXCEPT {
|
||||
adoptInstead(p.release());
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Swap pointers.
|
||||
* @param other other smart pointer
|
||||
* @stable ICU 56
|
||||
*/
|
||||
void swap(LocalArray<T> &other) U_NOEXCEPT {
|
||||
T *temp=LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=other.ptr;
|
||||
other.ptr=temp;
|
||||
}
|
||||
/**
|
||||
* Non-member LocalArray swap function.
|
||||
* @param p1 will get p2's pointer
|
||||
* @param p2 will get p1's pointer
|
||||
* @stable ICU 56
|
||||
*/
|
||||
friend inline void swap(LocalArray<T> &p1, LocalArray<T> &p2) U_NOEXCEPT {
|
||||
p1.swap(p2);
|
||||
}
|
||||
/**
|
||||
* Deletes the array it owns,
|
||||
* and adopts (takes ownership of) the one passed in.
|
||||
* @param p simple pointer to an array of T objects that is adopted
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
void adoptInstead(T *p) {
|
||||
delete[] LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=p;
|
||||
}
|
||||
/**
|
||||
* Deletes the array it owns,
|
||||
* and adopts (takes ownership of) the one passed in.
|
||||
*
|
||||
* If U_FAILURE(errorCode), then the current array is retained and the new one deleted.
|
||||
*
|
||||
* If U_SUCCESS(errorCode) but the input pointer is NULL,
|
||||
* then U_MEMORY_ALLOCATION_ERROR is set,
|
||||
* the current array is deleted, and NULL is set.
|
||||
*
|
||||
* @param p simple pointer to an array of T objects that is adopted
|
||||
* @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
|
||||
* if p==NULL and no other failure code had been set
|
||||
* @stable ICU 56
|
||||
*/
|
||||
void adoptInsteadAndCheckErrorCode(T *p, UErrorCode &errorCode) {
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
delete[] LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=p;
|
||||
if(p==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
} else {
|
||||
delete[] p;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Array item access (writable).
|
||||
* No index bounds check.
|
||||
* @param i array index
|
||||
* @return reference to the array item
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; }
|
||||
|
||||
/**
|
||||
* Conversion operator to a C++11 std::unique_ptr.
|
||||
* Disowns the object and gives it to the returned std::unique_ptr.
|
||||
*
|
||||
* This operator works via move semantics. If your LocalPointer is
|
||||
* in a local variable, you must use std::move.
|
||||
*
|
||||
* @return An std::unique_ptr owning the pointer previously owned by this
|
||||
* icu::LocalPointer.
|
||||
* @stable ICU 64
|
||||
*/
|
||||
operator std::unique_ptr<T[]> () && {
|
||||
return std::unique_ptr<T[]>(LocalPointerBase<T>::orphan());
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* \def U_DEFINE_LOCAL_OPEN_POINTER
|
||||
* "Smart pointer" definition macro, deletes objects via the closeFunction.
|
||||
* Defines a subclass of LocalPointerBase which works just
|
||||
* like LocalPointer<Type> except that this subclass will use the closeFunction
|
||||
* rather than the C++ delete operator.
|
||||
*
|
||||
* Usage example:
|
||||
* \code
|
||||
* LocalUCaseMapPointer csm(ucasemap_open(localeID, options, &errorCode));
|
||||
* utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(),
|
||||
* utf8Out, (int32_t)sizeof(utf8Out),
|
||||
* utf8In, utf8InLength, &errorCode);
|
||||
* if(U_FAILURE(errorCode)) { return; } // no need to explicitly delete the UCaseMap
|
||||
* \endcode
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \
|
||||
class LocalPointerClassName : public LocalPointerBase<Type> { \
|
||||
public: \
|
||||
using LocalPointerBase<Type>::operator*; \
|
||||
using LocalPointerBase<Type>::operator->; \
|
||||
explicit LocalPointerClassName(Type *p=NULL) : LocalPointerBase<Type>(p) {} \
|
||||
LocalPointerClassName(LocalPointerClassName &&src) U_NOEXCEPT \
|
||||
: LocalPointerBase<Type>(src.ptr) { \
|
||||
src.ptr=NULL; \
|
||||
} \
|
||||
/* TODO: Be agnostic of the deleter function signature from the user-provided std::unique_ptr? */ \
|
||||
explicit LocalPointerClassName(std::unique_ptr<Type, decltype(&closeFunction)> &&p) \
|
||||
: LocalPointerBase<Type>(p.release()) {} \
|
||||
~LocalPointerClassName() { if (ptr != NULL) { closeFunction(ptr); } } \
|
||||
LocalPointerClassName &operator=(LocalPointerClassName &&src) U_NOEXCEPT { \
|
||||
if (ptr != NULL) { closeFunction(ptr); } \
|
||||
LocalPointerBase<Type>::ptr=src.ptr; \
|
||||
src.ptr=NULL; \
|
||||
return *this; \
|
||||
} \
|
||||
/* TODO: Be agnostic of the deleter function signature from the user-provided std::unique_ptr? */ \
|
||||
LocalPointerClassName &operator=(std::unique_ptr<Type, decltype(&closeFunction)> &&p) { \
|
||||
adoptInstead(p.release()); \
|
||||
return *this; \
|
||||
} \
|
||||
void swap(LocalPointerClassName &other) U_NOEXCEPT { \
|
||||
Type *temp=LocalPointerBase<Type>::ptr; \
|
||||
LocalPointerBase<Type>::ptr=other.ptr; \
|
||||
other.ptr=temp; \
|
||||
} \
|
||||
friend inline void swap(LocalPointerClassName &p1, LocalPointerClassName &p2) U_NOEXCEPT { \
|
||||
p1.swap(p2); \
|
||||
} \
|
||||
void adoptInstead(Type *p) { \
|
||||
if (ptr != NULL) { closeFunction(ptr); } \
|
||||
ptr=p; \
|
||||
} \
|
||||
operator std::unique_ptr<Type, decltype(&closeFunction)> () && { \
|
||||
return std::unique_ptr<Type, decltype(&closeFunction)>(LocalPointerBase<Type>::orphan(), closeFunction); \
|
||||
} \
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
#endif /* __LOCALPOINTER_H__ */
|
||||
1272
app/src/main/cpp/icu4c/include/unicode/locid.h
Normal file
1272
app/src/main/cpp/icu4c/include/unicode/locid.h
Normal file
File diff suppressed because it is too large
Load Diff
94
app/src/main/cpp/icu4c/include/unicode/parseerr.h
Normal file
94
app/src/main/cpp/icu4c/include/unicode/parseerr.h
Normal file
@@ -0,0 +1,94 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2005, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 03/14/00 aliu Creation.
|
||||
* 06/27/00 aliu Change from C++ class to C struct
|
||||
**********************************************************************
|
||||
*/
|
||||
#ifndef PARSEERR_H
|
||||
#define PARSEERR_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Parse Error Information
|
||||
*/
|
||||
/**
|
||||
* The capacity of the context strings in UParseError.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
enum { U_PARSE_CONTEXT_LEN = 16 };
|
||||
|
||||
/**
|
||||
* A UParseError struct is used to returned detailed information about
|
||||
* parsing errors. It is used by ICU parsing engines that parse long
|
||||
* rules, patterns, or programs, where the text being parsed is long
|
||||
* enough that more information than a UErrorCode is needed to
|
||||
* localize the error.
|
||||
*
|
||||
* <p>The line, offset, and context fields are optional; parsing
|
||||
* engines may choose not to use to use them.
|
||||
*
|
||||
* <p>The preContext and postContext strings include some part of the
|
||||
* context surrounding the error. If the source text is "let for=7"
|
||||
* and "for" is the error (e.g., because it is a reserved word), then
|
||||
* some examples of what a parser might produce are the following:
|
||||
*
|
||||
* <pre>
|
||||
* preContext postContext
|
||||
* "" "" The parser does not support context
|
||||
* "let " "=7" Pre- and post-context only
|
||||
* "let " "for=7" Pre- and post-context and error text
|
||||
* "" "for" Error text only
|
||||
* </pre>
|
||||
*
|
||||
* <p>Examples of engines which use UParseError (or may use it in the
|
||||
* future) are Transliterator, RuleBasedBreakIterator, and
|
||||
* RegexPattern.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef struct UParseError {
|
||||
|
||||
/**
|
||||
* The line on which the error occurred. If the parser uses this
|
||||
* field, it sets it to the line number of the source text line on
|
||||
* which the error appears, which will be a value >= 1. If the
|
||||
* parse does not support line numbers, the value will be <= 0.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t line;
|
||||
|
||||
/**
|
||||
* The character offset to the error. If the line field is >= 1,
|
||||
* then this is the offset from the start of the line. Otherwise,
|
||||
* this is the offset from the start of the text. If the parser
|
||||
* does not support this field, it will have a value < 0.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t offset;
|
||||
|
||||
/**
|
||||
* Textual context before the error. Null-terminated. The empty
|
||||
* string if not supported by parser.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UChar preContext[U_PARSE_CONTEXT_LEN];
|
||||
|
||||
/**
|
||||
* The error itself and/or textual context after the error.
|
||||
* Null-terminated. The empty string if not supported by parser.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UChar postContext[U_PARSE_CONTEXT_LEN];
|
||||
|
||||
} UParseError;
|
||||
|
||||
#endif
|
||||
885
app/src/main/cpp/icu4c/include/unicode/platform.h
Normal file
885
app/src/main/cpp/icu4c/include/unicode/platform.h
Normal file
@@ -0,0 +1,885 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* FILE NAME : platform.h
|
||||
*
|
||||
* Date Name Description
|
||||
* 05/13/98 nos Creation (content moved here from ptypes.h).
|
||||
* 03/02/99 stephen Added AS400 support.
|
||||
* 03/30/99 stephen Added Linux support.
|
||||
* 04/13/99 stephen Reworked for autoconf.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _PLATFORM_H
|
||||
#define _PLATFORM_H
|
||||
|
||||
#include "unicode/uconfig.h"
|
||||
#include "unicode/uvernum.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief Basic types for the platform.
|
||||
*
|
||||
* This file used to be generated by autoconf/configure.
|
||||
* Starting with ICU 49, platform.h is a normal source file,
|
||||
* to simplify cross-compiling and working with non-autoconf/make build systems.
|
||||
*
|
||||
* When a value in this file does not work on a platform, then please
|
||||
* try to derive it from the U_PLATFORM value
|
||||
* (for which we might need a new value constant in rare cases)
|
||||
* and/or from other macros that are predefined by the compiler
|
||||
* or defined in standard (POSIX or platform or compiler) headers.
|
||||
*
|
||||
* As a temporary workaround, you can add an explicit \#define for some macros
|
||||
* before it is first tested, or add an equivalent -D macro definition
|
||||
* to the compiler's command line.
|
||||
*
|
||||
* Note: Some compilers provide ways to show the predefined macros.
|
||||
* For example, with gcc you can compile an empty .c file and have the compiler
|
||||
* print the predefined macros with
|
||||
* \code
|
||||
* gcc -E -dM -x c /dev/null | sort
|
||||
* \endcode
|
||||
* (You can provide an actual empty .c file rather than /dev/null.
|
||||
* <code>-x c++</code> is for C++.)
|
||||
*/
|
||||
|
||||
/**
|
||||
* Define some things so that they can be documented.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_IN_DOXYGEN
|
||||
/*
|
||||
* Problem: "platform.h:335: warning: documentation for unknown define U_HAVE_STD_STRING found." means that U_HAVE_STD_STRING is not documented.
|
||||
* Solution: #define any defines for non @internal API here, so that they are visible in the docs. If you just set PREDEFINED in Doxyfile.in, they won't be documented.
|
||||
*/
|
||||
|
||||
/* None for now. */
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_PLATFORM
|
||||
* The U_PLATFORM macro defines the platform we're on.
|
||||
*
|
||||
* We used to define one different, value-less macro per platform.
|
||||
* That made it hard to know the set of relevant platforms and macros,
|
||||
* and hard to deal with variants of platforms.
|
||||
*
|
||||
* Starting with ICU 49, we define platforms as numeric macros,
|
||||
* with ranges of values for related platforms and their variants.
|
||||
* The U_PLATFORM macro is set to one of these values.
|
||||
*
|
||||
* Historical note from the Solaris Wikipedia article:
|
||||
* AT&T and Sun collaborated on a project to merge the most popular Unix variants
|
||||
* on the market at that time: BSD, System V, and Xenix.
|
||||
* This became Unix System V Release 4 (SVR4).
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
|
||||
/** Unknown platform. @internal */
|
||||
#define U_PF_UNKNOWN 0
|
||||
/** Windows @internal */
|
||||
#define U_PF_WINDOWS 1000
|
||||
/** MinGW. Windows, calls to Win32 API, but using GNU gcc and binutils. @internal */
|
||||
#define U_PF_MINGW 1800
|
||||
/**
|
||||
* Cygwin. Windows, calls to cygwin1.dll for Posix functions,
|
||||
* using MSVC or GNU gcc and binutils.
|
||||
* @internal
|
||||
*/
|
||||
#define U_PF_CYGWIN 1900
|
||||
/* Reserve 2000 for U_PF_UNIX? */
|
||||
/** HP-UX is based on UNIX System V. @internal */
|
||||
#define U_PF_HPUX 2100
|
||||
/** Solaris is a Unix operating system based on SVR4. @internal */
|
||||
#define U_PF_SOLARIS 2600
|
||||
/** BSD is a UNIX operating system derivative. @internal */
|
||||
#define U_PF_BSD 3000
|
||||
/** AIX is based on UNIX System V Releases and 4.3 BSD. @internal */
|
||||
#define U_PF_AIX 3100
|
||||
/** IRIX is based on UNIX System V with BSD extensions. @internal */
|
||||
#define U_PF_IRIX 3200
|
||||
/**
|
||||
* Darwin is a POSIX-compliant operating system, composed of code developed by Apple,
|
||||
* as well as code derived from NeXTSTEP, BSD, and other projects,
|
||||
* built around the Mach kernel.
|
||||
* Darwin forms the core set of components upon which Mac OS X, Apple TV, and iOS are based.
|
||||
* (Original description modified from WikiPedia.)
|
||||
* @internal
|
||||
*/
|
||||
#define U_PF_DARWIN 3500
|
||||
/** iPhone OS (iOS) is a derivative of Mac OS X. @internal */
|
||||
#define U_PF_IPHONE 3550
|
||||
/** QNX is a commercial Unix-like real-time operating system related to BSD. @internal */
|
||||
#define U_PF_QNX 3700
|
||||
/** Linux is a Unix-like operating system. @internal */
|
||||
#define U_PF_LINUX 4000
|
||||
/**
|
||||
* Native Client is pretty close to Linux.
|
||||
* See https://developer.chrome.com/native-client and
|
||||
* http://www.chromium.org/nativeclient
|
||||
* @internal
|
||||
*/
|
||||
#define U_PF_BROWSER_NATIVE_CLIENT 4020
|
||||
/** Android is based on Linux. @internal */
|
||||
#define U_PF_ANDROID 4050
|
||||
/** Fuchsia is a POSIX-ish platform. @internal */
|
||||
#define U_PF_FUCHSIA 4100
|
||||
/* Maximum value for Linux-based platform is 4499 */
|
||||
/**
|
||||
* Emscripten is a C++ transpiler for the Web that can target asm.js or
|
||||
* WebAssembly. It provides some POSIX-compatible wrappers and stubs and
|
||||
* some Linux-like functionality, but is not fully compatible with
|
||||
* either.
|
||||
* @internal
|
||||
*/
|
||||
#define U_PF_EMSCRIPTEN 5010
|
||||
/** z/OS is the successor to OS/390 which was the successor to MVS. @internal */
|
||||
#define U_PF_OS390 9000
|
||||
/** "IBM i" is the current name of what used to be i5/OS and earlier OS/400. @internal */
|
||||
#define U_PF_OS400 9400
|
||||
|
||||
#ifdef U_PLATFORM
|
||||
/* Use the predefined value. */
|
||||
#elif defined(__MINGW32__)
|
||||
# define U_PLATFORM U_PF_MINGW
|
||||
#elif defined(__CYGWIN__)
|
||||
# define U_PLATFORM U_PF_CYGWIN
|
||||
#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
# define U_PLATFORM U_PF_WINDOWS
|
||||
#elif defined(__ANDROID__)
|
||||
# define U_PLATFORM U_PF_ANDROID
|
||||
/* Android wchar_t support depends on the API level. */
|
||||
# include <android/api-level.h>
|
||||
#elif defined(__pnacl__) || defined(__native_client__)
|
||||
# define U_PLATFORM U_PF_BROWSER_NATIVE_CLIENT
|
||||
#elif defined(__Fuchsia__)
|
||||
# define U_PLATFORM U_PF_FUCHSIA
|
||||
#elif defined(linux) || defined(__linux__) || defined(__linux)
|
||||
# define U_PLATFORM U_PF_LINUX
|
||||
#elif defined(__APPLE__) && defined(__MACH__)
|
||||
# include <TargetConditionals.h>
|
||||
# if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE /* variant of TARGET_OS_MAC */
|
||||
# define U_PLATFORM U_PF_IPHONE
|
||||
# else
|
||||
# define U_PLATFORM U_PF_DARWIN
|
||||
# endif
|
||||
#elif defined(BSD) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__MirBSD__)
|
||||
# if defined(__FreeBSD__)
|
||||
# include <sys/endian.h>
|
||||
# endif
|
||||
# define U_PLATFORM U_PF_BSD
|
||||
#elif defined(sun) || defined(__sun)
|
||||
/* Check defined(__SVR4) || defined(__svr4__) to distinguish Solaris from SunOS? */
|
||||
# define U_PLATFORM U_PF_SOLARIS
|
||||
# if defined(__GNUC__)
|
||||
/* Solaris/GCC needs this header file to get the proper endianness. Normally, this
|
||||
* header file is included with stddef.h but on Solairs/GCC, the GCC version of stddef.h
|
||||
* is included which does not include this header file.
|
||||
*/
|
||||
# include <sys/isa_defs.h>
|
||||
# endif
|
||||
#elif defined(_AIX) || defined(__TOS_AIX__)
|
||||
# define U_PLATFORM U_PF_AIX
|
||||
#elif defined(_hpux) || defined(hpux) || defined(__hpux)
|
||||
# define U_PLATFORM U_PF_HPUX
|
||||
#elif defined(sgi) || defined(__sgi)
|
||||
# define U_PLATFORM U_PF_IRIX
|
||||
#elif defined(__QNX__) || defined(__QNXNTO__)
|
||||
# define U_PLATFORM U_PF_QNX
|
||||
#elif defined(__TOS_MVS__)
|
||||
# define U_PLATFORM U_PF_OS390
|
||||
#elif defined(__OS400__) || defined(__TOS_OS400__)
|
||||
# define U_PLATFORM U_PF_OS400
|
||||
#elif defined(__EMSCRIPTEN__)
|
||||
# define U_PLATFORM U_PF_EMSCRIPTEN
|
||||
#else
|
||||
# define U_PLATFORM U_PF_UNKNOWN
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def CYGWINMSVC
|
||||
* Defined if this is Windows with Cygwin, but using MSVC rather than gcc.
|
||||
* Otherwise undefined.
|
||||
* @internal
|
||||
*/
|
||||
/* Commented out because this is already set in mh-cygwin-msvc
|
||||
#if U_PLATFORM == U_PF_CYGWIN && defined(_MSC_VER)
|
||||
# define CYGWINMSVC
|
||||
#endif
|
||||
*/
|
||||
#ifdef U_IN_DOXYGEN
|
||||
# define CYGWINMSVC
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_PLATFORM_USES_ONLY_WIN32_API
|
||||
* Defines whether the platform uses only the Win32 API.
|
||||
* Set to 1 for Windows/MSVC and MinGW but not Cygwin.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_PLATFORM_USES_ONLY_WIN32_API
|
||||
/* Use the predefined value. */
|
||||
#elif (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_MINGW) || defined(CYGWINMSVC)
|
||||
# define U_PLATFORM_USES_ONLY_WIN32_API 1
|
||||
#else
|
||||
/* Cygwin implements POSIX. */
|
||||
# define U_PLATFORM_USES_ONLY_WIN32_API 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_PLATFORM_HAS_WIN32_API
|
||||
* Defines whether the Win32 API is available on the platform.
|
||||
* Set to 1 for Windows/MSVC, MinGW and Cygwin.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_PLATFORM_HAS_WIN32_API
|
||||
/* Use the predefined value. */
|
||||
#elif U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
|
||||
# define U_PLATFORM_HAS_WIN32_API 1
|
||||
#else
|
||||
# define U_PLATFORM_HAS_WIN32_API 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_PLATFORM_HAS_WINUWP_API
|
||||
* Defines whether target is intended for Universal Windows Platform API
|
||||
* Set to 1 for Windows10 Release Solution Configuration
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_PLATFORM_HAS_WINUWP_API
|
||||
/* Use the predefined value. */
|
||||
#else
|
||||
# define U_PLATFORM_HAS_WINUWP_API 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_PLATFORM_IMPLEMENTS_POSIX
|
||||
* Defines whether the platform implements (most of) the POSIX API.
|
||||
* Set to 1 for Cygwin and most other platforms.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_PLATFORM_IMPLEMENTS_POSIX
|
||||
/* Use the predefined value. */
|
||||
#elif U_PLATFORM_USES_ONLY_WIN32_API
|
||||
# define U_PLATFORM_IMPLEMENTS_POSIX 0
|
||||
#else
|
||||
# define U_PLATFORM_IMPLEMENTS_POSIX 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_PLATFORM_IS_LINUX_BASED
|
||||
* Defines whether the platform is Linux or one of its derivatives.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_PLATFORM_IS_LINUX_BASED
|
||||
/* Use the predefined value. */
|
||||
#elif U_PF_LINUX <= U_PLATFORM && U_PLATFORM <= 4499
|
||||
# define U_PLATFORM_IS_LINUX_BASED 1
|
||||
#else
|
||||
# define U_PLATFORM_IS_LINUX_BASED 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_PLATFORM_IS_DARWIN_BASED
|
||||
* Defines whether the platform is Darwin or one of its derivatives.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_PLATFORM_IS_DARWIN_BASED
|
||||
/* Use the predefined value. */
|
||||
#elif U_PF_DARWIN <= U_PLATFORM && U_PLATFORM <= U_PF_IPHONE
|
||||
# define U_PLATFORM_IS_DARWIN_BASED 1
|
||||
#else
|
||||
# define U_PLATFORM_IS_DARWIN_BASED 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_HAVE_STDINT_H
|
||||
* Defines whether stdint.h is available. It is a C99 standard header.
|
||||
* We used to include inttypes.h which includes stdint.h but we usually do not need
|
||||
* the additional definitions from inttypes.h.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_HAVE_STDINT_H
|
||||
/* Use the predefined value. */
|
||||
#elif U_PLATFORM_USES_ONLY_WIN32_API
|
||||
# if defined(__BORLANDC__) || U_PLATFORM == U_PF_MINGW || (defined(_MSC_VER) && _MSC_VER>=1600)
|
||||
/* Windows Visual Studio 9 and below do not have stdint.h & inttypes.h, but VS 2010 adds them. */
|
||||
# define U_HAVE_STDINT_H 1
|
||||
# else
|
||||
# define U_HAVE_STDINT_H 0
|
||||
# endif
|
||||
#elif U_PLATFORM == U_PF_SOLARIS
|
||||
/* Solaris has inttypes.h but not stdint.h. */
|
||||
# define U_HAVE_STDINT_H 0
|
||||
#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER)
|
||||
/* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */
|
||||
# define U_HAVE_STDINT_H 0
|
||||
#else
|
||||
# define U_HAVE_STDINT_H 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_HAVE_INTTYPES_H
|
||||
* Defines whether inttypes.h is available. It is a C99 standard header.
|
||||
* We include inttypes.h where it is available but stdint.h is not.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_HAVE_INTTYPES_H
|
||||
/* Use the predefined value. */
|
||||
#elif U_PLATFORM == U_PF_SOLARIS
|
||||
/* Solaris has inttypes.h but not stdint.h. */
|
||||
# define U_HAVE_INTTYPES_H 1
|
||||
#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER)
|
||||
/* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */
|
||||
# define U_HAVE_INTTYPES_H 1
|
||||
#else
|
||||
/* Most platforms have both inttypes.h and stdint.h, or neither. */
|
||||
# define U_HAVE_INTTYPES_H U_HAVE_STDINT_H
|
||||
#endif
|
||||
|
||||
/*===========================================================================*/
|
||||
/** @{ Compiler and environment features */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* \def U_GCC_MAJOR_MINOR
|
||||
* Indicates whether the compiler is gcc (test for != 0),
|
||||
* and if so, contains its major (times 100) and minor version numbers.
|
||||
* If the compiler is not gcc, then U_GCC_MAJOR_MINOR == 0.
|
||||
*
|
||||
* For example, for testing for whether we have gcc, and whether it's 4.6 or higher,
|
||||
* use "#if U_GCC_MAJOR_MINOR >= 406".
|
||||
* @internal
|
||||
*/
|
||||
#ifdef __GNUC__
|
||||
# define U_GCC_MAJOR_MINOR (__GNUC__ * 100 + __GNUC_MINOR__)
|
||||
#else
|
||||
# define U_GCC_MAJOR_MINOR 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_IS_BIG_ENDIAN
|
||||
* Determines the endianness of the platform.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_IS_BIG_ENDIAN
|
||||
/* Use the predefined value. */
|
||||
#elif defined(BYTE_ORDER) && defined(BIG_ENDIAN)
|
||||
# define U_IS_BIG_ENDIAN (BYTE_ORDER == BIG_ENDIAN)
|
||||
#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
|
||||
/* gcc */
|
||||
# define U_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
|
||||
#elif defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN)
|
||||
# define U_IS_BIG_ENDIAN 1
|
||||
#elif defined(__LITTLE_ENDIAN__) || defined(_LITTLE_ENDIAN)
|
||||
# define U_IS_BIG_ENDIAN 0
|
||||
#elif U_PLATFORM == U_PF_OS390 || U_PLATFORM == U_PF_OS400 || defined(__s390__) || defined(__s390x__)
|
||||
/* These platforms do not appear to predefine any endianness macros. */
|
||||
# define U_IS_BIG_ENDIAN 1
|
||||
#elif defined(_PA_RISC1_0) || defined(_PA_RISC1_1) || defined(_PA_RISC2_0)
|
||||
/* HPPA do not appear to predefine any endianness macros. */
|
||||
# define U_IS_BIG_ENDIAN 1
|
||||
#elif defined(sparc) || defined(__sparc) || defined(__sparc__)
|
||||
/* Some sparc based systems (e.g. Linux) do not predefine any endianness macros. */
|
||||
# define U_IS_BIG_ENDIAN 1
|
||||
#else
|
||||
# define U_IS_BIG_ENDIAN 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_HAVE_PLACEMENT_NEW
|
||||
* Determines whether to override placement new and delete for STL.
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#ifdef U_HAVE_PLACEMENT_NEW
|
||||
/* Use the predefined value. */
|
||||
#elif defined(__BORLANDC__)
|
||||
# define U_HAVE_PLACEMENT_NEW 0
|
||||
#else
|
||||
# define U_HAVE_PLACEMENT_NEW 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_HAVE_DEBUG_LOCATION_NEW
|
||||
* Define this to define the MFC debug version of the operator new.
|
||||
*
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
#ifdef U_HAVE_DEBUG_LOCATION_NEW
|
||||
/* Use the predefined value. */
|
||||
#elif defined(_MSC_VER)
|
||||
# define U_HAVE_DEBUG_LOCATION_NEW 1
|
||||
#else
|
||||
# define U_HAVE_DEBUG_LOCATION_NEW 0
|
||||
#endif
|
||||
|
||||
/* Compatibility with compilers other than clang: http://clang.llvm.org/docs/LanguageExtensions.html */
|
||||
#ifdef __has_attribute
|
||||
# define UPRV_HAS_ATTRIBUTE(x) __has_attribute(x)
|
||||
#else
|
||||
# define UPRV_HAS_ATTRIBUTE(x) 0
|
||||
#endif
|
||||
#ifdef __has_cpp_attribute
|
||||
# define UPRV_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
|
||||
#else
|
||||
# define UPRV_HAS_CPP_ATTRIBUTE(x) 0
|
||||
#endif
|
||||
#ifdef __has_declspec_attribute
|
||||
# define UPRV_HAS_DECLSPEC_ATTRIBUTE(x) __has_declspec_attribute(x)
|
||||
#else
|
||||
# define UPRV_HAS_DECLSPEC_ATTRIBUTE(x) 0
|
||||
#endif
|
||||
#ifdef __has_builtin
|
||||
# define UPRV_HAS_BUILTIN(x) __has_builtin(x)
|
||||
#else
|
||||
# define UPRV_HAS_BUILTIN(x) 0
|
||||
#endif
|
||||
#ifdef __has_feature
|
||||
# define UPRV_HAS_FEATURE(x) __has_feature(x)
|
||||
#else
|
||||
# define UPRV_HAS_FEATURE(x) 0
|
||||
#endif
|
||||
#ifdef __has_extension
|
||||
# define UPRV_HAS_EXTENSION(x) __has_extension(x)
|
||||
#else
|
||||
# define UPRV_HAS_EXTENSION(x) 0
|
||||
#endif
|
||||
#ifdef __has_warning
|
||||
# define UPRV_HAS_WARNING(x) __has_warning(x)
|
||||
#else
|
||||
# define UPRV_HAS_WARNING(x) 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_MALLOC_ATTR
|
||||
* Attribute to mark functions as malloc-like
|
||||
* @internal
|
||||
*/
|
||||
#if defined(__GNUC__) && __GNUC__>=3
|
||||
# define U_MALLOC_ATTR __attribute__ ((__malloc__))
|
||||
#else
|
||||
# define U_MALLOC_ATTR
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_ALLOC_SIZE_ATTR
|
||||
* Attribute to specify the size of the allocated buffer for malloc-like functions
|
||||
* @internal
|
||||
*/
|
||||
#if (defined(__GNUC__) && \
|
||||
(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || \
|
||||
UPRV_HAS_ATTRIBUTE(alloc_size)
|
||||
# define U_ALLOC_SIZE_ATTR(X) __attribute__ ((alloc_size(X)))
|
||||
# define U_ALLOC_SIZE_ATTR2(X,Y) __attribute__ ((alloc_size(X,Y)))
|
||||
#else
|
||||
# define U_ALLOC_SIZE_ATTR(X)
|
||||
# define U_ALLOC_SIZE_ATTR2(X,Y)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_CPLUSPLUS_VERSION
|
||||
* 0 if no C++; 1, 11, 14, ... if C++.
|
||||
* Support for specific features cannot always be determined by the C++ version alone.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_CPLUSPLUS_VERSION
|
||||
# if U_CPLUSPLUS_VERSION != 0 && !defined(__cplusplus)
|
||||
# undef U_CPLUSPLUS_VERSION
|
||||
# define U_CPLUSPLUS_VERSION 0
|
||||
# endif
|
||||
/* Otherwise use the predefined value. */
|
||||
#elif !defined(__cplusplus)
|
||||
# define U_CPLUSPLUS_VERSION 0
|
||||
#elif __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L)
|
||||
# define U_CPLUSPLUS_VERSION 14
|
||||
#elif __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
|
||||
# define U_CPLUSPLUS_VERSION 11
|
||||
#else
|
||||
// C++98 or C++03
|
||||
# define U_CPLUSPLUS_VERSION 1
|
||||
#endif
|
||||
|
||||
#if (U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
|
||||
// add in std::nullptr_t
|
||||
namespace std {
|
||||
typedef decltype(nullptr) nullptr_t;
|
||||
};
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_NOEXCEPT
|
||||
* "noexcept" if supported, otherwise empty.
|
||||
* Some code, especially STL containers, uses move semantics of objects only
|
||||
* if the move constructor and the move operator are declared as not throwing exceptions.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_NOEXCEPT
|
||||
/* Use the predefined value. */
|
||||
#else
|
||||
# define U_NOEXCEPT noexcept
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_FALLTHROUGH
|
||||
* Annotate intentional fall-through between switch labels.
|
||||
* http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough
|
||||
* @internal
|
||||
*/
|
||||
#ifndef __cplusplus
|
||||
// Not for C.
|
||||
#elif defined(U_FALLTHROUGH)
|
||||
// Use the predefined value.
|
||||
#elif defined(__clang__)
|
||||
// Test for compiler vs. feature separately.
|
||||
// Other compilers might choke on the feature test.
|
||||
# if UPRV_HAS_CPP_ATTRIBUTE(clang::fallthrough) || \
|
||||
(UPRV_HAS_FEATURE(cxx_attributes) && \
|
||||
UPRV_HAS_WARNING("-Wimplicit-fallthrough"))
|
||||
# define U_FALLTHROUGH [[clang::fallthrough]]
|
||||
# endif
|
||||
#elif defined(__GNUC__) && (__GNUC__ >= 7)
|
||||
# define U_FALLTHROUGH __attribute__((fallthrough))
|
||||
#endif
|
||||
|
||||
#ifndef U_FALLTHROUGH
|
||||
# define U_FALLTHROUGH
|
||||
#endif
|
||||
|
||||
/** @} */
|
||||
|
||||
/*===========================================================================*/
|
||||
/** @{ Character data types */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* U_CHARSET_FAMILY is equal to this value when the platform is an ASCII based platform.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_ASCII_FAMILY 0
|
||||
|
||||
/**
|
||||
* U_CHARSET_FAMILY is equal to this value when the platform is an EBCDIC based platform.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_EBCDIC_FAMILY 1
|
||||
|
||||
/**
|
||||
* \def U_CHARSET_FAMILY
|
||||
*
|
||||
* <p>These definitions allow to specify the encoding of text
|
||||
* in the char data type as defined by the platform and the compiler.
|
||||
* It is enough to determine the code point values of "invariant characters",
|
||||
* which are the ones shared by all encodings that are in use
|
||||
* on a given platform.</p>
|
||||
*
|
||||
* <p>Those "invariant characters" should be all the uppercase and lowercase
|
||||
* latin letters, the digits, the space, and "basic punctuation".
|
||||
* Also, '\\n', '\\r', '\\t' should be available.</p>
|
||||
*
|
||||
* <p>The list of "invariant characters" is:<br>
|
||||
* \code
|
||||
* A-Z a-z 0-9 SPACE " % & ' ( ) * + , - . / : ; < = > ? _
|
||||
* \endcode
|
||||
* <br>
|
||||
* (52 letters + 10 numbers + 20 punc/sym/space = 82 total)</p>
|
||||
*
|
||||
* <p>This matches the IBM Syntactic Character Set (CS 640).</p>
|
||||
*
|
||||
* <p>In other words, all the graphic characters in 7-bit ASCII should
|
||||
* be safely accessible except the following:</p>
|
||||
*
|
||||
* \code
|
||||
* '\' <backslash>
|
||||
* '[' <left bracket>
|
||||
* ']' <right bracket>
|
||||
* '{' <left brace>
|
||||
* '}' <right brace>
|
||||
* '^' <circumflex>
|
||||
* '~' <tilde>
|
||||
* '!' <exclamation mark>
|
||||
* '#' <number sign>
|
||||
* '|' <vertical line>
|
||||
* '$' <dollar sign>
|
||||
* '@' <commercial at>
|
||||
* '`' <grave accent>
|
||||
* \endcode
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#ifdef U_CHARSET_FAMILY
|
||||
/* Use the predefined value. */
|
||||
#elif U_PLATFORM == U_PF_OS390 && (!defined(__CHARSET_LIB) || !__CHARSET_LIB)
|
||||
# define U_CHARSET_FAMILY U_EBCDIC_FAMILY
|
||||
#elif U_PLATFORM == U_PF_OS400 && !defined(__UTF32__)
|
||||
# define U_CHARSET_FAMILY U_EBCDIC_FAMILY
|
||||
#else
|
||||
# define U_CHARSET_FAMILY U_ASCII_FAMILY
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_CHARSET_IS_UTF8
|
||||
*
|
||||
* Hardcode the default charset to UTF-8.
|
||||
*
|
||||
* If this is set to 1, then
|
||||
* - ICU will assume that all non-invariant char*, StringPiece, std::string etc.
|
||||
* contain UTF-8 text, regardless of what the system API uses
|
||||
* - some ICU code will use fast functions like u_strFromUTF8()
|
||||
* rather than the more general and more heavy-weight conversion API (ucnv.h)
|
||||
* - ucnv_getDefaultName() always returns "UTF-8"
|
||||
* - ucnv_setDefaultName() is disabled and will not change the default charset
|
||||
* - static builds of ICU are smaller
|
||||
* - more functionality is available with the UCONFIG_NO_CONVERSION build-time
|
||||
* configuration option (see unicode/uconfig.h)
|
||||
* - the UCONFIG_NO_CONVERSION build option in uconfig.h is more usable
|
||||
*
|
||||
* @stable ICU 4.2
|
||||
* @see UCONFIG_NO_CONVERSION
|
||||
*/
|
||||
#ifdef U_CHARSET_IS_UTF8
|
||||
/* Use the predefined value. */
|
||||
#elif U_PLATFORM_IS_LINUX_BASED || U_PLATFORM_IS_DARWIN_BASED || \
|
||||
U_PLATFORM == U_PF_EMSCRIPTEN
|
||||
# define U_CHARSET_IS_UTF8 1
|
||||
#else
|
||||
# define U_CHARSET_IS_UTF8 0
|
||||
#endif
|
||||
|
||||
/** @} */
|
||||
|
||||
/*===========================================================================*/
|
||||
/** @{ Information about wchar support */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* \def U_HAVE_WCHAR_H
|
||||
* Indicates whether <wchar.h> is available (1) or not (0). Set to 1 by default.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#ifdef U_HAVE_WCHAR_H
|
||||
/* Use the predefined value. */
|
||||
#elif U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9
|
||||
/*
|
||||
* Android before Gingerbread (Android 2.3, API level 9) did not support wchar_t.
|
||||
* The type and header existed, but the library functions did not work as expected.
|
||||
* The size of wchar_t was 1 but L"xyz" string literals had 32-bit units anyway.
|
||||
*/
|
||||
# define U_HAVE_WCHAR_H 0
|
||||
#else
|
||||
# define U_HAVE_WCHAR_H 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_SIZEOF_WCHAR_T
|
||||
* U_SIZEOF_WCHAR_T==sizeof(wchar_t)
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#ifdef U_SIZEOF_WCHAR_T
|
||||
/* Use the predefined value. */
|
||||
#elif (U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9)
|
||||
/*
|
||||
* Classic Mac OS and Mac OS X before 10.3 (Panther) did not support wchar_t or wstring.
|
||||
* Newer Mac OS X has size 4.
|
||||
*/
|
||||
# define U_SIZEOF_WCHAR_T 1
|
||||
#elif U_PLATFORM_HAS_WIN32_API || U_PLATFORM == U_PF_CYGWIN
|
||||
# define U_SIZEOF_WCHAR_T 2
|
||||
#elif U_PLATFORM == U_PF_AIX
|
||||
/*
|
||||
* AIX 6.1 information, section "Wide character data representation":
|
||||
* "... the wchar_t datatype is 32-bit in the 64-bit environment and
|
||||
* 16-bit in the 32-bit environment."
|
||||
* and
|
||||
* "All locales use Unicode for their wide character code values (process code),
|
||||
* except the IBM-eucTW codeset."
|
||||
*/
|
||||
# ifdef __64BIT__
|
||||
# define U_SIZEOF_WCHAR_T 4
|
||||
# else
|
||||
# define U_SIZEOF_WCHAR_T 2
|
||||
# endif
|
||||
#elif U_PLATFORM == U_PF_OS390
|
||||
/*
|
||||
* z/OS V1R11 information center, section "LP64 | ILP32":
|
||||
* "In 31-bit mode, the size of long and pointers is 4 bytes and the size of wchar_t is 2 bytes.
|
||||
* Under LP64, the size of long and pointer is 8 bytes and the size of wchar_t is 4 bytes."
|
||||
*/
|
||||
# ifdef _LP64
|
||||
# define U_SIZEOF_WCHAR_T 4
|
||||
# else
|
||||
# define U_SIZEOF_WCHAR_T 2
|
||||
# endif
|
||||
#elif U_PLATFORM == U_PF_OS400
|
||||
# if defined(__UTF32__)
|
||||
/*
|
||||
* LOCALETYPE(*LOCALEUTF) is specified.
|
||||
* Wide-character strings are in UTF-32,
|
||||
* narrow-character strings are in UTF-8.
|
||||
*/
|
||||
# define U_SIZEOF_WCHAR_T 4
|
||||
# elif defined(__UCS2__)
|
||||
/*
|
||||
* LOCALETYPE(*LOCALEUCS2) is specified.
|
||||
* Wide-character strings are in UCS-2,
|
||||
* narrow-character strings are in EBCDIC.
|
||||
*/
|
||||
# define U_SIZEOF_WCHAR_T 2
|
||||
# else
|
||||
/*
|
||||
* LOCALETYPE(*CLD) or LOCALETYPE(*LOCALE) is specified.
|
||||
* Wide-character strings are in 16-bit EBCDIC,
|
||||
* narrow-character strings are in EBCDIC.
|
||||
*/
|
||||
# define U_SIZEOF_WCHAR_T 2
|
||||
# endif
|
||||
#else
|
||||
# define U_SIZEOF_WCHAR_T 4
|
||||
#endif
|
||||
|
||||
#ifndef U_HAVE_WCSCPY
|
||||
#define U_HAVE_WCSCPY U_HAVE_WCHAR_H
|
||||
#endif
|
||||
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* \def U_HAVE_CHAR16_T
|
||||
* Defines whether the char16_t type is available for UTF-16
|
||||
* and u"abc" UTF-16 string literals are supported.
|
||||
* This is a new standard type and standard string literal syntax in C++0x
|
||||
* but has been available in some compilers before.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_HAVE_CHAR16_T
|
||||
/* Use the predefined value. */
|
||||
#else
|
||||
/*
|
||||
* Notes:
|
||||
* Visual Studio 2010 (_MSC_VER==1600) defines char16_t as a typedef
|
||||
* and does not support u"abc" string literals.
|
||||
* Visual Studio 2015 (_MSC_VER>=1900) and above adds support for
|
||||
* both char16_t and u"abc" string literals.
|
||||
* gcc 4.4 defines the __CHAR16_TYPE__ macro to a usable type but
|
||||
* does not support u"abc" string literals.
|
||||
* C++11 and C11 require support for UTF-16 literals
|
||||
* TODO: Fix for plain C. Doesn't work on Mac.
|
||||
*/
|
||||
# if U_CPLUSPLUS_VERSION >= 11 || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)
|
||||
# define U_HAVE_CHAR16_T 1
|
||||
# else
|
||||
# define U_HAVE_CHAR16_T 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @{
|
||||
* \def U_DECLARE_UTF16
|
||||
* Do not use this macro because it is not defined on all platforms.
|
||||
* Use the UNICODE_STRING or U_STRING_DECL macros instead.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_DECLARE_UTF16
|
||||
/* Use the predefined value. */
|
||||
#elif U_HAVE_CHAR16_T \
|
||||
|| (defined(__xlC__) && defined(__IBM_UTF_LITERAL) && U_SIZEOF_WCHAR_T != 2) \
|
||||
|| (defined(__HP_aCC) && __HP_aCC >= 035000) \
|
||||
|| (defined(__HP_cc) && __HP_cc >= 111106) \
|
||||
|| (defined(U_IN_DOXYGEN))
|
||||
# define U_DECLARE_UTF16(string) u ## string
|
||||
#elif U_SIZEOF_WCHAR_T == 2 \
|
||||
&& (U_CHARSET_FAMILY == 0 || (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400 && defined(__UCS2__)))
|
||||
# define U_DECLARE_UTF16(string) L ## string
|
||||
#else
|
||||
/* Leave U_DECLARE_UTF16 undefined. See unistr.h. */
|
||||
#endif
|
||||
|
||||
/** @} */
|
||||
|
||||
/*===========================================================================*/
|
||||
/** @{ Symbol import-export control */
|
||||
/*===========================================================================*/
|
||||
|
||||
#ifdef U_EXPORT
|
||||
/* Use the predefined value. */
|
||||
#elif defined(U_STATIC_IMPLEMENTATION)
|
||||
# define U_EXPORT
|
||||
#elif defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(dllexport) && \
|
||||
UPRV_HAS_DECLSPEC_ATTRIBUTE(dllimport))
|
||||
# define U_EXPORT __declspec(dllexport)
|
||||
#elif defined(__GNUC__)
|
||||
# define U_EXPORT __attribute__((visibility("default")))
|
||||
#elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \
|
||||
|| (defined(__SUNPRO_C) && __SUNPRO_C >= 0x550)
|
||||
# define U_EXPORT __global
|
||||
/*#elif defined(__HP_aCC) || defined(__HP_cc)
|
||||
# define U_EXPORT __declspec(dllexport)*/
|
||||
#else
|
||||
# define U_EXPORT
|
||||
#endif
|
||||
|
||||
/* U_CALLCONV is related to U_EXPORT2 */
|
||||
#ifdef U_EXPORT2
|
||||
/* Use the predefined value. */
|
||||
#elif defined(_MSC_VER)
|
||||
# define U_EXPORT2 __cdecl
|
||||
#else
|
||||
# define U_EXPORT2
|
||||
#endif
|
||||
|
||||
#ifdef U_IMPORT
|
||||
/* Use the predefined value. */
|
||||
#elif defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(dllexport) && \
|
||||
UPRV_HAS_DECLSPEC_ATTRIBUTE(dllimport))
|
||||
/* Windows needs to export/import data. */
|
||||
# define U_IMPORT __declspec(dllimport)
|
||||
#else
|
||||
# define U_IMPORT
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_CALLCONV
|
||||
* Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary
|
||||
* in callback function typedefs to make sure that the calling convention
|
||||
* is compatible.
|
||||
*
|
||||
* This is only used for non-ICU-API functions.
|
||||
* When a function is a public ICU API,
|
||||
* you must use the U_CAPI and U_EXPORT2 qualifiers.
|
||||
*
|
||||
* Please note, you need to use U_CALLCONV after the *.
|
||||
*
|
||||
* NO : "static const char U_CALLCONV *func( . . . )"
|
||||
* YES: "static const char* U_CALLCONV func( . . . )"
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus)
|
||||
# define U_CALLCONV __cdecl
|
||||
#else
|
||||
# define U_CALLCONV U_EXPORT2
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_CALLCONV_FPTR
|
||||
* Similar to U_CALLCONV, but only used on function pointers.
|
||||
* @internal
|
||||
*/
|
||||
#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus)
|
||||
# define U_CALLCONV_FPTR U_CALLCONV
|
||||
#else
|
||||
# define U_CALLCONV_FPTR
|
||||
#endif
|
||||
/** @} */
|
||||
|
||||
#endif // _PLATFORM_H
|
||||
130
app/src/main/cpp/icu4c/include/unicode/ptypes.h
Normal file
130
app/src/main/cpp/icu4c/include/unicode/ptypes.h
Normal file
@@ -0,0 +1,130 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* FILE NAME : ptypes.h
|
||||
*
|
||||
* Date Name Description
|
||||
* 05/13/98 nos Creation (content moved here from ptypes.h).
|
||||
* 03/02/99 stephen Added AS400 support.
|
||||
* 03/30/99 stephen Added Linux support.
|
||||
* 04/13/99 stephen Reworked for autoconf.
|
||||
* 09/18/08 srl Moved basic types back to ptypes.h from platform.h
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Definitions of integer types of various widths
|
||||
*/
|
||||
|
||||
#ifndef _PTYPES_H
|
||||
#define _PTYPES_H
|
||||
|
||||
/**
|
||||
* \def __STDC_LIMIT_MACROS
|
||||
* According to the Linux stdint.h, the ISO C99 standard specifies that in C++ implementations
|
||||
* macros like INT32_MIN and UINTPTR_MAX should only be defined if explicitly requested.
|
||||
* We need to define __STDC_LIMIT_MACROS before including stdint.h in C++ code
|
||||
* that uses such limit macros.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef __STDC_LIMIT_MACROS
|
||||
#define __STDC_LIMIT_MACROS
|
||||
#endif
|
||||
|
||||
/* NULL, size_t, wchar_t */
|
||||
#include <stddef.h>
|
||||
|
||||
/*
|
||||
* If all compilers provided all of the C99 headers and types,
|
||||
* we would just unconditionally #include <stdint.h> here
|
||||
* and not need any of the stuff after including platform.h.
|
||||
*/
|
||||
|
||||
/* Find out if we have stdint.h etc. */
|
||||
#include "unicode/platform.h"
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Generic data types */
|
||||
/*===========================================================================*/
|
||||
|
||||
/* If your platform does not have the <stdint.h> header, you may
|
||||
need to edit the typedefs in the #else section below.
|
||||
Use #if...#else...#endif with predefined compiler macros if possible. */
|
||||
#if U_HAVE_STDINT_H
|
||||
|
||||
/*
|
||||
* We mostly need <stdint.h> (which defines the standard integer types) but not <inttypes.h>.
|
||||
* <inttypes.h> includes <stdint.h> and adds the printf/scanf helpers PRId32, SCNx16 etc.
|
||||
* which we almost never use, plus stuff like imaxabs() which we never use.
|
||||
*/
|
||||
#include <stdint.h>
|
||||
|
||||
#if U_PLATFORM == U_PF_OS390
|
||||
/* The features header is needed to get (u)int64_t sometimes. */
|
||||
#include <features.h>
|
||||
/* z/OS has <stdint.h>, but some versions are missing uint8_t (APAR PK62248). */
|
||||
#if !defined(__uint8_t)
|
||||
#define __uint8_t 1
|
||||
typedef unsigned char uint8_t;
|
||||
#endif
|
||||
#endif /* U_PLATFORM == U_PF_OS390 */
|
||||
|
||||
#elif U_HAVE_INTTYPES_H
|
||||
|
||||
# include <inttypes.h>
|
||||
|
||||
#else /* neither U_HAVE_STDINT_H nor U_HAVE_INTTYPES_H */
|
||||
|
||||
/// \cond
|
||||
#if ! U_HAVE_INT8_T
|
||||
typedef signed char int8_t;
|
||||
#endif
|
||||
|
||||
#if ! U_HAVE_UINT8_T
|
||||
typedef unsigned char uint8_t;
|
||||
#endif
|
||||
|
||||
#if ! U_HAVE_INT16_T
|
||||
typedef signed short int16_t;
|
||||
#endif
|
||||
|
||||
#if ! U_HAVE_UINT16_T
|
||||
typedef unsigned short uint16_t;
|
||||
#endif
|
||||
|
||||
#if ! U_HAVE_INT32_T
|
||||
typedef signed int int32_t;
|
||||
#endif
|
||||
|
||||
#if ! U_HAVE_UINT32_T
|
||||
typedef unsigned int uint32_t;
|
||||
#endif
|
||||
|
||||
#if ! U_HAVE_INT64_T
|
||||
#ifdef _MSC_VER
|
||||
typedef signed __int64 int64_t;
|
||||
#else
|
||||
typedef signed long long int64_t;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if ! U_HAVE_UINT64_T
|
||||
#ifdef _MSC_VER
|
||||
typedef unsigned __int64 uint64_t;
|
||||
#else
|
||||
typedef unsigned long long uint64_t;
|
||||
#endif
|
||||
#endif
|
||||
/// \endcond
|
||||
|
||||
#endif /* U_HAVE_STDINT_H / U_HAVE_INTTYPES_H */
|
||||
|
||||
#endif /* _PTYPES_H */
|
||||
183
app/src/main/cpp/icu4c/include/unicode/putil.h
Normal file
183
app/src/main/cpp/icu4c/include/unicode/putil.h
Normal file
@@ -0,0 +1,183 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* FILE NAME : putil.h
|
||||
*
|
||||
* Date Name Description
|
||||
* 05/14/98 nos Creation (content moved here from utypes.h).
|
||||
* 06/17/99 erm Added IEEE_754
|
||||
* 07/22/98 stephen Added IEEEremainder, max, min, trunc
|
||||
* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
|
||||
* 08/24/98 stephen Added longBitsFromDouble
|
||||
* 03/02/99 stephen Removed openFile(). Added AS400 support.
|
||||
* 04/15/99 stephen Converted to C
|
||||
* 11/15/99 helena Integrated S/390 changes for IEEE support.
|
||||
* 01/11/00 helena Added u_getVersion.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef PUTIL_H
|
||||
#define PUTIL_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Platform Utilities
|
||||
*/
|
||||
|
||||
/*==========================================================================*/
|
||||
/* Platform utilities */
|
||||
/*==========================================================================*/
|
||||
|
||||
/**
|
||||
* Platform utilities isolates the platform dependencies of the
|
||||
* library. For each platform which this code is ported to, these
|
||||
* functions may have to be re-implemented.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Return the ICU data directory.
|
||||
* The data directory is where common format ICU data files (.dat files)
|
||||
* are loaded from. Note that normal use of the built-in ICU
|
||||
* facilities does not require loading of an external data file;
|
||||
* unless you are adding custom data to ICU, the data directory
|
||||
* does not need to be set.
|
||||
*
|
||||
* The data directory is determined as follows:
|
||||
* If u_setDataDirectory() has been called, that is it, otherwise
|
||||
* if the ICU_DATA environment variable is set, use that, otherwise
|
||||
* If a data directory was specified at ICU build time
|
||||
* <code>
|
||||
* \code
|
||||
* #define ICU_DATA_DIR "path"
|
||||
* \endcode
|
||||
* </code> use that,
|
||||
* otherwise no data directory is available.
|
||||
*
|
||||
* @return the data directory, or an empty string ("") if no data directory has
|
||||
* been specified.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2 u_getDataDirectory(void);
|
||||
|
||||
|
||||
/**
|
||||
* Set the ICU data directory.
|
||||
* The data directory is where common format ICU data files (.dat files)
|
||||
* are loaded from. Note that normal use of the built-in ICU
|
||||
* facilities does not require loading of an external data file;
|
||||
* unless you are adding custom data to ICU, the data directory
|
||||
* does not need to be set.
|
||||
*
|
||||
* This function should be called at most once in a process, before the
|
||||
* first ICU operation (e.g., u_init()) that will require the loading of an
|
||||
* ICU data file.
|
||||
* This function is not thread-safe. Use it before calling ICU APIs from
|
||||
* multiple threads.
|
||||
*
|
||||
* @param directory The directory to be set.
|
||||
*
|
||||
* @see u_init
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 u_setDataDirectory(const char *directory);
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* Return the time zone files override directory, or an empty string if
|
||||
* no directory was specified. Certain time zone resources will be preferentially
|
||||
* loaded from individual files in this directory.
|
||||
*
|
||||
* @return the time zone data override directory.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI const char * U_EXPORT2 u_getTimeZoneFilesDirectory(UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Set the time zone files override directory.
|
||||
* This function is not thread safe; it must not be called concurrently with
|
||||
* u_getTimeZoneFilesDirectory() or any other use of ICU time zone functions.
|
||||
* This function should only be called before using any ICU service that
|
||||
* will access the time zone data.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status);
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
|
||||
/**
|
||||
* @{
|
||||
* Filesystem file and path separator characters.
|
||||
* Example: '/' and ':' on Unix, '\\' and ';' on Windows.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#if U_PLATFORM_USES_ONLY_WIN32_API
|
||||
# define U_FILE_SEP_CHAR '\\'
|
||||
# define U_FILE_ALT_SEP_CHAR '/'
|
||||
# define U_PATH_SEP_CHAR ';'
|
||||
# define U_FILE_SEP_STRING "\\"
|
||||
# define U_FILE_ALT_SEP_STRING "/"
|
||||
# define U_PATH_SEP_STRING ";"
|
||||
#else
|
||||
# define U_FILE_SEP_CHAR '/'
|
||||
# define U_FILE_ALT_SEP_CHAR '/'
|
||||
# define U_PATH_SEP_CHAR ':'
|
||||
# define U_FILE_SEP_STRING "/"
|
||||
# define U_FILE_ALT_SEP_STRING "/"
|
||||
# define U_PATH_SEP_STRING ":"
|
||||
#endif
|
||||
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* Convert char characters to UChar characters.
|
||||
* This utility function is useful only for "invariant characters"
|
||||
* that are encoded in the platform default encoding.
|
||||
* They are a small, constant subset of the encoding and include
|
||||
* just the latin letters, digits, and some punctuation.
|
||||
* For details, see U_CHARSET_FAMILY.
|
||||
*
|
||||
* @param cs Input string, points to <code>length</code>
|
||||
* character bytes from a subset of the platform encoding.
|
||||
* @param us Output string, points to memory for <code>length</code>
|
||||
* Unicode characters.
|
||||
* @param length The number of characters to convert; this may
|
||||
* include the terminating <code>NUL</code>.
|
||||
*
|
||||
* @see U_CHARSET_FAMILY
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_charsToUChars(const char *cs, UChar *us, int32_t length);
|
||||
|
||||
/**
|
||||
* Convert UChar characters to char characters.
|
||||
* This utility function is useful only for "invariant characters"
|
||||
* that can be encoded in the platform default encoding.
|
||||
* They are a small, constant subset of the encoding and include
|
||||
* just the latin letters, digits, and some punctuation.
|
||||
* For details, see U_CHARSET_FAMILY.
|
||||
*
|
||||
* @param us Input string, points to <code>length</code>
|
||||
* Unicode characters that can be encoded with the
|
||||
* codepage-invariant subset of the platform encoding.
|
||||
* @param cs Output string, points to memory for <code>length</code>
|
||||
* character bytes.
|
||||
* @param length The number of characters to convert; this may
|
||||
* include the terminating <code>NUL</code>.
|
||||
*
|
||||
* @see U_CHARSET_FAMILY
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_UCharsToChars(const UChar *us, char *cs, int32_t length);
|
||||
|
||||
#endif
|
||||
266
app/src/main/cpp/icu4c/include/unicode/rep.h
Normal file
266
app/src/main/cpp/icu4c/include/unicode/rep.h
Normal file
@@ -0,0 +1,266 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**************************************************************************
|
||||
* Copyright (C) 1999-2012, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
**************************************************************************
|
||||
* Date Name Description
|
||||
* 11/17/99 aliu Creation. Ported from java. Modified to
|
||||
* match current UnicodeString API. Forced
|
||||
* to use name "handleReplaceBetween" because
|
||||
* of existing methods in UnicodeString.
|
||||
**************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef REP_H
|
||||
#define REP_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Replaceable String
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class UnicodeString;
|
||||
|
||||
/**
|
||||
* <code>Replaceable</code> is an abstract base class representing a
|
||||
* string of characters that supports the replacement of a range of
|
||||
* itself with a new string of characters. It is used by APIs that
|
||||
* change a piece of text while retaining metadata. Metadata is data
|
||||
* other than the Unicode characters returned by char32At(). One
|
||||
* example of metadata is style attributes; another is an edit
|
||||
* history, marking each character with an author and revision number.
|
||||
*
|
||||
* <p>An implicit aspect of the <code>Replaceable</code> API is that
|
||||
* during a replace operation, new characters take on the metadata of
|
||||
* the old characters. For example, if the string "the <b>bold</b>
|
||||
* font" has range (4, 8) replaced with "strong", then it becomes "the
|
||||
* <b>strong</b> font".
|
||||
*
|
||||
* <p><code>Replaceable</code> specifies ranges using a start
|
||||
* offset and a limit offset. The range of characters thus specified
|
||||
* includes the characters at offset start..limit-1. That is, the
|
||||
* start offset is inclusive, and the limit offset is exclusive.
|
||||
*
|
||||
* <p><code>Replaceable</code> also includes API to access characters
|
||||
* in the string: <code>length()</code>, <code>charAt()</code>,
|
||||
* <code>char32At()</code>, and <code>extractBetween()</code>.
|
||||
*
|
||||
* <p>For a subclass to support metadata, typical behavior of
|
||||
* <code>replace()</code> is the following:
|
||||
* <ul>
|
||||
* <li>Set the metadata of the new text to the metadata of the first
|
||||
* character replaced</li>
|
||||
* <li>If no characters are replaced, use the metadata of the
|
||||
* previous character</li>
|
||||
* <li>If there is no previous character (i.e. start == 0), use the
|
||||
* following character</li>
|
||||
* <li>If there is no following character (i.e. the replaceable was
|
||||
* empty), use default metadata.<br>
|
||||
* <li>If the code point U+FFFF is seen, it should be interpreted as
|
||||
* a special marker having no metadata<li>
|
||||
* </li>
|
||||
* </ul>
|
||||
* If this is not the behavior, the subclass should document any differences.
|
||||
* @author Alan Liu
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
class U_COMMON_API Replaceable : public UObject {
|
||||
|
||||
public:
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual ~Replaceable();
|
||||
|
||||
/**
|
||||
* Returns the number of 16-bit code units in the text.
|
||||
* @return number of 16-bit code units in text
|
||||
* @stable ICU 1.8
|
||||
*/
|
||||
inline int32_t length() const;
|
||||
|
||||
/**
|
||||
* Returns the 16-bit code unit at the given offset into the text.
|
||||
* @param offset an integer between 0 and <code>length()</code>-1
|
||||
* inclusive
|
||||
* @return 16-bit code unit of text at given offset
|
||||
* @stable ICU 1.8
|
||||
*/
|
||||
inline char16_t charAt(int32_t offset) const;
|
||||
|
||||
/**
|
||||
* Returns the 32-bit code point at the given 16-bit offset into
|
||||
* the text. This assumes the text is stored as 16-bit code units
|
||||
* with surrogate pairs intermixed. If the offset of a leading or
|
||||
* trailing code unit of a surrogate pair is given, return the
|
||||
* code point of the surrogate pair.
|
||||
*
|
||||
* @param offset an integer between 0 and <code>length()</code>-1
|
||||
* inclusive
|
||||
* @return 32-bit code point of text at given offset
|
||||
* @stable ICU 1.8
|
||||
*/
|
||||
inline UChar32 char32At(int32_t offset) const;
|
||||
|
||||
/**
|
||||
* Copies characters in the range [<tt>start</tt>, <tt>limit</tt>)
|
||||
* into the UnicodeString <tt>target</tt>.
|
||||
* @param start offset of first character which will be copied
|
||||
* @param limit offset immediately following the last character to
|
||||
* be copied
|
||||
* @param target UnicodeString into which to copy characters.
|
||||
* @return A reference to <TT>target</TT>
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
virtual void extractBetween(int32_t start,
|
||||
int32_t limit,
|
||||
UnicodeString& target) const = 0;
|
||||
|
||||
/**
|
||||
* Replaces a substring of this object with the given text. If the
|
||||
* characters being replaced have metadata, the new characters
|
||||
* that replace them should be given the same metadata.
|
||||
*
|
||||
* <p>Subclasses must ensure that if the text between start and
|
||||
* limit is equal to the replacement text, that replace has no
|
||||
* effect. That is, any metadata
|
||||
* should be unaffected. In addition, subclasses are encouraged to
|
||||
* check for initial and trailing identical characters, and make a
|
||||
* smaller replacement if possible. This will preserve as much
|
||||
* metadata as possible.
|
||||
* @param start the beginning index, inclusive; <code>0 <= start
|
||||
* <= limit</code>.
|
||||
* @param limit the ending index, exclusive; <code>start <= limit
|
||||
* <= length()</code>.
|
||||
* @param text the text to replace characters <code>start</code>
|
||||
* to <code>limit - 1</code>
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual void handleReplaceBetween(int32_t start,
|
||||
int32_t limit,
|
||||
const UnicodeString& text) = 0;
|
||||
// Note: All other methods in this class take the names of
|
||||
// existing UnicodeString methods. This method is the exception.
|
||||
// It is named differently because all replace methods of
|
||||
// UnicodeString return a UnicodeString&. The 'between' is
|
||||
// required in order to conform to the UnicodeString naming
|
||||
// convention; API taking start/length are named <operation>, and
|
||||
// those taking start/limit are named <operationBetween>. The
|
||||
// 'handle' is added because 'replaceBetween' and
|
||||
// 'doReplaceBetween' are already taken.
|
||||
|
||||
/**
|
||||
* Copies a substring of this object, retaining metadata.
|
||||
* This method is used to duplicate or reorder substrings.
|
||||
* The destination index must not overlap the source range.
|
||||
*
|
||||
* @param start the beginning index, inclusive; <code>0 <= start <=
|
||||
* limit</code>.
|
||||
* @param limit the ending index, exclusive; <code>start <= limit <=
|
||||
* length()</code>.
|
||||
* @param dest the destination index. The characters from
|
||||
* <code>start..limit-1</code> will be copied to <code>dest</code>.
|
||||
* Implementations of this method may assume that <code>dest <= start ||
|
||||
* dest >= limit</code>.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual void copy(int32_t start, int32_t limit, int32_t dest) = 0;
|
||||
|
||||
/**
|
||||
* Returns true if this object contains metadata. If a
|
||||
* Replaceable object has metadata, calls to the Replaceable API
|
||||
* must be made so as to preserve metadata. If it does not, calls
|
||||
* to the Replaceable API may be optimized to improve performance.
|
||||
* The default implementation returns true.
|
||||
* @return true if this object contains metadata
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
virtual UBool hasMetaData() const;
|
||||
|
||||
/**
|
||||
* Clone this object, an instance of a subclass of Replaceable.
|
||||
* Clones can be used concurrently in multiple threads.
|
||||
* If a subclass does not implement clone(), or if an error occurs,
|
||||
* then NULL is returned.
|
||||
* The caller must delete the clone.
|
||||
*
|
||||
* @return a clone of this object
|
||||
*
|
||||
* @see getDynamicClassID
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
virtual Replaceable *clone() const;
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* Default constructor.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
inline Replaceable();
|
||||
|
||||
/*
|
||||
* Assignment operator not declared. The compiler will provide one
|
||||
* which does nothing since this class does not contain any data members.
|
||||
* API/code coverage may show the assignment operator as present and
|
||||
* untested - ignore.
|
||||
* Subclasses need this assignment operator if they use compiler-provided
|
||||
* assignment operators of their own. An alternative to not declaring one
|
||||
* here would be to declare and empty-implement a protected or public one.
|
||||
Replaceable &Replaceable::operator=(const Replaceable &);
|
||||
*/
|
||||
|
||||
/**
|
||||
* Virtual version of length().
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual int32_t getLength() const = 0;
|
||||
|
||||
/**
|
||||
* Virtual version of charAt().
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual char16_t getCharAt(int32_t offset) const = 0;
|
||||
|
||||
/**
|
||||
* Virtual version of char32At().
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual UChar32 getChar32At(int32_t offset) const = 0;
|
||||
};
|
||||
|
||||
inline Replaceable::Replaceable() {}
|
||||
|
||||
inline int32_t
|
||||
Replaceable::length() const {
|
||||
return getLength();
|
||||
}
|
||||
|
||||
inline char16_t
|
||||
Replaceable::charAt(int32_t offset) const {
|
||||
return getCharAt(offset);
|
||||
}
|
||||
|
||||
inline UChar32
|
||||
Replaceable::char32At(int32_t offset) const {
|
||||
return getChar32At(offset);
|
||||
}
|
||||
|
||||
// There is no rep.cpp, see unistr.cpp for Replaceable function implementations.
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif
|
||||
41
app/src/main/cpp/icu4c/include/unicode/std_string.h
Normal file
41
app/src/main/cpp/icu4c/include/unicode/std_string.h
Normal file
@@ -0,0 +1,41 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2009-2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: std_string.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2009feb19
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __STD_STRING_H__
|
||||
#define __STD_STRING_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Central ICU header for including the C++ standard <string>
|
||||
* header and for related definitions.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
// Workaround for a libstdc++ bug before libstdc++4.6 (2011).
|
||||
// https://bugs.llvm.org/show_bug.cgi?id=13364
|
||||
#if defined(__GLIBCXX__)
|
||||
namespace std { class type_info; }
|
||||
#endif
|
||||
#include <string>
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __STD_STRING_H__
|
||||
281
app/src/main/cpp/icu4c/include/unicode/strenum.h
Normal file
281
app/src/main/cpp/icu4c/include/unicode/strenum.h
Normal file
@@ -0,0 +1,281 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef STRENUM_H
|
||||
#define STRENUM_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: String Enumeration
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* Base class for 'pure' C++ implementations of uenum api. Adds a
|
||||
* method that returns the next UnicodeString since in C++ this can
|
||||
* be a common storage format for strings.
|
||||
*
|
||||
* <p>The model is that the enumeration is over strings maintained by
|
||||
* a 'service.' At any point, the service might change, invalidating
|
||||
* the enumerator (though this is expected to be rare). The iterator
|
||||
* returns an error if this has occurred. Lack of the error is no
|
||||
* guarantee that the service didn't change immediately after the
|
||||
* call, so the returned string still might not be 'valid' on
|
||||
* subsequent use.</p>
|
||||
*
|
||||
* <p>Strings may take the form of const char*, const char16_t*, or const
|
||||
* UnicodeString*. The type you get is determine by the variant of
|
||||
* 'next' that you call. In general the StringEnumeration is
|
||||
* optimized for one of these types, but all StringEnumerations can
|
||||
* return all types. Returned strings are each terminated with a NUL.
|
||||
* Depending on the service data, they might also include embedded NUL
|
||||
* characters, so API is provided to optionally return the true
|
||||
* length, counting the embedded NULs but not counting the terminating
|
||||
* NUL.</p>
|
||||
*
|
||||
* <p>The pointers returned by next, unext, and snext become invalid
|
||||
* upon any subsequent call to the enumeration's destructor, next,
|
||||
* unext, snext, or reset.</p>
|
||||
*
|
||||
* ICU 2.8 adds some default implementations and helper functions
|
||||
* for subclasses.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
class U_COMMON_API StringEnumeration : public UObject {
|
||||
public:
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual ~StringEnumeration();
|
||||
|
||||
/**
|
||||
* Clone this object, an instance of a subclass of StringEnumeration.
|
||||
* Clones can be used concurrently in multiple threads.
|
||||
* If a subclass does not implement clone(), or if an error occurs,
|
||||
* then NULL is returned.
|
||||
* The caller must delete the clone.
|
||||
*
|
||||
* @return a clone of this object
|
||||
*
|
||||
* @see getDynamicClassID
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
virtual StringEnumeration *clone() const;
|
||||
|
||||
/**
|
||||
* <p>Return the number of elements that the iterator traverses. If
|
||||
* the iterator is out of sync with its service, status is set to
|
||||
* U_ENUM_OUT_OF_SYNC_ERROR, and the return value is zero.</p>
|
||||
*
|
||||
* <p>The return value will not change except possibly as a result of
|
||||
* a subsequent call to reset, or if the iterator becomes out of sync.</p>
|
||||
*
|
||||
* <p>This is a convenience function. It can end up being very
|
||||
* expensive as all the items might have to be pre-fetched
|
||||
* (depending on the storage format of the data being
|
||||
* traversed).</p>
|
||||
*
|
||||
* @param status the error code.
|
||||
* @return number of elements in the iterator.
|
||||
*
|
||||
* @stable ICU 2.4 */
|
||||
virtual int32_t count(UErrorCode& status) const = 0;
|
||||
|
||||
/**
|
||||
* <p>Returns the next element as a NUL-terminated char*. If there
|
||||
* are no more elements, returns NULL. If the resultLength pointer
|
||||
* is not NULL, the length of the string (not counting the
|
||||
* terminating NUL) is returned at that address. If an error
|
||||
* status is returned, the value at resultLength is undefined.</p>
|
||||
*
|
||||
* <p>The returned pointer is owned by this iterator and must not be
|
||||
* deleted by the caller. The pointer is valid until the next call
|
||||
* to next, unext, snext, reset, or the enumerator's destructor.</p>
|
||||
*
|
||||
* <p>If the iterator is out of sync with its service, status is set
|
||||
* to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
|
||||
*
|
||||
* <p>If the native service string is a char16_t* string, it is
|
||||
* converted to char* with the invariant converter. If the
|
||||
* conversion fails (because a character cannot be converted) then
|
||||
* status is set to U_INVARIANT_CONVERSION_ERROR and the return
|
||||
* value is undefined (though not NULL).</p>
|
||||
*
|
||||
* Starting with ICU 2.8, the default implementation calls snext()
|
||||
* and handles the conversion.
|
||||
* Either next() or snext() must be implemented differently by a subclass.
|
||||
*
|
||||
* @param status the error code.
|
||||
* @param resultLength a pointer to receive the length, can be NULL.
|
||||
* @return a pointer to the string, or NULL.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual const char* next(int32_t *resultLength, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* <p>Returns the next element as a NUL-terminated char16_t*. If there
|
||||
* are no more elements, returns NULL. If the resultLength pointer
|
||||
* is not NULL, the length of the string (not counting the
|
||||
* terminating NUL) is returned at that address. If an error
|
||||
* status is returned, the value at resultLength is undefined.</p>
|
||||
*
|
||||
* <p>The returned pointer is owned by this iterator and must not be
|
||||
* deleted by the caller. The pointer is valid until the next call
|
||||
* to next, unext, snext, reset, or the enumerator's destructor.</p>
|
||||
*
|
||||
* <p>If the iterator is out of sync with its service, status is set
|
||||
* to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
|
||||
*
|
||||
* Starting with ICU 2.8, the default implementation calls snext()
|
||||
* and handles the conversion.
|
||||
*
|
||||
* @param status the error code.
|
||||
* @param resultLength a ponter to receive the length, can be NULL.
|
||||
* @return a pointer to the string, or NULL.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual const char16_t* unext(int32_t *resultLength, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* <p>Returns the next element a UnicodeString*. If there are no
|
||||
* more elements, returns NULL.</p>
|
||||
*
|
||||
* <p>The returned pointer is owned by this iterator and must not be
|
||||
* deleted by the caller. The pointer is valid until the next call
|
||||
* to next, unext, snext, reset, or the enumerator's destructor.</p>
|
||||
*
|
||||
* <p>If the iterator is out of sync with its service, status is set
|
||||
* to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
|
||||
*
|
||||
* Starting with ICU 2.8, the default implementation calls next()
|
||||
* and handles the conversion.
|
||||
* Either next() or snext() must be implemented differently by a subclass.
|
||||
*
|
||||
* @param status the error code.
|
||||
* @return a pointer to the string, or NULL.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual const UnicodeString* snext(UErrorCode& status);
|
||||
|
||||
/**
|
||||
* <p>Resets the iterator. This re-establishes sync with the
|
||||
* service and rewinds the iterator to start at the first
|
||||
* element.</p>
|
||||
*
|
||||
* <p>Previous pointers returned by next, unext, or snext become
|
||||
* invalid, and the value returned by count might change.</p>
|
||||
*
|
||||
* @param status the error code.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual void reset(UErrorCode& status) = 0;
|
||||
|
||||
/**
|
||||
* Compares this enumeration to other to check if both are equal
|
||||
*
|
||||
* @param that The other string enumeration to compare this object to
|
||||
* @return true if the enumerations are equal. false if not.
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
virtual UBool operator==(const StringEnumeration& that)const;
|
||||
/**
|
||||
* Compares this enumeration to other to check if both are not equal
|
||||
*
|
||||
* @param that The other string enumeration to compare this object to
|
||||
* @return true if the enumerations are equal. false if not.
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
virtual UBool operator!=(const StringEnumeration& that)const;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* UnicodeString field for use with default implementations and subclasses.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
UnicodeString unistr;
|
||||
/**
|
||||
* char * default buffer for use with default implementations and subclasses.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
char charsBuffer[32];
|
||||
/**
|
||||
* char * buffer for use with default implementations and subclasses.
|
||||
* Allocated in constructor and in ensureCharsCapacity().
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
char *chars;
|
||||
/**
|
||||
* Capacity of chars, for use with default implementations and subclasses.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
int32_t charsCapacity;
|
||||
|
||||
/**
|
||||
* Default constructor for use with default implementations and subclasses.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
StringEnumeration();
|
||||
|
||||
/**
|
||||
* Ensures that chars is at least as large as the requested capacity.
|
||||
* For use with default implementations and subclasses.
|
||||
*
|
||||
* @param capacity Requested capacity.
|
||||
* @param status ICU in/out error code.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
void ensureCharsCapacity(int32_t capacity, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Converts s to Unicode and sets unistr to the result.
|
||||
* For use with default implementations and subclasses,
|
||||
* especially for implementations of snext() in terms of next().
|
||||
* This is provided with a helper function instead of a default implementation
|
||||
* of snext() to avoid potential infinite loops between next() and snext().
|
||||
*
|
||||
* For example:
|
||||
* \code
|
||||
* const UnicodeString* snext(UErrorCode& status) {
|
||||
* int32_t resultLength=0;
|
||||
* const char *s=next(&resultLength, status);
|
||||
* return setChars(s, resultLength, status);
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* @param s String to be converted to Unicode.
|
||||
* @param length Length of the string.
|
||||
* @param status ICU in/out error code.
|
||||
* @return A pointer to unistr.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
UnicodeString *setChars(const char *s, int32_t length, UErrorCode &status);
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
/* STRENUM_H */
|
||||
#endif
|
||||
190
app/src/main/cpp/icu4c/include/unicode/stringoptions.h
Normal file
190
app/src/main/cpp/icu4c/include/unicode/stringoptions.h
Normal file
@@ -0,0 +1,190 @@
|
||||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// stringoptions.h
|
||||
// created: 2017jun08 Markus W. Scherer
|
||||
|
||||
#ifndef __STRINGOPTIONS_H__
|
||||
#define __STRINGOPTIONS_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Bit set option bit constants for various string and character processing functions.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Option value for case folding: Use default mappings defined in CaseFolding.txt.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_FOLD_CASE_DEFAULT 0
|
||||
|
||||
/**
|
||||
* Option value for case folding:
|
||||
*
|
||||
* Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
|
||||
* and dotless i appropriately for Turkic languages (tr, az).
|
||||
*
|
||||
* Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
|
||||
* are to be included for default mappings and
|
||||
* excluded for the Turkic-specific mappings.
|
||||
*
|
||||
* Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
|
||||
* are to be excluded for default mappings and
|
||||
* included for the Turkic-specific mappings.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
|
||||
|
||||
/**
|
||||
* Titlecase the string as a whole rather than each word.
|
||||
* (Titlecase only the character at index 0, possibly adjusted.)
|
||||
* Option bits value for titlecasing APIs that take an options bit set.
|
||||
*
|
||||
* It is an error to specify multiple titlecasing iterator options together,
|
||||
* including both an options bit and an explicit BreakIterator.
|
||||
*
|
||||
* @see U_TITLECASE_ADJUST_TO_CASED
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U_TITLECASE_WHOLE_STRING 0x20
|
||||
|
||||
/**
|
||||
* Titlecase sentences rather than words.
|
||||
* (Titlecase only the first character of each sentence, possibly adjusted.)
|
||||
* Option bits value for titlecasing APIs that take an options bit set.
|
||||
*
|
||||
* It is an error to specify multiple titlecasing iterator options together,
|
||||
* including both an options bit and an explicit BreakIterator.
|
||||
*
|
||||
* @see U_TITLECASE_ADJUST_TO_CASED
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U_TITLECASE_SENTENCES 0x40
|
||||
|
||||
/**
|
||||
* Do not lowercase non-initial parts of words when titlecasing.
|
||||
* Option bit for titlecasing APIs that take an options bit set.
|
||||
*
|
||||
* By default, titlecasing will titlecase the character at each
|
||||
* (possibly adjusted) BreakIterator index and
|
||||
* lowercase all other characters up to the next iterator index.
|
||||
* With this option, the other characters will not be modified.
|
||||
*
|
||||
* @see U_TITLECASE_ADJUST_TO_CASED
|
||||
* @see UnicodeString::toTitle
|
||||
* @see CaseMap::toTitle
|
||||
* @see ucasemap_setOptions
|
||||
* @see ucasemap_toTitle
|
||||
* @see ucasemap_utf8ToTitle
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
#define U_TITLECASE_NO_LOWERCASE 0x100
|
||||
|
||||
/**
|
||||
* Do not adjust the titlecasing BreakIterator indexes;
|
||||
* titlecase exactly the characters at breaks from the iterator.
|
||||
* Option bit for titlecasing APIs that take an options bit set.
|
||||
*
|
||||
* By default, titlecasing will take each break iterator index,
|
||||
* adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED),
|
||||
* and titlecase that one.
|
||||
*
|
||||
* Other characters are lowercased.
|
||||
*
|
||||
* It is an error to specify multiple titlecasing adjustment options together.
|
||||
*
|
||||
* @see U_TITLECASE_ADJUST_TO_CASED
|
||||
* @see U_TITLECASE_NO_LOWERCASE
|
||||
* @see UnicodeString::toTitle
|
||||
* @see CaseMap::toTitle
|
||||
* @see ucasemap_setOptions
|
||||
* @see ucasemap_toTitle
|
||||
* @see ucasemap_utf8ToTitle
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
|
||||
|
||||
/**
|
||||
* Adjust each titlecasing BreakIterator index to the next cased character.
|
||||
* (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
|
||||
* Option bit for titlecasing APIs that take an options bit set.
|
||||
*
|
||||
* This used to be the default index adjustment in ICU.
|
||||
* Since ICU 60, the default index adjustment is to the next character that is
|
||||
* a letter, number, symbol, or private use code point.
|
||||
* (Uncased modifier letters are skipped.)
|
||||
* The difference in behavior is small for word titlecasing,
|
||||
* but the new adjustment is much better for whole-string and sentence titlecasing:
|
||||
* It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»".
|
||||
*
|
||||
* It is an error to specify multiple titlecasing adjustment options together.
|
||||
*
|
||||
* @see U_TITLECASE_NO_BREAK_ADJUSTMENT
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U_TITLECASE_ADJUST_TO_CASED 0x400
|
||||
|
||||
/**
|
||||
* Option for string transformation functions to not first reset the Edits object.
|
||||
* Used for example in some case-mapping and normalization functions.
|
||||
*
|
||||
* @see CaseMap
|
||||
* @see Edits
|
||||
* @see Normalizer2
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U_EDITS_NO_RESET 0x2000
|
||||
|
||||
/**
|
||||
* Omit unchanged text when recording how source substrings
|
||||
* relate to changed and unchanged result substrings.
|
||||
* Used for example in some case-mapping and normalization functions.
|
||||
*
|
||||
* @see CaseMap
|
||||
* @see Edits
|
||||
* @see Normalizer2
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U_OMIT_UNCHANGED_TEXT 0x4000
|
||||
|
||||
/**
|
||||
* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
|
||||
* Compare strings in code point order instead of code unit order.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
#define U_COMPARE_CODE_POINT_ORDER 0x8000
|
||||
|
||||
/**
|
||||
* Option bit for unorm_compare:
|
||||
* Perform case-insensitive comparison.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
#define U_COMPARE_IGNORE_CASE 0x10000
|
||||
|
||||
/**
|
||||
* Option bit for unorm_compare:
|
||||
* Both input strings are assumed to fulfill FCD conditions.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
#define UNORM_INPUT_IS_FCD 0x20000
|
||||
|
||||
// Related definitions elsewhere.
|
||||
// Options that are not meaningful in the same functions
|
||||
// can share the same bits.
|
||||
//
|
||||
// Public:
|
||||
// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
|
||||
//
|
||||
// Internal: (may change or be removed)
|
||||
// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
|
||||
// ucase.h #define _FOLD_CASE_OPTIONS_MASK 7
|
||||
// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
|
||||
// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
|
||||
// ustr_imp.h #define _STRNCMP_STYLE 0x1000
|
||||
// unormcmp.cpp #define _COMPARE_EQUIV 0x80000
|
||||
|
||||
#endif // __STRINGOPTIONS_H__
|
||||
343
app/src/main/cpp/icu4c/include/unicode/stringpiece.h
Normal file
343
app/src/main/cpp/icu4c/include/unicode/stringpiece.h
Normal file
@@ -0,0 +1,343 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
// Copyright (C) 2009-2013, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//
|
||||
// Copyright 2001 and onwards Google Inc.
|
||||
// Author: Sanjay Ghemawat
|
||||
|
||||
// This code is a contribution of Google code, and the style used here is
|
||||
// a compromise between the original Google code and the ICU coding guidelines.
|
||||
// For example, data types are ICU-ified (size_t,int->int32_t),
|
||||
// and API comments doxygen-ified, but function names and behavior are
|
||||
// as in the original, if possible.
|
||||
// Assertion-style error handling, not available in ICU, was changed to
|
||||
// parameter "pinning" similar to UnicodeString.
|
||||
//
|
||||
// In addition, this is only a partial port of the original Google code,
|
||||
// limited to what was needed so far. The (nearly) complete original code
|
||||
// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
|
||||
// (see ICU ticket 6765, r25517).
|
||||
|
||||
#ifndef __STRINGPIECE_H__
|
||||
#define __STRINGPIECE_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: StringPiece: Read-only byte string wrapper class.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include <cstddef>
|
||||
#include <type_traits>
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/std_string.h"
|
||||
|
||||
// Arghh! I wish C++ literals were "string".
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* A string-like object that points to a sized piece of memory.
|
||||
*
|
||||
* We provide non-explicit singleton constructors so users can pass
|
||||
* in a "const char*" or a "string" wherever a "StringPiece" is
|
||||
* expected.
|
||||
*
|
||||
* Functions or methods may use StringPiece parameters to accept either a
|
||||
* "const char*" or a "string" value that will be implicitly converted to a
|
||||
* StringPiece.
|
||||
*
|
||||
* Systematic usage of StringPiece is encouraged as it will reduce unnecessary
|
||||
* conversions from "const char*" to "string" and back again.
|
||||
*
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
class U_COMMON_API StringPiece : public UMemory {
|
||||
private:
|
||||
const char* ptr_;
|
||||
int32_t length_;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Default constructor, creates an empty StringPiece.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece() : ptr_(nullptr), length_(0) { }
|
||||
|
||||
/**
|
||||
* Constructs from a NUL-terminated const char * pointer.
|
||||
* @param str a NUL-terminated const char * pointer
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece(const char* str);
|
||||
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Constructs from a NUL-terminated const char8_t * pointer.
|
||||
* @param str a NUL-terminated const char8_t * pointer
|
||||
* @stable ICU 67
|
||||
*/
|
||||
StringPiece(const char8_t* str) : StringPiece(reinterpret_cast<const char*>(str)) {}
|
||||
#endif
|
||||
/**
|
||||
* Constructs an empty StringPiece.
|
||||
* Needed for type disambiguation from multiple other overloads.
|
||||
* @param p nullptr
|
||||
* @stable ICU 67
|
||||
*/
|
||||
StringPiece(std::nullptr_t p) : ptr_(p), length_(0) {}
|
||||
|
||||
/**
|
||||
* Constructs from a std::string.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece(const std::string& str)
|
||||
: ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { }
|
||||
#if defined(__cpp_lib_char8_t) || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Constructs from a std::u8string.
|
||||
* @stable ICU 67
|
||||
*/
|
||||
StringPiece(const std::u8string& str)
|
||||
: ptr_(reinterpret_cast<const char*>(str.data())),
|
||||
length_(static_cast<int32_t>(str.size())) { }
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Constructs from some other implementation of a string piece class, from any
|
||||
* C++ record type that has these two methods:
|
||||
*
|
||||
* \code{.cpp}
|
||||
*
|
||||
* struct OtherStringPieceClass {
|
||||
* const char* data(); // or const char8_t*
|
||||
* size_t size();
|
||||
* };
|
||||
*
|
||||
* \endcode
|
||||
*
|
||||
* The other string piece class will typically be std::string_view from C++17
|
||||
* or absl::string_view from Abseil.
|
||||
*
|
||||
* Starting with C++20, data() may also return a const char8_t* pointer,
|
||||
* as from std::u8string_view.
|
||||
*
|
||||
* @param str the other string piece
|
||||
* @stable ICU 65
|
||||
*/
|
||||
template <typename T,
|
||||
typename = typename std::enable_if<
|
||||
(std::is_same<decltype(T().data()), const char*>::value
|
||||
#if defined(__cpp_char8_t)
|
||||
|| std::is_same<decltype(T().data()), const char8_t*>::value
|
||||
#endif
|
||||
) &&
|
||||
std::is_same<decltype(T().size()), size_t>::value>::type>
|
||||
StringPiece(T str)
|
||||
: ptr_(reinterpret_cast<const char*>(str.data())),
|
||||
length_(static_cast<int32_t>(str.size())) {}
|
||||
|
||||
/**
|
||||
* Constructs from a const char * pointer and a specified length.
|
||||
* @param offset a const char * pointer (need not be terminated)
|
||||
* @param len the length of the string; must be non-negative
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { }
|
||||
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Constructs from a const char8_t * pointer and a specified length.
|
||||
* @param str a const char8_t * pointer (need not be terminated)
|
||||
* @param len the length of the string; must be non-negative
|
||||
* @stable ICU 67
|
||||
*/
|
||||
StringPiece(const char8_t* str, int32_t len) :
|
||||
StringPiece(reinterpret_cast<const char*>(str), len) {}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Substring of another StringPiece.
|
||||
* @param x the other StringPiece
|
||||
* @param pos start position in x; must be non-negative and <= x.length().
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece(const StringPiece& x, int32_t pos);
|
||||
/**
|
||||
* Substring of another StringPiece.
|
||||
* @param x the other StringPiece
|
||||
* @param pos start position in x; must be non-negative and <= x.length().
|
||||
* @param len length of the substring;
|
||||
* must be non-negative and will be pinned to at most x.length() - pos.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece(const StringPiece& x, int32_t pos, int32_t len);
|
||||
|
||||
/**
|
||||
* Returns the string pointer. May be nullptr if it is empty.
|
||||
*
|
||||
* data() may return a pointer to a buffer with embedded NULs, and the
|
||||
* returned buffer may or may not be null terminated. Therefore it is
|
||||
* typically a mistake to pass data() to a routine that expects a NUL
|
||||
* terminated string.
|
||||
* @return the string pointer
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
const char* data() const { return ptr_; }
|
||||
/**
|
||||
* Returns the string length. Same as length().
|
||||
* @return the string length
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
int32_t size() const { return length_; }
|
||||
/**
|
||||
* Returns the string length. Same as size().
|
||||
* @return the string length
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
int32_t length() const { return length_; }
|
||||
/**
|
||||
* Returns whether the string is empty.
|
||||
* @return true if the string is empty
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
UBool empty() const { return length_ == 0; }
|
||||
|
||||
/**
|
||||
* Sets to an empty string.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
void clear() { ptr_ = nullptr; length_ = 0; }
|
||||
|
||||
/**
|
||||
* Reset the stringpiece to refer to new data.
|
||||
* @param xdata pointer the new string data. Need not be nul terminated.
|
||||
* @param len the length of the new data
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; }
|
||||
|
||||
/**
|
||||
* Reset the stringpiece to refer to new data.
|
||||
* @param str a pointer to a NUL-terminated string.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
void set(const char* str);
|
||||
|
||||
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Resets the stringpiece to refer to new data.
|
||||
* @param xdata pointer the new string data. Need not be NUL-terminated.
|
||||
* @param len the length of the new data
|
||||
* @stable ICU 67
|
||||
*/
|
||||
inline void set(const char8_t* xdata, int32_t len) {
|
||||
set(reinterpret_cast<const char*>(xdata), len);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the stringpiece to refer to new data.
|
||||
* @param str a pointer to a NUL-terminated string.
|
||||
* @stable ICU 67
|
||||
*/
|
||||
inline void set(const char8_t* str) {
|
||||
set(reinterpret_cast<const char*>(str));
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Removes the first n string units.
|
||||
* @param n prefix length, must be non-negative and <=length()
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
void remove_prefix(int32_t n) {
|
||||
if (n >= 0) {
|
||||
if (n > length_) {
|
||||
n = length_;
|
||||
}
|
||||
ptr_ += n;
|
||||
length_ -= n;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the last n string units.
|
||||
* @param n suffix length, must be non-negative and <=length()
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
void remove_suffix(int32_t n) {
|
||||
if (n >= 0) {
|
||||
if (n <= length_) {
|
||||
length_ -= n;
|
||||
} else {
|
||||
length_ = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Searches the StringPiece for the given search string (needle);
|
||||
* @param needle The string for which to search.
|
||||
* @param offset Where to start searching within this string (haystack).
|
||||
* @return The offset of needle in haystack, or -1 if not found.
|
||||
* @stable ICU 67
|
||||
*/
|
||||
int32_t find(StringPiece needle, int32_t offset);
|
||||
|
||||
/**
|
||||
* Compares this StringPiece with the other StringPiece, with semantics
|
||||
* similar to std::string::compare().
|
||||
* @param other The string to compare to.
|
||||
* @return below zero if this < other; above zero if this > other; 0 if this == other.
|
||||
* @stable ICU 67
|
||||
*/
|
||||
int32_t compare(StringPiece other);
|
||||
|
||||
/**
|
||||
* Maximum integer, used as a default value for substring methods.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
static const int32_t npos; // = 0x7fffffff;
|
||||
|
||||
/**
|
||||
* Returns a substring of this StringPiece.
|
||||
* @param pos start position; must be non-negative and <= length().
|
||||
* @param len length of the substring;
|
||||
* must be non-negative and will be pinned to at most length() - pos.
|
||||
* @return the substring StringPiece
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece substr(int32_t pos, int32_t len = npos) const {
|
||||
return StringPiece(*this, pos, len);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Global operator == for StringPiece
|
||||
* @param x The first StringPiece to compare.
|
||||
* @param y The second StringPiece to compare.
|
||||
* @return true if the string data is equal
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
U_EXPORT UBool U_EXPORT2
|
||||
operator==(const StringPiece& x, const StringPiece& y);
|
||||
|
||||
/**
|
||||
* Global operator != for StringPiece
|
||||
* @param x The first StringPiece to compare.
|
||||
* @param y The second StringPiece to compare.
|
||||
* @return true if the string data is not equal
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
inline UBool operator!=(const StringPiece& x, const StringPiece& y) {
|
||||
return !(x == y);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __STRINGPIECE_H__
|
||||
650
app/src/main/cpp/icu4c/include/unicode/ubrk.h
Normal file
650
app/src/main/cpp/icu4c/include/unicode/ubrk.h
Normal file
@@ -0,0 +1,650 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
* Copyright (C) 1996-2015, International Business Machines Corporation and others.
|
||||
* All Rights Reserved.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef UBRK_H
|
||||
#define UBRK_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "unicode/utext.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
#include "unicode/localpointer.h"
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
/**
|
||||
* A text-break iterator.
|
||||
* For usage in C programs.
|
||||
*/
|
||||
#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
|
||||
# define UBRK_TYPEDEF_UBREAK_ITERATOR
|
||||
/**
|
||||
* Opaque type representing an ICU Break iterator object.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef struct UBreakIterator UBreakIterator;
|
||||
#endif
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "unicode/parseerr.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: BreakIterator
|
||||
*
|
||||
* <h2> BreakIterator C API </h2>
|
||||
*
|
||||
* The BreakIterator C API defines methods for finding the location
|
||||
* of boundaries in text. Pointer to a UBreakIterator maintain a
|
||||
* current position and scan over text returning the index of characters
|
||||
* where boundaries occur.
|
||||
* <p>
|
||||
* Line boundary analysis determines where a text string can be broken
|
||||
* when line-wrapping. The mechanism correctly handles punctuation and
|
||||
* hyphenated words.
|
||||
* <p>
|
||||
* Note: The locale keyword "lb" can be used to modify line break
|
||||
* behavior according to the CSS level 3 line-break options, see
|
||||
* <http://dev.w3.org/csswg/css-text/#line-breaking>. For example:
|
||||
* "ja@lb=strict", "zh@lb=loose".
|
||||
* <p>
|
||||
* Sentence boundary analysis allows selection with correct
|
||||
* interpretation of periods within numbers and abbreviations, and
|
||||
* trailing punctuation marks such as quotation marks and parentheses.
|
||||
* <p>
|
||||
* Note: The locale keyword "ss" can be used to enable use of
|
||||
* segmentation suppression data (preventing breaks in English after
|
||||
* abbreviations such as "Mr." or "Est.", for example), as follows:
|
||||
* "en@ss=standard".
|
||||
* <p>
|
||||
* Word boundary analysis is used by search and replace functions, as
|
||||
* well as within text editing applications that allow the user to
|
||||
* select words with a double click. Word selection provides correct
|
||||
* interpretation of punctuation marks within and following
|
||||
* words. Characters that are not part of a word, such as symbols or
|
||||
* punctuation marks, have word-breaks on both sides.
|
||||
* <p>
|
||||
* Character boundary analysis identifies the boundaries of
|
||||
* "Extended Grapheme Clusters", which are groupings of codepoints
|
||||
* that should be treated as character-like units for many text operations.
|
||||
* Please see Unicode Standard Annex #29, Unicode Text Segmentation,
|
||||
* http://www.unicode.org/reports/tr29/ for additional information
|
||||
* on grapheme clusters and guidelines on their use.
|
||||
* <p>
|
||||
* Title boundary analysis locates all positions,
|
||||
* typically starts of words, that should be set to Title Case
|
||||
* when title casing the text.
|
||||
* <p>
|
||||
* The text boundary positions are found according to the rules
|
||||
* described in Unicode Standard Annex #29, Text Boundaries, and
|
||||
* Unicode Standard Annex #14, Line Breaking Properties. These
|
||||
* are available at http://www.unicode.org/reports/tr14/ and
|
||||
* http://www.unicode.org/reports/tr29/.
|
||||
* <p>
|
||||
* In addition to the plain C API defined in this header file, an
|
||||
* object oriented C++ API with equivalent functionality is defined in the
|
||||
* file brkiter.h.
|
||||
* <p>
|
||||
* Code snippets illustrating the use of the Break Iterator APIs
|
||||
* are available in the ICU User Guide,
|
||||
* http://icu-project.org/userguide/boundaryAnalysis.html
|
||||
* and in the sample program icu/source/samples/break/break.cpp
|
||||
*/
|
||||
|
||||
/** The possible types of text boundaries. @stable ICU 2.0 */
|
||||
typedef enum UBreakIteratorType {
|
||||
/** Character breaks @stable ICU 2.0 */
|
||||
UBRK_CHARACTER = 0,
|
||||
/** Word breaks @stable ICU 2.0 */
|
||||
UBRK_WORD = 1,
|
||||
/** Line breaks @stable ICU 2.0 */
|
||||
UBRK_LINE = 2,
|
||||
/** Sentence breaks @stable ICU 2.0 */
|
||||
UBRK_SENTENCE = 3,
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Title Case breaks
|
||||
* The iterator created using this type locates title boundaries as described for
|
||||
* Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
|
||||
* please use Word Boundary iterator.
|
||||
*
|
||||
* @deprecated ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later.
|
||||
*/
|
||||
UBRK_TITLE = 4,
|
||||
/**
|
||||
* One more than the highest normal UBreakIteratorType value.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
UBRK_COUNT = 5
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
} UBreakIteratorType;
|
||||
|
||||
/** Value indicating all text boundaries have been returned.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UBRK_DONE ((int32_t) -1)
|
||||
|
||||
|
||||
/**
|
||||
* Enum constants for the word break tags returned by
|
||||
* getRuleStatus(). A range of values is defined for each category of
|
||||
* word, to allow for further subdivisions of a category in future releases.
|
||||
* Applications should check for tag values falling within the range, rather
|
||||
* than for single individual values.
|
||||
*
|
||||
* The numeric values of all of these constants are stable (will not change).
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
typedef enum UWordBreak {
|
||||
/** Tag value for "words" that do not fit into any of other categories.
|
||||
* Includes spaces and most punctuation. */
|
||||
UBRK_WORD_NONE = 0,
|
||||
/** Upper bound for tags for uncategorized words. */
|
||||
UBRK_WORD_NONE_LIMIT = 100,
|
||||
/** Tag value for words that appear to be numbers, lower limit. */
|
||||
UBRK_WORD_NUMBER = 100,
|
||||
/** Tag value for words that appear to be numbers, upper limit. */
|
||||
UBRK_WORD_NUMBER_LIMIT = 200,
|
||||
/** Tag value for words that contain letters, excluding
|
||||
* hiragana, katakana or ideographic characters, lower limit. */
|
||||
UBRK_WORD_LETTER = 200,
|
||||
/** Tag value for words containing letters, upper limit */
|
||||
UBRK_WORD_LETTER_LIMIT = 300,
|
||||
/** Tag value for words containing kana characters, lower limit */
|
||||
UBRK_WORD_KANA = 300,
|
||||
/** Tag value for words containing kana characters, upper limit */
|
||||
UBRK_WORD_KANA_LIMIT = 400,
|
||||
/** Tag value for words containing ideographic characters, lower limit */
|
||||
UBRK_WORD_IDEO = 400,
|
||||
/** Tag value for words containing ideographic characters, upper limit */
|
||||
UBRK_WORD_IDEO_LIMIT = 500
|
||||
} UWordBreak;
|
||||
|
||||
/**
|
||||
* Enum constants for the line break tags returned by getRuleStatus().
|
||||
* A range of values is defined for each category of
|
||||
* word, to allow for further subdivisions of a category in future releases.
|
||||
* Applications should check for tag values falling within the range, rather
|
||||
* than for single individual values.
|
||||
*
|
||||
* The numeric values of all of these constants are stable (will not change).
|
||||
*
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
typedef enum ULineBreakTag {
|
||||
/** Tag value for soft line breaks, positions at which a line break
|
||||
* is acceptable but not required */
|
||||
UBRK_LINE_SOFT = 0,
|
||||
/** Upper bound for soft line breaks. */
|
||||
UBRK_LINE_SOFT_LIMIT = 100,
|
||||
/** Tag value for a hard, or mandatory line break */
|
||||
UBRK_LINE_HARD = 100,
|
||||
/** Upper bound for hard line breaks. */
|
||||
UBRK_LINE_HARD_LIMIT = 200
|
||||
} ULineBreakTag;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Enum constants for the sentence break tags returned by getRuleStatus().
|
||||
* A range of values is defined for each category of
|
||||
* sentence, to allow for further subdivisions of a category in future releases.
|
||||
* Applications should check for tag values falling within the range, rather
|
||||
* than for single individual values.
|
||||
*
|
||||
* The numeric values of all of these constants are stable (will not change).
|
||||
*
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
typedef enum USentenceBreakTag {
|
||||
/** Tag value for for sentences ending with a sentence terminator
|
||||
* ('.', '?', '!', etc.) character, possibly followed by a
|
||||
* hard separator (CR, LF, PS, etc.)
|
||||
*/
|
||||
UBRK_SENTENCE_TERM = 0,
|
||||
/** Upper bound for tags for sentences ended by sentence terminators. */
|
||||
UBRK_SENTENCE_TERM_LIMIT = 100,
|
||||
/** Tag value for for sentences that do not contain an ending
|
||||
* sentence terminator ('.', '?', '!', etc.) character, but
|
||||
* are ended only by a hard separator (CR, LF, PS, etc.) or end of input.
|
||||
*/
|
||||
UBRK_SENTENCE_SEP = 100,
|
||||
/** Upper bound for tags for sentences ended by a separator. */
|
||||
UBRK_SENTENCE_SEP_LIMIT = 200
|
||||
/** Tag value for a hard, or mandatory line break */
|
||||
} USentenceBreakTag;
|
||||
|
||||
|
||||
/**
|
||||
* Open a new UBreakIterator for locating text boundaries for a specified locale.
|
||||
* A UBreakIterator may be used for detecting character, line, word,
|
||||
* and sentence breaks in text.
|
||||
* @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD,
|
||||
* UBRK_LINE, UBRK_SENTENCE
|
||||
* @param locale The locale specifying the text-breaking conventions. Note that
|
||||
* locale keys such as "lb" and "ss" may be used to modify text break behavior,
|
||||
* see general discussion of BreakIterator C API.
|
||||
* @param text The text to be iterated over. May be null, in which case ubrk_setText() is
|
||||
* used to specify the text to be iterated.
|
||||
* @param textLength The number of characters in text, or -1 if null-terminated.
|
||||
* @param status A UErrorCode to receive any errors.
|
||||
* @return A UBreakIterator for the specified locale.
|
||||
* @see ubrk_openRules
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI UBreakIterator* U_EXPORT2
|
||||
ubrk_open(UBreakIteratorType type,
|
||||
const char *locale,
|
||||
const UChar *text,
|
||||
int32_t textLength,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Open a new UBreakIterator for locating text boundaries using specified breaking rules.
|
||||
* The rule syntax is ... (TBD)
|
||||
* @param rules A set of rules specifying the text breaking conventions.
|
||||
* @param rulesLength The number of characters in rules, or -1 if null-terminated.
|
||||
* @param text The text to be iterated over. May be null, in which case ubrk_setText() is
|
||||
* used to specify the text to be iterated.
|
||||
* @param textLength The number of characters in text, or -1 if null-terminated.
|
||||
* @param parseErr Receives position and context information for any syntax errors
|
||||
* detected while parsing the rules.
|
||||
* @param status A UErrorCode to receive any errors.
|
||||
* @return A UBreakIterator for the specified rules.
|
||||
* @see ubrk_open
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
U_CAPI UBreakIterator* U_EXPORT2
|
||||
ubrk_openRules(const UChar *rules,
|
||||
int32_t rulesLength,
|
||||
const UChar *text,
|
||||
int32_t textLength,
|
||||
UParseError *parseErr,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Open a new UBreakIterator for locating text boundaries using precompiled binary rules.
|
||||
* Opening a UBreakIterator this way is substantially faster than using ubrk_openRules.
|
||||
* Binary rules may be obtained using ubrk_getBinaryRules. The compiled rules are not
|
||||
* compatible across different major versions of ICU, nor across platforms of different
|
||||
* endianness or different base character set family (ASCII vs EBCDIC).
|
||||
* @param binaryRules A set of compiled binary rules specifying the text breaking
|
||||
* conventions. Ownership of the storage containing the compiled
|
||||
* rules remains with the caller of this function. The compiled
|
||||
* rules must not be modified or deleted during the life of the
|
||||
* break iterator.
|
||||
* @param rulesLength The length of binaryRules in bytes; must be >= 0.
|
||||
* @param text The text to be iterated over. May be null, in which case
|
||||
* ubrk_setText() is used to specify the text to be iterated.
|
||||
* @param textLength The number of characters in text, or -1 if null-terminated.
|
||||
* @param status Pointer to UErrorCode to receive any errors.
|
||||
* @return UBreakIterator for the specified rules.
|
||||
* @see ubrk_getBinaryRules
|
||||
* @stable ICU 59
|
||||
*/
|
||||
U_CAPI UBreakIterator* U_EXPORT2
|
||||
ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
|
||||
const UChar * text, int32_t textLength,
|
||||
UErrorCode * status);
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
||||
/**
|
||||
* Thread safe cloning operation
|
||||
* @param bi iterator to be cloned
|
||||
* @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
|
||||
* user allocated space for the new clone. If NULL new memory will be allocated.
|
||||
* If buffer is not large enough, new memory will be allocated.
|
||||
* Clients can use the U_BRK_SAFECLONE_BUFFERSIZE.
|
||||
* @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
|
||||
* pointer to size of allocated space.
|
||||
* If *pBufferSize == 0, a sufficient size for use in cloning will
|
||||
* be returned ('pre-flighting')
|
||||
* If *pBufferSize is not enough for a stack-based safe clone,
|
||||
* new memory will be allocated.
|
||||
* @param status to indicate whether the operation went on smoothly or there were errors
|
||||
* An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
|
||||
* @return pointer to the new clone
|
||||
* @deprecated ICU 69 Use ubrk_clone() instead.
|
||||
*/
|
||||
U_CAPI UBreakIterator * U_EXPORT2
|
||||
ubrk_safeClone(
|
||||
const UBreakIterator *bi,
|
||||
void *stackBuffer,
|
||||
int32_t *pBufferSize,
|
||||
UErrorCode *status);
|
||||
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Thread safe cloning operation.
|
||||
* @param bi iterator to be cloned
|
||||
* @param status to indicate whether the operation went on smoothly or there were errors
|
||||
* @return pointer to the new clone
|
||||
* @draft ICU 69
|
||||
*/
|
||||
U_CAPI UBreakIterator * U_EXPORT2
|
||||
ubrk_clone(const UBreakIterator *bi,
|
||||
UErrorCode *status);
|
||||
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
||||
/**
|
||||
* A recommended size (in bytes) for the memory buffer to be passed to ubrk_saveClone().
|
||||
* @deprecated ICU 52. Do not rely on ubrk_safeClone() cloning into any provided buffer.
|
||||
*/
|
||||
#define U_BRK_SAFECLONE_BUFFERSIZE 1
|
||||
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
/**
|
||||
* Close a UBreakIterator.
|
||||
* Once closed, a UBreakIterator may no longer be used.
|
||||
* @param bi The break iterator to close.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubrk_close(UBreakIterator *bi);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \class LocalUBreakIteratorPointer
|
||||
* "Smart pointer" class, closes a UBreakIterator via ubrk_close().
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalUBreakIteratorPointer, UBreakIterator, ubrk_close);
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Sets an existing iterator to point to a new piece of text.
|
||||
* The break iterator retains a pointer to the supplied text.
|
||||
* The caller must not modify or delete the text while the BreakIterator
|
||||
* retains the reference.
|
||||
*
|
||||
* @param bi The iterator to use
|
||||
* @param text The text to be set
|
||||
* @param textLength The length of the text
|
||||
* @param status The error code
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubrk_setText(UBreakIterator* bi,
|
||||
const UChar* text,
|
||||
int32_t textLength,
|
||||
UErrorCode* status);
|
||||
|
||||
|
||||
/**
|
||||
* Sets an existing iterator to point to a new piece of text.
|
||||
*
|
||||
* All index positions returned by break iterator functions are
|
||||
* native indices from the UText. For example, when breaking UTF-8
|
||||
* encoded text, the break positions returned by \ref ubrk_next, \ref ubrk_previous, etc.
|
||||
* will be UTF-8 string indices, not UTF-16 positions.
|
||||
*
|
||||
* @param bi The iterator to use
|
||||
* @param text The text to be set.
|
||||
* This function makes a shallow clone of the supplied UText. This means
|
||||
* that the caller is free to immediately close or otherwise reuse the
|
||||
* UText that was passed as a parameter, but that the underlying text itself
|
||||
* must not be altered while being referenced by the break iterator.
|
||||
* @param status The error code
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubrk_setUText(UBreakIterator* bi,
|
||||
UText* text,
|
||||
UErrorCode* status);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Determine the most recently-returned text boundary.
|
||||
*
|
||||
* @param bi The break iterator to use.
|
||||
* @return The character index most recently returned by \ref ubrk_next, \ref ubrk_previous,
|
||||
* \ref ubrk_first, or \ref ubrk_last.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_current(const UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
* Advance the iterator to the boundary following the current boundary.
|
||||
*
|
||||
* @param bi The break iterator to use.
|
||||
* @return The character index of the next text boundary, or UBRK_DONE
|
||||
* if all text boundaries have been returned.
|
||||
* @see ubrk_previous
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_next(UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
* Set the iterator position to the boundary preceding the current boundary.
|
||||
*
|
||||
* @param bi The break iterator to use.
|
||||
* @return The character index of the preceding text boundary, or UBRK_DONE
|
||||
* if all text boundaries have been returned.
|
||||
* @see ubrk_next
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_previous(UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
* Set the iterator position to zero, the start of the text being scanned.
|
||||
* @param bi The break iterator to use.
|
||||
* @return The new iterator position (zero).
|
||||
* @see ubrk_last
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_first(UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
* Set the iterator position to the index immediately <EM>beyond</EM> the last character in the text being scanned.
|
||||
* This is not the same as the last character.
|
||||
* @param bi The break iterator to use.
|
||||
* @return The character offset immediately <EM>beyond</EM> the last character in the
|
||||
* text being scanned.
|
||||
* @see ubrk_first
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_last(UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
* Set the iterator position to the first boundary preceding the specified offset.
|
||||
* The new position is always smaller than offset, or UBRK_DONE.
|
||||
* @param bi The break iterator to use.
|
||||
* @param offset The offset to begin scanning.
|
||||
* @return The text boundary preceding offset, or UBRK_DONE.
|
||||
* @see ubrk_following
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_preceding(UBreakIterator *bi,
|
||||
int32_t offset);
|
||||
|
||||
/**
|
||||
* Advance the iterator to the first boundary following the specified offset.
|
||||
* The value returned is always greater than offset, or UBRK_DONE.
|
||||
* @param bi The break iterator to use.
|
||||
* @param offset The offset to begin scanning.
|
||||
* @return The text boundary following offset, or UBRK_DONE.
|
||||
* @see ubrk_preceding
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_following(UBreakIterator *bi,
|
||||
int32_t offset);
|
||||
|
||||
/**
|
||||
* Get a locale for which text breaking information is available.
|
||||
* A UBreakIterator in a locale returned by this function will perform the correct
|
||||
* text breaking for the locale.
|
||||
* @param index The index of the desired locale.
|
||||
* @return A locale for which number text breaking information is available, or 0 if none.
|
||||
* @see ubrk_countAvailable
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
ubrk_getAvailable(int32_t index);
|
||||
|
||||
/**
|
||||
* Determine how many locales have text breaking information available.
|
||||
* This function is most useful as determining the loop ending condition for
|
||||
* calls to \ref ubrk_getAvailable.
|
||||
* @return The number of locales for which text breaking information is available.
|
||||
* @see ubrk_getAvailable
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_countAvailable(void);
|
||||
|
||||
|
||||
/**
|
||||
* Returns true if the specified position is a boundary position. As a side
|
||||
* effect, leaves the iterator pointing to the first boundary position at
|
||||
* or after "offset".
|
||||
* @param bi The break iterator to use.
|
||||
* @param offset the offset to check.
|
||||
* @return True if "offset" is a boundary position.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
|
||||
|
||||
/**
|
||||
* Return the status from the break rule that determined the most recently
|
||||
* returned break position. The values appear in the rule source
|
||||
* within brackets, {123}, for example. For rules that do not specify a
|
||||
* status, a default value of 0 is returned.
|
||||
* <p>
|
||||
* For word break iterators, the possible values are defined in enum UWordBreak.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_getRuleStatus(UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
* Get the statuses from the break rules that determined the most recently
|
||||
* returned break position. The values appear in the rule source
|
||||
* within brackets, {123}, for example. The default status value for rules
|
||||
* that do not explicitly provide one is zero.
|
||||
* <p>
|
||||
* For word break iterators, the possible values are defined in enum UWordBreak.
|
||||
* @param bi The break iterator to use
|
||||
* @param fillInVec an array to be filled in with the status values.
|
||||
* @param capacity the length of the supplied vector. A length of zero causes
|
||||
* the function to return the number of status values, in the
|
||||
* normal way, without attempting to store any values.
|
||||
* @param status receives error codes.
|
||||
* @return The number of rule status values from rules that determined
|
||||
* the most recent boundary returned by the break iterator.
|
||||
* @stable ICU 3.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Return the locale of the break iterator. You can choose between the valid and
|
||||
* the actual locale.
|
||||
* @param bi break iterator
|
||||
* @param type locale type (valid or actual)
|
||||
* @param status error code
|
||||
* @return locale string
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Set the subject text string upon which the break iterator is operating
|
||||
* without changing any other aspect of the state.
|
||||
* The new and previous text strings must have the same content.
|
||||
*
|
||||
* This function is intended for use in environments where ICU is operating on
|
||||
* strings that may move around in memory. It provides a mechanism for notifying
|
||||
* ICU that the string has been relocated, and providing a new UText to access the
|
||||
* string in its new position.
|
||||
*
|
||||
* Note that the break iterator never copies the underlying text
|
||||
* of a string being processed, but always operates directly on the original text
|
||||
* provided by the user. Refreshing simply drops the references to the old text
|
||||
* and replaces them with references to the new.
|
||||
*
|
||||
* Caution: this function is normally used only by very specialized
|
||||
* system-level code. One example use case is with garbage collection
|
||||
* that moves the text in memory.
|
||||
*
|
||||
* @param bi The break iterator.
|
||||
* @param text The new (moved) text string.
|
||||
* @param status Receives errors detected by this function.
|
||||
*
|
||||
* @stable ICU 49
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubrk_refreshUText(UBreakIterator *bi,
|
||||
UText *text,
|
||||
UErrorCode *status);
|
||||
|
||||
|
||||
/**
|
||||
* Get a compiled binary version of the rules specifying the behavior of a UBreakIterator.
|
||||
* The binary rules may be used with ubrk_openBinaryRules to open a new UBreakIterator
|
||||
* more quickly than using ubrk_openRules. The compiled rules are not compatible across
|
||||
* different major versions of ICU, nor across platforms of different endianness or
|
||||
* different base character set family (ASCII vs EBCDIC). Supports preflighting (with
|
||||
* binaryRules=NULL and rulesCapacity=0) to get the rules length without copying them to
|
||||
* the binaryRules buffer. However, whether preflighting or not, if the actual length
|
||||
* is greater than INT32_MAX, then the function returns 0 and sets *status to
|
||||
* U_INDEX_OUTOFBOUNDS_ERROR.
|
||||
|
||||
* @param bi The break iterator to use.
|
||||
* @param binaryRules Buffer to receive the compiled binary rules; set to NULL for
|
||||
* preflighting.
|
||||
* @param rulesCapacity Capacity (in bytes) of the binaryRules buffer; set to 0 for
|
||||
* preflighting. Must be >= 0.
|
||||
* @param status Pointer to UErrorCode to receive any errors, such as
|
||||
* U_BUFFER_OVERFLOW_ERROR, U_INDEX_OUTOFBOUNDS_ERROR, or
|
||||
* U_ILLEGAL_ARGUMENT_ERROR.
|
||||
* @return The actual byte length of the binary rules, if <= INT32_MAX;
|
||||
* otherwise 0. If not preflighting and this is larger than
|
||||
* rulesCapacity, *status will be set to an error.
|
||||
* @see ubrk_openBinaryRules
|
||||
* @stable ICU 59
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_getBinaryRules(UBreakIterator *bi,
|
||||
uint8_t * binaryRules, int32_t rulesCapacity,
|
||||
UErrorCode * status);
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
||||
|
||||
#endif
|
||||
4056
app/src/main/cpp/icu4c/include/unicode/uchar.h
Normal file
4056
app/src/main/cpp/icu4c/include/unicode/uchar.h
Normal file
File diff suppressed because it is too large
Load Diff
2045
app/src/main/cpp/icu4c/include/unicode/ucnv.h
Normal file
2045
app/src/main/cpp/icu4c/include/unicode/ucnv.h
Normal file
File diff suppressed because it is too large
Load Diff
465
app/src/main/cpp/icu4c/include/unicode/ucnv_err.h
Normal file
465
app/src/main/cpp/icu4c/include/unicode/ucnv_err.h
Normal file
@@ -0,0 +1,465 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2009, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
*
|
||||
* ucnv_err.h:
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C UConverter predefined error callbacks
|
||||
*
|
||||
* <h2>Error Behaviour Functions</h2>
|
||||
* Defines some error behaviour functions called by ucnv_{from,to}Unicode
|
||||
* These are provided as part of ICU and many are stable, but they
|
||||
* can also be considered only as an example of what can be done with
|
||||
* callbacks. You may of course write your own.
|
||||
*
|
||||
* If you want to write your own, you may also find the functions from
|
||||
* ucnv_cb.h useful when writing your own callbacks.
|
||||
*
|
||||
* These functions, although public, should NEVER be called directly.
|
||||
* They should be used as parameters to the ucnv_setFromUCallback
|
||||
* and ucnv_setToUCallback functions, to set the behaviour of a converter
|
||||
* when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
|
||||
*
|
||||
* usage example: 'STOP' doesn't need any context, but newContext
|
||||
* could be set to something other than 'NULL' if needed. The available
|
||||
* contexts in this header can modify the default behavior of the callback.
|
||||
*
|
||||
* \code
|
||||
* UErrorCode err = U_ZERO_ERROR;
|
||||
* UConverter *myConverter = ucnv_open("ibm-949", &err);
|
||||
* const void *oldContext;
|
||||
* UConverterFromUCallback oldAction;
|
||||
*
|
||||
*
|
||||
* if (U_SUCCESS(err))
|
||||
* {
|
||||
* ucnv_setFromUCallBack(myConverter,
|
||||
* UCNV_FROM_U_CALLBACK_STOP,
|
||||
* NULL,
|
||||
* &oldAction,
|
||||
* &oldContext,
|
||||
* &status);
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* The code above tells "myConverter" to stop when it encounters an
|
||||
* ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
|
||||
* Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
|
||||
* and ucnv_setToUCallBack would need to be called in order to change
|
||||
* that behavior too.
|
||||
*
|
||||
* Here is an example with a context:
|
||||
*
|
||||
* \code
|
||||
* UErrorCode err = U_ZERO_ERROR;
|
||||
* UConverter *myConverter = ucnv_open("ibm-949", &err);
|
||||
* const void *oldContext;
|
||||
* UConverterFromUCallback oldAction;
|
||||
*
|
||||
*
|
||||
* if (U_SUCCESS(err))
|
||||
* {
|
||||
* ucnv_setToUCallBack(myConverter,
|
||||
* UCNV_TO_U_CALLBACK_SUBSTITUTE,
|
||||
* UCNV_SUB_STOP_ON_ILLEGAL,
|
||||
* &oldAction,
|
||||
* &oldContext,
|
||||
* &status);
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* The code above tells "myConverter" to stop when it encounters an
|
||||
* ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
|
||||
* Codepage -> Unicode. Any unmapped and legal characters will be
|
||||
* substituted to be the default substitution character.
|
||||
*/
|
||||
|
||||
#ifndef UCNV_ERR_H
|
||||
#define UCNV_ERR_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_CONVERSION
|
||||
|
||||
/** Forward declaring the UConverter structure. @stable ICU 2.0 */
|
||||
struct UConverter;
|
||||
|
||||
/** @stable ICU 2.0 */
|
||||
typedef struct UConverter UConverter;
|
||||
|
||||
/**
|
||||
* FROM_U, TO_U context options for sub callback
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_SUB_STOP_ON_ILLEGAL "i"
|
||||
|
||||
/**
|
||||
* FROM_U, TO_U context options for skip callback
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_SKIP_STOP_ON_ILLEGAL "i"
|
||||
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_ICU NULL
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_JAVA "J"
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
|
||||
* TO_U_CALLBACK_ESCAPE option to escape the character value according to C (\\xXXXX)
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_C "C"
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
|
||||
* TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_XML_DEC "D"
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
|
||||
* TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_XML_HEX "X"
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_UNICODE "U"
|
||||
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is,
|
||||
* a backslash, 1..6 hex digits, and a space)
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_CSS2 "S"
|
||||
|
||||
/**
|
||||
* The process condition code to be used with the callbacks.
|
||||
* Codes which are greater than UCNV_IRREGULAR should be
|
||||
* passed on to any chained callbacks.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef enum {
|
||||
UCNV_UNASSIGNED = 0, /**< The code point is unassigned.
|
||||
The error code U_INVALID_CHAR_FOUND will be set. */
|
||||
UCNV_ILLEGAL = 1, /**< The code point is illegal. For example,
|
||||
\\x81\\x2E is illegal in SJIS because \\x2E
|
||||
is not a valid trail byte for the \\x81
|
||||
lead byte.
|
||||
Also, starting with Unicode 3.0.1, non-shortest byte sequences
|
||||
in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
|
||||
are also illegal, not just irregular.
|
||||
The error code U_ILLEGAL_CHAR_FOUND will be set. */
|
||||
UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in
|
||||
the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
|
||||
are irregular UTF-8 byte sequences for single surrogate
|
||||
code points.
|
||||
The error code U_INVALID_CHAR_FOUND will be set. */
|
||||
UCNV_RESET = 3, /**< The callback is called with this reason when a
|
||||
'reset' has occurred. Callback should reset all
|
||||
state. */
|
||||
UCNV_CLOSE = 4, /**< Called when the converter is closed. The
|
||||
callback should release any allocated memory.*/
|
||||
UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the
|
||||
converter. the pointer available as the
|
||||
'context' is an alias to the original converters'
|
||||
context pointer. If the context must be owned
|
||||
by the new converter, the callback must clone
|
||||
the data and call ucnv_setFromUCallback
|
||||
(or setToUCallback) with the correct pointer.
|
||||
@stable ICU 2.2
|
||||
*/
|
||||
} UConverterCallbackReason;
|
||||
|
||||
|
||||
/**
|
||||
* The structure for the fromUnicode callback function parameter.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef struct {
|
||||
uint16_t size; /**< The size of this struct. @stable ICU 2.0 */
|
||||
UBool flush; /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0 */
|
||||
UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
|
||||
const UChar *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
|
||||
const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
|
||||
char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
|
||||
const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
|
||||
int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
|
||||
} UConverterFromUnicodeArgs;
|
||||
|
||||
|
||||
/**
|
||||
* The structure for the toUnicode callback function parameter.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef struct {
|
||||
uint16_t size; /**< The size of this struct @stable ICU 2.0 */
|
||||
UBool flush; /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0 */
|
||||
UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
|
||||
const char *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
|
||||
const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
|
||||
UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
|
||||
const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
|
||||
int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
|
||||
} UConverterToUnicodeArgs;
|
||||
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
|
||||
* returning the error code back to the caller immediately.
|
||||
*
|
||||
* @param context Pointer to the callback's private data
|
||||
* @param fromUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err This should always be set to a failure status prior to calling.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
|
||||
const void *context,
|
||||
UConverterFromUnicodeArgs *fromUArgs,
|
||||
const UChar* codeUnits,
|
||||
int32_t length,
|
||||
UChar32 codePoint,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
|
||||
* returning the error code back to the caller immediately.
|
||||
*
|
||||
* @param context Pointer to the callback's private data
|
||||
* @param toUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err This should always be set to a failure status prior to calling.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
|
||||
const void *context,
|
||||
UConverterToUnicodeArgs *toUArgs,
|
||||
const char* codeUnits,
|
||||
int32_t length,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This From Unicode callback skips any ILLEGAL_SEQUENCE, or
|
||||
* skips only UNASSINGED_SEQUENCE depending on the context parameter
|
||||
* simply ignoring those characters.
|
||||
*
|
||||
* @param context The function currently recognizes the callback options:
|
||||
* UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
|
||||
* returning the error code back to the caller immediately.
|
||||
* NULL: Skips any ILLEGAL_SEQUENCE
|
||||
* @param fromUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err Return value will be set to success if the callback was handled,
|
||||
* otherwise this value will be set to a failure status.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
|
||||
const void *context,
|
||||
UConverterFromUnicodeArgs *fromUArgs,
|
||||
const UChar* codeUnits,
|
||||
int32_t length,
|
||||
UChar32 codePoint,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or
|
||||
* UNASSIGNED_SEQUENCE depending on context parameter, with the
|
||||
* current substitution string for the converter. This is the default
|
||||
* callback.
|
||||
*
|
||||
* @param context The function currently recognizes the callback options:
|
||||
* UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
|
||||
* returning the error code back to the caller immediately.
|
||||
* NULL: Substitutes any ILLEGAL_SEQUENCE
|
||||
* @param fromUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err Return value will be set to success if the callback was handled,
|
||||
* otherwise this value will be set to a failure status.
|
||||
* @see ucnv_setSubstChars
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
|
||||
const void *context,
|
||||
UConverterFromUnicodeArgs *fromUArgs,
|
||||
const UChar* codeUnits,
|
||||
int32_t length,
|
||||
UChar32 codePoint,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
|
||||
* hexadecimal representation of the illegal codepoints
|
||||
*
|
||||
* @param context The function currently recognizes the callback options:
|
||||
* <ul>
|
||||
* <li>UCNV_ESCAPE_ICU: Substitues the ILLEGAL SEQUENCE with the hexadecimal
|
||||
* representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE").
|
||||
* In the Event the converter doesn't support the characters {%,U}[A-F][0-9],
|
||||
* it will substitute the illegal sequence with the substitution characters.
|
||||
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
|
||||
* %UD84D%UDC56</li>
|
||||
* <li>UCNV_ESCAPE_JAVA: Substitues the ILLEGAL SEQUENCE with the hexadecimal
|
||||
* representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
|
||||
* In the Event the converter doesn't support the characters {\,u}[A-F][0-9],
|
||||
* it will substitute the illegal sequence with the substitution characters.
|
||||
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
|
||||
* \\uD84D\\uDC56</li>
|
||||
* <li>UCNV_ESCAPE_C: Substitues the ILLEGAL SEQUENCE with the hexadecimal
|
||||
* representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
|
||||
* In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9],
|
||||
* it will substitute the illegal sequence with the substitution characters.
|
||||
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
|
||||
* \\U00023456</li>
|
||||
* <li>UCNV_ESCAPE_XML_DEC: Substitues the ILLEGAL SEQUENCE with the decimal
|
||||
* representation in the format \htmlonly&#DDDDDDDD;, e.g. "&#65534;&#172;&#51454;")\endhtmlonly.
|
||||
* In the Event the converter doesn't support the characters {&,#}[0-9],
|
||||
* it will substitute the illegal sequence with the substitution characters.
|
||||
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
|
||||
* &#144470; and Zero padding is ignored.</li>
|
||||
* <li>UCNV_ESCAPE_XML_HEX:Substitues the ILLEGAL SEQUENCE with the decimal
|
||||
* representation in the format \htmlonly&#xXXXX; e.g. "&#xFFFE;&#x00AC;&#xC8FE;")\endhtmlonly.
|
||||
* In the Event the converter doesn't support the characters {&,#,x}[0-9],
|
||||
* it will substitute the illegal sequence with the substitution characters.
|
||||
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
|
||||
* \htmlonly&#x23456;\endhtmlonly</li>
|
||||
* </ul>
|
||||
* @param fromUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err Return value will be set to success if the callback was handled,
|
||||
* otherwise this value will be set to a failure status.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
|
||||
const void *context,
|
||||
UConverterFromUnicodeArgs *fromUArgs,
|
||||
const UChar* codeUnits,
|
||||
int32_t length,
|
||||
UChar32 codePoint,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This To Unicode callback skips any ILLEGAL_SEQUENCE, or
|
||||
* skips only UNASSINGED_SEQUENCE depending on the context parameter
|
||||
* simply ignoring those characters.
|
||||
*
|
||||
* @param context The function currently recognizes the callback options:
|
||||
* UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
|
||||
* returning the error code back to the caller immediately.
|
||||
* NULL: Skips any ILLEGAL_SEQUENCE
|
||||
* @param toUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err Return value will be set to success if the callback was handled,
|
||||
* otherwise this value will be set to a failure status.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
|
||||
const void *context,
|
||||
UConverterToUnicodeArgs *toUArgs,
|
||||
const char* codeUnits,
|
||||
int32_t length,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or
|
||||
* UNASSIGNED_SEQUENCE depending on context parameter, with the
|
||||
* Unicode substitution character, U+FFFD.
|
||||
*
|
||||
* @param context The function currently recognizes the callback options:
|
||||
* UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
|
||||
* returning the error code back to the caller immediately.
|
||||
* NULL: Substitutes any ILLEGAL_SEQUENCE
|
||||
* @param toUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err Return value will be set to success if the callback was handled,
|
||||
* otherwise this value will be set to a failure status.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
|
||||
const void *context,
|
||||
UConverterToUnicodeArgs *toUArgs,
|
||||
const char* codeUnits,
|
||||
int32_t length,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
|
||||
* hexadecimal representation of the illegal bytes
|
||||
* (in the format %XNN, e.g. "%XFF%X0A%XC8%X03").
|
||||
*
|
||||
* @param context This function currently recognizes the callback options:
|
||||
* UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
|
||||
* UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
|
||||
* @param toUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err Return value will be set to success if the callback was handled,
|
||||
* otherwise this value will be set to a failure status.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
|
||||
const void *context,
|
||||
UConverterToUnicodeArgs *toUArgs,
|
||||
const char* codeUnits,
|
||||
int32_t length,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/*UCNV_ERR_H*/
|
||||
456
app/src/main/cpp/icu4c/include/unicode/uconfig.h
Normal file
456
app/src/main/cpp/icu4c/include/unicode/uconfig.h
Normal file
@@ -0,0 +1,456 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2002-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: uconfig.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2002sep19
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __UCONFIG_H__
|
||||
#define __UCONFIG_H__
|
||||
|
||||
|
||||
/*!
|
||||
* \file
|
||||
* \brief User-configurable settings
|
||||
*
|
||||
* Miscellaneous switches:
|
||||
*
|
||||
* A number of macros affect a variety of minor aspects of ICU.
|
||||
* Most of them used to be defined elsewhere (e.g., in utypes.h or platform.h)
|
||||
* and moved here to make them easier to find.
|
||||
*
|
||||
* Switches for excluding parts of ICU library code modules:
|
||||
*
|
||||
* Changing these macros allows building partial, smaller libraries for special purposes.
|
||||
* By default, all modules are built.
|
||||
* The switches are fairly coarse, controlling large modules.
|
||||
* Basic services cannot be turned off.
|
||||
*
|
||||
* Building with any of these options does not guarantee that the
|
||||
* ICU build process will completely work. It is recommended that
|
||||
* the ICU libraries and data be built using the normal build.
|
||||
* At that time you should remove the data used by those services.
|
||||
* After building the ICU data library, you should rebuild the ICU
|
||||
* libraries with these switches customized to your needs.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
/**
|
||||
* If this switch is defined, ICU will attempt to load a header file named "uconfig_local.h"
|
||||
* prior to determining default settings for uconfig variables.
|
||||
*
|
||||
* @internal ICU 4.0
|
||||
*/
|
||||
#if defined(UCONFIG_USE_LOCAL)
|
||||
#include "uconfig_local.h"
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_DEBUG
|
||||
* Determines whether to include debugging code.
|
||||
* Automatically set on Windows, but most compilers do not have
|
||||
* related predefined macros.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_DEBUG
|
||||
/* Use the predefined value. */
|
||||
#elif defined(_DEBUG)
|
||||
/*
|
||||
* _DEBUG is defined by Visual Studio debug compilation.
|
||||
* Do *not* test for its NDEBUG macro: It is an orthogonal macro
|
||||
* which disables assert().
|
||||
*/
|
||||
# define U_DEBUG 1
|
||||
# else
|
||||
# define U_DEBUG 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Determines whether to enable auto cleanup of libraries.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef UCLN_NO_AUTO_CLEANUP
|
||||
#define UCLN_NO_AUTO_CLEANUP 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_DISABLE_RENAMING
|
||||
* Determines whether to disable renaming or not.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_DISABLE_RENAMING
|
||||
#define U_DISABLE_RENAMING 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_NO_DEFAULT_INCLUDE_UTF_HEADERS
|
||||
* Determines whether utypes.h includes utf.h, utf8.h, utf16.h and utf_old.h.
|
||||
* utypes.h includes those headers if this macro is defined to 0.
|
||||
* Otherwise, each those headers must be included explicitly when using one of their macros.
|
||||
* Defaults to 0 for backward compatibility, except inside ICU.
|
||||
* @stable ICU 49
|
||||
*/
|
||||
#ifdef U_NO_DEFAULT_INCLUDE_UTF_HEADERS
|
||||
/* Use the predefined value. */
|
||||
#elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \
|
||||
defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \
|
||||
defined(U_TOOLUTIL_IMPLEMENTATION)
|
||||
# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 1
|
||||
#else
|
||||
# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_OVERRIDE_CXX_ALLOCATION
|
||||
* Determines whether to override new and delete.
|
||||
* ICU is normally built such that all of its C++ classes, via their UMemory base,
|
||||
* override operators new and delete to use its internal, customizable,
|
||||
* non-exception-throwing memory allocation functions. (Default value 1 for this macro.)
|
||||
*
|
||||
* This is especially important when the application and its libraries use multiple heaps.
|
||||
* For example, on Windows, this allows the ICU DLL to be used by
|
||||
* applications that statically link the C Runtime library.
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
#ifndef U_OVERRIDE_CXX_ALLOCATION
|
||||
#define U_OVERRIDE_CXX_ALLOCATION 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_ENABLE_TRACING
|
||||
* Determines whether to enable tracing.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_ENABLE_TRACING
|
||||
#define U_ENABLE_TRACING 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_ENABLE_PLUGINS
|
||||
* Determines whether to enable ICU plugins.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef UCONFIG_ENABLE_PLUGINS
|
||||
#define UCONFIG_ENABLE_PLUGINS 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_ENABLE_DYLOAD
|
||||
* Whether to enable Dynamic loading in ICU.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_ENABLE_DYLOAD
|
||||
#define U_ENABLE_DYLOAD 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_CHECK_DYLOAD
|
||||
* Whether to test Dynamic loading as an OS capability.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_CHECK_DYLOAD
|
||||
#define U_CHECK_DYLOAD 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_DEFAULT_SHOW_DRAFT
|
||||
* Do we allow ICU users to use the draft APIs by default?
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_DEFAULT_SHOW_DRAFT
|
||||
#define U_DEFAULT_SHOW_DRAFT 1
|
||||
#endif
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Custom icu entry point renaming */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* \def U_HAVE_LIB_SUFFIX
|
||||
* 1 if a custom library suffix is set.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_HAVE_LIB_SUFFIX
|
||||
/* Use the predefined value. */
|
||||
#elif defined(U_LIB_SUFFIX_C_NAME) || defined(U_IN_DOXYGEN)
|
||||
# define U_HAVE_LIB_SUFFIX 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_LIB_SUFFIX_C_NAME_STRING
|
||||
* Defines the library suffix as a string with C syntax.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_LIB_SUFFIX_C_NAME_STRING
|
||||
/* Use the predefined value. */
|
||||
#elif defined(U_LIB_SUFFIX_C_NAME)
|
||||
# define CONVERT_TO_STRING(s) #s
|
||||
# define U_LIB_SUFFIX_C_NAME_STRING CONVERT_TO_STRING(U_LIB_SUFFIX_C_NAME)
|
||||
#else
|
||||
# define U_LIB_SUFFIX_C_NAME_STRING ""
|
||||
#endif
|
||||
|
||||
/* common/i18n library switches --------------------------------------------- */
|
||||
|
||||
/**
|
||||
* \def UCONFIG_ONLY_COLLATION
|
||||
* This switch turns off modules that are not needed for collation.
|
||||
*
|
||||
* It does not turn off legacy conversion because that is necessary
|
||||
* for ICU to work on EBCDIC platforms (for the default converter).
|
||||
* If you want "only collation" and do not build for EBCDIC,
|
||||
* then you can define UCONFIG_NO_CONVERSION or UCONFIG_NO_LEGACY_CONVERSION to 1 as well.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#ifndef UCONFIG_ONLY_COLLATION
|
||||
# define UCONFIG_ONLY_COLLATION 0
|
||||
#endif
|
||||
|
||||
#if UCONFIG_ONLY_COLLATION
|
||||
/* common library */
|
||||
# define UCONFIG_NO_BREAK_ITERATION 1
|
||||
# define UCONFIG_NO_IDNA 1
|
||||
|
||||
/* i18n library */
|
||||
# if UCONFIG_NO_COLLATION
|
||||
# error Contradictory collation switches in uconfig.h.
|
||||
# endif
|
||||
# define UCONFIG_NO_FORMATTING 1
|
||||
# define UCONFIG_NO_TRANSLITERATION 1
|
||||
# define UCONFIG_NO_REGULAR_EXPRESSIONS 1
|
||||
#endif
|
||||
|
||||
/* common library switches -------------------------------------------------- */
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_FILE_IO
|
||||
* This switch turns off all file access in the common library
|
||||
* where file access is only used for data loading.
|
||||
* ICU data must then be provided in the form of a data DLL (or with an
|
||||
* equivalent way to link to the data residing in an executable,
|
||||
* as in building a combined library with both the common library's code and
|
||||
* the data), or via udata_setCommonData().
|
||||
* Application data must be provided via udata_setAppData() or by using
|
||||
* "open" functions that take pointers to data, for example ucol_openBinary().
|
||||
*
|
||||
* File access is not used at all in the i18n library.
|
||||
*
|
||||
* File access cannot be turned off for the icuio library or for the ICU
|
||||
* test suites and ICU tools.
|
||||
*
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
#ifndef UCONFIG_NO_FILE_IO
|
||||
# define UCONFIG_NO_FILE_IO 0
|
||||
#endif
|
||||
|
||||
#if UCONFIG_NO_FILE_IO && defined(U_TIMEZONE_FILES_DIR)
|
||||
# error Contradictory file io switches in uconfig.h.
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_CONVERSION
|
||||
* ICU will not completely build (compiling the tools fails) with this
|
||||
* switch turned on.
|
||||
* This switch turns off all converters.
|
||||
*
|
||||
* You may want to use this together with U_CHARSET_IS_UTF8 defined to 1
|
||||
* in utypes.h if char* strings in your environment are always in UTF-8.
|
||||
*
|
||||
* @stable ICU 3.2
|
||||
* @see U_CHARSET_IS_UTF8
|
||||
*/
|
||||
#ifndef UCONFIG_NO_CONVERSION
|
||||
# define UCONFIG_NO_CONVERSION 0
|
||||
#endif
|
||||
|
||||
#if UCONFIG_NO_CONVERSION
|
||||
# define UCONFIG_NO_LEGACY_CONVERSION 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_ONLY_HTML_CONVERSION
|
||||
* This switch turns off all of the converters NOT listed in
|
||||
* the HTML encoding standard:
|
||||
* http://www.w3.org/TR/encoding/#names-and-labels
|
||||
*
|
||||
* This is not possible on EBCDIC platforms
|
||||
* because they need ibm-37 or ibm-1047 default converters.
|
||||
*
|
||||
* @stable ICU 55
|
||||
*/
|
||||
#ifndef UCONFIG_ONLY_HTML_CONVERSION
|
||||
# define UCONFIG_ONLY_HTML_CONVERSION 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_LEGACY_CONVERSION
|
||||
* This switch turns off all converters except for
|
||||
* - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)
|
||||
* - US-ASCII
|
||||
* - ISO-8859-1
|
||||
*
|
||||
* Turning off legacy conversion is not possible on EBCDIC platforms
|
||||
* because they need ibm-37 or ibm-1047 default converters.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#ifndef UCONFIG_NO_LEGACY_CONVERSION
|
||||
# define UCONFIG_NO_LEGACY_CONVERSION 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_NORMALIZATION
|
||||
* This switch turns off normalization.
|
||||
* It implies turning off several other services as well, for example
|
||||
* collation and IDNA.
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#ifndef UCONFIG_NO_NORMALIZATION
|
||||
# define UCONFIG_NO_NORMALIZATION 0
|
||||
#endif
|
||||
|
||||
#if UCONFIG_NO_NORMALIZATION
|
||||
/* common library */
|
||||
/* ICU 50 CJK dictionary BreakIterator uses normalization */
|
||||
# define UCONFIG_NO_BREAK_ITERATION 1
|
||||
/* IDNA (UTS #46) is implemented via normalization */
|
||||
# define UCONFIG_NO_IDNA 1
|
||||
|
||||
/* i18n library */
|
||||
# if UCONFIG_ONLY_COLLATION
|
||||
# error Contradictory collation switches in uconfig.h.
|
||||
# endif
|
||||
# define UCONFIG_NO_COLLATION 1
|
||||
# define UCONFIG_NO_TRANSLITERATION 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_BREAK_ITERATION
|
||||
* This switch turns off break iteration.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#ifndef UCONFIG_NO_BREAK_ITERATION
|
||||
# define UCONFIG_NO_BREAK_ITERATION 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_IDNA
|
||||
* This switch turns off IDNA.
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#ifndef UCONFIG_NO_IDNA
|
||||
# define UCONFIG_NO_IDNA 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
|
||||
* Determines the default UMessagePatternApostropheMode.
|
||||
* See the documentation for that enum.
|
||||
*
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
#ifndef UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
|
||||
# define UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE UMSGPAT_APOS_DOUBLE_OPTIONAL
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_USE_WINDOWS_LCID_MAPPING_API
|
||||
* On platforms where U_PLATFORM_HAS_WIN32_API is true, this switch determines
|
||||
* if the Windows platform APIs are used for LCID<->Locale Name conversions.
|
||||
* Otherwise, only the built-in ICU tables are used.
|
||||
*
|
||||
* @internal ICU 64
|
||||
*/
|
||||
#ifndef UCONFIG_USE_WINDOWS_LCID_MAPPING_API
|
||||
# define UCONFIG_USE_WINDOWS_LCID_MAPPING_API 1
|
||||
#endif
|
||||
|
||||
/* i18n library switches ---------------------------------------------------- */
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_COLLATION
|
||||
* This switch turns off collation and collation-based string search.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#ifndef UCONFIG_NO_COLLATION
|
||||
# define UCONFIG_NO_COLLATION 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_FORMATTING
|
||||
* This switch turns off formatting and calendar/timezone services.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#ifndef UCONFIG_NO_FORMATTING
|
||||
# define UCONFIG_NO_FORMATTING 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_TRANSLITERATION
|
||||
* This switch turns off transliteration.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#ifndef UCONFIG_NO_TRANSLITERATION
|
||||
# define UCONFIG_NO_TRANSLITERATION 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_REGULAR_EXPRESSIONS
|
||||
* This switch turns off regular expressions.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#ifndef UCONFIG_NO_REGULAR_EXPRESSIONS
|
||||
# define UCONFIG_NO_REGULAR_EXPRESSIONS 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_SERVICE
|
||||
* This switch turns off service registration.
|
||||
*
|
||||
* @stable ICU 3.2
|
||||
*/
|
||||
#ifndef UCONFIG_NO_SERVICE
|
||||
# define UCONFIG_NO_SERVICE 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_HAVE_PARSEALLINPUT
|
||||
* This switch turns on the "parse all input" attribute. Binary incompatible.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
#ifndef UCONFIG_HAVE_PARSEALLINPUT
|
||||
# define UCONFIG_HAVE_PARSEALLINPUT 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_FILTERED_BREAK_ITERATION
|
||||
* This switch turns off filtered break iteration code.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
#ifndef UCONFIG_NO_FILTERED_BREAK_ITERATION
|
||||
# define UCONFIG_NO_FILTERED_BREAK_ITERATION 0
|
||||
#endif
|
||||
|
||||
#endif // __UCONFIG_H__
|
||||
159
app/src/main/cpp/icu4c/include/unicode/ucpmap.h
Normal file
159
app/src/main/cpp/icu4c/include/unicode/ucpmap.h
Normal file
@@ -0,0 +1,159 @@
|
||||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// ucpmap.h
|
||||
// created: 2018sep03 Markus W. Scherer
|
||||
|
||||
#ifndef __UCPMAP_H__
|
||||
#define __UCPMAP_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/**
|
||||
* \file
|
||||
*
|
||||
* This file defines an abstract map from Unicode code points to integer values.
|
||||
*
|
||||
* @see UCPMap
|
||||
* @see UCPTrie
|
||||
* @see UMutableCPTrie
|
||||
*/
|
||||
|
||||
/**
|
||||
* Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
|
||||
*
|
||||
* @see UCPTrie
|
||||
* @see UMutableCPTrie
|
||||
* @stable ICU 63
|
||||
*/
|
||||
typedef struct UCPMap UCPMap;
|
||||
|
||||
/**
|
||||
* Selectors for how ucpmap_getRange() etc. should report value ranges overlapping with surrogates.
|
||||
* Most users should use UCPMAP_RANGE_NORMAL.
|
||||
*
|
||||
* @see ucpmap_getRange
|
||||
* @see ucptrie_getRange
|
||||
* @see umutablecptrie_getRange
|
||||
* @stable ICU 63
|
||||
*/
|
||||
enum UCPMapRangeOption {
|
||||
/**
|
||||
* ucpmap_getRange() enumerates all same-value ranges as stored in the map.
|
||||
* Most users should use this option.
|
||||
* @stable ICU 63
|
||||
*/
|
||||
UCPMAP_RANGE_NORMAL,
|
||||
/**
|
||||
* ucpmap_getRange() enumerates all same-value ranges as stored in the map,
|
||||
* except that lead surrogates (U+D800..U+DBFF) are treated as having the
|
||||
* surrogateValue, which is passed to getRange() as a separate parameter.
|
||||
* The surrogateValue is not transformed via filter().
|
||||
* See U_IS_LEAD(c).
|
||||
*
|
||||
* Most users should use UCPMAP_RANGE_NORMAL instead.
|
||||
*
|
||||
* This option is useful for maps that map surrogate code *units* to
|
||||
* special values optimized for UTF-16 string processing
|
||||
* or for special error behavior for unpaired surrogates,
|
||||
* but those values are not to be associated with the lead surrogate code *points*.
|
||||
* @stable ICU 63
|
||||
*/
|
||||
UCPMAP_RANGE_FIXED_LEAD_SURROGATES,
|
||||
/**
|
||||
* ucpmap_getRange() enumerates all same-value ranges as stored in the map,
|
||||
* except that all surrogates (U+D800..U+DFFF) are treated as having the
|
||||
* surrogateValue, which is passed to getRange() as a separate parameter.
|
||||
* The surrogateValue is not transformed via filter().
|
||||
* See U_IS_SURROGATE(c).
|
||||
*
|
||||
* Most users should use UCPMAP_RANGE_NORMAL instead.
|
||||
*
|
||||
* This option is useful for maps that map surrogate code *units* to
|
||||
* special values optimized for UTF-16 string processing
|
||||
* or for special error behavior for unpaired surrogates,
|
||||
* but those values are not to be associated with the lead surrogate code *points*.
|
||||
* @stable ICU 63
|
||||
*/
|
||||
UCPMAP_RANGE_FIXED_ALL_SURROGATES
|
||||
};
|
||||
#ifndef U_IN_DOXYGEN
|
||||
typedef enum UCPMapRangeOption UCPMapRangeOption;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Returns the value for a code point as stored in the map, with range checking.
|
||||
* Returns an implementation-defined error value if c is not in the range 0..U+10FFFF.
|
||||
*
|
||||
* @param map the map
|
||||
* @param c the code point
|
||||
* @return the map value,
|
||||
* or an implementation-defined error value if the code point is not in the range 0..U+10FFFF
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
ucpmap_get(const UCPMap *map, UChar32 c);
|
||||
|
||||
/**
|
||||
* Callback function type: Modifies a map value.
|
||||
* Optionally called by ucpmap_getRange()/ucptrie_getRange()/umutablecptrie_getRange().
|
||||
* The modified value will be returned by the getRange function.
|
||||
*
|
||||
* Can be used to ignore some of the value bits,
|
||||
* make a filter for one of several values,
|
||||
* return a value index computed from the map value, etc.
|
||||
*
|
||||
* @param context an opaque pointer, as passed into the getRange function
|
||||
* @param value a value from the map
|
||||
* @return the modified value
|
||||
* @stable ICU 63
|
||||
*/
|
||||
typedef uint32_t U_CALLCONV
|
||||
UCPMapValueFilter(const void *context, uint32_t value);
|
||||
|
||||
/**
|
||||
* Returns the last code point such that all those from start to there have the same value.
|
||||
* Can be used to efficiently iterate over all same-value ranges in a map.
|
||||
* (This is normally faster than iterating over code points and get()ting each value,
|
||||
* but much slower than a data structure that stores ranges directly.)
|
||||
*
|
||||
* If the UCPMapValueFilter function pointer is not NULL, then
|
||||
* the value to be delivered is passed through that function, and the return value is the end
|
||||
* of the range where all values are modified to the same actual value.
|
||||
* The value is unchanged if that function pointer is NULL.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* UChar32 start = 0, end;
|
||||
* uint32_t value;
|
||||
* while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
|
||||
* NULL, NULL, &value)) >= 0) {
|
||||
* // Work with the range start..end and its value.
|
||||
* start = end + 1;
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* @param map the map
|
||||
* @param start range start
|
||||
* @param option defines whether surrogates are treated normally,
|
||||
* or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
|
||||
* @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
|
||||
* @param filter a pointer to a function that may modify the map data value,
|
||||
* or NULL if the values from the map are to be used unmodified
|
||||
* @param context an opaque pointer that is passed on to the filter function
|
||||
* @param pValue if not NULL, receives the value that every code point start..end has;
|
||||
* may have been modified by filter(context, map value)
|
||||
* if that function pointer is not NULL
|
||||
* @return the range end code point, or -1 if start is not a valid code point
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
ucpmap_getRange(const UCPMap *map, UChar32 start,
|
||||
UCPMapRangeOption option, uint32_t surrogateValue,
|
||||
UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#endif
|
||||
209
app/src/main/cpp/icu4c/include/unicode/uenum.h
Normal file
209
app/src/main/cpp/icu4c/include/unicode/uenum.h
Normal file
@@ -0,0 +1,209 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2013, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: uenum.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:2
|
||||
*
|
||||
* created on: 2002jul08
|
||||
* created by: Vladimir Weinstein
|
||||
*/
|
||||
|
||||
#ifndef __UENUM_H
|
||||
#define __UENUM_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
#include "unicode/localpointer.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
class StringEnumeration;
|
||||
U_NAMESPACE_END
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: String Enumeration
|
||||
*/
|
||||
|
||||
/**
|
||||
* An enumeration object.
|
||||
* For usage in C programs.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
struct UEnumeration;
|
||||
/** structure representing an enumeration object instance @stable ICU 2.2 */
|
||||
typedef struct UEnumeration UEnumeration;
|
||||
|
||||
/**
|
||||
* Disposes of resources in use by the iterator. If en is NULL,
|
||||
* does nothing. After this call, any char* or UChar* pointer
|
||||
* returned by uenum_unext() or uenum_next() is invalid.
|
||||
* @param en UEnumeration structure pointer
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uenum_close(UEnumeration* en);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \class LocalUEnumerationPointer
|
||||
* "Smart pointer" class, closes a UEnumeration via uenum_close().
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalUEnumerationPointer, UEnumeration, uenum_close);
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Returns the number of elements that the iterator traverses. If
|
||||
* the iterator is out-of-sync with its service, status is set to
|
||||
* U_ENUM_OUT_OF_SYNC_ERROR.
|
||||
* This is a convenience function. It can end up being very
|
||||
* expensive as all the items might have to be pre-fetched (depending
|
||||
* on the type of data being traversed). Use with caution and only
|
||||
* when necessary.
|
||||
* @param en UEnumeration structure pointer
|
||||
* @param status error code, can be U_ENUM_OUT_OF_SYNC_ERROR if the
|
||||
* iterator is out of sync.
|
||||
* @return number of elements in the iterator
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uenum_count(UEnumeration* en, UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Returns the next element in the iterator's list. If there are
|
||||
* no more elements, returns NULL. If the iterator is out-of-sync
|
||||
* with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
|
||||
* NULL is returned. If the native service string is a char* string,
|
||||
* it is converted to UChar* with the invariant converter.
|
||||
* The result is terminated by (UChar)0.
|
||||
* @param en the iterator object
|
||||
* @param resultLength pointer to receive the length of the result
|
||||
* (not including the terminating \\0).
|
||||
* If the pointer is NULL it is ignored.
|
||||
* @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
|
||||
* the iterator is out of sync with its service.
|
||||
* @return a pointer to the string. The string will be
|
||||
* zero-terminated. The return pointer is owned by this iterator
|
||||
* and must not be deleted by the caller. The pointer is valid
|
||||
* until the next call to any uenum_... method, including
|
||||
* uenum_next() or uenum_unext(). When all strings have been
|
||||
* traversed, returns NULL.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
U_CAPI const UChar* U_EXPORT2
|
||||
uenum_unext(UEnumeration* en,
|
||||
int32_t* resultLength,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Returns the next element in the iterator's list. If there are
|
||||
* no more elements, returns NULL. If the iterator is out-of-sync
|
||||
* with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
|
||||
* NULL is returned. If the native service string is a UChar*
|
||||
* string, it is converted to char* with the invariant converter.
|
||||
* The result is terminated by (char)0. If the conversion fails
|
||||
* (because a character cannot be converted) then status is set to
|
||||
* U_INVARIANT_CONVERSION_ERROR and the return value is undefined
|
||||
* (but non-NULL).
|
||||
* @param en the iterator object
|
||||
* @param resultLength pointer to receive the length of the result
|
||||
* (not including the terminating \\0).
|
||||
* If the pointer is NULL it is ignored.
|
||||
* @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
|
||||
* the iterator is out of sync with its service. Set to
|
||||
* U_INVARIANT_CONVERSION_ERROR if the underlying native string is
|
||||
* UChar* and conversion to char* with the invariant converter
|
||||
* fails. This error pertains only to current string, so iteration
|
||||
* might be able to continue successfully.
|
||||
* @return a pointer to the string. The string will be
|
||||
* zero-terminated. The return pointer is owned by this iterator
|
||||
* and must not be deleted by the caller. The pointer is valid
|
||||
* until the next call to any uenum_... method, including
|
||||
* uenum_next() or uenum_unext(). When all strings have been
|
||||
* traversed, returns NULL.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
uenum_next(UEnumeration* en,
|
||||
int32_t* resultLength,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Resets the iterator to the current list of service IDs. This
|
||||
* re-establishes sync with the service and rewinds the iterator
|
||||
* to start at the first element.
|
||||
* @param en the iterator object
|
||||
* @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
|
||||
* the iterator is out of sync with its service.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uenum_reset(UEnumeration* en, UErrorCode* status);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
/**
|
||||
* Given a StringEnumeration, wrap it in a UEnumeration. The
|
||||
* StringEnumeration is adopted; after this call, the caller must not
|
||||
* delete it (regardless of error status).
|
||||
* @param adopted the C++ StringEnumeration to be wrapped in a UEnumeration.
|
||||
* @param ec the error code.
|
||||
* @return a UEnumeration wrapping the adopted StringEnumeration.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
U_CAPI UEnumeration* U_EXPORT2
|
||||
uenum_openFromStringEnumeration(icu::StringEnumeration* adopted, UErrorCode* ec);
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Given an array of const UChar* strings, return a UEnumeration. String pointers from 0..count-1 must not be null.
|
||||
* Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
|
||||
* \snippet test/cintltst/uenumtst.c uenum_openUCharStringsEnumeration
|
||||
* @param strings array of const UChar* strings (each null terminated). All storage is owned by the caller.
|
||||
* @param count length of the array
|
||||
* @param ec error code
|
||||
* @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory.
|
||||
* @see uenum_close
|
||||
* @stable ICU 50
|
||||
*/
|
||||
U_CAPI UEnumeration* U_EXPORT2
|
||||
uenum_openUCharStringsEnumeration(const UChar* const strings[], int32_t count,
|
||||
UErrorCode* ec);
|
||||
|
||||
/**
|
||||
* Given an array of const char* strings (invariant chars only), return a UEnumeration. String pointers from 0..count-1 must not be null.
|
||||
* Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
|
||||
* \snippet test/cintltst/uenumtst.c uenum_openCharStringsEnumeration
|
||||
* @param strings array of char* strings (each null terminated). All storage is owned by the caller.
|
||||
* @param count length of the array
|
||||
* @param ec error code
|
||||
* @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory
|
||||
* @see uenum_close
|
||||
* @stable ICU 50
|
||||
*/
|
||||
U_CAPI UEnumeration* U_EXPORT2
|
||||
uenum_openCharStringsEnumeration(const char* const strings[], int32_t count,
|
||||
UErrorCode* ec);
|
||||
|
||||
#endif
|
||||
709
app/src/main/cpp/icu4c/include/unicode/uiter.h
Normal file
709
app/src/main/cpp/icu4c/include/unicode/uiter.h
Normal file
@@ -0,0 +1,709 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2011 International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: uiter.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2002jan18
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __UITER_H__
|
||||
#define __UITER_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Unicode Character Iteration
|
||||
*
|
||||
* @see UCharIterator
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class CharacterIterator;
|
||||
class Replaceable;
|
||||
|
||||
U_NAMESPACE_END
|
||||
#endif
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
struct UCharIterator;
|
||||
typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
|
||||
|
||||
/**
|
||||
* Origin constants for UCharIterator.getIndex() and UCharIterator.move().
|
||||
* @see UCharIteratorMove
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef enum UCharIteratorOrigin {
|
||||
UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH
|
||||
} UCharIteratorOrigin;
|
||||
|
||||
/** Constants for UCharIterator. @stable ICU 2.6 */
|
||||
enum {
|
||||
/**
|
||||
* Constant value that may be returned by UCharIteratorMove
|
||||
* indicating that the final UTF-16 index is not known, but that the move succeeded.
|
||||
* This can occur when moving relative to limit or length, or
|
||||
* when moving relative to the current index after a setState()
|
||||
* when the current UTF-16 index is not known.
|
||||
*
|
||||
* It would be very inefficient to have to count from the beginning of the text
|
||||
* just to get the current/limit/length index after moving relative to it.
|
||||
* The actual index can be determined with getIndex(UITER_CURRENT)
|
||||
* which will count the UChars if necessary.
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
UITER_UNKNOWN_INDEX=-2
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Constant for UCharIterator getState() indicating an error or
|
||||
* an unknown state.
|
||||
* Returned by uiter_getState()/UCharIteratorGetState
|
||||
* when an error occurs.
|
||||
* Also, some UCharIterator implementations may not be able to return
|
||||
* a valid state for each position. This will be clearly documented
|
||||
* for each such iterator (none of the public ones here).
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define UITER_NO_STATE ((uint32_t)0xffffffff)
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.getIndex().
|
||||
*
|
||||
* Gets the current position, or the start or limit of the
|
||||
* iteration range.
|
||||
*
|
||||
* This function may perform slowly for UITER_CURRENT after setState() was called,
|
||||
* or for UITER_LENGTH, because an iterator implementation may have to count
|
||||
* UChars if the underlying storage is not UTF-16.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @param origin get the 0, start, limit, length, or current index
|
||||
* @return the requested index, or U_SENTINEL in an error condition
|
||||
*
|
||||
* @see UCharIteratorOrigin
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.move().
|
||||
*
|
||||
* Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index).
|
||||
*
|
||||
* Moves the current position relative to the start or limit of the
|
||||
* iteration range, or relative to the current position itself.
|
||||
* The movement is expressed in numbers of code units forward
|
||||
* or backward by specifying a positive or negative delta.
|
||||
* Out of bounds movement will be pinned to the start or limit.
|
||||
*
|
||||
* This function may perform slowly for moving relative to UITER_LENGTH
|
||||
* because an iterator implementation may have to count the rest of the
|
||||
* UChars if the native storage is not UTF-16.
|
||||
*
|
||||
* When moving relative to the limit or length, or
|
||||
* relative to the current position after setState() was called,
|
||||
* move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
|
||||
* determination of the actual UTF-16 index.
|
||||
* The actual index can be determined with getIndex(UITER_CURRENT)
|
||||
* which will count the UChars if necessary.
|
||||
* See UITER_UNKNOWN_INDEX for details.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @param delta can be positive, zero, or negative
|
||||
* @param origin move relative to the 0, start, limit, length, or current index
|
||||
* @return the new index, or U_SENTINEL on an error condition,
|
||||
* or UITER_UNKNOWN_INDEX when the index is not known.
|
||||
*
|
||||
* @see UCharIteratorOrigin
|
||||
* @see UCharIterator
|
||||
* @see UITER_UNKNOWN_INDEX
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.hasNext().
|
||||
*
|
||||
* Check if current() and next() can still
|
||||
* return another code unit.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return boolean value for whether current() and next() can still return another code unit
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef UBool U_CALLCONV
|
||||
UCharIteratorHasNext(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.hasPrevious().
|
||||
*
|
||||
* Check if previous() can still return another code unit.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return boolean value for whether previous() can still return another code unit
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef UBool U_CALLCONV
|
||||
UCharIteratorHasPrevious(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.current().
|
||||
*
|
||||
* Return the code unit at the current position,
|
||||
* or U_SENTINEL if there is none (index is at the limit).
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the current code unit
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef UChar32 U_CALLCONV
|
||||
UCharIteratorCurrent(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.next().
|
||||
*
|
||||
* Return the code unit at the current index and increment
|
||||
* the index (post-increment, like s[i++]),
|
||||
* or return U_SENTINEL if there is none (index is at the limit).
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the current code unit (and post-increment the current index)
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef UChar32 U_CALLCONV
|
||||
UCharIteratorNext(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.previous().
|
||||
*
|
||||
* Decrement the index and return the code unit from there
|
||||
* (pre-decrement, like s[--i]),
|
||||
* or return U_SENTINEL if there is none (index is at the start).
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the previous code unit (after pre-decrementing the current index)
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef UChar32 U_CALLCONV
|
||||
UCharIteratorPrevious(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.reservedFn().
|
||||
* Reserved for future use.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @param something some integer argument
|
||||
* @return some integer
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
UCharIteratorReserved(UCharIterator *iter, int32_t something);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.getState().
|
||||
*
|
||||
* Get the "state" of the iterator in the form of a single 32-bit word.
|
||||
* It is recommended that the state value be calculated to be as small as
|
||||
* is feasible. For strings with limited lengths, fewer than 32 bits may
|
||||
* be sufficient.
|
||||
*
|
||||
* This is used together with setState()/UCharIteratorSetState
|
||||
* to save and restore the iterator position more efficiently than with
|
||||
* getIndex()/move().
|
||||
*
|
||||
* The iterator state is defined as a uint32_t value because it is designed
|
||||
* for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
|
||||
* of the character iterator.
|
||||
*
|
||||
* With some UCharIterator implementations (e.g., UTF-8),
|
||||
* getting and setting the UTF-16 index with existing functions
|
||||
* (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
|
||||
* relatively slow because the iterator has to "walk" from a known index
|
||||
* to the requested one.
|
||||
* This takes more time the farther it needs to go.
|
||||
*
|
||||
* An opaque state value allows an iterator implementation to provide
|
||||
* an internal index (UTF-8: the source byte array index) for
|
||||
* fast, constant-time restoration.
|
||||
*
|
||||
* After calling setState(), a getIndex(UITER_CURRENT) may be slow because
|
||||
* the UTF-16 index may not be restored as well, but the iterator can deliver
|
||||
* the correct text contents and move relative to the current position
|
||||
* without performance degradation.
|
||||
*
|
||||
* Some UCharIterator implementations may not be able to return
|
||||
* a valid state for each position, in which case they return UITER_NO_STATE instead.
|
||||
* This will be clearly documented for each such iterator (none of the public ones here).
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the state word
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see UCharIteratorSetState
|
||||
* @see UITER_NO_STATE
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
typedef uint32_t U_CALLCONV
|
||||
UCharIteratorGetState(const UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.setState().
|
||||
*
|
||||
* Restore the "state" of the iterator using a state word from a getState() call.
|
||||
* The iterator object need not be the same one as for which getState() was called,
|
||||
* but it must be of the same type (set up using the same uiter_setXYZ function)
|
||||
* and it must iterate over the same string
|
||||
* (binary identical regardless of memory address).
|
||||
* For more about the state word see UCharIteratorGetState.
|
||||
*
|
||||
* After calling setState(), a getIndex(UITER_CURRENT) may be slow because
|
||||
* the UTF-16 index may not be restored as well, but the iterator can deliver
|
||||
* the correct text contents and move relative to the current position
|
||||
* without performance degradation.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @param state the state word from a getState() call
|
||||
* on a same-type, same-string iterator
|
||||
* @param pErrorCode Must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see UCharIteratorGetState
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
typedef void U_CALLCONV
|
||||
UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
|
||||
|
||||
|
||||
/**
|
||||
* C API for code unit iteration.
|
||||
* This can be used as a C wrapper around
|
||||
* CharacterIterator, Replaceable, or implemented using simple strings, etc.
|
||||
*
|
||||
* There are two roles for using UCharIterator:
|
||||
*
|
||||
* A "provider" sets the necessary function pointers and controls the "protected"
|
||||
* fields of the UCharIterator structure. A "provider" passes a UCharIterator
|
||||
* into C APIs that need a UCharIterator as an abstract, flexible string interface.
|
||||
*
|
||||
* Implementations of such C APIs are "callers" of UCharIterator functions;
|
||||
* they only use the "public" function pointers and never access the "protected"
|
||||
* fields directly.
|
||||
*
|
||||
* The current() and next() functions only check the current index against the
|
||||
* limit, and previous() only checks the current index against the start,
|
||||
* to see if the iterator already reached the end of the iteration range.
|
||||
*
|
||||
* The assumption - in all iterators - is that the index is moved via the API,
|
||||
* which means it won't go out of bounds, or the index is modified by
|
||||
* user code that knows enough about the iterator implementation to set valid
|
||||
* index values.
|
||||
*
|
||||
* UCharIterator functions return code unit values 0..0xffff,
|
||||
* or U_SENTINEL if the iteration bounds are reached.
|
||||
*
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
struct UCharIterator {
|
||||
/**
|
||||
* (protected) Pointer to string or wrapped object or similar.
|
||||
* Not used by caller.
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
const void *context;
|
||||
|
||||
/**
|
||||
* (protected) Length of string or similar.
|
||||
* Not used by caller.
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
int32_t length;
|
||||
|
||||
/**
|
||||
* (protected) Start index or similar.
|
||||
* Not used by caller.
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
int32_t start;
|
||||
|
||||
/**
|
||||
* (protected) Current index or similar.
|
||||
* Not used by caller.
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
int32_t index;
|
||||
|
||||
/**
|
||||
* (protected) Limit index or similar.
|
||||
* Not used by caller.
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
int32_t limit;
|
||||
|
||||
/**
|
||||
* (protected) Used by UTF-8 iterators and possibly others.
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
int32_t reservedField;
|
||||
|
||||
/**
|
||||
* (public) Returns the current position or the
|
||||
* start or limit index of the iteration range.
|
||||
*
|
||||
* @see UCharIteratorGetIndex
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorGetIndex *getIndex;
|
||||
|
||||
/**
|
||||
* (public) Moves the current position relative to the start or limit of the
|
||||
* iteration range, or relative to the current position itself.
|
||||
* The movement is expressed in numbers of code units forward
|
||||
* or backward by specifying a positive or negative delta.
|
||||
*
|
||||
* @see UCharIteratorMove
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorMove *move;
|
||||
|
||||
/**
|
||||
* (public) Check if current() and next() can still
|
||||
* return another code unit.
|
||||
*
|
||||
* @see UCharIteratorHasNext
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorHasNext *hasNext;
|
||||
|
||||
/**
|
||||
* (public) Check if previous() can still return another code unit.
|
||||
*
|
||||
* @see UCharIteratorHasPrevious
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorHasPrevious *hasPrevious;
|
||||
|
||||
/**
|
||||
* (public) Return the code unit at the current position,
|
||||
* or U_SENTINEL if there is none (index is at the limit).
|
||||
*
|
||||
* @see UCharIteratorCurrent
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorCurrent *current;
|
||||
|
||||
/**
|
||||
* (public) Return the code unit at the current index and increment
|
||||
* the index (post-increment, like s[i++]),
|
||||
* or return U_SENTINEL if there is none (index is at the limit).
|
||||
*
|
||||
* @see UCharIteratorNext
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorNext *next;
|
||||
|
||||
/**
|
||||
* (public) Decrement the index and return the code unit from there
|
||||
* (pre-decrement, like s[--i]),
|
||||
* or return U_SENTINEL if there is none (index is at the start).
|
||||
*
|
||||
* @see UCharIteratorPrevious
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorPrevious *previous;
|
||||
|
||||
/**
|
||||
* (public) Reserved for future use. Currently NULL.
|
||||
*
|
||||
* @see UCharIteratorReserved
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorReserved *reservedFn;
|
||||
|
||||
/**
|
||||
* (public) Return the state of the iterator, to be restored later with setState().
|
||||
* This function pointer is NULL if the iterator does not implement it.
|
||||
*
|
||||
* @see UCharIteratorGet
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
UCharIteratorGetState *getState;
|
||||
|
||||
/**
|
||||
* (public) Restore the iterator state from the state word from a call
|
||||
* to getState().
|
||||
* This function pointer is NULL if the iterator does not implement it.
|
||||
*
|
||||
* @see UCharIteratorSet
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
UCharIteratorSetState *setState;
|
||||
};
|
||||
|
||||
/**
|
||||
* Helper function for UCharIterator to get the code point
|
||||
* at the current index.
|
||||
*
|
||||
* Return the code point that includes the code unit at the current position,
|
||||
* or U_SENTINEL if there is none (index is at the limit).
|
||||
* If the current code unit is a lead or trail surrogate,
|
||||
* then the following or preceding surrogate is used to form
|
||||
* the code point value.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the current code point
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see U16_GET
|
||||
* @see UnicodeString::char32At()
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
uiter_current32(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Helper function for UCharIterator to get the next code point.
|
||||
*
|
||||
* Return the code point at the current index and increment
|
||||
* the index (post-increment, like s[i++]),
|
||||
* or return U_SENTINEL if there is none (index is at the limit).
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the current code point (and post-increment the current index)
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see U16_NEXT
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
uiter_next32(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Helper function for UCharIterator to get the previous code point.
|
||||
*
|
||||
* Decrement the index and return the code point from there
|
||||
* (pre-decrement, like s[--i]),
|
||||
* or return U_SENTINEL if there is none (index is at the start).
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the previous code point (after pre-decrementing the current index)
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see U16_PREV
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
uiter_previous32(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Get the "state" of the iterator in the form of a single 32-bit word.
|
||||
* This is a convenience function that calls iter->getState(iter)
|
||||
* if iter->getState is not NULL;
|
||||
* if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
|
||||
*
|
||||
* Some UCharIterator implementations may not be able to return
|
||||
* a valid state for each position, in which case they return UITER_NO_STATE instead.
|
||||
* This will be clearly documented for each such iterator (none of the public ones here).
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the state word
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see UCharIteratorGetState
|
||||
* @see UITER_NO_STATE
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
uiter_getState(const UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Restore the "state" of the iterator using a state word from a getState() call.
|
||||
* This is a convenience function that calls iter->setState(iter, state, pErrorCode)
|
||||
* if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @param state the state word from a getState() call
|
||||
* on a same-type, same-string iterator
|
||||
* @param pErrorCode Must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see UCharIteratorSetState
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Set up a UCharIterator to iterate over a string.
|
||||
*
|
||||
* Sets the UCharIterator function pointers for iteration over the string s
|
||||
* with iteration boundaries start=index=0 and length=limit=string length.
|
||||
* The "provider" may set the start, index, and limit values at any time
|
||||
* within the range 0..length.
|
||||
* The length field will be ignored.
|
||||
*
|
||||
* The string pointer s is set into UCharIterator.context without copying
|
||||
* or reallocating the string contents.
|
||||
*
|
||||
* getState() simply returns the current index.
|
||||
* move() will always return the final index.
|
||||
*
|
||||
* @param iter UCharIterator structure to be set for iteration
|
||||
* @param s String to iterate over
|
||||
* @param length Length of s, or -1 if NUL-terminated
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uiter_setString(UCharIterator *iter, const UChar *s, int32_t length);
|
||||
|
||||
/**
|
||||
* Set up a UCharIterator to iterate over a UTF-16BE string
|
||||
* (byte vector with a big-endian pair of bytes per UChar).
|
||||
*
|
||||
* Everything works just like with a normal UChar iterator (uiter_setString),
|
||||
* except that UChars are assembled from byte pairs,
|
||||
* and that the length argument here indicates an even number of bytes.
|
||||
*
|
||||
* getState() simply returns the current index.
|
||||
* move() will always return the final index.
|
||||
*
|
||||
* @param iter UCharIterator structure to be set for iteration
|
||||
* @param s UTF-16BE string to iterate over
|
||||
* @param length Length of s as an even number of bytes, or -1 if NUL-terminated
|
||||
* (NUL means pair of 0 bytes at even index from s)
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see uiter_setString
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
|
||||
|
||||
/**
|
||||
* Set up a UCharIterator to iterate over a UTF-8 string.
|
||||
*
|
||||
* Sets the UCharIterator function pointers for iteration over the UTF-8 string s
|
||||
* with UTF-8 iteration boundaries 0 and length.
|
||||
* The implementation counts the UTF-16 index on the fly and
|
||||
* lazily evaluates the UTF-16 length of the text.
|
||||
*
|
||||
* The start field is used as the UTF-8 offset, the limit field as the UTF-8 length.
|
||||
* When the reservedField is not 0, then it contains a supplementary code point
|
||||
* and the UTF-16 index is between the two corresponding surrogates.
|
||||
* At that point, the UTF-8 index is behind that code point.
|
||||
*
|
||||
* The UTF-8 string pointer s is set into UCharIterator.context without copying
|
||||
* or reallocating the string contents.
|
||||
*
|
||||
* getState() returns a state value consisting of
|
||||
* - the current UTF-8 source byte index (bits 31..1)
|
||||
* - a flag (bit 0) that indicates whether the UChar position is in the middle
|
||||
* of a surrogate pair
|
||||
* (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
|
||||
*
|
||||
* getState() cannot also encode the UTF-16 index in the state value.
|
||||
* move(relative to limit or length), or
|
||||
* move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
|
||||
*
|
||||
* @param iter UCharIterator structure to be set for iteration
|
||||
* @param s UTF-8 string to iterate over
|
||||
* @param length Length of s in bytes, or -1 if NUL-terminated
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
/**
|
||||
* Set up a UCharIterator to wrap around a C++ CharacterIterator.
|
||||
*
|
||||
* Sets the UCharIterator function pointers for iteration using the
|
||||
* CharacterIterator charIter.
|
||||
*
|
||||
* The CharacterIterator pointer charIter is set into UCharIterator.context
|
||||
* without copying or cloning the CharacterIterator object.
|
||||
* The other "protected" UCharIterator fields are set to 0 and will be ignored.
|
||||
* The iteration index and boundaries are controlled by the CharacterIterator.
|
||||
*
|
||||
* getState() simply returns the current index.
|
||||
* move() will always return the final index.
|
||||
*
|
||||
* @param iter UCharIterator structure to be set for iteration
|
||||
* @param charIter CharacterIterator to wrap
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter);
|
||||
|
||||
/**
|
||||
* Set up a UCharIterator to iterate over a C++ Replaceable.
|
||||
*
|
||||
* Sets the UCharIterator function pointers for iteration over the
|
||||
* Replaceable rep with iteration boundaries start=index=0 and
|
||||
* length=limit=rep->length().
|
||||
* The "provider" may set the start, index, and limit values at any time
|
||||
* within the range 0..length=rep->length().
|
||||
* The length field will be ignored.
|
||||
*
|
||||
* The Replaceable pointer rep is set into UCharIterator.context without copying
|
||||
* or cloning/reallocating the Replaceable object.
|
||||
*
|
||||
* getState() simply returns the current index.
|
||||
* move() will always return the final index.
|
||||
*
|
||||
* @param iter UCharIterator structure to be set for iteration
|
||||
* @param rep Replaceable to iterate over
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep);
|
||||
|
||||
#endif
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#endif
|
||||
1393
app/src/main/cpp/icu4c/include/unicode/uloc.h
Normal file
1393
app/src/main/cpp/icu4c/include/unicode/uloc.h
Normal file
File diff suppressed because it is too large
Load Diff
491
app/src/main/cpp/icu4c/include/unicode/umachine.h
Normal file
491
app/src/main/cpp/icu4c/include/unicode/umachine.h
Normal file
@@ -0,0 +1,491 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
* file name: umachine.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999sep13
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* This file defines basic types and constants for ICU to be
|
||||
* platform-independent. umachine.h and utf.h are included into
|
||||
* utypes.h to provide all the general definitions for ICU.
|
||||
* All of these definitions used to be in utypes.h before
|
||||
* the UTF-handling macros made this unmaintainable.
|
||||
*/
|
||||
|
||||
#ifndef __UMACHINE_H__
|
||||
#define __UMACHINE_H__
|
||||
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief Basic types and constants for UTF
|
||||
*
|
||||
* <h2> Basic types and constants for UTF </h2>
|
||||
* This file defines basic types and constants for utf.h to be
|
||||
* platform-independent. umachine.h and utf.h are included into
|
||||
* utypes.h to provide all the general definitions for ICU.
|
||||
* All of these definitions used to be in utypes.h before
|
||||
* the UTF-handling macros made this unmaintainable.
|
||||
*
|
||||
*/
|
||||
/*==========================================================================*/
|
||||
/* Include platform-dependent definitions */
|
||||
/* which are contained in the platform-specific file platform.h */
|
||||
/*==========================================================================*/
|
||||
|
||||
#include "unicode/ptypes.h" /* platform.h is included in ptypes.h */
|
||||
|
||||
/*
|
||||
* ANSI C headers:
|
||||
* stddef.h defines wchar_t
|
||||
*/
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
|
||||
/*==========================================================================*/
|
||||
/* For C wrappers, we use the symbol U_CAPI. */
|
||||
/* This works properly if the includer is C or C++. */
|
||||
/* Functions are declared U_CAPI return-type U_EXPORT2 function-name()... */
|
||||
/*==========================================================================*/
|
||||
|
||||
/**
|
||||
* \def U_CFUNC
|
||||
* This is used in a declaration of a library private ICU C function.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_CDECL_BEGIN
|
||||
* This is used to begin a declaration of a library private ICU C API.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_CDECL_END
|
||||
* This is used to end a declaration of a library private ICU C API
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
# define U_CFUNC extern "C"
|
||||
# define U_CDECL_BEGIN extern "C" {
|
||||
# define U_CDECL_END }
|
||||
#else
|
||||
# define U_CFUNC extern
|
||||
# define U_CDECL_BEGIN
|
||||
# define U_CDECL_END
|
||||
#endif
|
||||
|
||||
#ifndef U_ATTRIBUTE_DEPRECATED
|
||||
/**
|
||||
* \def U_ATTRIBUTE_DEPRECATED
|
||||
* This is used for GCC specific attributes
|
||||
* @internal
|
||||
*/
|
||||
#if U_GCC_MAJOR_MINOR >= 302
|
||||
# define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated))
|
||||
/**
|
||||
* \def U_ATTRIBUTE_DEPRECATED
|
||||
* This is used for Visual C++ specific attributes
|
||||
* @internal
|
||||
*/
|
||||
#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
|
||||
# define U_ATTRIBUTE_DEPRECATED __declspec(deprecated)
|
||||
#else
|
||||
# define U_ATTRIBUTE_DEPRECATED
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
|
||||
#define U_CAPI U_CFUNC U_EXPORT
|
||||
/** Obsolete/same as U_CAPI; was used to declare a function as a stable public ICU C API*/
|
||||
#define U_STABLE U_CAPI
|
||||
/** Obsolete/same as U_CAPI; was used to declare a function as a draft public ICU C API */
|
||||
#define U_DRAFT U_CAPI
|
||||
/** This is used to declare a function as a deprecated public ICU C API */
|
||||
#define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED
|
||||
/** Obsolete/same as U_CAPI; was used to declare a function as an obsolete public ICU C API */
|
||||
#define U_OBSOLETE U_CAPI
|
||||
/** Obsolete/same as U_CAPI; was used to declare a function as an internal ICU C API */
|
||||
#define U_INTERNAL U_CAPI
|
||||
|
||||
/**
|
||||
* \def U_OVERRIDE
|
||||
* Defined to the C++11 "override" keyword if available.
|
||||
* Denotes a class or member which is an override of the base class.
|
||||
* May result in an error if it applied to something not an override.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_OVERRIDE
|
||||
#define U_OVERRIDE override
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_FINAL
|
||||
* Defined to the C++11 "final" keyword if available.
|
||||
* Denotes a class or member which may not be overridden in subclasses.
|
||||
* May result in an error if subclasses attempt to override.
|
||||
* @internal
|
||||
*/
|
||||
#if !defined(U_FINAL) || defined(U_IN_DOXYGEN)
|
||||
#define U_FINAL final
|
||||
#endif
|
||||
|
||||
// Before ICU 65, function-like, multi-statement ICU macros were just defined as
|
||||
// series of statements wrapped in { } blocks and the caller could choose to
|
||||
// either treat them as if they were actual functions and end the invocation
|
||||
// with a trailing ; creating an empty statement after the block or else omit
|
||||
// this trailing ; using the knowledge that the macro would expand to { }.
|
||||
//
|
||||
// But doing so doesn't work well with macros that look like functions and
|
||||
// compiler warnings about empty statements (ICU-20601) and ICU 65 therefore
|
||||
// switches to the standard solution of wrapping such macros in do { } while.
|
||||
//
|
||||
// This will however break existing code that depends on being able to invoke
|
||||
// these macros without a trailing ; so to be able to remain compatible with
|
||||
// such code the wrapper is itself defined as macros so that it's possible to
|
||||
// build ICU 65 and later with the old macro behaviour, like this:
|
||||
//
|
||||
// export CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""'
|
||||
// runConfigureICU ...
|
||||
//
|
||||
|
||||
/**
|
||||
* \def UPRV_BLOCK_MACRO_BEGIN
|
||||
* Defined as the "do" keyword by default.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef UPRV_BLOCK_MACRO_BEGIN
|
||||
#define UPRV_BLOCK_MACRO_BEGIN do
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UPRV_BLOCK_MACRO_END
|
||||
* Defined as "while (false)" by default.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef UPRV_BLOCK_MACRO_END
|
||||
#define UPRV_BLOCK_MACRO_END while (false)
|
||||
#endif
|
||||
|
||||
/*==========================================================================*/
|
||||
/* limits for int32_t etc., like in POSIX inttypes.h */
|
||||
/*==========================================================================*/
|
||||
|
||||
#ifndef INT8_MIN
|
||||
/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT8_MIN ((int8_t)(-128))
|
||||
#endif
|
||||
#ifndef INT16_MIN
|
||||
/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT16_MIN ((int16_t)(-32767-1))
|
||||
#endif
|
||||
#ifndef INT32_MIN
|
||||
/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT32_MIN ((int32_t)(-2147483647-1))
|
||||
#endif
|
||||
|
||||
#ifndef INT8_MAX
|
||||
/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT8_MAX ((int8_t)(127))
|
||||
#endif
|
||||
#ifndef INT16_MAX
|
||||
/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT16_MAX ((int16_t)(32767))
|
||||
#endif
|
||||
#ifndef INT32_MAX
|
||||
/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT32_MAX ((int32_t)(2147483647))
|
||||
#endif
|
||||
|
||||
#ifndef UINT8_MAX
|
||||
/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */
|
||||
# define UINT8_MAX ((uint8_t)(255U))
|
||||
#endif
|
||||
#ifndef UINT16_MAX
|
||||
/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */
|
||||
# define UINT16_MAX ((uint16_t)(65535U))
|
||||
#endif
|
||||
#ifndef UINT32_MAX
|
||||
/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */
|
||||
# define UINT32_MAX ((uint32_t)(4294967295U))
|
||||
#endif
|
||||
|
||||
#if defined(U_INT64_T_UNAVAILABLE)
|
||||
# error int64_t is required for decimal format and rule-based number format.
|
||||
#else
|
||||
# ifndef INT64_C
|
||||
/**
|
||||
* Provides a platform independent way to specify a signed 64-bit integer constant.
|
||||
* note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
# define INT64_C(c) c ## LL
|
||||
# endif
|
||||
# ifndef UINT64_C
|
||||
/**
|
||||
* Provides a platform independent way to specify an unsigned 64-bit integer constant.
|
||||
* note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
# define UINT64_C(c) c ## ULL
|
||||
# endif
|
||||
# ifndef U_INT64_MIN
|
||||
/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
|
||||
# define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1))
|
||||
# endif
|
||||
# ifndef U_INT64_MAX
|
||||
/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
|
||||
# define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807)))
|
||||
# endif
|
||||
# ifndef U_UINT64_MAX
|
||||
/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
|
||||
# define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615)))
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*==========================================================================*/
|
||||
/* Boolean data type */
|
||||
/*==========================================================================*/
|
||||
|
||||
/**
|
||||
* The ICU boolean type, a signed-byte integer.
|
||||
* ICU-specific for historical reasons: The C and C++ standards used to not define type bool.
|
||||
* Also provides a fixed type definition, as opposed to
|
||||
* type bool whose details (e.g., sizeof) may vary by compiler and between C and C++.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef int8_t UBool;
|
||||
|
||||
/**
|
||||
* \def U_DEFINE_FALSE_AND_TRUE
|
||||
* Normally turns off defining macros FALSE=0 & TRUE=1 in public ICU headers.
|
||||
* These obsolete macros sometimes break compilation of other code that
|
||||
* defines enum constants or similar with these names.
|
||||
* C++ has long defined bool/false/true.
|
||||
* C99 also added definitions for these, although as macros; see stdbool.h.
|
||||
*
|
||||
* You may transitionally define U_DEFINE_FALSE_AND_TRUE=1 if you need time to migrate code.
|
||||
*
|
||||
* @internal ICU 68
|
||||
*/
|
||||
#ifdef U_DEFINE_FALSE_AND_TRUE
|
||||
// Use the predefined value.
|
||||
#elif defined(U_COMBINED_IMPLEMENTATION) || \
|
||||
defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \
|
||||
defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \
|
||||
defined(U_TOOLUTIL_IMPLEMENTATION)
|
||||
// Inside ICU: Keep FALSE & TRUE available.
|
||||
# define U_DEFINE_FALSE_AND_TRUE 1
|
||||
#else
|
||||
// Outside ICU: Avoid collision with non-macro definitions of FALSE & TRUE.
|
||||
# define U_DEFINE_FALSE_AND_TRUE 0
|
||||
#endif
|
||||
|
||||
#if U_DEFINE_FALSE_AND_TRUE || defined(U_IN_DOXYGEN)
|
||||
#ifndef TRUE
|
||||
/**
|
||||
* The TRUE value of a UBool.
|
||||
*
|
||||
* @deprecated ICU 68 Use standard "true" instead.
|
||||
*/
|
||||
# define TRUE 1
|
||||
#endif
|
||||
#ifndef FALSE
|
||||
/**
|
||||
* The FALSE value of a UBool.
|
||||
*
|
||||
* @deprecated ICU 68 Use standard "false" instead.
|
||||
*/
|
||||
# define FALSE 0
|
||||
#endif
|
||||
#endif // U_DEFINE_FALSE_AND_TRUE
|
||||
|
||||
/*==========================================================================*/
|
||||
/* Unicode data types */
|
||||
/*==========================================================================*/
|
||||
|
||||
/* wchar_t-related definitions -------------------------------------------- */
|
||||
|
||||
/*
|
||||
* \def U_WCHAR_IS_UTF16
|
||||
* Defined if wchar_t uses UTF-16.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
/*
|
||||
* \def U_WCHAR_IS_UTF32
|
||||
* Defined if wchar_t uses UTF-32.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
|
||||
# ifdef __STDC_ISO_10646__
|
||||
# if (U_SIZEOF_WCHAR_T==2)
|
||||
# define U_WCHAR_IS_UTF16
|
||||
# elif (U_SIZEOF_WCHAR_T==4)
|
||||
# define U_WCHAR_IS_UTF32
|
||||
# endif
|
||||
# elif defined __UCS2__
|
||||
# if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2)
|
||||
# define U_WCHAR_IS_UTF16
|
||||
# endif
|
||||
# elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__))
|
||||
# if (U_SIZEOF_WCHAR_T==4)
|
||||
# define U_WCHAR_IS_UTF32
|
||||
# endif
|
||||
# elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED)
|
||||
# define U_WCHAR_IS_UTF32
|
||||
# elif U_PLATFORM_HAS_WIN32_API
|
||||
# define U_WCHAR_IS_UTF16
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* UChar and UChar32 definitions -------------------------------------------- */
|
||||
|
||||
/** Number of bytes in a UChar. @stable ICU 2.0 */
|
||||
#define U_SIZEOF_UCHAR 2
|
||||
|
||||
/**
|
||||
* \def U_CHAR16_IS_TYPEDEF
|
||||
* If 1, then char16_t is a typedef and not a real type (yet)
|
||||
* @internal
|
||||
*/
|
||||
#if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
|
||||
// for AIX, uchar.h needs to be included
|
||||
# include <uchar.h>
|
||||
# define U_CHAR16_IS_TYPEDEF 1
|
||||
#elif defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||
// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type,
|
||||
// and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx
|
||||
# define U_CHAR16_IS_TYPEDEF 1
|
||||
#else
|
||||
# define U_CHAR16_IS_TYPEDEF 0
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* \var UChar
|
||||
*
|
||||
* The base type for UTF-16 code units and pointers.
|
||||
* Unsigned 16-bit integer.
|
||||
* Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.
|
||||
*
|
||||
* UChar is configurable by defining the macro UCHAR_TYPE
|
||||
* on the preprocessor or compiler command line:
|
||||
* -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc.
|
||||
* (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.)
|
||||
* This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.
|
||||
*
|
||||
* The default is UChar=char16_t.
|
||||
*
|
||||
* C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.
|
||||
*
|
||||
* In C, char16_t is a simple typedef of uint_least16_t.
|
||||
* ICU requires uint_least16_t=uint16_t for data memory mapping.
|
||||
* On macOS, char16_t is not available because the uchar.h standard header is missing.
|
||||
*
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
|
||||
#if 1
|
||||
// #if 1 is normal. UChar defaults to char16_t in C++.
|
||||
// For configuration testing of UChar=uint16_t temporarily change this to #if 0.
|
||||
// The intltest Makefile #defines UCHAR_TYPE=char16_t,
|
||||
// so we only #define it to uint16_t if it is undefined so far.
|
||||
#elif !defined(UCHAR_TYPE)
|
||||
# define UCHAR_TYPE uint16_t
|
||||
#endif
|
||||
|
||||
#if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
|
||||
defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
|
||||
// Inside the ICU library code, never configurable.
|
||||
typedef char16_t UChar;
|
||||
#elif defined(UCHAR_TYPE)
|
||||
typedef UCHAR_TYPE UChar;
|
||||
#elif (U_CPLUSPLUS_VERSION >= 11)
|
||||
typedef char16_t UChar;
|
||||
#else
|
||||
typedef uint16_t UChar;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \var OldUChar
|
||||
* Default ICU 58 definition of UChar.
|
||||
* A base type for UTF-16 code units and pointers.
|
||||
* Unsigned 16-bit integer.
|
||||
*
|
||||
* Define OldUChar to be wchar_t if that is 16 bits wide.
|
||||
* If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
|
||||
*
|
||||
* This makes the definition of OldUChar platform-dependent
|
||||
* but allows direct string type compatibility with platforms with
|
||||
* 16-bit wchar_t types.
|
||||
*
|
||||
* This is how UChar was defined in ICU 58, for transition convenience.
|
||||
* Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined.
|
||||
* The current UChar responds to UCHAR_TYPE but OldUChar does not.
|
||||
*
|
||||
* @stable ICU 59
|
||||
*/
|
||||
#if U_SIZEOF_WCHAR_T==2
|
||||
typedef wchar_t OldUChar;
|
||||
#elif defined(__CHAR16_TYPE__)
|
||||
typedef __CHAR16_TYPE__ OldUChar;
|
||||
#else
|
||||
typedef uint16_t OldUChar;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Define UChar32 as a type for single Unicode code points.
|
||||
* UChar32 is a signed 32-bit integer (same as int32_t).
|
||||
*
|
||||
* The Unicode code point range is 0..0x10ffff.
|
||||
* All other values (negative or >=0x110000) are illegal as Unicode code points.
|
||||
* They may be used as sentinel values to indicate "done", "error"
|
||||
* or similar non-code point conditions.
|
||||
*
|
||||
* Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
|
||||
* to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
|
||||
* or else to be uint32_t.
|
||||
* That is, the definition of UChar32 was platform-dependent.
|
||||
*
|
||||
* @see U_SENTINEL
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
typedef int32_t UChar32;
|
||||
|
||||
/**
|
||||
* This value is intended for sentinel values for APIs that
|
||||
* (take or) return single code points (UChar32).
|
||||
* It is outside of the Unicode code point range 0..0x10ffff.
|
||||
*
|
||||
* For example, a "done" or "error" value in a new API
|
||||
* could be indicated with U_SENTINEL.
|
||||
*
|
||||
* ICU APIs designed before ICU 2.4 usually define service-specific "done"
|
||||
* values, mostly 0xffff.
|
||||
* Those may need to be distinguished from
|
||||
* actual U+ffff text contents by calling functions like
|
||||
* CharacterIterator::hasNext() or UnicodeString::length().
|
||||
*
|
||||
* @return -1
|
||||
* @see UChar32
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_SENTINEL (-1)
|
||||
|
||||
#include "unicode/urename.h"
|
||||
|
||||
#endif
|
||||
62
app/src/main/cpp/icu4c/include/unicode/umisc.h
Normal file
62
app/src/main/cpp/icu4c/include/unicode/umisc.h
Normal file
@@ -0,0 +1,62 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: umisc.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999oct15
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef UMISC_H
|
||||
#define UMISC_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API:misc definitions
|
||||
*
|
||||
* This file contains miscellaneous definitions for the C APIs.
|
||||
*/
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/** A struct representing a range of text containing a specific field
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef struct UFieldPosition {
|
||||
/**
|
||||
* The field
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t field;
|
||||
/**
|
||||
* The start of the text range containing field
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t beginIndex;
|
||||
/**
|
||||
* The limit of the text range containing field
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t endIndex;
|
||||
} UFieldPosition;
|
||||
|
||||
#if !UCONFIG_NO_SERVICE
|
||||
/**
|
||||
* Opaque type returned by registerInstance, registerFactory and unregister for service registration.
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
typedef const void* URegistryKey;
|
||||
#endif
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#endif
|
||||
4757
app/src/main/cpp/icu4c/include/unicode/unistr.h
Normal file
4757
app/src/main/cpp/icu4c/include/unicode/unistr.h
Normal file
File diff suppressed because it is too large
Load Diff
324
app/src/main/cpp/icu4c/include/unicode/uobject.h
Normal file
324
app/src/main/cpp/icu4c/include/unicode/uobject.h
Normal file
@@ -0,0 +1,324 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
* file name: uobject.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2002jun26
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __UOBJECT_H__
|
||||
#define __UOBJECT_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/platform.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Common ICU base class UObject.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_NO_THROW
|
||||
* Since ICU 64, use U_NOEXCEPT instead.
|
||||
*
|
||||
* Previously, define this to define the throw() specification so
|
||||
* certain functions do not throw any exceptions
|
||||
*
|
||||
* UMemory operator new methods should have the throw() specification
|
||||
* appended to them, so that the compiler adds the additional NULL check
|
||||
* before calling constructors. Without, if <code>operator new</code> returns NULL the
|
||||
* constructor is still called, and if the constructor references member
|
||||
* data, (which it typically does), the result is a segmentation violation.
|
||||
*
|
||||
* @stable ICU 4.2. Since ICU 64, Use U_NOEXCEPT instead. See ICU-20422.
|
||||
*/
|
||||
#ifndef U_NO_THROW
|
||||
#define U_NO_THROW U_NOEXCEPT
|
||||
#endif
|
||||
|
||||
/*===========================================================================*/
|
||||
/* UClassID-based RTTI */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* UClassID is used to identify classes without using the compiler's RTTI.
|
||||
* This was used before C++ compilers consistently supported RTTI.
|
||||
* ICU 4.6 requires compiler RTTI to be turned on.
|
||||
*
|
||||
* Each class hierarchy which needs
|
||||
* to implement polymorphic clone() or operator==() defines two methods,
|
||||
* described in detail below. UClassID values can be compared using
|
||||
* operator==(). Nothing else should be done with them.
|
||||
*
|
||||
* \par
|
||||
* In class hierarchies that implement "poor man's RTTI",
|
||||
* each concrete subclass implements getDynamicClassID() in the same way:
|
||||
*
|
||||
* \code
|
||||
* class Derived {
|
||||
* public:
|
||||
* virtual UClassID getDynamicClassID() const
|
||||
* { return Derived::getStaticClassID(); }
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* Each concrete class implements getStaticClassID() as well, which allows
|
||||
* clients to test for a specific type.
|
||||
*
|
||||
* \code
|
||||
* class Derived {
|
||||
* public:
|
||||
* static UClassID U_EXPORT2 getStaticClassID();
|
||||
* private:
|
||||
* static char fgClassID;
|
||||
* }
|
||||
*
|
||||
* // In Derived.cpp:
|
||||
* UClassID Derived::getStaticClassID()
|
||||
* { return (UClassID)&Derived::fgClassID; }
|
||||
* char Derived::fgClassID = 0; // Value is irrelevant
|
||||
* \endcode
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef void* UClassID;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* UMemory is the common ICU base class.
|
||||
* All other ICU C++ classes are derived from UMemory (starting with ICU 2.4).
|
||||
*
|
||||
* This is primarily to make it possible and simple to override the
|
||||
* C++ memory management by adding new/delete operators to this base class.
|
||||
*
|
||||
* To override ALL ICU memory management, including that from plain C code,
|
||||
* replace the allocation functions declared in cmemory.h
|
||||
*
|
||||
* UMemory does not contain any virtual functions.
|
||||
* Common "boilerplate" functions are defined in UObject.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
class U_COMMON_API UMemory {
|
||||
public:
|
||||
|
||||
/* test versions for debugging shaper heap memory problems */
|
||||
#ifdef SHAPER_MEMORY_DEBUG
|
||||
static void * NewArray(int size, int count);
|
||||
static void * GrowArray(void * array, int newSize );
|
||||
static void FreeArray(void * array );
|
||||
#endif
|
||||
|
||||
#if U_OVERRIDE_CXX_ALLOCATION
|
||||
/**
|
||||
* Override for ICU4C C++ memory management.
|
||||
* simple, non-class types are allocated using the macros in common/cmemory.h
|
||||
* (uprv_malloc(), uprv_free(), uprv_realloc());
|
||||
* they or something else could be used here to implement C++ new/delete
|
||||
* for ICU4C C++ classes
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
static void * U_EXPORT2 operator new(size_t size) U_NOEXCEPT;
|
||||
|
||||
/**
|
||||
* Override for ICU4C C++ memory management.
|
||||
* See new().
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
static void * U_EXPORT2 operator new[](size_t size) U_NOEXCEPT;
|
||||
|
||||
/**
|
||||
* Override for ICU4C C++ memory management.
|
||||
* simple, non-class types are allocated using the macros in common/cmemory.h
|
||||
* (uprv_malloc(), uprv_free(), uprv_realloc());
|
||||
* they or something else could be used here to implement C++ new/delete
|
||||
* for ICU4C C++ classes
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
static void U_EXPORT2 operator delete(void *p) U_NOEXCEPT;
|
||||
|
||||
/**
|
||||
* Override for ICU4C C++ memory management.
|
||||
* See delete().
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
static void U_EXPORT2 operator delete[](void *p) U_NOEXCEPT;
|
||||
|
||||
#if U_HAVE_PLACEMENT_NEW
|
||||
/**
|
||||
* Override for ICU4C C++ memory management for STL.
|
||||
* See new().
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
static inline void * U_EXPORT2 operator new(size_t, void *ptr) U_NOEXCEPT { return ptr; }
|
||||
|
||||
/**
|
||||
* Override for ICU4C C++ memory management for STL.
|
||||
* See delete().
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
static inline void U_EXPORT2 operator delete(void *, void *) U_NOEXCEPT {}
|
||||
#endif /* U_HAVE_PLACEMENT_NEW */
|
||||
#if U_HAVE_DEBUG_LOCATION_NEW
|
||||
/**
|
||||
* This method overrides the MFC debug version of the operator new
|
||||
*
|
||||
* @param size The requested memory size
|
||||
* @param file The file where the allocation was requested
|
||||
* @param line The line where the allocation was requested
|
||||
*/
|
||||
static void * U_EXPORT2 operator new(size_t size, const char* file, int line) U_NOEXCEPT;
|
||||
/**
|
||||
* This method provides a matching delete for the MFC debug new
|
||||
*
|
||||
* @param p The pointer to the allocated memory
|
||||
* @param file The file where the allocation was requested
|
||||
* @param line The line where the allocation was requested
|
||||
*/
|
||||
static void U_EXPORT2 operator delete(void* p, const char* file, int line) U_NOEXCEPT;
|
||||
#endif /* U_HAVE_DEBUG_LOCATION_NEW */
|
||||
#endif /* U_OVERRIDE_CXX_ALLOCATION */
|
||||
|
||||
/*
|
||||
* Assignment operator not declared. The compiler will provide one
|
||||
* which does nothing since this class does not contain any data members.
|
||||
* API/code coverage may show the assignment operator as present and
|
||||
* untested - ignore.
|
||||
* Subclasses need this assignment operator if they use compiler-provided
|
||||
* assignment operators of their own. An alternative to not declaring one
|
||||
* here would be to declare and empty-implement a protected or public one.
|
||||
UMemory &UMemory::operator=(const UMemory &);
|
||||
*/
|
||||
};
|
||||
|
||||
/**
|
||||
* UObject is the common ICU "boilerplate" class.
|
||||
* UObject inherits UMemory (starting with ICU 2.4),
|
||||
* and all other public ICU C++ classes
|
||||
* are derived from UObject (starting with ICU 2.2).
|
||||
*
|
||||
* UObject contains common virtual functions, in particular a virtual destructor.
|
||||
*
|
||||
* The clone() function is not available in UObject because it is not
|
||||
* implemented by all ICU classes.
|
||||
* Many ICU services provide a clone() function for their class trees,
|
||||
* defined on the service's C++ base class
|
||||
* (which itself is a subclass of UObject).
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
class U_COMMON_API UObject : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Destructor.
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
virtual ~UObject();
|
||||
|
||||
/**
|
||||
* ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
|
||||
* The base class implementation returns a dummy value.
|
||||
*
|
||||
* Use compiler RTTI rather than ICU's "poor man's RTTI".
|
||||
* Since ICU 4.6, new ICU C++ class hierarchies do not implement "poor man's RTTI".
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const;
|
||||
|
||||
protected:
|
||||
// the following functions are protected to prevent instantiation and
|
||||
// direct use of UObject itself
|
||||
|
||||
// default constructor
|
||||
// inline UObject() {}
|
||||
|
||||
// copy constructor
|
||||
// inline UObject(const UObject &other) {}
|
||||
|
||||
#if 0
|
||||
// TODO Sometime in the future. Implement operator==().
|
||||
// (This comment inserted in 2.2)
|
||||
// some or all of the following "boilerplate" functions may be made public
|
||||
// in a future ICU4C release when all subclasses implement them
|
||||
|
||||
// assignment operator
|
||||
// (not virtual, see "Taligent's Guide to Designing Programs" pp.73..74)
|
||||
// commented out because the implementation is the same as a compiler's default
|
||||
// UObject &operator=(const UObject &other) { return *this; }
|
||||
|
||||
// comparison operators
|
||||
virtual inline UBool operator==(const UObject &other) const { return this==&other; }
|
||||
inline UBool operator!=(const UObject &other) const { return !operator==(other); }
|
||||
|
||||
// clone() commented out from the base class:
|
||||
// some compilers do not support co-variant return types
|
||||
// (i.e., subclasses would have to return UObject * as well, instead of SubClass *)
|
||||
// see also UObject class documentation.
|
||||
// virtual UObject *clone() const;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Assignment operator not declared. The compiler will provide one
|
||||
* which does nothing since this class does not contain any data members.
|
||||
* API/code coverage may show the assignment operator as present and
|
||||
* untested - ignore.
|
||||
* Subclasses need this assignment operator if they use compiler-provided
|
||||
* assignment operators of their own. An alternative to not declaring one
|
||||
* here would be to declare and empty-implement a protected or public one.
|
||||
UObject &UObject::operator=(const UObject &);
|
||||
*/
|
||||
};
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* This is a simple macro to add ICU RTTI to an ICU object implementation.
|
||||
* This does not go into the header. This should only be used in *.cpp files.
|
||||
*
|
||||
* @param myClass The name of the class that needs RTTI defined.
|
||||
* @internal
|
||||
*/
|
||||
#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass) \
|
||||
UClassID U_EXPORT2 myClass::getStaticClassID() { \
|
||||
static char classID = 0; \
|
||||
return (UClassID)&classID; \
|
||||
} \
|
||||
UClassID myClass::getDynamicClassID() const \
|
||||
{ return myClass::getStaticClassID(); }
|
||||
|
||||
|
||||
/**
|
||||
* This macro adds ICU RTTI to an ICU abstract class implementation.
|
||||
* This macro should be invoked in *.cpp files. The corresponding
|
||||
* header should declare getStaticClassID.
|
||||
*
|
||||
* @param myClass The name of the class that needs RTTI defined.
|
||||
* @internal
|
||||
*/
|
||||
#define UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(myClass) \
|
||||
UClassID U_EXPORT2 myClass::getStaticClassID() { \
|
||||
static char classID = 0; \
|
||||
return (UClassID)&classID; \
|
||||
}
|
||||
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif
|
||||
1939
app/src/main/cpp/icu4c/include/unicode/urename.h
Normal file
1939
app/src/main/cpp/icu4c/include/unicode/urename.h
Normal file
File diff suppressed because it is too large
Load Diff
1689
app/src/main/cpp/icu4c/include/unicode/ustring.h
Normal file
1689
app/src/main/cpp/icu4c/include/unicode/ustring.h
Normal file
File diff suppressed because it is too large
Load Diff
1603
app/src/main/cpp/icu4c/include/unicode/utext.h
Normal file
1603
app/src/main/cpp/icu4c/include/unicode/utext.h
Normal file
File diff suppressed because it is too large
Load Diff
734
app/src/main/cpp/icu4c/include/unicode/utf16.h
Normal file
734
app/src/main/cpp/icu4c/include/unicode/utf16.h
Normal file
@@ -0,0 +1,734 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: utf16.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999sep09
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: 16-bit Unicode handling macros
|
||||
*
|
||||
* This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
|
||||
*
|
||||
* For more information see utf.h and the ICU User Guide Strings chapter
|
||||
* (https://unicode-org.github.io/icu/userguide/strings).
|
||||
*
|
||||
* <em>Usage:</em>
|
||||
* ICU coding guidelines for if() statements should be followed when using these macros.
|
||||
* Compound statements (curly braces {}) must be used for if-else-while...
|
||||
* bodies and all macro statements should be terminated with semicolon.
|
||||
*/
|
||||
|
||||
#ifndef __UTF16_H__
|
||||
#define __UTF16_H__
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "unicode/umachine.h"
|
||||
#ifndef __UTF_H__
|
||||
# include "unicode/utf.h"
|
||||
#endif
|
||||
|
||||
/* single-code point definitions -------------------------------------------- */
|
||||
|
||||
/**
|
||||
* Does this code unit alone encode a code point (BMP, not a surrogate)?
|
||||
* @param c 16-bit code unit
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
|
||||
|
||||
/**
|
||||
* Is this code unit a lead surrogate (U+d800..U+dbff)?
|
||||
* @param c 16-bit code unit
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
|
||||
|
||||
/**
|
||||
* Is this code unit a trail surrogate (U+dc00..U+dfff)?
|
||||
* @param c 16-bit code unit
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
|
||||
|
||||
/**
|
||||
* Is this code unit a surrogate (U+d800..U+dfff)?
|
||||
* @param c 16-bit code unit
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
|
||||
|
||||
/**
|
||||
* Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
|
||||
* is it a lead surrogate?
|
||||
* @param c 16-bit code unit
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
|
||||
|
||||
/**
|
||||
* Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
|
||||
* is it a trail surrogate?
|
||||
* @param c 16-bit code unit
|
||||
* @return true or false
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
|
||||
|
||||
/**
|
||||
* Helper constant for U16_GET_SUPPLEMENTARY.
|
||||
* @internal
|
||||
*/
|
||||
#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
|
||||
|
||||
/**
|
||||
* Get a supplementary code point value (U+10000..U+10ffff)
|
||||
* from its lead and trail surrogates.
|
||||
* The result is undefined if the input values are not
|
||||
* lead and trail surrogates.
|
||||
*
|
||||
* @param lead lead surrogate (U+d800..U+dbff)
|
||||
* @param trail trail surrogate (U+dc00..U+dfff)
|
||||
* @return supplementary code point (U+10000..U+10ffff)
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_GET_SUPPLEMENTARY(lead, trail) \
|
||||
(((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
|
||||
|
||||
|
||||
/**
|
||||
* Get the lead surrogate (0xd800..0xdbff) for a
|
||||
* supplementary code point (0x10000..0x10ffff).
|
||||
* @param supplementary 32-bit code point (U+10000..U+10ffff)
|
||||
* @return lead surrogate (U+d800..U+dbff) for supplementary
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
|
||||
|
||||
/**
|
||||
* Get the trail surrogate (0xdc00..0xdfff) for a
|
||||
* supplementary code point (0x10000..0x10ffff).
|
||||
* @param supplementary 32-bit code point (U+10000..U+10ffff)
|
||||
* @return trail surrogate (U+dc00..U+dfff) for supplementary
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
|
||||
|
||||
/**
|
||||
* How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
|
||||
* The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
|
||||
* @param c 32-bit code point
|
||||
* @return 1 or 2
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
|
||||
|
||||
/**
|
||||
* The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
|
||||
* @return 2
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_MAX_LENGTH 2
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* The offset may point to either the lead or trail surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the adjacent matching surrogate as well.
|
||||
* The result is undefined if the offset points to a single, unpaired surrogate.
|
||||
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_GET
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[i]; \
|
||||
if(U16_IS_SURROGATE(c)) { \
|
||||
if(U16_IS_SURROGATE_LEAD(c)) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
|
||||
} else { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The offset may point to either the lead or trail surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the adjacent matching surrogate as well.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* If the offset points to a single, unpaired surrogate, then
|
||||
* c is set to that unpaired surrogate.
|
||||
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<=i<length
|
||||
* @param length string length
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_GET_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[i]; \
|
||||
if(U16_IS_SURROGATE(c)) { \
|
||||
uint16_t __c2; \
|
||||
if(U16_IS_SURROGATE_LEAD(c)) { \
|
||||
if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
|
||||
} \
|
||||
} else { \
|
||||
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The offset may point to either the lead or trail surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the adjacent matching surrogate as well.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* If the offset points to a single, unpaired surrogate, then
|
||||
* c is set to U+FFFD.
|
||||
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<=i<length
|
||||
* @param length string length
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_GET_UNSAFE
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[i]; \
|
||||
if(U16_IS_SURROGATE(c)) { \
|
||||
uint16_t __c2; \
|
||||
if(U16_IS_SURROGATE_LEAD(c)) { \
|
||||
if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
|
||||
} else { \
|
||||
(c)=0xfffd; \
|
||||
} \
|
||||
} else { \
|
||||
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
|
||||
} else { \
|
||||
(c)=0xfffd; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/* definitions with forward iteration --------------------------------------- */
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* The offset may point to the lead surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the following trail surrogate as well.
|
||||
* If the offset points to a trail surrogate, then that itself
|
||||
* will be returned as the code point.
|
||||
* The result is undefined if the offset points to a single, unpaired lead surrogate.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_NEXT
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[(i)++]; \
|
||||
if(U16_IS_LEAD(c)) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* The offset may point to the lead surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the following trail surrogate as well.
|
||||
* If the offset points to a trail surrogate or
|
||||
* to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset, must be i<length
|
||||
* @param length string length
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_NEXT_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[(i)++]; \
|
||||
if(U16_IS_LEAD(c)) { \
|
||||
uint16_t __c2; \
|
||||
if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
|
||||
++(i); \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* The offset may point to the lead surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the following trail surrogate as well.
|
||||
* If the offset points to a trail surrogate or
|
||||
* to a single, unpaired lead surrogate, then c is set to U+FFFD.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset, must be i<length
|
||||
* @param length string length
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_NEXT_UNSAFE
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[(i)++]; \
|
||||
if(U16_IS_SURROGATE(c)) { \
|
||||
uint16_t __c2; \
|
||||
if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
|
||||
++(i); \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
|
||||
} else { \
|
||||
(c)=0xfffd; \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Append a code point to a string, overwriting 1 or 2 code units.
|
||||
* The offset points to the current end of the string contents
|
||||
* and is advanced (post-increment).
|
||||
* "Unsafe" macro, assumes a valid code point and sufficient space in the string.
|
||||
* Otherwise, the result is undefined.
|
||||
*
|
||||
* @param s const UChar * string buffer
|
||||
* @param i string offset
|
||||
* @param c code point to append
|
||||
* @see U16_APPEND
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if((uint32_t)(c)<=0xffff) { \
|
||||
(s)[(i)++]=(uint16_t)(c); \
|
||||
} else { \
|
||||
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
|
||||
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Append a code point to a string, overwriting 1 or 2 code units.
|
||||
* The offset points to the current end of the string contents
|
||||
* and is advanced (post-increment).
|
||||
* "Safe" macro, checks for a valid code point.
|
||||
* If a surrogate pair is written, checks for sufficient space in the string.
|
||||
* If the code point is not valid or a trail surrogate does not fit,
|
||||
* then isError is set to true.
|
||||
*
|
||||
* @param s const UChar * string buffer
|
||||
* @param i string offset, must be i<capacity
|
||||
* @param capacity size of the string buffer
|
||||
* @param c code point to append
|
||||
* @param isError output UBool set to true if an error occurs, otherwise not modified
|
||||
* @see U16_APPEND_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if((uint32_t)(c)<=0xffff) { \
|
||||
(s)[(i)++]=(uint16_t)(c); \
|
||||
} else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
|
||||
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
|
||||
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
|
||||
} else /* c>0x10ffff or not enough space */ { \
|
||||
(isError)=true; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the next.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @see U16_FWD_1
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_LEAD((s)[(i)++])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the next.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset, must be i<length
|
||||
* @param length string length
|
||||
* @see U16_FWD_1_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the n-th next one,
|
||||
* i.e., move forward by n code points.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @param n number of code points to skip
|
||||
* @see U16_FWD_N
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0) { \
|
||||
U16_FWD_1_UNSAFE(s, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the n-th next one,
|
||||
* i.e., move forward by n code points.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i int32_t string offset, must be i<length
|
||||
* @param length int32_t string length
|
||||
* @param n number of code points to skip
|
||||
* @see U16_FWD_N_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
|
||||
U16_FWD_1(s, i, length); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary
|
||||
* at the start of a code point.
|
||||
* If the offset points to the trail surrogate of a surrogate pair,
|
||||
* then the offset is decremented.
|
||||
* Otherwise, it is not modified.
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @see U16_SET_CP_START
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_TRAIL((s)[i])) { \
|
||||
--(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary
|
||||
* at the start of a code point.
|
||||
* If the offset points to the trail surrogate of a surrogate pair,
|
||||
* then the offset is decremented.
|
||||
* Otherwise, it is not modified.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<=i
|
||||
* @see U16_SET_CP_START_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
|
||||
--(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/* definitions with backward iteration -------------------------------------- */
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a trail surrogate unit
|
||||
* for a supplementary code point, then the macro will read
|
||||
* the preceding lead surrogate as well.
|
||||
* If the offset is behind a lead surrogate, then that itself
|
||||
* will be returned as the code point.
|
||||
* The result is undefined if the offset is behind a single, unpaired trail surrogate.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_PREV
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[--(i)]; \
|
||||
if(U16_IS_TRAIL(c)) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a trail surrogate unit
|
||||
* for a supplementary code point, then the macro will read
|
||||
* the preceding lead surrogate as well.
|
||||
* If the offset is behind a lead surrogate or behind a single, unpaired
|
||||
* trail surrogate, then c is set to that unpaired surrogate.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<i
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_PREV_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[--(i)]; \
|
||||
if(U16_IS_TRAIL(c)) { \
|
||||
uint16_t __c2; \
|
||||
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
|
||||
--(i); \
|
||||
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a trail surrogate unit
|
||||
* for a supplementary code point, then the macro will read
|
||||
* the preceding lead surrogate as well.
|
||||
* If the offset is behind a lead surrogate or behind a single, unpaired
|
||||
* trail surrogate, then c is set to U+FFFD.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<i
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_PREV_UNSAFE
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[--(i)]; \
|
||||
if(U16_IS_SURROGATE(c)) { \
|
||||
uint16_t __c2; \
|
||||
if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
|
||||
--(i); \
|
||||
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
|
||||
} else { \
|
||||
(c)=0xfffd; \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @see U16_BACK_1
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_TRAIL((s)[--(i)])) { \
|
||||
--(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<i
|
||||
* @see U16_BACK_1_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
|
||||
--(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the n-th one before it,
|
||||
* i.e., move backward by n code points.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @param n number of code points to skip
|
||||
* @see U16_BACK_N
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0) { \
|
||||
U16_BACK_1_UNSAFE(s, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the n-th one before it,
|
||||
* i.e., move backward by n code points.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start start of string
|
||||
* @param i string offset, must be start<i
|
||||
* @param n number of code points to skip
|
||||
* @see U16_BACK_N_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0 && (i)>(start)) { \
|
||||
U16_BACK_1(s, start, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary after a code point.
|
||||
* If the offset is behind the lead surrogate of a surrogate pair,
|
||||
* then the offset is incremented.
|
||||
* Otherwise, it is not modified.
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @see U16_SET_CP_LIMIT
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_LEAD((s)[(i)-1])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary after a code point.
|
||||
* If the offset is behind the lead surrogate of a surrogate pair,
|
||||
* then the offset is incremented.
|
||||
* Otherwise, it is not modified.
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, start<=i<=length
|
||||
* @param length int32_t string length
|
||||
* @see U16_SET_CP_LIMIT_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
#endif
|
||||
882
app/src/main/cpp/icu4c/include/unicode/utf8.h
Normal file
882
app/src/main/cpp/icu4c/include/unicode/utf8.h
Normal file
@@ -0,0 +1,882 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: utf8.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999sep13
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: 8-bit Unicode handling macros
|
||||
*
|
||||
* This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
|
||||
*
|
||||
* For more information see utf.h and the ICU User Guide Strings chapter
|
||||
* (https://unicode-org.github.io/icu/userguide/strings).
|
||||
*
|
||||
* <em>Usage:</em>
|
||||
* ICU coding guidelines for if() statements should be followed when using these macros.
|
||||
* Compound statements (curly braces {}) must be used for if-else-while...
|
||||
* bodies and all macro statements should be terminated with semicolon.
|
||||
*/
|
||||
|
||||
#ifndef __UTF8_H__
|
||||
#define __UTF8_H__
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "unicode/umachine.h"
|
||||
#ifndef __UTF_H__
|
||||
# include "unicode/utf.h"
|
||||
#endif
|
||||
|
||||
/* internal definitions ----------------------------------------------------- */
|
||||
|
||||
/**
|
||||
* Counts the trail bytes for a UTF-8 lead byte.
|
||||
* Returns 0 for 0..0xc1 as well as for 0xf5..0xff.
|
||||
* leadByte might be evaluated multiple times.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this file and thus must remain stable.
|
||||
*
|
||||
* @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
|
||||
* @internal
|
||||
*/
|
||||
#define U8_COUNT_TRAIL_BYTES(leadByte) \
|
||||
(U8_IS_LEAD(leadByte) ? \
|
||||
((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0)
|
||||
|
||||
/**
|
||||
* Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
|
||||
* Returns 0 for 0..0xc1. Undefined for 0xf5..0xff.
|
||||
* leadByte might be evaluated multiple times.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this file and thus must remain stable.
|
||||
*
|
||||
* @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
|
||||
* @internal
|
||||
*/
|
||||
#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \
|
||||
(((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0))
|
||||
|
||||
/**
|
||||
* Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this file and thus must remain stable.
|
||||
* @internal
|
||||
*/
|
||||
#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
|
||||
|
||||
/**
|
||||
* Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
|
||||
* Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
|
||||
* Lead byte E0..EF bits 3..0 are used as byte index,
|
||||
* first trail byte bits 7..5 are used as bit index into that byte.
|
||||
* @see U8_IS_VALID_LEAD3_AND_T1
|
||||
* @internal
|
||||
*/
|
||||
#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
|
||||
|
||||
/**
|
||||
* Internal 3-byte UTF-8 validity check.
|
||||
* Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence.
|
||||
* @internal
|
||||
*/
|
||||
#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5)))
|
||||
|
||||
/**
|
||||
* Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
|
||||
* Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
|
||||
* First trail byte bits 7..4 are used as byte index,
|
||||
* lead byte F0..F4 bits 2..0 are used as bit index into that byte.
|
||||
* @see U8_IS_VALID_LEAD4_AND_T1
|
||||
* @internal
|
||||
*/
|
||||
#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
|
||||
|
||||
/**
|
||||
* Internal 4-byte UTF-8 validity check.
|
||||
* Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence.
|
||||
* @internal
|
||||
*/
|
||||
#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7)))
|
||||
|
||||
/**
|
||||
* Function for handling "next code point" with error-checking.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this
|
||||
* file and thus must remain stable, and should not be hidden when other internal
|
||||
* functions are hidden (otherwise public macros would fail to compile).
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);
|
||||
|
||||
/**
|
||||
* Function for handling "append code point" with error-checking.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this
|
||||
* file and thus must remain stable, and should not be hidden when other internal
|
||||
* functions are hidden (otherwise public macros would fail to compile).
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError);
|
||||
|
||||
/**
|
||||
* Function for handling "previous code point" with error-checking.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this
|
||||
* file and thus must remain stable, and should not be hidden when other internal
|
||||
* functions are hidden (otherwise public macros would fail to compile).
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict);
|
||||
|
||||
/**
|
||||
* Function for handling "skip backward one code point" with error-checking.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this
|
||||
* file and thus must remain stable, and should not be hidden when other internal
|
||||
* functions are hidden (otherwise public macros would fail to compile).
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
||||
|
||||
/* single-code point definitions -------------------------------------------- */
|
||||
|
||||
/**
|
||||
* Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
|
||||
* @param c 8-bit code unit (byte)
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_IS_SINGLE(c) (((c)&0x80)==0)
|
||||
|
||||
/**
|
||||
* Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
|
||||
* @param c 8-bit code unit (byte)
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32)
|
||||
// 0x32=0xf4-0xc2
|
||||
|
||||
/**
|
||||
* Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF)
|
||||
* @param c 8-bit code unit (byte)
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40)
|
||||
|
||||
/**
|
||||
* How many code units (bytes) are used for the UTF-8 encoding
|
||||
* of this Unicode code point?
|
||||
* @param c 32-bit code point
|
||||
* @return 1..4, or 0 if c is a surrogate or not a Unicode code point
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_LENGTH(c) \
|
||||
((uint32_t)(c)<=0x7f ? 1 : \
|
||||
((uint32_t)(c)<=0x7ff ? 2 : \
|
||||
((uint32_t)(c)<=0xd7ff ? 3 : \
|
||||
((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
|
||||
((uint32_t)(c)<=0xffff ? 3 : 4)\
|
||||
) \
|
||||
) \
|
||||
) \
|
||||
)
|
||||
|
||||
/**
|
||||
* The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
|
||||
* @return 4
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_MAX_LENGTH 4
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* The offset may point to either the lead byte or one of the trail bytes
|
||||
* for a code point, in which case the macro will read all of the bytes
|
||||
* for the code point.
|
||||
* The result is undefined if the offset points to an illegal UTF-8
|
||||
* byte sequence.
|
||||
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U8_GET
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t _u8_get_unsafe_index=(int32_t)(i); \
|
||||
U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
|
||||
U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* The offset may point to either the lead byte or one of the trail bytes
|
||||
* for a code point, in which case the macro will read all of the bytes
|
||||
* for the code point.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* If the offset points to an illegal UTF-8 byte sequence, then
|
||||
* c is set to a negative value.
|
||||
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset
|
||||
* @param i int32_t string offset, must be start<=i<length
|
||||
* @param length int32_t string length
|
||||
* @param c output UChar32 variable, set to <0 in case of an error
|
||||
* @see U8_GET_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t _u8_get_index=(i); \
|
||||
U8_SET_CP_START(s, start, _u8_get_index); \
|
||||
U8_NEXT(s, _u8_get_index, length, c); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* The offset may point to either the lead byte or one of the trail bytes
|
||||
* for a code point, in which case the macro will read all of the bytes
|
||||
* for the code point.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* If the offset points to an illegal UTF-8 byte sequence, then
|
||||
* c is set to U+FFFD.
|
||||
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD.
|
||||
*
|
||||
* This macro does not distinguish between a real U+FFFD in the text
|
||||
* and U+FFFD returned for an ill-formed sequence.
|
||||
* Use U8_GET() if that distinction is important.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset
|
||||
* @param i int32_t string offset, must be start<=i<length
|
||||
* @param length int32_t string length
|
||||
* @param c output UChar32 variable, set to U+FFFD in case of an error
|
||||
* @see U8_GET
|
||||
* @stable ICU 51
|
||||
*/
|
||||
#define U8_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t _u8_get_index=(i); \
|
||||
U8_SET_CP_START(s, start, _u8_get_index); \
|
||||
U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/* definitions with forward iteration --------------------------------------- */
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* The offset may point to the lead byte of a multi-byte sequence,
|
||||
* in which case the macro will read the whole sequence.
|
||||
* The result is undefined if the offset points to a trail byte
|
||||
* or an illegal UTF-8 sequence.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U8_NEXT
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(uint8_t)(s)[(i)++]; \
|
||||
if(!U8_IS_SINGLE(c)) { \
|
||||
if((c)<0xe0) { \
|
||||
(c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \
|
||||
} else if((c)<0xf0) { \
|
||||
/* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
|
||||
(c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \
|
||||
(i)+=2; \
|
||||
} else { \
|
||||
(c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \
|
||||
(i)+=3; \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* The offset may point to the lead byte of a multi-byte sequence,
|
||||
* in which case the macro will read the whole sequence.
|
||||
* If the offset points to a trail byte or an illegal UTF-8 sequence, then
|
||||
* c is set to a negative value.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i int32_t string offset, must be i<length
|
||||
* @param length int32_t string length
|
||||
* @param c output UChar32 variable, set to <0 in case of an error
|
||||
* @see U8_NEXT_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL)
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* The offset may point to the lead byte of a multi-byte sequence,
|
||||
* in which case the macro will read the whole sequence.
|
||||
* If the offset points to a trail byte or an illegal UTF-8 sequence, then
|
||||
* c is set to U+FFFD.
|
||||
*
|
||||
* This macro does not distinguish between a real U+FFFD in the text
|
||||
* and U+FFFD returned for an ill-formed sequence.
|
||||
* Use U8_NEXT() if that distinction is important.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i int32_t string offset, must be i<length
|
||||
* @param length int32_t string length
|
||||
* @param c output UChar32 variable, set to U+FFFD in case of an error
|
||||
* @see U8_NEXT
|
||||
* @stable ICU 51
|
||||
*/
|
||||
#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd)
|
||||
|
||||
/** @internal */
|
||||
#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(uint8_t)(s)[(i)++]; \
|
||||
if(!U8_IS_SINGLE(c)) { \
|
||||
uint8_t __t = 0; \
|
||||
if((i)!=(length) && \
|
||||
/* fetch/validate/assemble all but last trail byte */ \
|
||||
((c)>=0xe0 ? \
|
||||
((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \
|
||||
U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \
|
||||
(__t&=0x3f, 1) \
|
||||
: /* U+10000..U+10FFFF */ \
|
||||
((c)-=0xf0)<=4 && \
|
||||
U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \
|
||||
((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \
|
||||
(__t=(s)[i]-0x80)<=0x3f) && \
|
||||
/* valid second-to-last trail byte */ \
|
||||
((c)=((c)<<6)|__t, ++(i)!=(length)) \
|
||||
: /* U+0080..U+07FF */ \
|
||||
(c)>=0xc2 && ((c)&=0x1f, 1)) && \
|
||||
/* last trail byte */ \
|
||||
(__t=(s)[i]-0x80)<=0x3f && \
|
||||
((c)=((c)<<6)|__t, ++(i), 1)) { \
|
||||
} else { \
|
||||
(c)=(sub); /* ill-formed*/ \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Append a code point to a string, overwriting 1 to 4 bytes.
|
||||
* The offset points to the current end of the string contents
|
||||
* and is advanced (post-increment).
|
||||
* "Unsafe" macro, assumes a valid code point and sufficient space in the string.
|
||||
* Otherwise, the result is undefined.
|
||||
*
|
||||
* @param s const uint8_t * string buffer
|
||||
* @param i string offset
|
||||
* @param c code point to append
|
||||
* @see U8_APPEND
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
uint32_t __uc=(c); \
|
||||
if(__uc<=0x7f) { \
|
||||
(s)[(i)++]=(uint8_t)__uc; \
|
||||
} else { \
|
||||
if(__uc<=0x7ff) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
|
||||
} else { \
|
||||
if(__uc<=0xffff) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
|
||||
} else { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
|
||||
} \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
|
||||
} \
|
||||
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Append a code point to a string, overwriting 1 to 4 bytes.
|
||||
* The offset points to the current end of the string contents
|
||||
* and is advanced (post-increment).
|
||||
* "Safe" macro, checks for a valid code point.
|
||||
* If a non-ASCII code point is written, checks for sufficient space in the string.
|
||||
* If the code point is not valid or trail bytes do not fit,
|
||||
* then isError is set to true.
|
||||
*
|
||||
* @param s const uint8_t * string buffer
|
||||
* @param i int32_t string offset, must be i<capacity
|
||||
* @param capacity int32_t size of the string buffer
|
||||
* @param c UChar32 code point to append
|
||||
* @param isError output UBool set to true if an error occurs, otherwise not modified
|
||||
* @see U8_APPEND_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
uint32_t __uc=(c); \
|
||||
if(__uc<=0x7f) { \
|
||||
(s)[(i)++]=(uint8_t)__uc; \
|
||||
} else if(__uc<=0x7ff && (i)+1<(capacity)) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
|
||||
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
|
||||
} else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
|
||||
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
|
||||
} else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
|
||||
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
|
||||
} else { \
|
||||
(isError)=true; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the next.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @see U8_FWD_1
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the next.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i int32_t string offset, must be i<length
|
||||
* @param length int32_t string length
|
||||
* @see U8_FWD_1_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
uint8_t __b=(s)[(i)++]; \
|
||||
if(U8_IS_LEAD(__b) && (i)!=(length)) { \
|
||||
uint8_t __t1=(s)[i]; \
|
||||
if((0xe0<=__b && __b<0xf0)) { \
|
||||
if(U8_IS_VALID_LEAD3_AND_T1(__b, __t1) && \
|
||||
++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} else if(__b<0xe0) { \
|
||||
if(U8_IS_TRAIL(__t1)) { \
|
||||
++(i); \
|
||||
} \
|
||||
} else /* c>=0xf0 */ { \
|
||||
if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \
|
||||
++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \
|
||||
++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the n-th next one,
|
||||
* i.e., move forward by n code points.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @param n number of code points to skip
|
||||
* @see U8_FWD_N
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0) { \
|
||||
U8_FWD_1_UNSAFE(s, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the n-th next one,
|
||||
* i.e., move forward by n code points.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i int32_t string offset, must be i<length
|
||||
* @param length int32_t string length
|
||||
* @param n number of code points to skip
|
||||
* @see U8_FWD_N_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
|
||||
U8_FWD_1(s, i, length); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary
|
||||
* at the start of a code point.
|
||||
* If the offset points to a UTF-8 trail byte,
|
||||
* then the offset is moved backward to the corresponding lead byte.
|
||||
* Otherwise, it is not modified.
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @see U8_SET_CP_START
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
while(U8_IS_TRAIL((s)[i])) { --(i); } \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary
|
||||
* at the start of a code point.
|
||||
* If the offset points to a UTF-8 trail byte,
|
||||
* then the offset is moved backward to the corresponding lead byte.
|
||||
* Otherwise, it is not modified.
|
||||
*
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
* Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i].
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, must be start<=i
|
||||
* @see U8_SET_CP_START_UNSAFE
|
||||
* @see U8_TRUNCATE_IF_INCOMPLETE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U8_IS_TRAIL((s)[(i)])) { \
|
||||
(i)=utf8_back1SafeBody(s, start, (i)); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* If the string ends with a UTF-8 byte sequence that is valid so far
|
||||
* but incomplete, then reduce the length of the string to end before
|
||||
* the lead byte of that incomplete sequence.
|
||||
* For example, if the string ends with E1 80, the length is reduced by 2.
|
||||
*
|
||||
* In all other cases (the string ends with a complete sequence, or it is not
|
||||
* possible for any further trail byte to extend the trailing sequence)
|
||||
* the length remains unchanged.
|
||||
*
|
||||
* Useful for processing text split across multiple buffers
|
||||
* (save the incomplete sequence for later)
|
||||
* and for optimizing iteration
|
||||
* (check for string length only once per character).
|
||||
*
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
* Unlike U8_SET_CP_START(), this macro never reads s[length].
|
||||
*
|
||||
* (In UTF-16, simply check for U16_IS_LEAD(last code unit).)
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param length int32_t string length (usually start<=length)
|
||||
* @see U8_SET_CP_START
|
||||
* @stable ICU 61
|
||||
*/
|
||||
#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if((length)>(start)) { \
|
||||
uint8_t __b1=s[(length)-1]; \
|
||||
if(U8_IS_SINGLE(__b1)) { \
|
||||
/* common ASCII character */ \
|
||||
} else if(U8_IS_LEAD(__b1)) { \
|
||||
--(length); \
|
||||
} else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
|
||||
uint8_t __b2=s[(length)-2]; \
|
||||
if(0xe0<=__b2 && __b2<=0xf4) { \
|
||||
if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
|
||||
U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
|
||||
(length)-=2; \
|
||||
} \
|
||||
} else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
|
||||
uint8_t __b3=s[(length)-3]; \
|
||||
if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
|
||||
(length)-=3; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/* definitions with backward iteration -------------------------------------- */
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a multi-byte sequence, then the macro will read
|
||||
* the whole sequence.
|
||||
* If the offset is behind a lead byte, then that itself
|
||||
* will be returned as the code point.
|
||||
* The result is undefined if the offset is behind an illegal UTF-8 sequence.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U8_PREV
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(uint8_t)(s)[--(i)]; \
|
||||
if(U8_IS_TRAIL(c)) { \
|
||||
uint8_t __b, __count=1, __shift=6; \
|
||||
\
|
||||
/* c is a trail byte */ \
|
||||
(c)&=0x3f; \
|
||||
for(;;) { \
|
||||
__b=(s)[--(i)]; \
|
||||
if(__b>=0xc0) { \
|
||||
U8_MASK_LEAD_BYTE(__b, __count); \
|
||||
(c)|=(UChar32)__b<<__shift; \
|
||||
break; \
|
||||
} else { \
|
||||
(c)|=(UChar32)(__b&0x3f)<<__shift; \
|
||||
++__count; \
|
||||
__shift+=6; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a multi-byte sequence, then the macro will read
|
||||
* the whole sequence.
|
||||
* If the offset is behind a lead byte, then that itself
|
||||
* will be returned as the code point.
|
||||
* If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, must be start<i
|
||||
* @param c output UChar32 variable, set to <0 in case of an error
|
||||
* @see U8_PREV_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(uint8_t)(s)[--(i)]; \
|
||||
if(!U8_IS_SINGLE(c)) { \
|
||||
(c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a multi-byte sequence, then the macro will read
|
||||
* the whole sequence.
|
||||
* If the offset is behind a lead byte, then that itself
|
||||
* will be returned as the code point.
|
||||
* If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD.
|
||||
*
|
||||
* This macro does not distinguish between a real U+FFFD in the text
|
||||
* and U+FFFD returned for an ill-formed sequence.
|
||||
* Use U8_PREV() if that distinction is important.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, must be start<i
|
||||
* @param c output UChar32 variable, set to U+FFFD in case of an error
|
||||
* @see U8_PREV
|
||||
* @stable ICU 51
|
||||
*/
|
||||
#define U8_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(uint8_t)(s)[--(i)]; \
|
||||
if(!U8_IS_SINGLE(c)) { \
|
||||
(c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @see U8_BACK_1
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
while(U8_IS_TRAIL((s)[--(i)])) {} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, must be start<i
|
||||
* @see U8_BACK_1_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U8_IS_TRAIL((s)[--(i)])) { \
|
||||
(i)=utf8_back1SafeBody(s, start, (i)); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the n-th one before it,
|
||||
* i.e., move backward by n code points.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @param n number of code points to skip
|
||||
* @see U8_BACK_N
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0) { \
|
||||
U8_BACK_1_UNSAFE(s, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the n-th one before it,
|
||||
* i.e., move backward by n code points.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t index of the start of the string
|
||||
* @param i int32_t string offset, must be start<i
|
||||
* @param n number of code points to skip
|
||||
* @see U8_BACK_N_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0 && (i)>(start)) { \
|
||||
U8_BACK_1(s, start, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary after a code point.
|
||||
* If the offset is behind a partial multi-byte sequence,
|
||||
* then the offset is incremented to behind the whole sequence.
|
||||
* Otherwise, it is not modified.
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @see U8_SET_CP_LIMIT
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
U8_BACK_1_UNSAFE(s, i); \
|
||||
U8_FWD_1_UNSAFE(s, i); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary after a code point.
|
||||
* If the offset is behind a partial multi-byte sequence,
|
||||
* then the offset is incremented to behind the whole sequence.
|
||||
* Otherwise, it is not modified.
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, must be start<=i<=length
|
||||
* @param length int32_t string length
|
||||
* @see U8_SET_CP_LIMIT_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if((start)<(i) && ((i)<(length) || (length)<0)) { \
|
||||
U8_BACK_1(s, start, i); \
|
||||
U8_FWD_1(s, i, length); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
#endif
|
||||
732
app/src/main/cpp/icu4c/include/unicode/utypes.h
Normal file
732
app/src/main/cpp/icu4c/include/unicode/utypes.h
Normal file
@@ -0,0 +1,732 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1996-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
* FILE NAME : UTYPES.H (formerly ptypes.h)
|
||||
*
|
||||
* Date Name Description
|
||||
* 12/11/96 helena Creation.
|
||||
* 02/27/97 aliu Added typedefs for UClassID, int8, int16, int32,
|
||||
* uint8, uint16, and uint32.
|
||||
* 04/01/97 aliu Added XP_CPLUSPLUS and modified to work under C as
|
||||
* well as C++.
|
||||
* Modified to use memcpy() for uprv_arrayCopy() fns.
|
||||
* 04/14/97 aliu Added TPlatformUtilities.
|
||||
* 05/07/97 aliu Added import/export specifiers (replacing the old
|
||||
* broken EXT_CLASS). Added version number for our
|
||||
* code. Cleaned up header.
|
||||
* 6/20/97 helena Java class name change.
|
||||
* 08/11/98 stephen UErrorCode changed from typedef to enum
|
||||
* 08/12/98 erm Changed T_ANALYTIC_PACKAGE_VERSION to 3
|
||||
* 08/14/98 stephen Added uprv_arrayCopy() for int8_t, int16_t, int32_t
|
||||
* 12/09/98 jfitz Added BUFFER_OVERFLOW_ERROR (bug 1100066)
|
||||
* 04/20/99 stephen Cleaned up & reworked for autoconf.
|
||||
* Renamed to utypes.h.
|
||||
* 05/05/99 stephen Changed to use <inttypes.h>
|
||||
* 12/07/99 helena Moved copyright notice string from ucnv_bld.h here.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef UTYPES_H
|
||||
#define UTYPES_H
|
||||
|
||||
|
||||
#include "unicode/umachine.h"
|
||||
#include "unicode/uversion.h"
|
||||
#include "unicode/uconfig.h"
|
||||
#include <float.h>
|
||||
|
||||
#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS
|
||||
# include "unicode/utf.h"
|
||||
#endif
|
||||
|
||||
/*!
|
||||
* \file
|
||||
* \brief Basic definitions for ICU, for both C and C++ APIs
|
||||
*
|
||||
* This file defines basic types, constants, and enumerations directly or
|
||||
* indirectly by including other header files, especially utf.h for the
|
||||
* basic character and string definitions and umachine.h for consistent
|
||||
* integer and other types.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* \def U_SHOW_CPLUSPLUS_API
|
||||
* @internal
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
# ifndef U_SHOW_CPLUSPLUS_API
|
||||
# define U_SHOW_CPLUSPLUS_API 1
|
||||
# endif
|
||||
#else
|
||||
# undef U_SHOW_CPLUSPLUS_API
|
||||
# define U_SHOW_CPLUSPLUS_API 0
|
||||
#endif
|
||||
|
||||
/** @{ API visibility control */
|
||||
|
||||
/**
|
||||
* \def U_HIDE_DRAFT_API
|
||||
* Define this to 1 to request that draft API be "hidden"
|
||||
* @internal
|
||||
*/
|
||||
/**
|
||||
* \def U_HIDE_INTERNAL_API
|
||||
* Define this to 1 to request that internal API be "hidden"
|
||||
* @internal
|
||||
*/
|
||||
#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_DRAFT_API)
|
||||
#define U_HIDE_DRAFT_API 1
|
||||
#endif
|
||||
#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_INTERNAL_API)
|
||||
#define U_HIDE_INTERNAL_API 1
|
||||
#endif
|
||||
|
||||
/** @} */
|
||||
|
||||
/*===========================================================================*/
|
||||
/* ICUDATA naming scheme */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* \def U_ICUDATA_TYPE_LETTER
|
||||
*
|
||||
* This is a platform-dependent string containing one letter:
|
||||
* - b for big-endian, ASCII-family platforms
|
||||
* - l for little-endian, ASCII-family platforms
|
||||
* - e for big-endian, EBCDIC-family platforms
|
||||
* This letter is part of the common data file name.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_ICUDATA_TYPE_LITLETTER
|
||||
* The non-string form of U_ICUDATA_TYPE_LETTER
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#if U_CHARSET_FAMILY
|
||||
# if U_IS_BIG_ENDIAN
|
||||
/* EBCDIC - should always be BE */
|
||||
# define U_ICUDATA_TYPE_LETTER "e"
|
||||
# define U_ICUDATA_TYPE_LITLETTER e
|
||||
# else
|
||||
# error "Don't know what to do with little endian EBCDIC!"
|
||||
# define U_ICUDATA_TYPE_LETTER "x"
|
||||
# define U_ICUDATA_TYPE_LITLETTER x
|
||||
# endif
|
||||
#else
|
||||
# if U_IS_BIG_ENDIAN
|
||||
/* Big-endian ASCII */
|
||||
# define U_ICUDATA_TYPE_LETTER "b"
|
||||
# define U_ICUDATA_TYPE_LITLETTER b
|
||||
# else
|
||||
/* Little-endian ASCII */
|
||||
# define U_ICUDATA_TYPE_LETTER "l"
|
||||
# define U_ICUDATA_TYPE_LITLETTER l
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* A single string literal containing the icudata stub name. i.e. 'icudt18e' for
|
||||
* ICU 1.8.x on EBCDIC, etc..
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_ICUDATA_NAME "icudt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
#define U_USRDATA_NAME "usrdt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER /**< @internal */
|
||||
#define U_USE_USRDATA 0 /**< @internal */
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/**
|
||||
* U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library.
|
||||
* Defined as a literal, not a string.
|
||||
* Tricky Preprocessor use - ## operator replaces macro parameters with the literal string
|
||||
* from the corresponding macro invocation, _before_ other macro substitutions.
|
||||
* Need a nested \#defines to get the actual version numbers rather than
|
||||
* the literal text U_ICU_VERSION_MAJOR_NUM into the name.
|
||||
* The net result will be something of the form
|
||||
* \#define U_ICU_ENTRY_POINT icudt19_dat
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICUDATA_ENTRY_POINT U_DEF2_ICUDATA_ENTRY_POINT(U_ICU_VERSION_MAJOR_NUM,U_LIB_SUFFIX_C_NAME)
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* Do not use. Note that it's OK for the 2nd argument to be undefined (literal).
|
||||
* @internal
|
||||
*/
|
||||
#define U_DEF2_ICUDATA_ENTRY_POINT(major,suff) U_DEF_ICUDATA_ENTRY_POINT(major,suff)
|
||||
|
||||
/**
|
||||
* Do not use.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_DEF_ICUDATA_ENTRY_POINT
|
||||
/* affected by symbol renaming. See platform.h */
|
||||
#ifndef U_LIB_SUFFIX_C_NAME
|
||||
#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##major##_dat
|
||||
#else
|
||||
#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##suff ## major##_dat
|
||||
#endif
|
||||
#endif
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/**
|
||||
* \def NULL
|
||||
* Define NULL if necessary, to nullptr for C++ and to ((void *)0) for C.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#ifndef NULL
|
||||
#ifdef __cplusplus
|
||||
#define NULL nullptr
|
||||
#else
|
||||
#define NULL ((void *)0)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Calendar/TimeZone data types */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* Date and Time data type.
|
||||
* This is a primitive data type that holds the date and time
|
||||
* as the number of milliseconds since 1970-jan-01, 00:00 UTC.
|
||||
* UTC leap seconds are ignored.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef double UDate;
|
||||
|
||||
/** The number of milliseconds per second @stable ICU 2.0 */
|
||||
#define U_MILLIS_PER_SECOND (1000)
|
||||
/** The number of milliseconds per minute @stable ICU 2.0 */
|
||||
#define U_MILLIS_PER_MINUTE (60000)
|
||||
/** The number of milliseconds per hour @stable ICU 2.0 */
|
||||
#define U_MILLIS_PER_HOUR (3600000)
|
||||
/** The number of milliseconds per day @stable ICU 2.0 */
|
||||
#define U_MILLIS_PER_DAY (86400000)
|
||||
|
||||
/**
|
||||
* Maximum UDate value
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
#define U_DATE_MAX DBL_MAX
|
||||
|
||||
/**
|
||||
* Minimum UDate value
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
#define U_DATE_MIN -U_DATE_MAX
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Shared library/DLL import-export API control */
|
||||
/*===========================================================================*/
|
||||
|
||||
/*
|
||||
* Control of symbol import/export.
|
||||
* ICU is separated into three libraries.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_COMBINED_IMPLEMENTATION
|
||||
* Set to export library symbols from inside the ICU library
|
||||
* when all of ICU is in a single library.
|
||||
* This can be set as a compiler option while building ICU, and it
|
||||
* needs to be the first one tested to override U_COMMON_API, U_I18N_API, etc.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_DATA_API
|
||||
* Set to export library symbols from inside the stubdata library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 3.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_COMMON_API
|
||||
* Set to export library symbols from inside the common library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_I18N_API
|
||||
* Set to export library symbols from inside the i18n library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_LAYOUT_API
|
||||
* Set to export library symbols from inside the layout engine library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_LAYOUTEX_API
|
||||
* Set to export library symbols from inside the layout extensions library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_IO_API
|
||||
* Set to export library symbols from inside the ustdio library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_TOOLUTIL_API
|
||||
* Set to export library symbols from inside the toolutil library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
|
||||
#ifdef U_IN_DOXYGEN
|
||||
// This definition is required when generating the API docs.
|
||||
#define U_COMBINED_IMPLEMENTATION 1
|
||||
#endif
|
||||
|
||||
#if defined(U_COMBINED_IMPLEMENTATION)
|
||||
#define U_DATA_API U_EXPORT
|
||||
#define U_COMMON_API U_EXPORT
|
||||
#define U_I18N_API U_EXPORT
|
||||
#define U_LAYOUT_API U_EXPORT
|
||||
#define U_LAYOUTEX_API U_EXPORT
|
||||
#define U_IO_API U_EXPORT
|
||||
#define U_TOOLUTIL_API U_EXPORT
|
||||
#elif defined(U_STATIC_IMPLEMENTATION)
|
||||
#define U_DATA_API
|
||||
#define U_COMMON_API
|
||||
#define U_I18N_API
|
||||
#define U_LAYOUT_API
|
||||
#define U_LAYOUTEX_API
|
||||
#define U_IO_API
|
||||
#define U_TOOLUTIL_API
|
||||
#elif defined(U_COMMON_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_EXPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#elif defined(U_I18N_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_EXPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#elif defined(U_LAYOUT_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_EXPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#elif defined(U_LAYOUTEX_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_EXPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#elif defined(U_IO_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_EXPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#elif defined(U_TOOLUTIL_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_EXPORT
|
||||
#else
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_STANDARD_CPP_NAMESPACE
|
||||
* Control of C++ Namespace
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
#define U_STANDARD_CPP_NAMESPACE ::
|
||||
#else
|
||||
#define U_STANDARD_CPP_NAMESPACE
|
||||
#endif
|
||||
|
||||
/*===========================================================================*/
|
||||
/* UErrorCode */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* Standard ICU4C error code type, a substitute for exceptions.
|
||||
*
|
||||
* Initialize the UErrorCode with U_ZERO_ERROR, and check for success or
|
||||
* failure using U_SUCCESS() or U_FAILURE():
|
||||
*
|
||||
* UErrorCode errorCode = U_ZERO_ERROR;
|
||||
* // call ICU API that needs an error code parameter.
|
||||
* if (U_FAILURE(errorCode)) {
|
||||
* // An error occurred. Handle it here.
|
||||
* }
|
||||
*
|
||||
* C++ code should use icu::ErrorCode, available in unicode/errorcode.h, or a
|
||||
* suitable subclass.
|
||||
*
|
||||
* For more information, see:
|
||||
* http://icu-project.org/userguide/conventions
|
||||
*
|
||||
* Note: By convention, ICU functions that take a reference (C++) or a pointer
|
||||
* (C) to a UErrorCode first test:
|
||||
*
|
||||
* if (U_FAILURE(errorCode)) { return immediately; }
|
||||
*
|
||||
* so that in a chain of such functions the first one that sets an error code
|
||||
* causes the following ones to not perform any operations.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef enum UErrorCode {
|
||||
/* The ordering of U_ERROR_INFO_START Vs U_USING_FALLBACK_WARNING looks weird
|
||||
* and is that way because VC++ debugger displays first encountered constant,
|
||||
* which is not the what the code is used for
|
||||
*/
|
||||
|
||||
U_USING_FALLBACK_WARNING = -128, /**< A resource bundle lookup returned a fallback result (not an error) */
|
||||
|
||||
U_ERROR_WARNING_START = -128, /**< Start of information results (semantically successful) */
|
||||
|
||||
U_USING_DEFAULT_WARNING = -127, /**< A resource bundle lookup returned a result from the root locale (not an error) */
|
||||
|
||||
U_SAFECLONE_ALLOCATED_WARNING = -126, /**< A SafeClone operation required allocating memory (informational only) */
|
||||
|
||||
U_STATE_OLD_WARNING = -125, /**< ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading */
|
||||
|
||||
U_STRING_NOT_TERMINATED_WARNING = -124,/**< An output string could not be NUL-terminated because output length==destCapacity. */
|
||||
|
||||
U_SORT_KEY_TOO_SHORT_WARNING = -123, /**< Number of levels requested in getBound is higher than the number of levels in the sort key */
|
||||
|
||||
U_AMBIGUOUS_ALIAS_WARNING = -122, /**< This converter alias can go to different converter implementations */
|
||||
|
||||
U_DIFFERENT_UCA_VERSION = -121, /**< ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules. No impact to further function */
|
||||
|
||||
U_PLUGIN_CHANGED_LEVEL_WARNING = -120, /**< A plugin caused a level change. May not be an error, but later plugins may not load. */
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal UErrorCode warning value.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_ERROR_WARNING_LIMIT,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
U_ZERO_ERROR = 0, /**< No error, no warning. */
|
||||
|
||||
U_ILLEGAL_ARGUMENT_ERROR = 1, /**< Start of codes indicating failure */
|
||||
U_MISSING_RESOURCE_ERROR = 2, /**< The requested resource cannot be found */
|
||||
U_INVALID_FORMAT_ERROR = 3, /**< Data format is not what is expected */
|
||||
U_FILE_ACCESS_ERROR = 4, /**< The requested file cannot be found */
|
||||
U_INTERNAL_PROGRAM_ERROR = 5, /**< Indicates a bug in the library code */
|
||||
U_MESSAGE_PARSE_ERROR = 6, /**< Unable to parse a message (message format) */
|
||||
U_MEMORY_ALLOCATION_ERROR = 7, /**< Memory allocation error */
|
||||
U_INDEX_OUTOFBOUNDS_ERROR = 8, /**< Trying to access the index that is out of bounds */
|
||||
U_PARSE_ERROR = 9, /**< Equivalent to Java ParseException */
|
||||
U_INVALID_CHAR_FOUND = 10, /**< Character conversion: Unmappable input sequence. In other APIs: Invalid character. */
|
||||
U_TRUNCATED_CHAR_FOUND = 11, /**< Character conversion: Incomplete input sequence. */
|
||||
U_ILLEGAL_CHAR_FOUND = 12, /**< Character conversion: Illegal input sequence/combination of input units. */
|
||||
U_INVALID_TABLE_FORMAT = 13, /**< Conversion table file found, but corrupted */
|
||||
U_INVALID_TABLE_FILE = 14, /**< Conversion table file not found */
|
||||
U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */
|
||||
U_UNSUPPORTED_ERROR = 16, /**< Requested operation not supported in current context */
|
||||
U_RESOURCE_TYPE_MISMATCH = 17, /**< an operation is requested over a resource that does not support it */
|
||||
U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illegal escape sequence */
|
||||
U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */
|
||||
U_NO_SPACE_AVAILABLE = 20, /**< No space available for in-buffer expansion for Arabic shaping */
|
||||
U_CE_NOT_FOUND_ERROR = 21, /**< Currently used only while setting variable top, but can be used generally */
|
||||
U_PRIMARY_TOO_LONG_ERROR = 22, /**< User tried to set variable top to a primary that is longer than two bytes */
|
||||
U_STATE_TOO_OLD_ERROR = 23, /**< ICU cannot construct a service from this state, as it is no longer supported */
|
||||
U_TOO_MANY_ALIASES_ERROR = 24, /**< There are too many aliases in the path to the requested resource.
|
||||
It is very possible that a circular alias definition has occurred */
|
||||
U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */
|
||||
U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */
|
||||
U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */
|
||||
U_COLLATOR_VERSION_MISMATCH = 28, /**< Collator version is not compatible with the base version */
|
||||
U_USELESS_COLLATOR_ERROR = 29, /**< Collator is options only and no base is specified */
|
||||
U_NO_WRITE_PERMISSION = 30, /**< Attempt to modify read-only or constant data. */
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* The input is impractically long for an operation.
|
||||
* It is rejected because it may lead to problems such as excessive
|
||||
* processing time, stack depth, or heap memory requirements.
|
||||
*
|
||||
* @draft ICU 68
|
||||
*/
|
||||
U_INPUT_TOO_LONG_ERROR = 31,
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest standard error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_STANDARD_ERROR_LIMIT = 32,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
/*
|
||||
* Error codes in the range 0x10000 0x10100 are reserved for Transliterator.
|
||||
*/
|
||||
U_BAD_VARIABLE_DEFINITION=0x10000,/**< Missing '$' or duplicate variable name */
|
||||
U_PARSE_ERROR_START = 0x10000, /**< Start of Transliterator errors */
|
||||
U_MALFORMED_RULE, /**< Elements of a rule are misplaced */
|
||||
U_MALFORMED_SET, /**< A UnicodeSet pattern is invalid*/
|
||||
U_MALFORMED_SYMBOL_REFERENCE, /**< UNUSED as of ICU 2.4 */
|
||||
U_MALFORMED_UNICODE_ESCAPE, /**< A Unicode escape pattern is invalid*/
|
||||
U_MALFORMED_VARIABLE_DEFINITION, /**< A variable definition is invalid */
|
||||
U_MALFORMED_VARIABLE_REFERENCE, /**< A variable reference is invalid */
|
||||
U_MISMATCHED_SEGMENT_DELIMITERS, /**< UNUSED as of ICU 2.4 */
|
||||
U_MISPLACED_ANCHOR_START, /**< A start anchor appears at an illegal position */
|
||||
U_MISPLACED_CURSOR_OFFSET, /**< A cursor offset occurs at an illegal position */
|
||||
U_MISPLACED_QUANTIFIER, /**< A quantifier appears after a segment close delimiter */
|
||||
U_MISSING_OPERATOR, /**< A rule contains no operator */
|
||||
U_MISSING_SEGMENT_CLOSE, /**< UNUSED as of ICU 2.4 */
|
||||
U_MULTIPLE_ANTE_CONTEXTS, /**< More than one ante context */
|
||||
U_MULTIPLE_CURSORS, /**< More than one cursor */
|
||||
U_MULTIPLE_POST_CONTEXTS, /**< More than one post context */
|
||||
U_TRAILING_BACKSLASH, /**< A dangling backslash */
|
||||
U_UNDEFINED_SEGMENT_REFERENCE, /**< A segment reference does not correspond to a defined segment */
|
||||
U_UNDEFINED_VARIABLE, /**< A variable reference does not correspond to a defined variable */
|
||||
U_UNQUOTED_SPECIAL, /**< A special character was not quoted or escaped */
|
||||
U_UNTERMINATED_QUOTE, /**< A closing single quote is missing */
|
||||
U_RULE_MASK_ERROR, /**< A rule is hidden by an earlier more general rule */
|
||||
U_MISPLACED_COMPOUND_FILTER, /**< A compound filter is in an invalid location */
|
||||
U_MULTIPLE_COMPOUND_FILTERS, /**< More than one compound filter */
|
||||
U_INVALID_RBT_SYNTAX, /**< A "::id" rule was passed to the RuleBasedTransliterator parser */
|
||||
U_INVALID_PROPERTY_PATTERN, /**< UNUSED as of ICU 2.4 */
|
||||
U_MALFORMED_PRAGMA, /**< A 'use' pragma is invalid */
|
||||
U_UNCLOSED_SEGMENT, /**< A closing ')' is missing */
|
||||
U_ILLEGAL_CHAR_IN_SEGMENT, /**< UNUSED as of ICU 2.4 */
|
||||
U_VARIABLE_RANGE_EXHAUSTED, /**< Too many stand-ins generated for the given variable range */
|
||||
U_VARIABLE_RANGE_OVERLAP, /**< The variable range overlaps characters used in rules */
|
||||
U_ILLEGAL_CHARACTER, /**< A special character is outside its allowed context */
|
||||
U_INTERNAL_TRANSLITERATOR_ERROR, /**< Internal transliterator system error */
|
||||
U_INVALID_ID, /**< A "::id" rule specifies an unknown transliterator */
|
||||
U_INVALID_FUNCTION, /**< A "&fn()" rule specifies an unknown transliterator */
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal Transliterator error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_PARSE_ERROR_LIMIT,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
/*
|
||||
* Error codes in the range 0x10100 0x10200 are reserved for the formatting API.
|
||||
*/
|
||||
U_UNEXPECTED_TOKEN=0x10100, /**< Syntax error in format pattern */
|
||||
U_FMT_PARSE_ERROR_START=0x10100, /**< Start of format library errors */
|
||||
U_MULTIPLE_DECIMAL_SEPARATORS, /**< More than one decimal separator in number pattern */
|
||||
U_MULTIPLE_DECIMAL_SEPERATORS = U_MULTIPLE_DECIMAL_SEPARATORS, /**< Typo: kept for backward compatibility. Use U_MULTIPLE_DECIMAL_SEPARATORS */
|
||||
U_MULTIPLE_EXPONENTIAL_SYMBOLS, /**< More than one exponent symbol in number pattern */
|
||||
U_MALFORMED_EXPONENTIAL_PATTERN, /**< Grouping symbol in exponent pattern */
|
||||
U_MULTIPLE_PERCENT_SYMBOLS, /**< More than one percent symbol in number pattern */
|
||||
U_MULTIPLE_PERMILL_SYMBOLS, /**< More than one permill symbol in number pattern */
|
||||
U_MULTIPLE_PAD_SPECIFIERS, /**< More than one pad symbol in number pattern */
|
||||
U_PATTERN_SYNTAX_ERROR, /**< Syntax error in format pattern */
|
||||
U_ILLEGAL_PAD_POSITION, /**< Pad symbol misplaced in number pattern */
|
||||
U_UNMATCHED_BRACES, /**< Braces do not match in message pattern */
|
||||
U_UNSUPPORTED_PROPERTY, /**< UNUSED as of ICU 2.4 */
|
||||
U_UNSUPPORTED_ATTRIBUTE, /**< UNUSED as of ICU 2.4 */
|
||||
U_ARGUMENT_TYPE_MISMATCH, /**< Argument name and argument index mismatch in MessageFormat functions */
|
||||
U_DUPLICATE_KEYWORD, /**< Duplicate keyword in PluralFormat */
|
||||
U_UNDEFINED_KEYWORD, /**< Undefined Plural keyword */
|
||||
U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */
|
||||
U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */
|
||||
U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */
|
||||
U_NUMBER_ARG_OUTOFBOUNDS_ERROR, /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @stable ICU 61 */
|
||||
U_NUMBER_SKELETON_SYNTAX_ERROR, /**< The number skeleton passed to C++ NumberFormatter or C UNumberFormatter was invalid or contained a syntax error. @stable ICU 62 */
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal formatting API error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_FMT_PARSE_ERROR_LIMIT = 0x10114,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
/*
|
||||
* Error codes in the range 0x10200 0x102ff are reserved for BreakIterator.
|
||||
*/
|
||||
U_BRK_INTERNAL_ERROR=0x10200, /**< An internal error (bug) was detected. */
|
||||
U_BRK_ERROR_START=0x10200, /**< Start of codes indicating Break Iterator failures */
|
||||
U_BRK_HEX_DIGITS_EXPECTED, /**< Hex digits expected as part of a escaped char in a rule. */
|
||||
U_BRK_SEMICOLON_EXPECTED, /**< Missing ';' at the end of a RBBI rule. */
|
||||
U_BRK_RULE_SYNTAX, /**< Syntax error in RBBI rule. */
|
||||
U_BRK_UNCLOSED_SET, /**< UnicodeSet writing an RBBI rule missing a closing ']'. */
|
||||
U_BRK_ASSIGN_ERROR, /**< Syntax error in RBBI rule assignment statement. */
|
||||
U_BRK_VARIABLE_REDFINITION, /**< RBBI rule $Variable redefined. */
|
||||
U_BRK_MISMATCHED_PAREN, /**< Mis-matched parentheses in an RBBI rule. */
|
||||
U_BRK_NEW_LINE_IN_QUOTED_STRING, /**< Missing closing quote in an RBBI rule. */
|
||||
U_BRK_UNDEFINED_VARIABLE, /**< Use of an undefined $Variable in an RBBI rule. */
|
||||
U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */
|
||||
U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */
|
||||
U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */
|
||||
U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is malformed */
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal BreakIterator error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_BRK_ERROR_LIMIT,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
/*
|
||||
* Error codes in the range 0x10300-0x103ff are reserved for regular expression related errors.
|
||||
*/
|
||||
U_REGEX_INTERNAL_ERROR=0x10300, /**< An internal error (bug) was detected. */
|
||||
U_REGEX_ERROR_START=0x10300, /**< Start of codes indicating Regexp failures */
|
||||
U_REGEX_RULE_SYNTAX, /**< Syntax error in regexp pattern. */
|
||||
U_REGEX_INVALID_STATE, /**< RegexMatcher in invalid state for requested operation */
|
||||
U_REGEX_BAD_ESCAPE_SEQUENCE, /**< Unrecognized backslash escape sequence in pattern */
|
||||
U_REGEX_PROPERTY_SYNTAX, /**< Incorrect Unicode property */
|
||||
U_REGEX_UNIMPLEMENTED, /**< Use of regexp feature that is not yet implemented. */
|
||||
U_REGEX_MISMATCHED_PAREN, /**< Incorrectly nested parentheses in regexp pattern. */
|
||||
U_REGEX_NUMBER_TOO_BIG, /**< Decimal number is too large. */
|
||||
U_REGEX_BAD_INTERVAL, /**< Error in {min,max} interval */
|
||||
U_REGEX_MAX_LT_MIN, /**< In {min,max}, max is less than min. */
|
||||
U_REGEX_INVALID_BACK_REF, /**< Back-reference to a non-existent capture group. */
|
||||
U_REGEX_INVALID_FLAG, /**< Invalid value for match mode flags. */
|
||||
U_REGEX_LOOK_BEHIND_LIMIT, /**< Look-Behind pattern matches must have a bounded maximum length. */
|
||||
U_REGEX_SET_CONTAINS_STRING, /**< Regexps cannot have UnicodeSets containing strings.*/
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
U_REGEX_OCTAL_TOO_BIG, /**< Octal character constants must be <= 0377. @deprecated ICU 54. This error cannot occur. */
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
U_REGEX_MISSING_CLOSE_BRACKET=U_REGEX_SET_CONTAINS_STRING+2, /**< Missing closing bracket on a bracket expression. */
|
||||
U_REGEX_INVALID_RANGE, /**< In a character range [x-y], x is greater than y. */
|
||||
U_REGEX_STACK_OVERFLOW, /**< Regular expression backtrack stack overflow. */
|
||||
U_REGEX_TIME_OUT, /**< Maximum allowed match time exceeded */
|
||||
U_REGEX_STOPPED_BY_CALLER, /**< Matching operation aborted by user callback fn. */
|
||||
U_REGEX_PATTERN_TOO_BIG, /**< Pattern exceeds limits on size or complexity. @stable ICU 55 */
|
||||
U_REGEX_INVALID_CAPTURE_GROUP_NAME, /**< Invalid capture group name. @stable ICU 55 */
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal regular expression error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_REGEX_ERROR_LIMIT=U_REGEX_STOPPED_BY_CALLER+3,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
/*
|
||||
* Error codes in the range 0x10400-0x104ff are reserved for IDNA related error codes.
|
||||
*/
|
||||
U_IDNA_PROHIBITED_ERROR=0x10400,
|
||||
U_IDNA_ERROR_START=0x10400,
|
||||
U_IDNA_UNASSIGNED_ERROR,
|
||||
U_IDNA_CHECK_BIDI_ERROR,
|
||||
U_IDNA_STD3_ASCII_RULES_ERROR,
|
||||
U_IDNA_ACE_PREFIX_ERROR,
|
||||
U_IDNA_VERIFICATION_ERROR,
|
||||
U_IDNA_LABEL_TOO_LONG_ERROR,
|
||||
U_IDNA_ZERO_LENGTH_LABEL_ERROR,
|
||||
U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR,
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal IDNA error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_IDNA_ERROR_LIMIT,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
/*
|
||||
* Aliases for StringPrep
|
||||
*/
|
||||
U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR,
|
||||
U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR,
|
||||
U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR,
|
||||
|
||||
/*
|
||||
* Error codes in the range 0x10500-0x105ff are reserved for Plugin related error codes.
|
||||
*/
|
||||
U_PLUGIN_ERROR_START=0x10500, /**< Start of codes indicating plugin failures */
|
||||
U_PLUGIN_TOO_HIGH=0x10500, /**< The plugin's level is too high to be loaded right now. */
|
||||
U_PLUGIN_DIDNT_SET_LEVEL, /**< The plugin didn't call uplug_setPlugLevel in response to a QUERY */
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal plug-in error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_PLUGIN_ERROR_LIMIT,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_ERROR_LIMIT=U_PLUGIN_ERROR_LIMIT
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
} UErrorCode;
|
||||
|
||||
/* Use the following to determine if an UErrorCode represents */
|
||||
/* operational success or failure. */
|
||||
|
||||
#ifdef __cplusplus
|
||||
/**
|
||||
* Does the error code indicate success?
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static
|
||||
inline UBool U_SUCCESS(UErrorCode code) { return (UBool)(code<=U_ZERO_ERROR); }
|
||||
/**
|
||||
* Does the error code indicate a failure?
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static
|
||||
inline UBool U_FAILURE(UErrorCode code) { return (UBool)(code>U_ZERO_ERROR); }
|
||||
#else
|
||||
/**
|
||||
* Does the error code indicate success?
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
# define U_SUCCESS(x) ((x)<=U_ZERO_ERROR)
|
||||
/**
|
||||
* Does the error code indicate a failure?
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
# define U_FAILURE(x) ((x)>U_ZERO_ERROR)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Return a string for a UErrorCode value.
|
||||
* The string will be the same as the name of the error code constant
|
||||
* in the UErrorCode enum above.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI const char * U_EXPORT2
|
||||
u_errorName(UErrorCode code);
|
||||
|
||||
|
||||
#endif /* _UTYPES */
|
||||
198
app/src/main/cpp/icu4c/include/unicode/uvernum.h
Normal file
198
app/src/main/cpp/icu4c/include/unicode/uvernum.h
Normal file
@@ -0,0 +1,198 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2000-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*
|
||||
* file name: uvernum.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* Created by: Vladimir Weinstein
|
||||
* Updated by: Steven R. Loomis
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: definitions of ICU version numbers
|
||||
*
|
||||
* This file is included by uversion.h and other files. This file contains only
|
||||
* macros and definitions. The actual version numbers are defined here.
|
||||
*/
|
||||
|
||||
/*
|
||||
* IMPORTANT: When updating version, the following things need to be done:
|
||||
* source/common/unicode/uvernum.h - this file: update major, minor,
|
||||
* patchlevel, suffix, version, short version constants, namespace,
|
||||
* renaming macro, and copyright
|
||||
*
|
||||
* The following files need to be updated as well, which can be done
|
||||
* by running the UNIX makefile target 'update-windows-makefiles' in icu/source.
|
||||
*
|
||||
*
|
||||
* source/common/common_uwp.vcxproj
|
||||
* source/common/common.vcxproj - update 'Output file name' on the link tab so
|
||||
* that it contains the new major/minor combination
|
||||
* source/i18n/i18n.vcxproj - same as for the common.vcxproj
|
||||
* source/i18n/i18n_uwp.vcxproj - same as for the common_uwp.vcxproj
|
||||
* source/layoutex/layoutex.vcproj - same
|
||||
* source/stubdata/stubdata.vcproj - same as for the common.vcxproj
|
||||
* source/io/io.vcproj - same as for the common.vcxproj
|
||||
* source/data/makedata.mak - change U_ICUDATA_NAME so that it contains
|
||||
* the new major/minor combination and the Unicode version.
|
||||
*/
|
||||
|
||||
#ifndef UVERNUM_H
|
||||
#define UVERNUM_H
|
||||
|
||||
/** The standard copyright notice that gets compiled into each library.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_COPYRIGHT_STRING \
|
||||
" Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html "
|
||||
|
||||
/** The current ICU major version as an integer.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICU_VERSION_MAJOR_NUM 69
|
||||
|
||||
/** The current ICU minor version as an integer.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_MINOR_NUM 1
|
||||
|
||||
/** The current ICU patchlevel version as an integer.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICU_VERSION_PATCHLEVEL_NUM 0
|
||||
|
||||
/** The current ICU build level version as an integer.
|
||||
* This value is for use by ICU clients. It defaults to 0.
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
#ifndef U_ICU_VERSION_BUILDLEVEL_NUM
|
||||
#define U_ICU_VERSION_BUILDLEVEL_NUM 0
|
||||
#endif
|
||||
|
||||
/** Glued version suffix for renamers
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_SUFFIX _69
|
||||
|
||||
/**
|
||||
* \def U_DEF2_ICU_ENTRY_POINT_RENAME
|
||||
* @internal
|
||||
*/
|
||||
/**
|
||||
* \def U_DEF_ICU_ENTRY_POINT_RENAME
|
||||
* @internal
|
||||
*/
|
||||
/** Glued version suffix function for renamers
|
||||
* This value will change in the subsequent releases of ICU.
|
||||
* If a custom suffix (such as matching library suffixes) is desired, this can be modified.
|
||||
* Note that if present, platform.h may contain an earlier definition of this macro.
|
||||
* \def U_ICU_ENTRY_POINT_RENAME
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
/**
|
||||
* Disable the version suffix. Use the custom suffix if exists.
|
||||
* \def U_DISABLE_VERSION_SUFFIX
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_DISABLE_VERSION_SUFFIX
|
||||
#define U_DISABLE_VERSION_SUFFIX 0
|
||||
#endif
|
||||
|
||||
#ifndef U_ICU_ENTRY_POINT_RENAME
|
||||
#ifdef U_HAVE_LIB_SUFFIX
|
||||
# if !U_DISABLE_VERSION_SUFFIX
|
||||
# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z) x ## y ## z
|
||||
# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y,z) U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z)
|
||||
# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX,U_LIB_SUFFIX_C_NAME)
|
||||
# else
|
||||
# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y) x ## y
|
||||
# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y) U_DEF_ICU_ENTRY_POINT_RENAME(x,y)
|
||||
# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_LIB_SUFFIX_C_NAME)
|
||||
# endif
|
||||
#else
|
||||
# if !U_DISABLE_VERSION_SUFFIX
|
||||
# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y) x ## y
|
||||
# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y) U_DEF_ICU_ENTRY_POINT_RENAME(x,y)
|
||||
# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX)
|
||||
# else
|
||||
# define U_ICU_ENTRY_POINT_RENAME(x) x
|
||||
# endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/** The current ICU library version as a dotted-decimal string. The patchlevel
|
||||
* only appears in this string if it non-zero.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICU_VERSION "69.1"
|
||||
|
||||
/**
|
||||
* The current ICU library major version number as a string, for library name suffixes.
|
||||
* This value will change in subsequent releases of ICU.
|
||||
*
|
||||
* Until ICU 4.8, this was the combination of the single-digit major and minor ICU version numbers
|
||||
* into one string without dots ("48").
|
||||
* Since ICU 49, it is the double-digit major ICU version number.
|
||||
* See https://unicode-org.github.io/icu/userguide/design#version-numbers-in-icu
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_SHORT "69"
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/** Data version in ICU4C.
|
||||
* @internal ICU 4.4 Internal Use Only
|
||||
**/
|
||||
#define U_ICU_DATA_VERSION "69.1"
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/*===========================================================================
|
||||
* ICU collation framework version information
|
||||
* Version info that can be obtained from a collator is affected by these
|
||||
* numbers in a secret and magic way. Please use collator version as whole
|
||||
*===========================================================================
|
||||
*/
|
||||
|
||||
/**
|
||||
* Collation runtime version (sort key generator, strcoll).
|
||||
* If the version is different, sort keys for the same string could be different.
|
||||
* This value may change in subsequent releases of ICU.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define UCOL_RUNTIME_VERSION 9
|
||||
|
||||
/**
|
||||
* Collation builder code version.
|
||||
* When this is different, the same tailoring might result
|
||||
* in assigning different collation elements to code points.
|
||||
* This value may change in subsequent releases of ICU.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define UCOL_BUILDER_VERSION 9
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Constant 1.
|
||||
* This was intended to be the version of collation tailorings,
|
||||
* but instead the tailoring data carries a version number.
|
||||
* @deprecated ICU 54
|
||||
*/
|
||||
#define UCOL_TAILORINGS_VERSION 1
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
#endif
|
||||
187
app/src/main/cpp/icu4c/include/unicode/uversion.h
Normal file
187
app/src/main/cpp/icu4c/include/unicode/uversion.h
Normal file
@@ -0,0 +1,187 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2000-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*
|
||||
* file name: uversion.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* Created by: Vladimir Weinstein
|
||||
*
|
||||
* Gets included by utypes.h and Windows .rc files
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: API for accessing ICU version numbers.
|
||||
*/
|
||||
/*===========================================================================*/
|
||||
/* Main ICU version information */
|
||||
/*===========================================================================*/
|
||||
|
||||
#ifndef UVERSION_H
|
||||
#define UVERSION_H
|
||||
|
||||
#include "unicode/umachine.h"
|
||||
|
||||
/* Actual version info lives in uvernum.h */
|
||||
#include "unicode/uvernum.h"
|
||||
|
||||
/** Maximum length of the copyright string.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_COPYRIGHT_STRING_LENGTH 128
|
||||
|
||||
/** An ICU version consists of up to 4 numbers from 0..255.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_MAX_VERSION_LENGTH 4
|
||||
|
||||
/** In a string, ICU version fields are delimited by dots.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_VERSION_DELIMITER '.'
|
||||
|
||||
/** The maximum length of an ICU version string.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_MAX_VERSION_STRING_LENGTH 20
|
||||
|
||||
/** The binary form of a version on ICU APIs is an array of 4 uint8_t.
|
||||
* To compare two versions, use memcmp(v1,v2,sizeof(UVersionInfo)).
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
|
||||
|
||||
/*===========================================================================*/
|
||||
/* C++ namespace if supported. Versioned unless versioning is disabled. */
|
||||
/*===========================================================================*/
|
||||
|
||||
/* Define C++ namespace symbols. */
|
||||
#ifdef __cplusplus
|
||||
|
||||
/**
|
||||
* \def U_NAMESPACE_BEGIN
|
||||
* This is used to begin a declaration of a public ICU C++ API within
|
||||
* versioned-ICU-namespace block.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_NAMESPACE_END
|
||||
* This is used to end a declaration of a public ICU C++ API.
|
||||
* It ends the versioned-ICU-namespace block begun by U_NAMESPACE_BEGIN.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_NAMESPACE_USE
|
||||
* This is used to specify that the rest of the code uses the
|
||||
* public ICU C++ API namespace.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_NAMESPACE_QUALIFIER
|
||||
* This is used to qualify that a function or class is part of
|
||||
* the public ICU C++ API namespace.
|
||||
*
|
||||
* This macro is unnecessary since ICU 49 requires namespace support.
|
||||
* You can just use "icu::" instead.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
# if U_DISABLE_RENAMING
|
||||
# define U_ICU_NAMESPACE icu
|
||||
namespace U_ICU_NAMESPACE { }
|
||||
# else
|
||||
# define U_ICU_NAMESPACE U_ICU_ENTRY_POINT_RENAME(icu)
|
||||
namespace U_ICU_NAMESPACE { }
|
||||
namespace icu = U_ICU_NAMESPACE;
|
||||
# endif
|
||||
|
||||
# define U_NAMESPACE_BEGIN namespace U_ICU_NAMESPACE {
|
||||
# define U_NAMESPACE_END }
|
||||
# define U_NAMESPACE_USE using namespace U_ICU_NAMESPACE;
|
||||
# define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE::
|
||||
|
||||
# ifndef U_USING_ICU_NAMESPACE
|
||||
# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
|
||||
defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || \
|
||||
defined(U_LAYOUTEX_IMPLEMENTATION) || defined(U_TOOLUTIL_IMPLEMENTATION)
|
||||
# define U_USING_ICU_NAMESPACE 0
|
||||
# else
|
||||
# define U_USING_ICU_NAMESPACE 0
|
||||
# endif
|
||||
# endif
|
||||
# if U_USING_ICU_NAMESPACE
|
||||
U_NAMESPACE_USE
|
||||
# endif
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/*===========================================================================*/
|
||||
/* General version helper functions. Definitions in putil.c */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* Parse a string with dotted-decimal version information and
|
||||
* fill in a UVersionInfo structure with the result.
|
||||
* Definition of this function lives in putil.c
|
||||
*
|
||||
* @param versionArray The destination structure for the version information.
|
||||
* @param versionString A string with dotted-decimal version information,
|
||||
* with up to four non-negative number fields with
|
||||
* values of up to 255 each.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_versionFromString(UVersionInfo versionArray, const char *versionString);
|
||||
|
||||
/**
|
||||
* Parse a Unicode string with dotted-decimal version information and
|
||||
* fill in a UVersionInfo structure with the result.
|
||||
* Definition of this function lives in putil.c
|
||||
*
|
||||
* @param versionArray The destination structure for the version information.
|
||||
* @param versionString A Unicode string with dotted-decimal version
|
||||
* information, with up to four non-negative number
|
||||
* fields with values of up to 255 each.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_versionFromUString(UVersionInfo versionArray, const UChar *versionString);
|
||||
|
||||
|
||||
/**
|
||||
* Write a string with dotted-decimal version information according
|
||||
* to the input UVersionInfo.
|
||||
* Definition of this function lives in putil.c
|
||||
*
|
||||
* @param versionArray The version information to be written as a string.
|
||||
* @param versionString A string buffer that will be filled in with
|
||||
* a string corresponding to the numeric version
|
||||
* information in versionArray.
|
||||
* The buffer size must be at least U_MAX_VERSION_STRING_LENGTH.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_versionToString(const UVersionInfo versionArray, char *versionString);
|
||||
|
||||
/**
|
||||
* Gets the ICU release version. The version array stores the version information
|
||||
* for ICU. For example, release "1.3.31.2" is then represented as 0x01031F02.
|
||||
* Definition of this function lives in putil.c
|
||||
*
|
||||
* @param versionArray the version # information, the result will be filled in
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_getVersion(UVersionInfo versionArray);
|
||||
#endif
|
||||
13
app/src/main/cpp/ime/nlp/CMakeLists.txt
Normal file
13
app/src/main/cpp/ime/nlp/CMakeLists.txt
Normal file
@@ -0,0 +1,13 @@
|
||||
add_library(
|
||||
# Name
|
||||
ime-nlp
|
||||
|
||||
# Headers
|
||||
nlp.h
|
||||
token.h
|
||||
suggestion_list.h
|
||||
|
||||
# Sources
|
||||
token.cpp
|
||||
suggestion_list.cpp
|
||||
)
|
||||
32
app/src/main/cpp/ime/nlp/nlp.h
Normal file
32
app/src/main/cpp/ime/nlp/nlp.h
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef FLORISBOARD_NLP_H
|
||||
#define FLORISBOARD_NLP_H
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace ime::nlp {
|
||||
|
||||
typedef std::string word_t;
|
||||
typedef uint16_t freq_t;
|
||||
|
||||
static const freq_t FREQ_VALUE_MASK = 0xFF;
|
||||
static const freq_t FREQ_POSSIBLY_OFFENSIVE = 0x01;
|
||||
|
||||
} // namespace ime::nlp
|
||||
|
||||
#endif // FLORISBOARD_NLP_H
|
||||
98
app/src/main/cpp/ime/nlp/suggestion_list.cpp
Normal file
98
app/src/main/cpp/ime/nlp/suggestion_list.cpp
Normal file
@@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "suggestion_list.h"
|
||||
|
||||
#include <utility>
|
||||
|
||||
using namespace ime::nlp;
|
||||
|
||||
SuggestionList::SuggestionList(size_t _maxSize) :
|
||||
maxSize(_maxSize), internalSize(0), tokens(_maxSize), isPrimaryTokenAutoInsert(false)
|
||||
{ }
|
||||
|
||||
SuggestionList::~SuggestionList() = default;
|
||||
|
||||
bool SuggestionList::add(word_t &&word, freq_t &&freq) {
|
||||
auto entryIndex = indexOfWord(word);
|
||||
if (entryIndex.has_value()) {
|
||||
// Word exists already
|
||||
auto entry = tokens[entryIndex.value()];
|
||||
if (entry.freq < freq) {
|
||||
// Need to update freq
|
||||
entry.freq = freq;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (internalSize < maxSize) {
|
||||
tokens[internalSize++] = WeightedToken(std::move(word), freq);
|
||||
} else {
|
||||
auto last = tokens[internalSize - 1];
|
||||
if (last.freq < freq) {
|
||||
tokens[internalSize - 1] = WeightedToken(std::move(word), freq);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::sort(tokens.begin(), tokens.begin() + internalSize, std::greater<>());
|
||||
return true;
|
||||
}
|
||||
|
||||
void SuggestionList::clear() {
|
||||
internalSize = 0;
|
||||
isPrimaryTokenAutoInsert = false;
|
||||
}
|
||||
|
||||
bool SuggestionList::contains(const WeightedToken &element) const {
|
||||
return indexOf(element).has_value();
|
||||
}
|
||||
|
||||
bool SuggestionList::containsWord(const word_t &word) const {
|
||||
return indexOfWord(word).has_value();
|
||||
}
|
||||
|
||||
const WeightedToken *SuggestionList::get(size_t index) const {
|
||||
if (index < 0 || index >= internalSize) return nullptr;
|
||||
return &tokens[index];
|
||||
}
|
||||
|
||||
std::optional<size_t> SuggestionList::indexOf(const WeightedToken &element) const {
|
||||
for (size_t n = 0; n < internalSize; n++) {
|
||||
if (element == tokens[n]) {
|
||||
return n;
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::optional<size_t> SuggestionList::indexOfWord(const word_t &word) const {
|
||||
for (size_t n = 0; n < internalSize; n++) {
|
||||
if (word == tokens[n].data) {
|
||||
return n;
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
bool SuggestionList::isEmpty() const {
|
||||
return internalSize == 0;
|
||||
}
|
||||
|
||||
size_t SuggestionList::size() const {
|
||||
return internalSize;
|
||||
}
|
||||
51
app/src/main/cpp/ime/nlp/suggestion_list.h
Normal file
51
app/src/main/cpp/ime/nlp/suggestion_list.h
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef FLORISBOARD_SUGGESTION_LIST_H
|
||||
#define FLORISBOARD_SUGGESTION_LIST_H
|
||||
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
#include "token.h"
|
||||
|
||||
namespace ime::nlp {
|
||||
|
||||
class SuggestionList {
|
||||
public:
|
||||
SuggestionList(size_t _maxSize);
|
||||
~SuggestionList();
|
||||
|
||||
bool add(word_t &&word, freq_t &&freq);
|
||||
void clear();
|
||||
bool contains(const WeightedToken &element) const;
|
||||
bool containsWord(const word_t &word) const;
|
||||
const WeightedToken *get(size_t index) const;
|
||||
std::optional<size_t> indexOf(const WeightedToken &element) const;
|
||||
std::optional<size_t> indexOfWord(const word_t &word) const;
|
||||
bool isEmpty() const;
|
||||
size_t size() const;
|
||||
|
||||
bool isPrimaryTokenAutoInsert;
|
||||
|
||||
private:
|
||||
std::vector<WeightedToken> tokens;
|
||||
size_t internalSize;
|
||||
size_t maxSize;
|
||||
};
|
||||
|
||||
} // namespace ime::nlp
|
||||
|
||||
#endif // FLORISBOARD_SUGGESTION_LIST_H
|
||||
61
app/src/main/cpp/ime/nlp/token.cpp
Normal file
61
app/src/main/cpp/ime/nlp/token.cpp
Normal file
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "token.h"
|
||||
|
||||
#include <utility>
|
||||
|
||||
namespace ime::nlp {
|
||||
|
||||
Token::Token() : data() {}
|
||||
Token::Token(word_t &&_data) : data(std::move(_data)) {}
|
||||
|
||||
bool operator==(const Token &t1, const Token &t2) {
|
||||
return t1.data == t2.data;
|
||||
}
|
||||
|
||||
bool operator!=(const Token &t1, const Token &t2) {
|
||||
return !(t1 == t2);
|
||||
}
|
||||
|
||||
WeightedToken::WeightedToken() : Token(), freq(0) {}
|
||||
WeightedToken::WeightedToken(word_t &&_data, freq_t _freq) : Token(std::move(_data)), freq(_freq) {}
|
||||
|
||||
bool operator==(const WeightedToken &t1, const WeightedToken &t2) {
|
||||
return t1.data == t2.data && t1.freq == t2.freq;
|
||||
}
|
||||
|
||||
bool operator!=(const WeightedToken &t1, const WeightedToken &t2) {
|
||||
return !(t1 == t2);
|
||||
}
|
||||
|
||||
bool operator<(const WeightedToken &t1, const WeightedToken &t2) {
|
||||
return t1.freq < t2.freq;
|
||||
}
|
||||
|
||||
bool operator<=(const WeightedToken &t1, const WeightedToken &t2) {
|
||||
return t1.freq <= t2.freq;
|
||||
}
|
||||
|
||||
bool operator>(const WeightedToken &t1, const WeightedToken &t2) {
|
||||
return t1.freq > t2.freq;
|
||||
}
|
||||
|
||||
bool operator>=(const WeightedToken &t1, const WeightedToken &t2) {
|
||||
return t1.freq >= t2.freq;
|
||||
}
|
||||
|
||||
} // namespace ime::nlp
|
||||
51
app/src/main/cpp/ime/nlp/token.h
Normal file
51
app/src/main/cpp/ime/nlp/token.h
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef FLORISBOARD_TOKEN_H
|
||||
#define FLORISBOARD_TOKEN_H
|
||||
|
||||
#include "nlp.h"
|
||||
#include <string>
|
||||
|
||||
namespace ime::nlp {
|
||||
|
||||
class Token {
|
||||
public:
|
||||
word_t data;
|
||||
Token();
|
||||
Token(word_t &&_data);
|
||||
|
||||
friend bool operator==(const Token &t1, const Token &t2);
|
||||
friend bool operator!=(const Token &t1, const Token &t2);
|
||||
};
|
||||
|
||||
class WeightedToken : public Token {
|
||||
public:
|
||||
freq_t freq;
|
||||
WeightedToken();
|
||||
WeightedToken(word_t &&_data, freq_t _freq);
|
||||
|
||||
friend bool operator==(const WeightedToken &t1, const WeightedToken &t2);
|
||||
friend bool operator!=(const WeightedToken &t1, const WeightedToken &t2);
|
||||
friend bool operator<(const WeightedToken &t1, const WeightedToken &t2);
|
||||
friend bool operator<=(const WeightedToken &t1, const WeightedToken &t2);
|
||||
friend bool operator>(const WeightedToken &t1, const WeightedToken &t2);
|
||||
friend bool operator>=(const WeightedToken &t1, const WeightedToken &t2);
|
||||
};
|
||||
|
||||
} // namespace ime::nlp
|
||||
|
||||
#endif // FLORISBOARD_TOKEN_H
|
||||
10
app/src/main/cpp/ime/spelling/CMakeLists.txt
Normal file
10
app/src/main/cpp/ime/spelling/CMakeLists.txt
Normal file
@@ -0,0 +1,10 @@
|
||||
add_library(
|
||||
# Name
|
||||
ime-spelling
|
||||
|
||||
# Headers
|
||||
spellingdict.h
|
||||
|
||||
# Sources
|
||||
spellingdict.cpp
|
||||
)
|
||||
51
app/src/main/cpp/ime/spelling/spellingdict.cpp
Normal file
51
app/src/main/cpp/ime/spelling/spellingdict.cpp
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "spellingdict.h"
|
||||
#include "utils/log.h"
|
||||
|
||||
using namespace ime::spellcheck;
|
||||
|
||||
SpellingDict::SpellingDict(const nuspell::Dictionary& dict) : dictionary(std::make_unique<nuspell::Dictionary>(dict))
|
||||
{ }
|
||||
|
||||
SpellingDict::~SpellingDict() = default;
|
||||
|
||||
SpellingDict* SpellingDict::load(const std::string &basePath) {
|
||||
utils::start_stdout_stderr_logger("spell-floris");
|
||||
try {
|
||||
auto temp = nuspell::Dictionary::load_from_path(basePath);
|
||||
auto spellingDict = new SpellingDict(temp);
|
||||
return spellingDict;
|
||||
} catch (const nuspell::Dictionary_Loading_Error& e) {
|
||||
utils::log_error("SpellingDict.load()", e.what());
|
||||
return nullptr;
|
||||
} catch (...) {
|
||||
utils::log_error("SpellingDict.load()", "An unknown error occurred!");
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
bool SpellingDict::spell(const std::string& word) {
|
||||
bool result = dictionary->spell(word);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<std::string> SpellingDict::suggest(const std::string &word) {
|
||||
auto result = std::vector<std::string>();
|
||||
dictionary->suggest(word, result);
|
||||
return result;
|
||||
}
|
||||
42
app/src/main/cpp/ime/spelling/spellingdict.h
Normal file
42
app/src/main/cpp/ime/spelling/spellingdict.h
Normal file
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef FLORISBOARD_SPELLINGDICT_H
|
||||
#define FLORISBOARD_SPELLINGDICT_H
|
||||
|
||||
#include "nuspell/dictionary.hxx"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace ime::spellcheck {
|
||||
|
||||
class SpellingDict {
|
||||
public:
|
||||
SpellingDict(const nuspell::Dictionary& dict);
|
||||
~SpellingDict();
|
||||
|
||||
static SpellingDict* load(const std::string& basePath);
|
||||
|
||||
bool spell(const std::string& word);
|
||||
std::vector<std::string> suggest(const std::string& word);
|
||||
|
||||
private:
|
||||
std::unique_ptr<nuspell::Dictionary> dictionary;
|
||||
};
|
||||
|
||||
} // namespace ime::spellcheck
|
||||
|
||||
#endif // FLORISBOARD_SPELLINGDICT_H
|
||||
61
app/src/main/cpp/nuspell/CMakeLists.txt
Normal file
61
app/src/main/cpp/nuspell/CMakeLists.txt
Normal file
@@ -0,0 +1,61 @@
|
||||
add_library(nuspell
|
||||
aff_data.cxx aff_data.hxx
|
||||
checker.cxx checker.hxx
|
||||
suggester.cxx suggester.hxx
|
||||
dictionary.cxx dictionary.hxx
|
||||
finder.cxx finder.hxx
|
||||
unicode.hxx
|
||||
utils.cxx utils.hxx
|
||||
structures.hxx)
|
||||
|
||||
add_library(Nuspell::nuspell ALIAS nuspell)
|
||||
|
||||
#include(GenerateExportHeader)
|
||||
#generate_export_header(nuspell)
|
||||
|
||||
#set(nuspell_headers aff_data.hxx checker.hxx suggester.hxx dictionary.hxx
|
||||
# finder.hxx structures.hxx unicode.hxx
|
||||
# ${CMAKE_CURRENT_BINARY_DIR}/nuspell_export.h)
|
||||
#[[set_target_properties(nuspell PROPERTIES
|
||||
PUBLIC_HEADER "${nuspell_headers}"
|
||||
VERSION ${PROJECT_VERSION}
|
||||
SOVERSION ${PROJECT_VERSION_MAJOR}f
|
||||
CXX_VISIBILITY_PRESET hidden)]]
|
||||
|
||||
#target_compile_features(nuspell PUBLIC cxx_std_17)
|
||||
|
||||
#[[target_include_directories(nuspell
|
||||
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
|
||||
INTERFACE $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/src>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
|
||||
|
||||
target_link_libraries(nuspell PUBLIC ICU::uc ICU::data)
|
||||
|
||||
add_executable(nuspell-bin main.cxx)
|
||||
set_target_properties(nuspell-bin PROPERTIES
|
||||
RUNTIME_OUTPUT_DIRECTORY ../tools
|
||||
RUNTIME_OUTPUT_NAME nuspell)
|
||||
target_compile_definitions(nuspell-bin PRIVATE
|
||||
PROJECT_VERSION=\"${PROJECT_VERSION}\")
|
||||
target_link_libraries(nuspell-bin nuspell)
|
||||
if (BUILD_SHARED_LIBS AND WIN32)
|
||||
# This should be PRE_LINK (or PRE_BUILD), so Vcpkg's POST_BUILD
|
||||
# step (see VCPKG_APPLOCAL_DEPS) that copies dll can pick up nuspell.dll
|
||||
# inside the folder ../tools.
|
||||
add_custom_command(TARGET nuspell-bin PRE_LINK
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
||||
$<TARGET_FILE:nuspell> $<TARGET_FILE_DIR:nuspell-bin>)
|
||||
endif()
|
||||
|
||||
if (NOT subproject)
|
||||
install(TARGETS nuspell
|
||||
EXPORT NuspellTargets
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/nuspell)
|
||||
install(EXPORT NuspellTargets
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/nuspell
|
||||
NAMESPACE Nuspell::)
|
||||
install(TARGETS nuspell-bin DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
endif()]]
|
||||
1064
app/src/main/cpp/nuspell/aff_data.cxx
Normal file
1064
app/src/main/cpp/nuspell/aff_data.cxx
Normal file
File diff suppressed because it is too large
Load Diff
173
app/src/main/cpp/nuspell/aff_data.hxx
Normal file
173
app/src/main/cpp/nuspell/aff_data.hxx
Normal file
@@ -0,0 +1,173 @@
|
||||
/* Copyright 2016-2021 Dimitrij Mijoski
|
||||
*
|
||||
* This file is part of Nuspell.
|
||||
*
|
||||
* Nuspell is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Nuspell is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Nuspell. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef NUSPELL_AFF_DATA_HXX
|
||||
#define NUSPELL_AFF_DATA_HXX
|
||||
|
||||
#include "nuspell_export.h"
|
||||
#include "structures.hxx"
|
||||
|
||||
#include <iosfwd>
|
||||
#include <unicode/locid.h>
|
||||
|
||||
namespace nuspell {
|
||||
inline namespace v5 {
|
||||
|
||||
class Encoding {
|
||||
std::string name;
|
||||
|
||||
NUSPELL_EXPORT auto normalize_name() -> void;
|
||||
|
||||
public:
|
||||
enum Enc_Type { SINGLEBYTE = false, UTF8 = true };
|
||||
|
||||
Encoding() = default;
|
||||
explicit Encoding(const std::string& e) : name(e) { normalize_name(); }
|
||||
explicit Encoding(std::string&& e) : name(move(e)) { normalize_name(); }
|
||||
explicit Encoding(const char* e) : name(e) { normalize_name(); }
|
||||
auto& operator=(const std::string& e)
|
||||
{
|
||||
name = e;
|
||||
normalize_name();
|
||||
return *this;
|
||||
}
|
||||
auto& operator=(std::string&& e)
|
||||
{
|
||||
name = move(e);
|
||||
normalize_name();
|
||||
return *this;
|
||||
}
|
||||
auto& operator=(const char* e)
|
||||
{
|
||||
name = e;
|
||||
normalize_name();
|
||||
return *this;
|
||||
}
|
||||
auto empty() const { return name.empty(); }
|
||||
auto& value() const { return name; }
|
||||
auto is_utf8() const { return name == "UTF-8"; }
|
||||
auto value_or_default() const -> std::string
|
||||
{
|
||||
if (name.empty())
|
||||
return "ISO8859-1";
|
||||
else
|
||||
return name;
|
||||
}
|
||||
operator Enc_Type() const { return is_utf8() ? UTF8 : SINGLEBYTE; }
|
||||
};
|
||||
|
||||
enum class Flag_Type { SINGLE_CHAR, DOUBLE_CHAR, NUMBER, UTF8 };
|
||||
|
||||
/**
|
||||
* @internal
|
||||
* @brief Map between words and word_flags.
|
||||
*
|
||||
* Flags are stored as part of the container. Maybe for the future flags should
|
||||
* be stored elsewhere (flag aliases) and this should store pointers.
|
||||
*
|
||||
* Does not store morphological data as is low priority feature and is out of
|
||||
* scope.
|
||||
*/
|
||||
using Word_List = Hash_Multimap<std::string, Flag_Set>;
|
||||
|
||||
struct Aff_Data {
|
||||
static constexpr auto HIDDEN_HOMONYM_FLAG = char16_t(-1);
|
||||
static constexpr auto MAX_SUGGESTIONS = size_t(16);
|
||||
|
||||
// spell checking options
|
||||
Word_List words;
|
||||
Prefix_Table prefixes;
|
||||
Suffix_Table suffixes;
|
||||
|
||||
bool complex_prefixes;
|
||||
bool fullstrip;
|
||||
bool checksharps;
|
||||
bool forbid_warn;
|
||||
char16_t compound_onlyin_flag;
|
||||
char16_t circumfix_flag;
|
||||
char16_t forbiddenword_flag;
|
||||
char16_t keepcase_flag;
|
||||
char16_t need_affix_flag;
|
||||
char16_t warn_flag;
|
||||
|
||||
// compounding options
|
||||
char16_t compound_flag;
|
||||
char16_t compound_begin_flag;
|
||||
char16_t compound_last_flag;
|
||||
char16_t compound_middle_flag;
|
||||
Compound_Rule_Table compound_rules;
|
||||
|
||||
// spell checking options
|
||||
Break_Table break_table;
|
||||
Substr_Replacer input_substr_replacer;
|
||||
std::string ignored_chars;
|
||||
icu::Locale icu_locale;
|
||||
Substr_Replacer output_substr_replacer;
|
||||
|
||||
// suggestion options
|
||||
Replacement_Table replacements;
|
||||
std::vector<Similarity_Group> similarities;
|
||||
std::string keyboard_closeness;
|
||||
std::string try_chars;
|
||||
// Phonetic_Table phonetic_table;
|
||||
|
||||
char16_t nosuggest_flag;
|
||||
char16_t substandard_flag;
|
||||
unsigned short max_compound_suggestions;
|
||||
unsigned short max_ngram_suggestions;
|
||||
unsigned short max_diff_factor;
|
||||
bool only_max_diff;
|
||||
bool no_split_suggestions;
|
||||
bool suggest_with_dots;
|
||||
|
||||
// compounding options
|
||||
unsigned short compound_min_length;
|
||||
unsigned short compound_max_word_count;
|
||||
char16_t compound_permit_flag;
|
||||
char16_t compound_forbid_flag;
|
||||
char16_t compound_root_flag;
|
||||
char16_t compound_force_uppercase;
|
||||
bool compound_more_suffixes;
|
||||
bool compound_check_duplicate;
|
||||
bool compound_check_rep;
|
||||
bool compound_check_case;
|
||||
bool compound_check_triple;
|
||||
bool compound_simplified_triple;
|
||||
bool compound_syllable_num;
|
||||
unsigned short compound_syllable_max;
|
||||
std::string compound_syllable_vowels;
|
||||
std::vector<Compound_Pattern> compound_patterns;
|
||||
|
||||
// data members used only while parsing
|
||||
Flag_Type flag_type;
|
||||
Encoding encoding;
|
||||
std::vector<Flag_Set> flag_aliases;
|
||||
std::string wordchars; // deprecated?
|
||||
|
||||
auto parse_aff(std::istream& in) -> bool;
|
||||
auto parse_dic(std::istream& in) -> bool;
|
||||
auto parse_aff_dic(std::istream& aff, std::istream& dic)
|
||||
{
|
||||
if (parse_aff(aff))
|
||||
return parse_dic(dic);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
} // namespace v5
|
||||
} // namespace nuspell
|
||||
#endif // NUSPELL_AFF_DATA_HXX
|
||||
2009
app/src/main/cpp/nuspell/checker.cxx
Normal file
2009
app/src/main/cpp/nuspell/checker.cxx
Normal file
File diff suppressed because it is too large
Load Diff
352
app/src/main/cpp/nuspell/checker.hxx
Normal file
352
app/src/main/cpp/nuspell/checker.hxx
Normal file
@@ -0,0 +1,352 @@
|
||||
/* Copyright 2016-2021 Dimitrij Mijoski
|
||||
*
|
||||
* This file is part of Nuspell.
|
||||
*
|
||||
* Nuspell is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Nuspell is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Nuspell. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef NUSPELL_CHECKER_HXX
|
||||
#define NUSPELL_CHECKER_HXX
|
||||
|
||||
#include "aff_data.hxx"
|
||||
|
||||
namespace nuspell {
|
||||
inline namespace v5 {
|
||||
|
||||
enum Affixing_Mode {
|
||||
FULL_WORD,
|
||||
AT_COMPOUND_BEGIN,
|
||||
AT_COMPOUND_END,
|
||||
AT_COMPOUND_MIDDLE
|
||||
};
|
||||
|
||||
struct Affixing_Result_Base {
|
||||
Word_List::const_pointer root_word = {};
|
||||
|
||||
operator Word_List::const_pointer() const { return root_word; }
|
||||
auto& operator*() const { return *root_word; }
|
||||
auto operator->() const { return root_word; }
|
||||
};
|
||||
|
||||
template <class T1 = void, class T2 = void>
|
||||
struct Affixing_Result : Affixing_Result_Base {
|
||||
const T1* a = {};
|
||||
const T2* b = {};
|
||||
|
||||
Affixing_Result() = default;
|
||||
Affixing_Result(Word_List::const_reference r, const T1& a, const T2& b)
|
||||
: Affixing_Result_Base{&r}, a{&a}, b{&b}
|
||||
{
|
||||
}
|
||||
};
|
||||
template <class T1>
|
||||
struct Affixing_Result<T1, void> : Affixing_Result_Base {
|
||||
const T1* a = {};
|
||||
|
||||
Affixing_Result() = default;
|
||||
Affixing_Result(Word_List::const_reference r, const T1& a)
|
||||
: Affixing_Result_Base{&r}, a{&a}
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Affixing_Result<void, void> : Affixing_Result_Base {
|
||||
Affixing_Result() = default;
|
||||
Affixing_Result(Word_List::const_reference r) : Affixing_Result_Base{&r}
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
struct Compounding_Result {
|
||||
Word_List::const_pointer word_entry = {};
|
||||
unsigned char num_words_modifier = {};
|
||||
signed char num_syllable_modifier = {};
|
||||
bool affixed_and_modified = {}; /**< non-zero affix */
|
||||
operator Word_List::const_pointer() const { return word_entry; }
|
||||
auto& operator*() const { return *word_entry; }
|
||||
auto operator->() const { return word_entry; }
|
||||
};
|
||||
|
||||
struct Checker : public Aff_Data {
|
||||
enum Forceucase : bool {
|
||||
FORBID_BAD_FORCEUCASE = false,
|
||||
ALLOW_BAD_FORCEUCASE = true
|
||||
};
|
||||
|
||||
enum Hidden_Homonym : bool {
|
||||
ACCEPT_HIDDEN_HOMONYM = false,
|
||||
SKIP_HIDDEN_HOMONYM = true
|
||||
};
|
||||
Checker()
|
||||
: Aff_Data() // we explicity do value init so content is zeroed
|
||||
{
|
||||
}
|
||||
auto spell_priv(std::string& s) const -> bool;
|
||||
auto spell_break(std::string& s, size_t depth = 0) const -> bool;
|
||||
auto spell_casing(std::string& s) const -> const Flag_Set*;
|
||||
auto spell_casing_upper(std::string& s) const -> const Flag_Set*;
|
||||
auto spell_casing_title(std::string& s) const -> const Flag_Set*;
|
||||
auto spell_sharps(std::string& base, size_t n_pos = 0, size_t n = 0,
|
||||
size_t rep = 0) const -> const Flag_Set*;
|
||||
|
||||
auto check_word(std::string& s, Forceucase allow_bad_forceucase = {},
|
||||
Hidden_Homonym skip_hidden_homonym = {}) const
|
||||
-> const Flag_Set*;
|
||||
auto check_simple_word(std::string& word,
|
||||
Hidden_Homonym skip_hidden_homonym = {}) const
|
||||
-> const Flag_Set*;
|
||||
|
||||
template <Affixing_Mode m>
|
||||
auto affix_NOT_valid(const Prefix& a) const;
|
||||
template <Affixing_Mode m>
|
||||
auto affix_NOT_valid(const Suffix& a) const;
|
||||
template <Affixing_Mode m, class AffixT>
|
||||
auto outer_affix_NOT_valid(const AffixT& a) const;
|
||||
template <class AffixT>
|
||||
auto is_circumfix(const AffixT& a) const;
|
||||
template <Affixing_Mode m>
|
||||
auto is_valid_inside_compound(const Flag_Set& flags) const;
|
||||
|
||||
template <Affixing_Mode m = FULL_WORD>
|
||||
auto strip_prefix_only(std::string& s,
|
||||
Hidden_Homonym skip_hidden_homonym = {}) const
|
||||
-> Affixing_Result<Prefix>;
|
||||
|
||||
template <Affixing_Mode m = FULL_WORD>
|
||||
auto strip_suffix_only(std::string& s,
|
||||
Hidden_Homonym skip_hidden_homonym = {}) const
|
||||
-> Affixing_Result<Suffix>;
|
||||
|
||||
template <Affixing_Mode m = FULL_WORD>
|
||||
auto
|
||||
strip_prefix_then_suffix(std::string& s,
|
||||
Hidden_Homonym skip_hidden_homonym = {}) const
|
||||
-> Affixing_Result<Suffix, Prefix>;
|
||||
|
||||
template <Affixing_Mode m>
|
||||
auto strip_pfx_then_sfx_2(const Prefix& pe, std::string& s,
|
||||
Hidden_Homonym skip_hidden_homonym) const
|
||||
-> Affixing_Result<Suffix, Prefix>;
|
||||
|
||||
template <Affixing_Mode m = FULL_WORD>
|
||||
auto
|
||||
strip_suffix_then_prefix(std::string& s,
|
||||
Hidden_Homonym skip_hidden_homonym = {}) const
|
||||
-> Affixing_Result<Prefix, Suffix>;
|
||||
|
||||
template <Affixing_Mode m>
|
||||
auto strip_sfx_then_pfx_2(const Suffix& se, std::string& s,
|
||||
Hidden_Homonym skip_hidden_homonym) const
|
||||
-> Affixing_Result<Prefix, Suffix>;
|
||||
|
||||
template <Affixing_Mode m = FULL_WORD>
|
||||
auto strip_prefix_then_suffix_commutative(
|
||||
std::string& word, Hidden_Homonym skip_hidden_homonym = {}) const
|
||||
-> Affixing_Result<Suffix, Prefix>;
|
||||
|
||||
template <Affixing_Mode m = FULL_WORD>
|
||||
auto strip_pfx_then_sfx_comm_2(const Prefix& pe, std::string& word,
|
||||
Hidden_Homonym skip_hidden_homonym) const
|
||||
-> Affixing_Result<Suffix, Prefix>;
|
||||
|
||||
template <Affixing_Mode m = FULL_WORD>
|
||||
auto
|
||||
strip_suffix_then_suffix(std::string& s,
|
||||
Hidden_Homonym skip_hidden_homonym = {}) const
|
||||
-> Affixing_Result<Suffix, Suffix>;
|
||||
|
||||
template <Affixing_Mode m>
|
||||
auto strip_sfx_then_sfx_2(const Suffix& se1, std::string& s,
|
||||
Hidden_Homonym skip_hidden_homonym) const
|
||||
-> Affixing_Result<Suffix, Suffix>;
|
||||
|
||||
template <Affixing_Mode m = FULL_WORD>
|
||||
auto
|
||||
strip_prefix_then_prefix(std::string& s,
|
||||
Hidden_Homonym skip_hidden_homonym = {}) const
|
||||
-> Affixing_Result<Prefix, Prefix>;
|
||||
|
||||
template <Affixing_Mode m>
|
||||
auto strip_pfx_then_pfx_2(const Prefix& pe1, std::string& s,
|
||||
Hidden_Homonym skip_hidden_homonym) const
|
||||
-> Affixing_Result<Prefix, Prefix>;
|
||||
|
||||
template <Affixing_Mode m = FULL_WORD>
|
||||
auto strip_prefix_then_2_suffixes(
|
||||
std::string& s, Hidden_Homonym skip_hidden_homonym = {}) const
|
||||
-> Affixing_Result<>;
|
||||
|
||||
template <Affixing_Mode m>
|
||||
auto strip_pfx_2_sfx_3(const Prefix& pe1, const Suffix& se1,
|
||||
std::string& s,
|
||||
Hidden_Homonym skip_hidden_homonym) const
|
||||
-> Affixing_Result<>;
|
||||
|
||||
template <Affixing_Mode m = FULL_WORD>
|
||||
auto strip_suffix_prefix_suffix(
|
||||
std::string& s, Hidden_Homonym skip_hidden_homonym = {}) const
|
||||
-> Affixing_Result<>;
|
||||
|
||||
template <Affixing_Mode m>
|
||||
auto strip_s_p_s_3(const Suffix& se1, const Prefix& pe1,
|
||||
std::string& word,
|
||||
Hidden_Homonym skip_hidden_homonym) const
|
||||
-> Affixing_Result<>;
|
||||
|
||||
template <Affixing_Mode m = FULL_WORD>
|
||||
auto strip_2_suffixes_then_prefix(
|
||||
std::string& s, Hidden_Homonym skip_hidden_homonym = {}) const
|
||||
-> Affixing_Result<>;
|
||||
|
||||
template <Affixing_Mode m>
|
||||
auto strip_2_sfx_pfx_3(const Suffix& se1, const Suffix& se2,
|
||||
std::string& word,
|
||||
Hidden_Homonym skip_hidden_homonym) const
|
||||
-> Affixing_Result<>;
|
||||
|
||||
template <Affixing_Mode m = FULL_WORD>
|
||||
auto strip_suffix_then_2_prefixes(
|
||||
std::string& s, Hidden_Homonym skip_hidden_homonym = {}) const
|
||||
-> Affixing_Result<>;
|
||||
|
||||
template <Affixing_Mode m>
|
||||
auto strip_sfx_2_pfx_3(const Suffix& se1, const Prefix& pe1,
|
||||
std::string& s,
|
||||
Hidden_Homonym skip_hidden_homonym) const
|
||||
-> Affixing_Result<>;
|
||||
|
||||
template <Affixing_Mode m = FULL_WORD>
|
||||
auto strip_prefix_suffix_prefix(
|
||||
std::string& word, Hidden_Homonym skip_hidden_homonym = {}) const
|
||||
-> Affixing_Result<>;
|
||||
|
||||
template <Affixing_Mode m>
|
||||
auto strip_p_s_p_3(const Prefix& pe1, const Suffix& se1,
|
||||
std::string& word,
|
||||
Hidden_Homonym skip_hidden_homonym) const
|
||||
-> Affixing_Result<>;
|
||||
|
||||
template <Affixing_Mode m = FULL_WORD>
|
||||
auto strip_2_prefixes_then_suffix(
|
||||
std::string& word, Hidden_Homonym skip_hidden_homonym = {}) const
|
||||
-> Affixing_Result<>;
|
||||
|
||||
template <Affixing_Mode m>
|
||||
auto strip_2_pfx_sfx_3(const Prefix& pe1, const Prefix& pe2,
|
||||
std::string& word,
|
||||
Hidden_Homonym skip_hidden_homonym) const
|
||||
-> Affixing_Result<>;
|
||||
|
||||
auto check_compound(std::string& word,
|
||||
Forceucase allow_bad_forceucase) const
|
||||
-> Compounding_Result;
|
||||
|
||||
template <Affixing_Mode m = AT_COMPOUND_BEGIN>
|
||||
auto check_compound(std::string& word, size_t start_pos,
|
||||
size_t num_part, std::string& part,
|
||||
Forceucase allow_bad_forceucase) const
|
||||
-> Compounding_Result;
|
||||
|
||||
template <Affixing_Mode m = AT_COMPOUND_BEGIN>
|
||||
auto check_compound_classic(std::string& word, size_t start_pos,
|
||||
size_t i, size_t num_part,
|
||||
std::string& part,
|
||||
Forceucase allow_bad_forceucase) const
|
||||
-> Compounding_Result;
|
||||
|
||||
template <Affixing_Mode m = AT_COMPOUND_BEGIN>
|
||||
auto check_compound_with_pattern_replacements(
|
||||
std::string& word, size_t start_pos, size_t i, size_t num_part,
|
||||
std::string& part, Forceucase allow_bad_forceucase) const
|
||||
-> Compounding_Result;
|
||||
|
||||
template <Affixing_Mode m>
|
||||
auto check_word_in_compound(std::string& s) const -> Compounding_Result;
|
||||
|
||||
auto calc_num_words_modifier(const Prefix& pfx) const -> unsigned char;
|
||||
|
||||
template <Affixing_Mode m>
|
||||
auto calc_syllable_modifier(Word_List::const_reference we) const
|
||||
-> signed char;
|
||||
|
||||
template <Affixing_Mode m>
|
||||
auto calc_syllable_modifier(Word_List::const_reference we,
|
||||
const Suffix& sfx) const -> signed char;
|
||||
|
||||
auto count_syllables(std::string_view word) const -> size_t;
|
||||
|
||||
auto check_compound_with_rules(std::string& word,
|
||||
std::vector<const Flag_Set*>& words_data,
|
||||
size_t start_pos, std::string& part,
|
||||
Forceucase allow_bad_forceucase) const
|
||||
|
||||
-> Compounding_Result;
|
||||
auto is_rep_similar(std::string& word) const -> bool;
|
||||
};
|
||||
|
||||
template <Affixing_Mode m>
|
||||
auto Checker::affix_NOT_valid(const Prefix& e) const
|
||||
{
|
||||
if (m == FULL_WORD && e.cont_flags.contains(compound_onlyin_flag))
|
||||
return true;
|
||||
if (m == AT_COMPOUND_END &&
|
||||
!e.cont_flags.contains(compound_permit_flag))
|
||||
return true;
|
||||
if (m != FULL_WORD && e.cont_flags.contains(compound_forbid_flag))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
template <Affixing_Mode m>
|
||||
auto Checker::affix_NOT_valid(const Suffix& e) const
|
||||
{
|
||||
if (m == FULL_WORD && e.cont_flags.contains(compound_onlyin_flag))
|
||||
return true;
|
||||
if (m == AT_COMPOUND_BEGIN &&
|
||||
!e.cont_flags.contains(compound_permit_flag))
|
||||
return true;
|
||||
if (m != FULL_WORD && e.cont_flags.contains(compound_forbid_flag))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
template <Affixing_Mode m, class AffixT>
|
||||
auto Checker::outer_affix_NOT_valid(const AffixT& e) const
|
||||
{
|
||||
if (affix_NOT_valid<m>(e))
|
||||
return true;
|
||||
if (e.cont_flags.contains(need_affix_flag))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
template <class AffixT>
|
||||
auto Checker::is_circumfix(const AffixT& a) const
|
||||
{
|
||||
return a.cont_flags.contains(circumfix_flag);
|
||||
}
|
||||
|
||||
template <class AffixInner, class AffixOuter>
|
||||
auto cross_valid_inner_outer(const AffixInner& inner, const AffixOuter& outer)
|
||||
{
|
||||
return inner.cont_flags.contains(outer.flag);
|
||||
}
|
||||
|
||||
template <class Affix>
|
||||
auto cross_valid_inner_outer(const Flag_Set& word_flags, const Affix& afx)
|
||||
{
|
||||
return word_flags.contains(afx.flag);
|
||||
}
|
||||
|
||||
} // namespace v5
|
||||
} // namespace nuspell
|
||||
#endif // NUSPELL_CHECKER_HXX
|
||||
1
app/src/main/cpp/nuspell/clang-format.sh
Normal file
1
app/src/main/cpp/nuspell/clang-format.sh
Normal file
@@ -0,0 +1 @@
|
||||
clang-format -style=file -i *.[ch]xx
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user